Continuing the theme of adding branchless lowerings for simple selects, this time handle the 0 arm case. This is very common for various umin idioms, etc..
Differential Revision: https://reviews.llvm.org/D135600
return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
}
+ // (select c, 0, y) -> -!c & y
+ if (isNullConstant(TrueV)) {
+ SDValue C = DAG.getSetCC(DL, VT, LHS, RHS,
+ ISD::getSetCCInverse(CCVal, VT));
+ SDValue Neg = DAG.getNegative(C, DL, VT);
+ return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
+ }
+ // (select c, y, 0) -> -c & y
+ if (isNullConstant(FalseV)) {
+ SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
+ SDValue Neg = DAG.getNegative(C, DL, VT);
+ return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
+ }
+
+
return SDValue();
}
case RISCVISD::BR_CC: {
;
; RV32I-LABEL: sltiu:
; RV32I: # %bb.0:
-; RV32I-NEXT: beqz a1, .LBB2_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB2_2:
; RV32I-NEXT: sltiu a0, a0, 3
+; RV32I-NEXT: snez a1, a1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a0, a1, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: ret
%1 = icmp ult i64 %a, 3
; RV32I-NEXT: bltz a3, .LBB11_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sll a1, a0, a3
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: ret
+; RV32I-NEXT: j .LBB11_3
; RV32I-NEXT: .LBB11_2:
; RV32I-NEXT: sll a1, a1, a2
-; RV32I-NEXT: xori a3, a2, 31
-; RV32I-NEXT: srli a4, a0, 1
-; RV32I-NEXT: srl a3, a4, a3
-; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: xori a4, a2, 31
+; RV32I-NEXT: srli a5, a0, 1
+; RV32I-NEXT: srl a4, a5, a4
+; RV32I-NEXT: or a1, a1, a4
+; RV32I-NEXT: .LBB11_3:
; RV32I-NEXT: sll a0, a0, a2
+; RV32I-NEXT: slti a2, a3, 0
+; RV32I-NEXT: neg a2, a2
+; RV32I-NEXT: and a0, a2, a0
; RV32I-NEXT: ret
%1 = shl i64 %a, %b
ret i64 %1
; RV32I-NEXT: bltz a3, .LBB15_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srl a0, a1, a3
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: ret
+; RV32I-NEXT: j .LBB15_3
; RV32I-NEXT: .LBB15_2:
; RV32I-NEXT: srl a0, a0, a2
-; RV32I-NEXT: xori a3, a2, 31
-; RV32I-NEXT: slli a4, a1, 1
-; RV32I-NEXT: sll a3, a4, a3
-; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: xori a4, a2, 31
+; RV32I-NEXT: slli a5, a1, 1
+; RV32I-NEXT: sll a4, a5, a4
+; RV32I-NEXT: or a0, a0, a4
+; RV32I-NEXT: .LBB15_3:
; RV32I-NEXT: srl a1, a1, a2
+; RV32I-NEXT: slti a2, a3, 0
+; RV32I-NEXT: neg a2, a2
+; RV32I-NEXT: and a1, a2, a1
; RV32I-NEXT: ret
%1 = lshr i64 %a, %b
ret i64 %1
define i1 @bittest_constant_by_var_shr_i64(i64 %b) nounwind {
; RV32-LABEL: bittest_constant_by_var_shr_i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi a1, a0, -32
-; RV32-NEXT: bltz a1, .LBB12_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: andi a0, zero, 1
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB12_2:
; RV32-NEXT: lui a1, 301408
; RV32-NEXT: addi a1, a1, 722
-; RV32-NEXT: srl a0, a1, a0
+; RV32-NEXT: srl a1, a1, a0
+; RV32-NEXT: addi a0, a0, -32
+; RV32-NEXT: slti a0, a0, 0
+; RV32-NEXT: neg a0, a0
+; RV32-NEXT: and a0, a0, a1
; RV32-NEXT: andi a0, a0, 1
; RV32-NEXT: ret
;
define i1 @bittest_constant_by_var_shl_i64(i64 %b) nounwind {
; RV32-LABEL: bittest_constant_by_var_shl_i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi a1, a0, -32
-; RV32-NEXT: bltz a1, .LBB13_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: andi a0, zero, 1
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB13_2:
; RV32-NEXT: lui a1, 301408
; RV32-NEXT: addi a1, a1, 722
-; RV32-NEXT: srl a0, a1, a0
+; RV32-NEXT: srl a1, a1, a0
+; RV32-NEXT: addi a0, a0, -32
+; RV32-NEXT: slti a0, a0, 0
+; RV32-NEXT: neg a0, a0
+; RV32-NEXT: and a0, a0, a1
; RV32-NEXT: andi a0, a0, 1
; RV32-NEXT: ret
;
; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]]
define i32 @f_small_pos(i32 %in0) minsize {
%cmp = icmp CMPCOND i32 %in0, 20
- %toRet = select i1 %cmp, i32 0, i32 42
+ %toRet = select i1 %cmp, i32 -99, i32 42
ret i32 %toRet
}
; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]]
define i32 @f_small_neg(i32 %in0) minsize {
%cmp = icmp CMPCOND i32 %in0, -20
- %toRet = select i1 %cmp, i32 0, i32 42
+ %toRet = select i1 %cmp, i32 -99, i32 42
ret i32 %toRet
}
; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]]
define i32 @f_small_edge_pos(i32 %in0) minsize {
%cmp = icmp CMPCOND i32 %in0, 31
- %toRet = select i1 %cmp, i32 0, i32 42
+ %toRet = select i1 %cmp, i32 -99, i32 42
ret i32 %toRet
}
; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]]
define i32 @f_small_edge_neg(i32 %in0) minsize {
%cmp = icmp CMPCOND i32 %in0, -32
- %toRet = select i1 %cmp, i32 0, i32 42
+ %toRet = select i1 %cmp, i32 -99, i32 42
ret i32 %toRet
}
; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]]
define i32 @f_medium_ledge_pos(i32 %in0) minsize {
%cmp = icmp CMPCOND i32 %in0, 32
- %toRet = select i1 %cmp, i32 0, i32 42
+ %toRet = select i1 %cmp, i32 -99, i32 42
ret i32 %toRet
}
; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]]
define i32 @f_medium_ledge_neg(i32 %in0) minsize {
%cmp = icmp CMPCOND i32 %in0, -33
- %toRet = select i1 %cmp, i32 0, i32 42
+ %toRet = select i1 %cmp, i32 -99, i32 42
ret i32 %toRet
}
; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]]
define i32 @f_medium_pos(i32 %in0) minsize {
%cmp = icmp CMPCOND i32 %in0, 63
- %toRet = select i1 %cmp, i32 0, i32 42
+ %toRet = select i1 %cmp, i32 -99, i32 42
ret i32 %toRet
}
; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]]
define i32 @f_medium_neg(i32 %in0) minsize {
%cmp = icmp CMPCOND i32 %in0, -63
- %toRet = select i1 %cmp, i32 0, i32 42
+ %toRet = select i1 %cmp, i32 -99, i32 42
ret i32 %toRet
}
; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]]
define i32 @f_medium_bedge_pos(i32 %in0) minsize {
%cmp = icmp CMPCOND i32 %in0, 2047
- %toRet = select i1 %cmp, i32 0, i32 42
+ %toRet = select i1 %cmp, i32 -99, i32 42
ret i32 %toRet
}
; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]]
define i32 @f_medium_bedge_neg(i32 %in0) minsize {
%cmp = icmp CMPCOND i32 %in0, -2047
- %toRet = select i1 %cmp, i32 0, i32 42
+ %toRet = select i1 %cmp, i32 -99, i32 42
ret i32 %toRet
}
; nothing to check.
define i32 @f_big_ledge_pos(i32 %in0) minsize {
%cmp = icmp CMPCOND i32 %in0, 2048
- %toRet = select i1 %cmp, i32 0, i32 42
+ %toRet = select i1 %cmp, i32 -99, i32 42
ret i32 %toRet
}
; nothing to check.
define i32 @f_big_ledge_neg(i32 %in0) minsize {
%cmp = icmp CMPCOND i32 %in0, -2048
- %toRet = select i1 %cmp, i32 0, i32 42
+ %toRet = select i1 %cmp, i32 -99, i32 42
ret i32 %toRet
}
define i32 @fcvt_w_d_sat(double %a) nounwind {
; CHECKIFD-LABEL: fcvt_w_d_sat:
; CHECKIFD: # %bb.0: # %start
-; CHECKIFD-NEXT: feq.d a0, fa0, fa0
-; CHECKIFD-NEXT: beqz a0, .LBB3_2
-; CHECKIFD-NEXT: # %bb.1:
; CHECKIFD-NEXT: fcvt.w.d a0, fa0, rtz
-; CHECKIFD-NEXT: .LBB3_2: # %start
+; CHECKIFD-NEXT: feq.d a1, fa0, fa0
+; CHECKIFD-NEXT: seqz a1, a1
+; CHECKIFD-NEXT: addi a1, a1, -1
+; CHECKIFD-NEXT: and a0, a1, a0
; CHECKIFD-NEXT: ret
;
; RV32I-LABEL: fcvt_w_d_sat:
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a1
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: lui a0, 269824
; RV32I-NEXT: lui a2, 1047552
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __gtdf2@plt
-; RV32I-NEXT: mv s3, a0
+; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: lui a3, 794112
-; RV32I-NEXT: li s2, 0
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: li a2, 0
; RV32I-NEXT: call __gedf2@plt
-; RV32I-NEXT: mv s4, a0
+; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __fixdfsi@plt
; RV32I-NEXT: lui a1, 524288
-; RV32I-NEXT: lui s5, 524288
-; RV32I-NEXT: bltz s4, .LBB3_2
+; RV32I-NEXT: lui s4, 524288
+; RV32I-NEXT: bltz s3, .LBB3_2
; RV32I-NEXT: # %bb.1: # %start
-; RV32I-NEXT: mv s5, a0
+; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: .LBB3_2: # %start
-; RV32I-NEXT: bge s2, s3, .LBB3_4
+; RV32I-NEXT: blez s2, .LBB3_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: addi s5, a1, -1
+; RV32I-NEXT: addi s4, a1, -1
; RV32I-NEXT: .LBB3_4: # %start
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: mv a2, s1
; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: call __unorddf2@plt
-; RV32I-NEXT: bnez a0, .LBB3_6
-; RV32I-NEXT: # %bb.5: # %start
-; RV32I-NEXT: mv s2, s5
-; RV32I-NEXT: .LBB3_6: # %start
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, s4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unorddf2@plt
-; RV64I-NEXT: mv a1, a0
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bnez a1, .LBB3_6
-; RV64I-NEXT: # %bb.5: # %start
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: .LBB3_6: # %start
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
define i32 @fcvt_wu_d_sat(double %a) nounwind {
; RV32IFD-LABEL: fcvt_wu_d_sat:
; RV32IFD: # %bb.0: # %start
-; RV32IFD-NEXT: feq.d a0, fa0, fa0
-; RV32IFD-NEXT: beqz a0, .LBB6_2
-; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz
-; RV32IFD-NEXT: .LBB6_2: # %start
+; RV32IFD-NEXT: feq.d a1, fa0, fa0
+; RV32IFD-NEXT: seqz a1, a1
+; RV32IFD-NEXT: addi a1, a1, -1
+; RV32IFD-NEXT: and a0, a1, a0
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: fcvt_wu_d_sat:
; RV64IFD: # %bb.0: # %start
-; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: beqz a0, .LBB6_2
-; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rtz
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a0, a1
; RV64IFD-NEXT: slli a0, a0, 32
; RV64IFD-NEXT: srli a0, a0, 32
-; RV64IFD-NEXT: .LBB6_2: # %start
; RV64IFD-NEXT: ret
;
; RV32I-LABEL: fcvt_wu_d_sat:
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __gedf2@plt
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __fixunsdfdi@plt
-; RV64I-NEXT: li s1, 0
-; RV64I-NEXT: bltz s2, .LBB6_2
-; RV64I-NEXT: # %bb.1: # %start
; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: .LBB6_2: # %start
; RV64I-NEXT: li a0, 1055
; RV64I-NEXT: slli a0, a0, 31
; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: slli a1, a0, 21
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __gtdf2@plt
-; RV64I-NEXT: blez a0, .LBB6_4
-; RV64I-NEXT: # %bb.3:
+; RV64I-NEXT: bgtz a0, .LBB6_2
+; RV64I-NEXT: # %bb.1: # %start
+; RV64I-NEXT: slti a0, s0, 0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s1
+; RV64I-NEXT: j .LBB6_3
+; RV64I-NEXT: .LBB6_2:
; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: srli s1, a0, 32
-; RV64I-NEXT: .LBB6_4: # %start
-; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: .LBB6_3: # %start
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; RV32IFD-NEXT: addi a1, a3, -1
; RV32IFD-NEXT: .LBB12_4: # %start
; RV32IFD-NEXT: feq.d a3, fs0, fs0
-; RV32IFD-NEXT: bnez a3, .LBB12_6
-; RV32IFD-NEXT: # %bb.5: # %start
-; RV32IFD-NEXT: li a1, 0
-; RV32IFD-NEXT: li a0, 0
-; RV32IFD-NEXT: j .LBB12_7
-; RV32IFD-NEXT: .LBB12_6:
-; RV32IFD-NEXT: neg a3, s0
-; RV32IFD-NEXT: and a0, a3, a0
+; RV32IFD-NEXT: seqz a3, a3
+; RV32IFD-NEXT: addi a3, a3, -1
+; RV32IFD-NEXT: and a1, a3, a1
+; RV32IFD-NEXT: seqz a4, s0
+; RV32IFD-NEXT: addi a4, a4, -1
+; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: seqz a2, a2
; RV32IFD-NEXT: addi a2, a2, -1
-; RV32IFD-NEXT: or a0, a0, a2
-; RV32IFD-NEXT: .LBB12_7: # %start
+; RV32IFD-NEXT: or a0, a2, a0
+; RV32IFD-NEXT: and a0, a3, a0
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
;
; RV64IFD-LABEL: fcvt_l_d_sat:
; RV64IFD: # %bb.0: # %start
-; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: beqz a0, .LBB12_2
-; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz
-; RV64IFD-NEXT: .LBB12_2: # %start
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
;
; RV32I-LABEL: fcvt_l_d_sat:
; RV32I: # %bb.0: # %start
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: lui a3, 802304
-; RV32I-NEXT: li s0, 0
-; RV32I-NEXT: li a2, 0
-; RV32I-NEXT: call __gedf2@plt
-; RV32I-NEXT: mv s3, a0
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a1
+; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: lui a0, 278016
-; RV32I-NEXT: addi s4, a0, -1
+; RV32I-NEXT: addi s3, a0, -1
; RV32I-NEXT: li a2, -1
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: mv a3, s4
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a3, s3
; RV32I-NEXT: call __gtdf2@plt
-; RV32I-NEXT: sgtz a0, a0
-; RV32I-NEXT: neg s6, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv s7, a0
+; RV32I-NEXT: lui a3, 802304
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: li a2, 0
+; RV32I-NEXT: call __gedf2@plt
+; RV32I-NEXT: mv s5, a0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __fixdfdi@plt
-; RV32I-NEXT: mv s5, a1
-; RV32I-NEXT: bltz s3, .LBB12_2
+; RV32I-NEXT: mv s6, a0
+; RV32I-NEXT: mv s4, a1
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: mv a2, s1
+; RV32I-NEXT: mv a3, s0
+; RV32I-NEXT: call __unorddf2@plt
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi s2, a0, -1
+; RV32I-NEXT: bgtz s7, .LBB12_2
; RV32I-NEXT: # %bb.1: # %start
-; RV32I-NEXT: or s6, s6, a0
+; RV32I-NEXT: slti a0, s5, 0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, s6
+; RV32I-NEXT: and s2, s2, a0
; RV32I-NEXT: .LBB12_2: # %start
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
-; RV32I-NEXT: call __unorddf2@plt
-; RV32I-NEXT: mv s3, s0
-; RV32I-NEXT: bnez a0, .LBB12_4
-; RV32I-NEXT: # %bb.3: # %start
-; RV32I-NEXT: mv s3, s6
-; RV32I-NEXT: .LBB12_4: # %start
; RV32I-NEXT: li a2, -1
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: mv a3, s4
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: mv a3, s3
; RV32I-NEXT: call __gtdf2@plt
-; RV32I-NEXT: mv s4, a0
+; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: lui a3, 802304
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: mv a2, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: li a2, 0
; RV32I-NEXT: call __gedf2@plt
; RV32I-NEXT: lui a1, 524288
-; RV32I-NEXT: lui s6, 524288
-; RV32I-NEXT: bltz a0, .LBB12_6
-; RV32I-NEXT: # %bb.5: # %start
-; RV32I-NEXT: mv s6, s5
+; RV32I-NEXT: lui s5, 524288
+; RV32I-NEXT: bltz a0, .LBB12_4
+; RV32I-NEXT: # %bb.3: # %start
+; RV32I-NEXT: mv s5, s4
+; RV32I-NEXT: .LBB12_4: # %start
+; RV32I-NEXT: blez s3, .LBB12_6
+; RV32I-NEXT: # %bb.5:
+; RV32I-NEXT: addi s5, a1, -1
; RV32I-NEXT: .LBB12_6: # %start
-; RV32I-NEXT: bge s0, s4, .LBB12_8
-; RV32I-NEXT: # %bb.7:
-; RV32I-NEXT: addi s6, a1, -1
-; RV32I-NEXT: .LBB12_8: # %start
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
-; RV32I-NEXT: call __unorddf2@plt
-; RV32I-NEXT: bnez a0, .LBB12_10
-; RV32I-NEXT: # %bb.9: # %start
-; RV32I-NEXT: mv s0, s6
-; RV32I-NEXT: .LBB12_10: # %start
-; RV32I-NEXT: mv a0, s3
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: mv a2, s1
+; RV32I-NEXT: mv a3, s0
+; RV32I-NEXT: call __unorddf2@plt
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV64I-LABEL: fcvt_l_d_sat:
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unorddf2@plt
-; RV64I-NEXT: mv a1, a0
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bnez a1, .LBB12_7
-; RV64I-NEXT: # %bb.6: # %start
-; RV64I-NEXT: mv a0, s1
-; RV64I-NEXT: .LBB12_7: # %start
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV32IFD-NEXT: fmv.d fs0, fa0
; RV32IFD-NEXT: fcvt.d.w ft0, zero
; RV32IFD-NEXT: fle.d a0, ft0, fa0
-; RV32IFD-NEXT: neg s0, a0
+; RV32IFD-NEXT: seqz a0, a0
+; RV32IFD-NEXT: addi s0, a0, -1
; RV32IFD-NEXT: call __fixunsdfdi@plt
; RV32IFD-NEXT: lui a2, %hi(.LCPI14_0)
; RV32IFD-NEXT: fld ft0, %lo(.LCPI14_0)(a2)
; RV32IFD-NEXT: flt.d a2, ft0, fs0
; RV32IFD-NEXT: seqz a2, a2
; RV32IFD-NEXT: addi a2, a2, -1
-; RV32IFD-NEXT: or a0, a0, a2
+; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: and a1, s0, a1
-; RV32IFD-NEXT: or a1, a1, a2
+; RV32IFD-NEXT: or a1, a2, a1
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
;
; RV64IFD-LABEL: fcvt_lu_d_sat:
; RV64IFD: # %bb.0: # %start
-; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: beqz a0, .LBB14_2
-; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rtz
-; RV64IFD-NEXT: .LBB14_2: # %start
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
;
; RV32I-LABEL: fcvt_lu_d_sat:
define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
; RV32IFD-LABEL: fcvt_w_s_sat_i16:
; RV32IFD: # %bb.0: # %start
-; RV32IFD-NEXT: feq.d a0, fa0, fa0
-; RV32IFD-NEXT: beqz a0, .LBB26_2
-; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: lui a0, %hi(.LCPI26_0)
; RV32IFD-NEXT: fld ft0, %lo(.LCPI26_0)(a0)
; RV32IFD-NEXT: lui a0, %hi(.LCPI26_1)
; RV32IFD-NEXT: fmax.d ft0, fa0, ft0
; RV32IFD-NEXT: fmin.d ft0, ft0, ft1
; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz
-; RV32IFD-NEXT: .LBB26_2: # %start
+; RV32IFD-NEXT: feq.d a1, fa0, fa0
+; RV32IFD-NEXT: seqz a1, a1
+; RV32IFD-NEXT: addi a1, a1, -1
+; RV32IFD-NEXT: and a0, a1, a0
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: fcvt_w_s_sat_i16:
; RV64IFD: # %bb.0: # %start
-; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: beqz a0, .LBB26_2
-; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: lui a0, %hi(.LCPI26_0)
; RV64IFD-NEXT: fld ft0, %lo(.LCPI26_0)(a0)
; RV64IFD-NEXT: lui a0, %hi(.LCPI26_1)
; RV64IFD-NEXT: fmax.d ft0, fa0, ft0
; RV64IFD-NEXT: fmin.d ft0, ft0, ft1
; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz
-; RV64IFD-NEXT: .LBB26_2: # %start
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
;
; RV32I-LABEL: fcvt_w_s_sat_i16:
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: mv s2, a0
+; RV32I-NEXT: mv s0, a1
+; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: lui a0, 265728
; RV32I-NEXT: addi a3, a0, -64
-; RV32I-NEXT: li s0, 0
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a2, 0
; RV32I-NEXT: call __gtdf2@plt
-; RV32I-NEXT: mv s3, a0
+; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: lui a3, 790016
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: li a2, 0
; RV32I-NEXT: call __gedf2@plt
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv s3, a0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __fixdfsi@plt
-; RV32I-NEXT: lui s5, 1048568
-; RV32I-NEXT: bltz s4, .LBB26_2
+; RV32I-NEXT: lui s4, 1048568
+; RV32I-NEXT: bltz s3, .LBB26_2
; RV32I-NEXT: # %bb.1: # %start
-; RV32I-NEXT: mv s5, a0
+; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: .LBB26_2: # %start
-; RV32I-NEXT: bge s0, s3, .LBB26_4
+; RV32I-NEXT: blez s2, .LBB26_4
; RV32I-NEXT: # %bb.3:
; RV32I-NEXT: lui a0, 8
-; RV32I-NEXT: addi s5, a0, -1
+; RV32I-NEXT: addi s4, a0, -1
; RV32I-NEXT: .LBB26_4: # %start
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: mv a2, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: call __unorddf2@plt
-; RV32I-NEXT: bnez a0, .LBB26_6
-; RV32I-NEXT: # %bb.5: # %start
-; RV32I-NEXT: mv s0, s5
-; RV32I-NEXT: .LBB26_6: # %start
-; RV32I-NEXT: slli a0, s0, 16
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, s4
+; RV32I-NEXT: slli a0, a0, 16
; RV32I-NEXT: srai a0, a0, 16
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unorddf2@plt
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: bnez a0, .LBB26_6
-; RV64I-NEXT: # %bb.5: # %start
-; RV64I-NEXT: mv a1, s2
-; RV64I-NEXT: .LBB26_6: # %start
-; RV64I-NEXT: slli a0, a1, 48
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s2
+; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: li a2, 0
; RV32I-NEXT: call __gtdf2@plt
-; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: mv a1, s1
; RV32I-NEXT: li a2, 0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __gedf2@plt
-; RV32I-NEXT: mv s3, a0
+; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: mv a1, s1
; RV32I-NEXT: call __fixunsdfsi@plt
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: bltz s3, .LBB28_2
+; RV32I-NEXT: lui a1, 16
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: mv a2, a1
+; RV32I-NEXT: bgtz s3, .LBB28_2
; RV32I-NEXT: # %bb.1: # %start
-; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: slti a2, s0, 0
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a2, a2, a0
; RV32I-NEXT: .LBB28_2: # %start
-; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: addi a0, a0, -1
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: bgtz s0, .LBB28_4
-; RV32I-NEXT: # %bb.3: # %start
-; RV32I-NEXT: mv a2, a1
-; RV32I-NEXT: .LBB28_4: # %start
-; RV32I-NEXT: and a0, a2, a0
+; RV32I-NEXT: and a0, a2, a1
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __gedf2@plt
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __fixunsdfdi@plt
-; RV64I-NEXT: li s2, 0
-; RV64I-NEXT: bltz s1, .LBB28_2
-; RV64I-NEXT: # %bb.1: # %start
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: .LBB28_2: # %start
+; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: lui a0, 8312
; RV64I-NEXT: addiw a0, a0, -1
; RV64I-NEXT: slli a1, a0, 37
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __gtdf2@plt
; RV64I-NEXT: lui a1, 16
; RV64I-NEXT: addiw a1, a1, -1
; RV64I-NEXT: mv a2, a1
-; RV64I-NEXT: bgtz a0, .LBB28_4
-; RV64I-NEXT: # %bb.3: # %start
-; RV64I-NEXT: mv a2, s2
-; RV64I-NEXT: .LBB28_4: # %start
+; RV64I-NEXT: bgtz a0, .LBB28_2
+; RV64I-NEXT: # %bb.1: # %start
+; RV64I-NEXT: slti a0, s0, 0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a2, a0, s1
+; RV64I-NEXT: .LBB28_2: # %start
; RV64I-NEXT: and a0, a2, a1
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind {
; RV32IFD-LABEL: fcvt_w_s_sat_i8:
; RV32IFD: # %bb.0: # %start
-; RV32IFD-NEXT: feq.d a0, fa0, fa0
-; RV32IFD-NEXT: beqz a0, .LBB30_2
-; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: lui a0, %hi(.LCPI30_0)
; RV32IFD-NEXT: fld ft0, %lo(.LCPI30_0)(a0)
; RV32IFD-NEXT: lui a0, %hi(.LCPI30_1)
; RV32IFD-NEXT: fmax.d ft0, fa0, ft0
; RV32IFD-NEXT: fmin.d ft0, ft0, ft1
; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz
-; RV32IFD-NEXT: .LBB30_2: # %start
+; RV32IFD-NEXT: feq.d a1, fa0, fa0
+; RV32IFD-NEXT: seqz a1, a1
+; RV32IFD-NEXT: addi a1, a1, -1
+; RV32IFD-NEXT: and a0, a1, a0
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: fcvt_w_s_sat_i8:
; RV64IFD: # %bb.0: # %start
-; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: beqz a0, .LBB30_2
-; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: lui a0, %hi(.LCPI30_0)
; RV64IFD-NEXT: fld ft0, %lo(.LCPI30_0)(a0)
; RV64IFD-NEXT: lui a0, %hi(.LCPI30_1)
; RV64IFD-NEXT: fmax.d ft0, fa0, ft0
; RV64IFD-NEXT: fmin.d ft0, ft0, ft1
; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz
-; RV64IFD-NEXT: .LBB30_2: # %start
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
;
; RV32I-LABEL: fcvt_w_s_sat_i8:
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: mv s2, a0
+; RV32I-NEXT: mv s0, a1
+; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: lui a3, 263676
-; RV32I-NEXT: li s0, 0
; RV32I-NEXT: li a2, 0
; RV32I-NEXT: call __gtdf2@plt
-; RV32I-NEXT: mv s3, a0
+; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: lui a3, 787968
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: li a2, 0
; RV32I-NEXT: call __gedf2@plt
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv s3, a0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __fixdfsi@plt
; RV32I-NEXT: li a1, -128
-; RV32I-NEXT: bltz s4, .LBB30_2
+; RV32I-NEXT: bltz s3, .LBB30_2
; RV32I-NEXT: # %bb.1: # %start
; RV32I-NEXT: mv a1, a0
; RV32I-NEXT: .LBB30_2: # %start
-; RV32I-NEXT: li s4, 127
-; RV32I-NEXT: blt s0, s3, .LBB30_4
+; RV32I-NEXT: li s3, 127
+; RV32I-NEXT: bgtz s2, .LBB30_4
; RV32I-NEXT: # %bb.3: # %start
-; RV32I-NEXT: mv s4, a1
+; RV32I-NEXT: mv s3, a1
; RV32I-NEXT: .LBB30_4: # %start
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: mv a2, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: call __unorddf2@plt
-; RV32I-NEXT: bnez a0, .LBB30_6
-; RV32I-NEXT: # %bb.5: # %start
-; RV32I-NEXT: mv s0, s4
-; RV32I-NEXT: .LBB30_6: # %start
-; RV32I-NEXT: slli a0, s0, 24
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, s3
+; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: srai a0, a0, 24
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unorddf2@plt
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: bnez a0, .LBB30_6
-; RV64I-NEXT: # %bb.5: # %start
-; RV64I-NEXT: mv a1, s1
-; RV64I-NEXT: .LBB30_6: # %start
-; RV64I-NEXT: slli a0, a1, 56
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s1
+; RV64I-NEXT: slli a0, a0, 56
; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV32I-NEXT: lui a3, 263934
; RV32I-NEXT: li a2, 0
; RV32I-NEXT: call __gtdf2@plt
-; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: mv a1, s1
; RV32I-NEXT: li a2, 0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __gedf2@plt
-; RV32I-NEXT: mv s3, a0
+; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: mv a1, s1
; RV32I-NEXT: call __fixunsdfsi@plt
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: bltz s3, .LBB32_2
+; RV32I-NEXT: li a1, 255
+; RV32I-NEXT: bgtz s3, .LBB32_2
; RV32I-NEXT: # %bb.1: # %start
-; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: slti a1, s0, 0
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a0
; RV32I-NEXT: .LBB32_2: # %start
-; RV32I-NEXT: li a0, 255
-; RV32I-NEXT: bgtz s0, .LBB32_4
-; RV32I-NEXT: # %bb.3: # %start
-; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: .LBB32_4: # %start
-; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: andi a0, a1, 255
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __gedf2@plt
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __fixunsdfdi@plt
-; RV64I-NEXT: li s2, 0
-; RV64I-NEXT: bltz s1, .LBB32_2
-; RV64I-NEXT: # %bb.1: # %start
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: .LBB32_2: # %start
+; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: lui a0, 131967
; RV64I-NEXT: slli a1, a0, 33
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __gtdf2@plt
; RV64I-NEXT: li a1, 255
-; RV64I-NEXT: bgtz a0, .LBB32_4
-; RV64I-NEXT: # %bb.3: # %start
-; RV64I-NEXT: mv a1, s2
-; RV64I-NEXT: .LBB32_4: # %start
+; RV64I-NEXT: bgtz a0, .LBB32_2
+; RV64I-NEXT: # %bb.1: # %start
+; RV64I-NEXT: slti a0, s0, 0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a1, a0, s1
+; RV64I-NEXT: .LBB32_2: # %start
; RV64I-NEXT: andi a0, a1, 255
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
define zeroext i32 @fcvt_wu_d_sat_zext(double %a) nounwind {
; RV32IFD-LABEL: fcvt_wu_d_sat_zext:
; RV32IFD: # %bb.0: # %start
-; RV32IFD-NEXT: feq.d a0, fa0, fa0
-; RV32IFD-NEXT: beqz a0, .LBB33_2
-; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz
-; RV32IFD-NEXT: .LBB33_2: # %start
+; RV32IFD-NEXT: feq.d a1, fa0, fa0
+; RV32IFD-NEXT: seqz a1, a1
+; RV32IFD-NEXT: addi a1, a1, -1
+; RV32IFD-NEXT: and a0, a1, a0
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: fcvt_wu_d_sat_zext:
; RV64IFD: # %bb.0: # %start
-; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: beqz a0, .LBB33_2
-; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rtz
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a0, a1
; RV64IFD-NEXT: slli a0, a0, 32
; RV64IFD-NEXT: srli a0, a0, 32
-; RV64IFD-NEXT: .LBB33_2: # %start
; RV64IFD-NEXT: ret
;
; RV32I-LABEL: fcvt_wu_d_sat_zext:
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __gedf2@plt
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __fixunsdfdi@plt
-; RV64I-NEXT: li s2, 0
-; RV64I-NEXT: bltz s1, .LBB33_2
-; RV64I-NEXT: # %bb.1: # %start
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: .LBB33_2: # %start
+; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: li a0, 1055
; RV64I-NEXT: slli a0, a0, 31
; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: slli a1, a0, 21
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __gtdf2@plt
-; RV64I-NEXT: blez a0, .LBB33_4
-; RV64I-NEXT: # %bb.3:
+; RV64I-NEXT: bgtz a0, .LBB33_2
+; RV64I-NEXT: # %bb.1: # %start
+; RV64I-NEXT: slti a0, s0, 0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s1
+; RV64I-NEXT: j .LBB33_3
+; RV64I-NEXT: .LBB33_2:
; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: srli s2, a0, 32
-; RV64I-NEXT: .LBB33_4: # %start
-; RV64I-NEXT: slli a0, s2, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: .LBB33_3: # %start
+; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
define signext i32 @fcvt_w_d_sat_sext(double %a) nounwind {
; CHECKIFD-LABEL: fcvt_w_d_sat_sext:
; CHECKIFD: # %bb.0: # %start
-; CHECKIFD-NEXT: feq.d a0, fa0, fa0
-; CHECKIFD-NEXT: beqz a0, .LBB34_2
-; CHECKIFD-NEXT: # %bb.1:
; CHECKIFD-NEXT: fcvt.w.d a0, fa0, rtz
-; CHECKIFD-NEXT: .LBB34_2: # %start
+; CHECKIFD-NEXT: feq.d a1, fa0, fa0
+; CHECKIFD-NEXT: seqz a1, a1
+; CHECKIFD-NEXT: addi a1, a1, -1
+; CHECKIFD-NEXT: and a0, a1, a0
; CHECKIFD-NEXT: ret
;
; RV32I-LABEL: fcvt_w_d_sat_sext:
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a1
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: lui a0, 269824
; RV32I-NEXT: lui a2, 1047552
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __gtdf2@plt
-; RV32I-NEXT: mv s3, a0
+; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: lui a3, 794112
-; RV32I-NEXT: li s2, 0
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: li a2, 0
; RV32I-NEXT: call __gedf2@plt
-; RV32I-NEXT: mv s4, a0
+; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __fixdfsi@plt
; RV32I-NEXT: lui a1, 524288
-; RV32I-NEXT: lui s5, 524288
-; RV32I-NEXT: bltz s4, .LBB34_2
+; RV32I-NEXT: lui s4, 524288
+; RV32I-NEXT: bltz s3, .LBB34_2
; RV32I-NEXT: # %bb.1: # %start
-; RV32I-NEXT: mv s5, a0
+; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: .LBB34_2: # %start
-; RV32I-NEXT: bge s2, s3, .LBB34_4
+; RV32I-NEXT: blez s2, .LBB34_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: addi s5, a1, -1
+; RV32I-NEXT: addi s4, a1, -1
; RV32I-NEXT: .LBB34_4: # %start
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: mv a2, s1
; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: call __unorddf2@plt
-; RV32I-NEXT: bnez a0, .LBB34_6
-; RV32I-NEXT: # %bb.5: # %start
-; RV32I-NEXT: mv s2, s5
-; RV32I-NEXT: .LBB34_6: # %start
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, s4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unorddf2@plt
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: bnez a0, .LBB34_6
-; RV64I-NEXT: # %bb.5: # %start
-; RV64I-NEXT: mv a1, s2
-; RV64I-NEXT: .LBB34_6: # %start
-; RV64I-NEXT: sext.w a0, a1
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s2
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
define signext i32 @test_floor_si32(double %x) {
; CHECKIFD-LABEL: test_floor_si32:
; CHECKIFD: # %bb.0:
-; CHECKIFD-NEXT: feq.d a0, fa0, fa0
-; CHECKIFD-NEXT: beqz a0, .LBB0_2
-; CHECKIFD-NEXT: # %bb.1:
; CHECKIFD-NEXT: fcvt.w.d a0, fa0, rdn
-; CHECKIFD-NEXT: .LBB0_2:
+; CHECKIFD-NEXT: feq.d a1, fa0, fa0
+; CHECKIFD-NEXT: seqz a1, a1
+; CHECKIFD-NEXT: addi a1, a1, -1
+; CHECKIFD-NEXT: and a0, a1, a0
; CHECKIFD-NEXT: ret
%a = call double @llvm.floor.f64(double %x)
%b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
; RV32IFD-NEXT: addi a1, a3, -1
; RV32IFD-NEXT: .LBB1_4:
; RV32IFD-NEXT: feq.d a3, fs0, fs0
-; RV32IFD-NEXT: bnez a3, .LBB1_6
-; RV32IFD-NEXT: # %bb.5:
-; RV32IFD-NEXT: li a1, 0
-; RV32IFD-NEXT: li a0, 0
-; RV32IFD-NEXT: j .LBB1_7
-; RV32IFD-NEXT: .LBB1_6:
-; RV32IFD-NEXT: neg a3, s0
-; RV32IFD-NEXT: and a0, a3, a0
+; RV32IFD-NEXT: seqz a3, a3
+; RV32IFD-NEXT: addi a3, a3, -1
+; RV32IFD-NEXT: and a1, a3, a1
+; RV32IFD-NEXT: seqz a4, s0
+; RV32IFD-NEXT: addi a4, a4, -1
+; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: seqz a2, a2
; RV32IFD-NEXT: addi a2, a2, -1
-; RV32IFD-NEXT: or a0, a0, a2
-; RV32IFD-NEXT: .LBB1_7:
+; RV32IFD-NEXT: or a0, a2, a0
+; RV32IFD-NEXT: and a0, a3, a0
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
;
; RV64IFD-LABEL: test_floor_si64:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: beqz a0, .LBB1_2
-; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: fcvt.l.d a0, fa0, rdn
-; RV64IFD-NEXT: .LBB1_2:
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
%a = call double @llvm.floor.f64(double %x)
%b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
define signext i32 @test_floor_ui32(double %x) {
; CHECKIFD-LABEL: test_floor_ui32:
; CHECKIFD: # %bb.0:
-; CHECKIFD-NEXT: feq.d a0, fa0, fa0
-; CHECKIFD-NEXT: beqz a0, .LBB2_2
-; CHECKIFD-NEXT: # %bb.1:
; CHECKIFD-NEXT: fcvt.wu.d a0, fa0, rdn
-; CHECKIFD-NEXT: .LBB2_2:
+; CHECKIFD-NEXT: feq.d a1, fa0, fa0
+; CHECKIFD-NEXT: seqz a1, a1
+; CHECKIFD-NEXT: addi a1, a1, -1
+; CHECKIFD-NEXT: and a0, a1, a0
; CHECKIFD-NEXT: ret
%a = call double @llvm.floor.f64(double %x)
%b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
; RV32IFD-NEXT: fmv.d fs0, fa0
; RV32IFD-NEXT: fcvt.d.w ft0, zero
; RV32IFD-NEXT: fle.d a0, ft0, fa0
-; RV32IFD-NEXT: neg s0, a0
+; RV32IFD-NEXT: seqz a0, a0
+; RV32IFD-NEXT: addi s0, a0, -1
; RV32IFD-NEXT: call __fixunsdfdi@plt
; RV32IFD-NEXT: lui a2, %hi(.LCPI3_0)
; RV32IFD-NEXT: fld ft0, %lo(.LCPI3_0)(a2)
; RV32IFD-NEXT: flt.d a2, ft0, fs0
; RV32IFD-NEXT: seqz a2, a2
; RV32IFD-NEXT: addi a2, a2, -1
-; RV32IFD-NEXT: or a0, a0, a2
+; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: and a1, s0, a1
-; RV32IFD-NEXT: or a1, a1, a2
+; RV32IFD-NEXT: or a1, a2, a1
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
;
; RV64IFD-LABEL: test_floor_ui64:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: beqz a0, .LBB3_2
-; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rdn
-; RV64IFD-NEXT: .LBB3_2:
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
%a = call double @llvm.floor.f64(double %x)
%b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
define signext i32 @test_ceil_si32(double %x) {
; CHECKIFD-LABEL: test_ceil_si32:
; CHECKIFD: # %bb.0:
-; CHECKIFD-NEXT: feq.d a0, fa0, fa0
-; CHECKIFD-NEXT: beqz a0, .LBB4_2
-; CHECKIFD-NEXT: # %bb.1:
; CHECKIFD-NEXT: fcvt.w.d a0, fa0, rup
-; CHECKIFD-NEXT: .LBB4_2:
+; CHECKIFD-NEXT: feq.d a1, fa0, fa0
+; CHECKIFD-NEXT: seqz a1, a1
+; CHECKIFD-NEXT: addi a1, a1, -1
+; CHECKIFD-NEXT: and a0, a1, a0
; CHECKIFD-NEXT: ret
%a = call double @llvm.ceil.f64(double %x)
%b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
; RV32IFD-NEXT: addi a1, a3, -1
; RV32IFD-NEXT: .LBB5_4:
; RV32IFD-NEXT: feq.d a3, fs0, fs0
-; RV32IFD-NEXT: bnez a3, .LBB5_6
-; RV32IFD-NEXT: # %bb.5:
-; RV32IFD-NEXT: li a1, 0
-; RV32IFD-NEXT: li a0, 0
-; RV32IFD-NEXT: j .LBB5_7
-; RV32IFD-NEXT: .LBB5_6:
-; RV32IFD-NEXT: neg a3, s0
-; RV32IFD-NEXT: and a0, a3, a0
+; RV32IFD-NEXT: seqz a3, a3
+; RV32IFD-NEXT: addi a3, a3, -1
+; RV32IFD-NEXT: and a1, a3, a1
+; RV32IFD-NEXT: seqz a4, s0
+; RV32IFD-NEXT: addi a4, a4, -1
+; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: seqz a2, a2
; RV32IFD-NEXT: addi a2, a2, -1
-; RV32IFD-NEXT: or a0, a0, a2
-; RV32IFD-NEXT: .LBB5_7:
+; RV32IFD-NEXT: or a0, a2, a0
+; RV32IFD-NEXT: and a0, a3, a0
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
;
; RV64IFD-LABEL: test_ceil_si64:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: beqz a0, .LBB5_2
-; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: fcvt.l.d a0, fa0, rup
-; RV64IFD-NEXT: .LBB5_2:
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
%a = call double @llvm.ceil.f64(double %x)
%b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
define signext i32 @test_ceil_ui32(double %x) {
; CHECKIFD-LABEL: test_ceil_ui32:
; CHECKIFD: # %bb.0:
-; CHECKIFD-NEXT: feq.d a0, fa0, fa0
-; CHECKIFD-NEXT: beqz a0, .LBB6_2
-; CHECKIFD-NEXT: # %bb.1:
; CHECKIFD-NEXT: fcvt.wu.d a0, fa0, rup
-; CHECKIFD-NEXT: .LBB6_2:
+; CHECKIFD-NEXT: feq.d a1, fa0, fa0
+; CHECKIFD-NEXT: seqz a1, a1
+; CHECKIFD-NEXT: addi a1, a1, -1
+; CHECKIFD-NEXT: and a0, a1, a0
; CHECKIFD-NEXT: ret
%a = call double @llvm.ceil.f64(double %x)
%b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
; RV32IFD-NEXT: fmv.d fs0, fa0
; RV32IFD-NEXT: fcvt.d.w ft0, zero
; RV32IFD-NEXT: fle.d a0, ft0, fa0
-; RV32IFD-NEXT: neg s0, a0
+; RV32IFD-NEXT: seqz a0, a0
+; RV32IFD-NEXT: addi s0, a0, -1
; RV32IFD-NEXT: call __fixunsdfdi@plt
; RV32IFD-NEXT: lui a2, %hi(.LCPI7_0)
; RV32IFD-NEXT: fld ft0, %lo(.LCPI7_0)(a2)
; RV32IFD-NEXT: flt.d a2, ft0, fs0
; RV32IFD-NEXT: seqz a2, a2
; RV32IFD-NEXT: addi a2, a2, -1
-; RV32IFD-NEXT: or a0, a0, a2
+; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: and a1, s0, a1
-; RV32IFD-NEXT: or a1, a1, a2
+; RV32IFD-NEXT: or a1, a2, a1
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
;
; RV64IFD-LABEL: test_ceil_ui64:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: beqz a0, .LBB7_2
-; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rup
-; RV64IFD-NEXT: .LBB7_2:
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
%a = call double @llvm.ceil.f64(double %x)
%b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
define signext i32 @test_trunc_si32(double %x) {
; CHECKIFD-LABEL: test_trunc_si32:
; CHECKIFD: # %bb.0:
-; CHECKIFD-NEXT: feq.d a0, fa0, fa0
-; CHECKIFD-NEXT: beqz a0, .LBB8_2
-; CHECKIFD-NEXT: # %bb.1:
; CHECKIFD-NEXT: fcvt.w.d a0, fa0, rtz
-; CHECKIFD-NEXT: .LBB8_2:
+; CHECKIFD-NEXT: feq.d a1, fa0, fa0
+; CHECKIFD-NEXT: seqz a1, a1
+; CHECKIFD-NEXT: addi a1, a1, -1
+; CHECKIFD-NEXT: and a0, a1, a0
; CHECKIFD-NEXT: ret
%a = call double @llvm.trunc.f64(double %x)
%b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
; RV32IFD-NEXT: addi a1, a3, -1
; RV32IFD-NEXT: .LBB9_4:
; RV32IFD-NEXT: feq.d a3, fs0, fs0
-; RV32IFD-NEXT: bnez a3, .LBB9_6
-; RV32IFD-NEXT: # %bb.5:
-; RV32IFD-NEXT: li a1, 0
-; RV32IFD-NEXT: li a0, 0
-; RV32IFD-NEXT: j .LBB9_7
-; RV32IFD-NEXT: .LBB9_6:
-; RV32IFD-NEXT: neg a3, s0
-; RV32IFD-NEXT: and a0, a3, a0
+; RV32IFD-NEXT: seqz a3, a3
+; RV32IFD-NEXT: addi a3, a3, -1
+; RV32IFD-NEXT: and a1, a3, a1
+; RV32IFD-NEXT: seqz a4, s0
+; RV32IFD-NEXT: addi a4, a4, -1
+; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: seqz a2, a2
; RV32IFD-NEXT: addi a2, a2, -1
-; RV32IFD-NEXT: or a0, a0, a2
-; RV32IFD-NEXT: .LBB9_7:
+; RV32IFD-NEXT: or a0, a2, a0
+; RV32IFD-NEXT: and a0, a3, a0
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
;
; RV64IFD-LABEL: test_trunc_si64:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: beqz a0, .LBB9_2
-; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz
-; RV64IFD-NEXT: .LBB9_2:
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
%a = call double @llvm.trunc.f64(double %x)
%b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
define signext i32 @test_trunc_ui32(double %x) {
; CHECKIFD-LABEL: test_trunc_ui32:
; CHECKIFD: # %bb.0:
-; CHECKIFD-NEXT: feq.d a0, fa0, fa0
-; CHECKIFD-NEXT: beqz a0, .LBB10_2
-; CHECKIFD-NEXT: # %bb.1:
; CHECKIFD-NEXT: fcvt.wu.d a0, fa0, rtz
-; CHECKIFD-NEXT: .LBB10_2:
+; CHECKIFD-NEXT: feq.d a1, fa0, fa0
+; CHECKIFD-NEXT: seqz a1, a1
+; CHECKIFD-NEXT: addi a1, a1, -1
+; CHECKIFD-NEXT: and a0, a1, a0
; CHECKIFD-NEXT: ret
%a = call double @llvm.trunc.f64(double %x)
%b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
; RV32IFD-NEXT: fmv.d fs0, fa0
; RV32IFD-NEXT: fcvt.d.w ft0, zero
; RV32IFD-NEXT: fle.d a0, ft0, fa0
-; RV32IFD-NEXT: neg s0, a0
+; RV32IFD-NEXT: seqz a0, a0
+; RV32IFD-NEXT: addi s0, a0, -1
; RV32IFD-NEXT: call __fixunsdfdi@plt
; RV32IFD-NEXT: lui a2, %hi(.LCPI11_0)
; RV32IFD-NEXT: fld ft0, %lo(.LCPI11_0)(a2)
; RV32IFD-NEXT: flt.d a2, ft0, fs0
; RV32IFD-NEXT: seqz a2, a2
; RV32IFD-NEXT: addi a2, a2, -1
-; RV32IFD-NEXT: or a0, a0, a2
+; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: and a1, s0, a1
-; RV32IFD-NEXT: or a1, a1, a2
+; RV32IFD-NEXT: or a1, a2, a1
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
;
; RV64IFD-LABEL: test_trunc_ui64:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: beqz a0, .LBB11_2
-; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rtz
-; RV64IFD-NEXT: .LBB11_2:
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
%a = call double @llvm.trunc.f64(double %x)
%b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
define signext i32 @test_round_si32(double %x) {
; CHECKIFD-LABEL: test_round_si32:
; CHECKIFD: # %bb.0:
-; CHECKIFD-NEXT: feq.d a0, fa0, fa0
-; CHECKIFD-NEXT: beqz a0, .LBB12_2
-; CHECKIFD-NEXT: # %bb.1:
; CHECKIFD-NEXT: fcvt.w.d a0, fa0, rmm
-; CHECKIFD-NEXT: .LBB12_2:
+; CHECKIFD-NEXT: feq.d a1, fa0, fa0
+; CHECKIFD-NEXT: seqz a1, a1
+; CHECKIFD-NEXT: addi a1, a1, -1
+; CHECKIFD-NEXT: and a0, a1, a0
; CHECKIFD-NEXT: ret
%a = call double @llvm.round.f64(double %x)
%b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
; RV32IFD-NEXT: addi a1, a3, -1
; RV32IFD-NEXT: .LBB13_4:
; RV32IFD-NEXT: feq.d a3, fs0, fs0
-; RV32IFD-NEXT: bnez a3, .LBB13_6
-; RV32IFD-NEXT: # %bb.5:
-; RV32IFD-NEXT: li a1, 0
-; RV32IFD-NEXT: li a0, 0
-; RV32IFD-NEXT: j .LBB13_7
-; RV32IFD-NEXT: .LBB13_6:
-; RV32IFD-NEXT: neg a3, s0
-; RV32IFD-NEXT: and a0, a3, a0
+; RV32IFD-NEXT: seqz a3, a3
+; RV32IFD-NEXT: addi a3, a3, -1
+; RV32IFD-NEXT: and a1, a3, a1
+; RV32IFD-NEXT: seqz a4, s0
+; RV32IFD-NEXT: addi a4, a4, -1
+; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: seqz a2, a2
; RV32IFD-NEXT: addi a2, a2, -1
-; RV32IFD-NEXT: or a0, a0, a2
-; RV32IFD-NEXT: .LBB13_7:
+; RV32IFD-NEXT: or a0, a2, a0
+; RV32IFD-NEXT: and a0, a3, a0
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
;
; RV64IFD-LABEL: test_round_si64:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: beqz a0, .LBB13_2
-; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: fcvt.l.d a0, fa0, rmm
-; RV64IFD-NEXT: .LBB13_2:
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
%a = call double @llvm.round.f64(double %x)
%b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
define signext i32 @test_round_ui32(double %x) {
; CHECKIFD-LABEL: test_round_ui32:
; CHECKIFD: # %bb.0:
-; CHECKIFD-NEXT: feq.d a0, fa0, fa0
-; CHECKIFD-NEXT: beqz a0, .LBB14_2
-; CHECKIFD-NEXT: # %bb.1:
; CHECKIFD-NEXT: fcvt.wu.d a0, fa0, rmm
-; CHECKIFD-NEXT: .LBB14_2:
+; CHECKIFD-NEXT: feq.d a1, fa0, fa0
+; CHECKIFD-NEXT: seqz a1, a1
+; CHECKIFD-NEXT: addi a1, a1, -1
+; CHECKIFD-NEXT: and a0, a1, a0
; CHECKIFD-NEXT: ret
%a = call double @llvm.round.f64(double %x)
%b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
; RV32IFD-NEXT: fmv.d fs0, fa0
; RV32IFD-NEXT: fcvt.d.w ft0, zero
; RV32IFD-NEXT: fle.d a0, ft0, fa0
-; RV32IFD-NEXT: neg s0, a0
+; RV32IFD-NEXT: seqz a0, a0
+; RV32IFD-NEXT: addi s0, a0, -1
; RV32IFD-NEXT: call __fixunsdfdi@plt
; RV32IFD-NEXT: lui a2, %hi(.LCPI15_0)
; RV32IFD-NEXT: fld ft0, %lo(.LCPI15_0)(a2)
; RV32IFD-NEXT: flt.d a2, ft0, fs0
; RV32IFD-NEXT: seqz a2, a2
; RV32IFD-NEXT: addi a2, a2, -1
-; RV32IFD-NEXT: or a0, a0, a2
+; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: and a1, s0, a1
-; RV32IFD-NEXT: or a1, a1, a2
+; RV32IFD-NEXT: or a1, a2, a1
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
;
; RV64IFD-LABEL: test_round_ui64:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: beqz a0, .LBB15_2
-; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rmm
-; RV64IFD-NEXT: .LBB15_2:
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
%a = call double @llvm.round.f64(double %x)
%b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
define signext i32 @test_roundeven_si32(double %x) {
; CHECKIFD-LABEL: test_roundeven_si32:
; CHECKIFD: # %bb.0:
-; CHECKIFD-NEXT: feq.d a0, fa0, fa0
-; CHECKIFD-NEXT: beqz a0, .LBB16_2
-; CHECKIFD-NEXT: # %bb.1:
; CHECKIFD-NEXT: fcvt.w.d a0, fa0, rne
-; CHECKIFD-NEXT: .LBB16_2:
+; CHECKIFD-NEXT: feq.d a1, fa0, fa0
+; CHECKIFD-NEXT: seqz a1, a1
+; CHECKIFD-NEXT: addi a1, a1, -1
+; CHECKIFD-NEXT: and a0, a1, a0
; CHECKIFD-NEXT: ret
%a = call double @llvm.roundeven.f64(double %x)
%b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
; RV32IFD-NEXT: addi a1, a3, -1
; RV32IFD-NEXT: .LBB17_4:
; RV32IFD-NEXT: feq.d a3, fs0, fs0
-; RV32IFD-NEXT: bnez a3, .LBB17_6
-; RV32IFD-NEXT: # %bb.5:
-; RV32IFD-NEXT: li a1, 0
-; RV32IFD-NEXT: li a0, 0
-; RV32IFD-NEXT: j .LBB17_7
-; RV32IFD-NEXT: .LBB17_6:
-; RV32IFD-NEXT: neg a3, s0
-; RV32IFD-NEXT: and a0, a3, a0
+; RV32IFD-NEXT: seqz a3, a3
+; RV32IFD-NEXT: addi a3, a3, -1
+; RV32IFD-NEXT: and a1, a3, a1
+; RV32IFD-NEXT: seqz a4, s0
+; RV32IFD-NEXT: addi a4, a4, -1
+; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: seqz a2, a2
; RV32IFD-NEXT: addi a2, a2, -1
-; RV32IFD-NEXT: or a0, a0, a2
-; RV32IFD-NEXT: .LBB17_7:
+; RV32IFD-NEXT: or a0, a2, a0
+; RV32IFD-NEXT: and a0, a3, a0
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
;
; RV64IFD-LABEL: test_roundeven_si64:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: beqz a0, .LBB17_2
-; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: fcvt.l.d a0, fa0, rne
-; RV64IFD-NEXT: .LBB17_2:
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
%a = call double @llvm.roundeven.f64(double %x)
%b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
define signext i32 @test_roundeven_ui32(double %x) {
; CHECKIFD-LABEL: test_roundeven_ui32:
; CHECKIFD: # %bb.0:
-; CHECKIFD-NEXT: feq.d a0, fa0, fa0
-; CHECKIFD-NEXT: beqz a0, .LBB18_2
-; CHECKIFD-NEXT: # %bb.1:
; CHECKIFD-NEXT: fcvt.wu.d a0, fa0, rne
-; CHECKIFD-NEXT: .LBB18_2:
+; CHECKIFD-NEXT: feq.d a1, fa0, fa0
+; CHECKIFD-NEXT: seqz a1, a1
+; CHECKIFD-NEXT: addi a1, a1, -1
+; CHECKIFD-NEXT: and a0, a1, a0
; CHECKIFD-NEXT: ret
%a = call double @llvm.roundeven.f64(double %x)
%b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
; RV32IFD-NEXT: fmv.d fs0, fa0
; RV32IFD-NEXT: fcvt.d.w ft0, zero
; RV32IFD-NEXT: fle.d a0, ft0, fa0
-; RV32IFD-NEXT: neg s0, a0
+; RV32IFD-NEXT: seqz a0, a0
+; RV32IFD-NEXT: addi s0, a0, -1
; RV32IFD-NEXT: call __fixunsdfdi@plt
; RV32IFD-NEXT: lui a2, %hi(.LCPI19_0)
; RV32IFD-NEXT: fld ft0, %lo(.LCPI19_0)(a2)
; RV32IFD-NEXT: flt.d a2, ft0, fs0
; RV32IFD-NEXT: seqz a2, a2
; RV32IFD-NEXT: addi a2, a2, -1
-; RV32IFD-NEXT: or a0, a0, a2
+; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: and a1, s0, a1
-; RV32IFD-NEXT: or a1, a1, a2
+; RV32IFD-NEXT: or a1, a2, a1
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
;
; RV64IFD-LABEL: test_roundeven_ui64:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: beqz a0, .LBB19_2
-; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rne
-; RV64IFD-NEXT: .LBB19_2:
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
%a = call double @llvm.roundeven.f64(double %x)
%b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
define i32 @fcvt_w_s_sat(float %a) nounwind {
; CHECKIF-LABEL: fcvt_w_s_sat:
; CHECKIF: # %bb.0: # %start
-; CHECKIF-NEXT: feq.s a0, fa0, fa0
-; CHECKIF-NEXT: beqz a0, .LBB1_2
-; CHECKIF-NEXT: # %bb.1:
; CHECKIF-NEXT: fcvt.w.s a0, fa0, rtz
-; CHECKIF-NEXT: .LBB1_2: # %start
+; CHECKIF-NEXT: feq.s a1, fa0, fa0
+; CHECKIF-NEXT: seqz a1, a1
+; CHECKIF-NEXT: addi a1, a1, -1
+; CHECKIF-NEXT: and a0, a1, a0
; CHECKIF-NEXT: ret
;
; RV32I-LABEL: fcvt_w_s_sat:
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __unordsf2@plt
-; RV32I-NEXT: mv a1, a0
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bnez a1, .LBB1_6
-; RV32I-NEXT: # %bb.5: # %start
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: .LBB1_6: # %start
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, s2
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2@plt
-; RV64I-NEXT: mv a1, a0
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bnez a1, .LBB1_6
-; RV64I-NEXT: # %bb.5: # %start
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: .LBB1_6: # %start
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
define i32 @fcvt_wu_s_sat(float %a) nounwind {
; RV32IF-LABEL: fcvt_wu_s_sat:
; RV32IF: # %bb.0: # %start
-; RV32IF-NEXT: feq.s a0, fa0, fa0
-; RV32IF-NEXT: beqz a0, .LBB4_2
-; RV32IF-NEXT: # %bb.1:
; RV32IF-NEXT: fcvt.wu.s a0, fa0, rtz
-; RV32IF-NEXT: .LBB4_2: # %start
+; RV32IF-NEXT: feq.s a1, fa0, fa0
+; RV32IF-NEXT: seqz a1, a1
+; RV32IF-NEXT: addi a1, a1, -1
+; RV32IF-NEXT: and a0, a1, a0
; RV32IF-NEXT: ret
;
; RV64IF-LABEL: fcvt_wu_s_sat:
; RV64IF: # %bb.0: # %start
-; RV64IF-NEXT: feq.s a0, fa0, fa0
-; RV64IF-NEXT: beqz a0, .LBB4_2
-; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: fcvt.wu.s a0, fa0, rtz
+; RV64IF-NEXT: feq.s a1, fa0, fa0
+; RV64IF-NEXT: seqz a1, a1
+; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: and a0, a0, a1
; RV64IF-NEXT: slli a0, a0, 32
; RV64IF-NEXT: srli a0, a0, 32
-; RV64IF-NEXT: .LBB4_2: # %start
; RV64IF-NEXT: ret
;
; RV32I-LABEL: fcvt_wu_s_sat:
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __gesf2@plt
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __fixunssfdi@plt
-; RV64I-NEXT: li s1, 0
-; RV64I-NEXT: bltz s2, .LBB4_2
-; RV64I-NEXT: # %bb.1: # %start
; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: .LBB4_2: # %start
; RV64I-NEXT: lui a0, 325632
; RV64I-NEXT: addiw a1, a0, -1
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __gtsf2@plt
-; RV64I-NEXT: blez a0, .LBB4_4
-; RV64I-NEXT: # %bb.3:
+; RV64I-NEXT: bgtz a0, .LBB4_2
+; RV64I-NEXT: # %bb.1: # %start
+; RV64I-NEXT: slti a0, s0, 0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s1
+; RV64I-NEXT: j .LBB4_3
+; RV64I-NEXT: .LBB4_2:
; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: srli s1, a0, 32
-; RV64I-NEXT: .LBB4_4: # %start
-; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: .LBB4_3: # %start
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; RV32IF-NEXT: addi a1, a3, -1
; RV32IF-NEXT: .LBB12_4: # %start
; RV32IF-NEXT: feq.s a3, fs0, fs0
-; RV32IF-NEXT: bnez a3, .LBB12_6
-; RV32IF-NEXT: # %bb.5: # %start
-; RV32IF-NEXT: li a1, 0
-; RV32IF-NEXT: li a0, 0
-; RV32IF-NEXT: j .LBB12_7
-; RV32IF-NEXT: .LBB12_6:
-; RV32IF-NEXT: neg a3, s0
-; RV32IF-NEXT: and a0, a3, a0
+; RV32IF-NEXT: seqz a3, a3
+; RV32IF-NEXT: addi a3, a3, -1
+; RV32IF-NEXT: and a1, a3, a1
+; RV32IF-NEXT: seqz a4, s0
+; RV32IF-NEXT: addi a4, a4, -1
+; RV32IF-NEXT: and a0, a4, a0
; RV32IF-NEXT: seqz a2, a2
; RV32IF-NEXT: addi a2, a2, -1
-; RV32IF-NEXT: or a0, a0, a2
-; RV32IF-NEXT: .LBB12_7: # %start
+; RV32IF-NEXT: or a0, a2, a0
+; RV32IF-NEXT: and a0, a3, a0
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IF-LABEL: fcvt_l_s_sat:
; RV64IF: # %bb.0: # %start
-; RV64IF-NEXT: feq.s a0, fa0, fa0
-; RV64IF-NEXT: beqz a0, .LBB12_2
-; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: fcvt.l.s a0, fa0, rtz
-; RV64IF-NEXT: .LBB12_2: # %start
+; RV64IF-NEXT: feq.s a1, fa0, fa0
+; RV64IF-NEXT: seqz a1, a1
+; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: and a0, a1, a0
; RV64IF-NEXT: ret
;
; RV32I-LABEL: fcvt_l_s_sat:
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 389120
-; RV32I-NEXT: addi s2, a0, -1
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s2
-; RV32I-NEXT: call __gtsf2@plt
-; RV32I-NEXT: li s0, 0
-; RV32I-NEXT: sgtz a0, a0
-; RV32I-NEXT: neg s5, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __fixsfdi@plt
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv s4, a1
+; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lui a1, 913408
-; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __gesf2@plt
-; RV32I-NEXT: bltz a0, .LBB12_2
+; RV32I-NEXT: mv s4, a0
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: call __fixsfdi@plt
+; RV32I-NEXT: mv s5, a0
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: call __unordsf2@plt
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi s1, a0, -1
+; RV32I-NEXT: lui a0, 389120
+; RV32I-NEXT: addi s3, a0, -1
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s3
+; RV32I-NEXT: call __gtsf2@plt
+; RV32I-NEXT: bgtz a0, .LBB12_2
; RV32I-NEXT: # %bb.1: # %start
-; RV32I-NEXT: or s5, s5, s3
+; RV32I-NEXT: slti a0, s4, 0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and s1, s1, a0
; RV32I-NEXT: .LBB12_2: # %start
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __unordsf2@plt
-; RV32I-NEXT: mv s3, s0
-; RV32I-NEXT: bnez a0, .LBB12_4
-; RV32I-NEXT: # %bb.3: # %start
-; RV32I-NEXT: mv s3, s5
-; RV32I-NEXT: .LBB12_4: # %start
; RV32I-NEXT: lui a1, 913408
-; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __gesf2@plt
-; RV32I-NEXT: lui s6, 524288
; RV32I-NEXT: lui s5, 524288
-; RV32I-NEXT: bltz a0, .LBB12_6
-; RV32I-NEXT: # %bb.5: # %start
-; RV32I-NEXT: mv s5, s4
-; RV32I-NEXT: .LBB12_6: # %start
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s2
+; RV32I-NEXT: lui s4, 524288
+; RV32I-NEXT: bltz a0, .LBB12_4
+; RV32I-NEXT: # %bb.3: # %start
+; RV32I-NEXT: mv s4, s2
+; RV32I-NEXT: .LBB12_4: # %start
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s3
; RV32I-NEXT: call __gtsf2@plt
-; RV32I-NEXT: bge s0, a0, .LBB12_8
-; RV32I-NEXT: # %bb.7:
-; RV32I-NEXT: addi s5, s6, -1
-; RV32I-NEXT: .LBB12_8: # %start
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __unordsf2@plt
-; RV32I-NEXT: bnez a0, .LBB12_10
-; RV32I-NEXT: # %bb.9: # %start
-; RV32I-NEXT: mv s0, s5
-; RV32I-NEXT: .LBB12_10: # %start
-; RV32I-NEXT: mv a0, s3
+; RV32I-NEXT: blez a0, .LBB12_6
+; RV32I-NEXT: # %bb.5:
+; RV32I-NEXT: addi s4, s5, -1
+; RV32I-NEXT: .LBB12_6: # %start
+; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: call __unordsf2@plt
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a1, a0, s4
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2@plt
-; RV64I-NEXT: mv a1, a0
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bnez a1, .LBB12_7
-; RV64I-NEXT: # %bb.6: # %start
-; RV64I-NEXT: mv a0, s1
-; RV64I-NEXT: .LBB12_7: # %start
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: fmv.w.x ft0, zero
; RV32IF-NEXT: fle.s a0, ft0, fa0
-; RV32IF-NEXT: neg s0, a0
+; RV32IF-NEXT: seqz a0, a0
+; RV32IF-NEXT: addi s0, a0, -1
; RV32IF-NEXT: call __fixunssfdi@plt
; RV32IF-NEXT: lui a2, %hi(.LCPI14_0)
; RV32IF-NEXT: flw ft0, %lo(.LCPI14_0)(a2)
; RV32IF-NEXT: flt.s a2, ft0, fs0
; RV32IF-NEXT: seqz a2, a2
; RV32IF-NEXT: addi a2, a2, -1
-; RV32IF-NEXT: or a0, a0, a2
+; RV32IF-NEXT: or a0, a2, a0
; RV32IF-NEXT: and a1, s0, a1
-; RV32IF-NEXT: or a1, a1, a2
+; RV32IF-NEXT: or a1, a2, a1
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IF-LABEL: fcvt_lu_s_sat:
; RV64IF: # %bb.0: # %start
-; RV64IF-NEXT: feq.s a0, fa0, fa0
-; RV64IF-NEXT: beqz a0, .LBB14_2
-; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: fcvt.lu.s a0, fa0, rtz
-; RV64IF-NEXT: .LBB14_2: # %start
+; RV64IF-NEXT: feq.s a1, fa0, fa0
+; RV64IF-NEXT: seqz a1, a1
+; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: and a0, a1, a0
; RV64IF-NEXT: ret
;
; RV32I-LABEL: fcvt_lu_s_sat:
define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
; RV32IF-LABEL: fcvt_w_s_sat_i16:
; RV32IF: # %bb.0: # %start
-; RV32IF-NEXT: feq.s a0, fa0, fa0
-; RV32IF-NEXT: beqz a0, .LBB24_2
-; RV32IF-NEXT: # %bb.1:
; RV32IF-NEXT: lui a0, %hi(.LCPI24_0)
; RV32IF-NEXT: flw ft0, %lo(.LCPI24_0)(a0)
; RV32IF-NEXT: lui a0, %hi(.LCPI24_1)
; RV32IF-NEXT: fmax.s ft0, fa0, ft0
; RV32IF-NEXT: fmin.s ft0, ft0, ft1
; RV32IF-NEXT: fcvt.w.s a0, ft0, rtz
-; RV32IF-NEXT: .LBB24_2: # %start
+; RV32IF-NEXT: feq.s a1, fa0, fa0
+; RV32IF-NEXT: seqz a1, a1
+; RV32IF-NEXT: addi a1, a1, -1
+; RV32IF-NEXT: and a0, a1, a0
; RV32IF-NEXT: ret
;
; RV64IF-LABEL: fcvt_w_s_sat_i16:
; RV64IF: # %bb.0: # %start
-; RV64IF-NEXT: feq.s a0, fa0, fa0
-; RV64IF-NEXT: beqz a0, .LBB24_2
-; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: lui a0, %hi(.LCPI24_0)
; RV64IF-NEXT: flw ft0, %lo(.LCPI24_0)(a0)
; RV64IF-NEXT: lui a0, %hi(.LCPI24_1)
; RV64IF-NEXT: fmax.s ft0, fa0, ft0
; RV64IF-NEXT: fmin.s ft0, ft0, ft1
; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz
-; RV64IF-NEXT: .LBB24_2: # %start
+; RV64IF-NEXT: feq.s a1, fa0, fa0
+; RV64IF-NEXT: seqz a1, a1
+; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: and a0, a1, a0
; RV64IF-NEXT: ret
;
; RV32I-LABEL: fcvt_w_s_sat_i16:
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __unordsf2@plt
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: bnez a0, .LBB24_6
-; RV32I-NEXT: # %bb.5: # %start
-; RV32I-NEXT: mv a1, s2
-; RV32I-NEXT: .LBB24_6: # %start
-; RV32I-NEXT: slli a0, a1, 16
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, s2
+; RV32I-NEXT: slli a0, a0, 16
; RV32I-NEXT: srai a0, a0, 16
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2@plt
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: bnez a0, .LBB24_6
-; RV64I-NEXT: # %bb.5: # %start
-; RV64I-NEXT: mv a1, s2
-; RV64I-NEXT: .LBB24_6: # %start
-; RV64I-NEXT: slli a0, a1, 48
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s2
+; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __gesf2@plt
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __fixunssfsi@plt
-; RV32I-NEXT: li s2, 0
-; RV32I-NEXT: bltz s1, .LBB26_2
-; RV32I-NEXT: # %bb.1: # %start
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: .LBB26_2: # %start
+; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: lui a0, 292864
; RV32I-NEXT: addi a1, a0, -256
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __gtsf2@plt
; RV32I-NEXT: lui a1, 16
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: mv a2, a1
-; RV32I-NEXT: bgtz a0, .LBB26_4
-; RV32I-NEXT: # %bb.3: # %start
-; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: .LBB26_4: # %start
+; RV32I-NEXT: bgtz a0, .LBB26_2
+; RV32I-NEXT: # %bb.1: # %start
+; RV32I-NEXT: slti a0, s0, 0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a2, a0, s1
+; RV32I-NEXT: .LBB26_2: # %start
; RV32I-NEXT: and a0, a2, a1
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __gesf2@plt
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __fixunssfdi@plt
-; RV64I-NEXT: li s2, 0
-; RV64I-NEXT: bltz s1, .LBB26_2
-; RV64I-NEXT: # %bb.1: # %start
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: .LBB26_2: # %start
+; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: lui a0, 292864
; RV64I-NEXT: addiw a1, a0, -256
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __gtsf2@plt
; RV64I-NEXT: lui a1, 16
; RV64I-NEXT: addiw a1, a1, -1
; RV64I-NEXT: mv a2, a1
-; RV64I-NEXT: bgtz a0, .LBB26_4
-; RV64I-NEXT: # %bb.3: # %start
-; RV64I-NEXT: mv a2, s2
-; RV64I-NEXT: .LBB26_4: # %start
+; RV64I-NEXT: bgtz a0, .LBB26_2
+; RV64I-NEXT: # %bb.1: # %start
+; RV64I-NEXT: slti a0, s0, 0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a2, a0, s1
+; RV64I-NEXT: .LBB26_2: # %start
; RV64I-NEXT: and a0, a2, a1
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
define signext i8 @fcvt_w_s_sat_i8(float %a) nounwind {
; RV32IF-LABEL: fcvt_w_s_sat_i8:
; RV32IF: # %bb.0: # %start
-; RV32IF-NEXT: feq.s a0, fa0, fa0
-; RV32IF-NEXT: beqz a0, .LBB28_2
-; RV32IF-NEXT: # %bb.1:
; RV32IF-NEXT: lui a0, %hi(.LCPI28_0)
; RV32IF-NEXT: flw ft0, %lo(.LCPI28_0)(a0)
; RV32IF-NEXT: lui a0, %hi(.LCPI28_1)
; RV32IF-NEXT: fmax.s ft0, fa0, ft0
; RV32IF-NEXT: fmin.s ft0, ft0, ft1
; RV32IF-NEXT: fcvt.w.s a0, ft0, rtz
-; RV32IF-NEXT: .LBB28_2: # %start
+; RV32IF-NEXT: feq.s a1, fa0, fa0
+; RV32IF-NEXT: seqz a1, a1
+; RV32IF-NEXT: addi a1, a1, -1
+; RV32IF-NEXT: and a0, a1, a0
; RV32IF-NEXT: ret
;
; RV64IF-LABEL: fcvt_w_s_sat_i8:
; RV64IF: # %bb.0: # %start
-; RV64IF-NEXT: feq.s a0, fa0, fa0
-; RV64IF-NEXT: beqz a0, .LBB28_2
-; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: lui a0, %hi(.LCPI28_0)
; RV64IF-NEXT: flw ft0, %lo(.LCPI28_0)(a0)
; RV64IF-NEXT: lui a0, %hi(.LCPI28_1)
; RV64IF-NEXT: fmax.s ft0, fa0, ft0
; RV64IF-NEXT: fmin.s ft0, ft0, ft1
; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz
-; RV64IF-NEXT: .LBB28_2: # %start
+; RV64IF-NEXT: feq.s a1, fa0, fa0
+; RV64IF-NEXT: seqz a1, a1
+; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: and a0, a1, a0
; RV64IF-NEXT: ret
;
; RV32I-LABEL: fcvt_w_s_sat_i8:
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __unordsf2@plt
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: bnez a0, .LBB28_6
-; RV32I-NEXT: # %bb.5: # %start
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: .LBB28_6: # %start
-; RV32I-NEXT: slli a0, a1, 24
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, s1
+; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: srai a0, a0, 24
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2@plt
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: bnez a0, .LBB28_6
-; RV64I-NEXT: # %bb.5: # %start
-; RV64I-NEXT: mv a1, s1
-; RV64I-NEXT: .LBB28_6: # %start
-; RV64I-NEXT: slli a0, a1, 56
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s1
+; RV64I-NEXT: slli a0, a0, 56
; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __gesf2@plt
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __fixunssfsi@plt
-; RV32I-NEXT: li s2, 0
-; RV32I-NEXT: bltz s1, .LBB30_2
-; RV32I-NEXT: # %bb.1: # %start
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: .LBB30_2: # %start
+; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: lui a1, 276464
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __gtsf2@plt
; RV32I-NEXT: li a1, 255
-; RV32I-NEXT: bgtz a0, .LBB30_4
-; RV32I-NEXT: # %bb.3: # %start
-; RV32I-NEXT: mv a1, s2
-; RV32I-NEXT: .LBB30_4: # %start
+; RV32I-NEXT: bgtz a0, .LBB30_2
+; RV32I-NEXT: # %bb.1: # %start
+; RV32I-NEXT: slti a0, s0, 0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a1, a0, s1
+; RV32I-NEXT: .LBB30_2: # %start
; RV32I-NEXT: andi a0, a1, 255
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __gesf2@plt
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __fixunssfdi@plt
-; RV64I-NEXT: li s2, 0
-; RV64I-NEXT: bltz s1, .LBB30_2
-; RV64I-NEXT: # %bb.1: # %start
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: .LBB30_2: # %start
+; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: lui a1, 276464
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __gtsf2@plt
; RV64I-NEXT: li a1, 255
-; RV64I-NEXT: bgtz a0, .LBB30_4
-; RV64I-NEXT: # %bb.3: # %start
-; RV64I-NEXT: mv a1, s2
-; RV64I-NEXT: .LBB30_4: # %start
+; RV64I-NEXT: bgtz a0, .LBB30_2
+; RV64I-NEXT: # %bb.1: # %start
+; RV64I-NEXT: slti a0, s0, 0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a1, a0, s1
+; RV64I-NEXT: .LBB30_2: # %start
; RV64I-NEXT: andi a0, a1, 255
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
define zeroext i32 @fcvt_wu_s_sat_zext(float %a) nounwind {
; RV32IF-LABEL: fcvt_wu_s_sat_zext:
; RV32IF: # %bb.0: # %start
-; RV32IF-NEXT: feq.s a0, fa0, fa0
-; RV32IF-NEXT: beqz a0, .LBB31_2
-; RV32IF-NEXT: # %bb.1:
; RV32IF-NEXT: fcvt.wu.s a0, fa0, rtz
-; RV32IF-NEXT: .LBB31_2: # %start
+; RV32IF-NEXT: feq.s a1, fa0, fa0
+; RV32IF-NEXT: seqz a1, a1
+; RV32IF-NEXT: addi a1, a1, -1
+; RV32IF-NEXT: and a0, a1, a0
; RV32IF-NEXT: ret
;
; RV64IF-LABEL: fcvt_wu_s_sat_zext:
; RV64IF: # %bb.0: # %start
-; RV64IF-NEXT: feq.s a0, fa0, fa0
-; RV64IF-NEXT: beqz a0, .LBB31_2
-; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: fcvt.wu.s a0, fa0, rtz
+; RV64IF-NEXT: feq.s a1, fa0, fa0
+; RV64IF-NEXT: seqz a1, a1
+; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: and a0, a0, a1
; RV64IF-NEXT: slli a0, a0, 32
; RV64IF-NEXT: srli a0, a0, 32
-; RV64IF-NEXT: .LBB31_2: # %start
; RV64IF-NEXT: ret
;
; RV32I-LABEL: fcvt_wu_s_sat_zext:
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __gesf2@plt
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __fixunssfdi@plt
-; RV64I-NEXT: li s2, 0
-; RV64I-NEXT: bltz s1, .LBB31_2
-; RV64I-NEXT: # %bb.1: # %start
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: .LBB31_2: # %start
+; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: lui a0, 325632
; RV64I-NEXT: addiw a1, a0, -1
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __gtsf2@plt
-; RV64I-NEXT: blez a0, .LBB31_4
-; RV64I-NEXT: # %bb.3:
+; RV64I-NEXT: bgtz a0, .LBB31_2
+; RV64I-NEXT: # %bb.1: # %start
+; RV64I-NEXT: slti a0, s0, 0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s1
+; RV64I-NEXT: j .LBB31_3
+; RV64I-NEXT: .LBB31_2:
; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: srli s2, a0, 32
-; RV64I-NEXT: .LBB31_4: # %start
-; RV64I-NEXT: slli a0, s2, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: .LBB31_3: # %start
+; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
define signext i32 @fcvt_w_s_sat_sext(float %a) nounwind {
; CHECKIF-LABEL: fcvt_w_s_sat_sext:
; CHECKIF: # %bb.0: # %start
-; CHECKIF-NEXT: feq.s a0, fa0, fa0
-; CHECKIF-NEXT: beqz a0, .LBB32_2
-; CHECKIF-NEXT: # %bb.1:
; CHECKIF-NEXT: fcvt.w.s a0, fa0, rtz
-; CHECKIF-NEXT: .LBB32_2: # %start
+; CHECKIF-NEXT: feq.s a1, fa0, fa0
+; CHECKIF-NEXT: seqz a1, a1
+; CHECKIF-NEXT: addi a1, a1, -1
+; CHECKIF-NEXT: and a0, a1, a0
; CHECKIF-NEXT: ret
;
; RV32I-LABEL: fcvt_w_s_sat_sext:
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __unordsf2@plt
-; RV32I-NEXT: mv a1, a0
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bnez a1, .LBB32_6
-; RV32I-NEXT: # %bb.5: # %start
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: .LBB32_6: # %start
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, s2
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2@plt
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: bnez a0, .LBB32_6
-; RV64I-NEXT: # %bb.5: # %start
-; RV64I-NEXT: mv a1, s2
-; RV64I-NEXT: .LBB32_6: # %start
-; RV64I-NEXT: sext.w a0, a1
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s2
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
define signext i32 @test_floor_si32(float %x) {
; CHECKIF-LABEL: test_floor_si32:
; CHECKIF: # %bb.0:
-; CHECKIF-NEXT: feq.s a0, fa0, fa0
-; CHECKIF-NEXT: beqz a0, .LBB0_2
-; CHECKIF-NEXT: # %bb.1:
; CHECKIF-NEXT: fcvt.w.s a0, fa0, rdn
-; CHECKIF-NEXT: .LBB0_2:
+; CHECKIF-NEXT: feq.s a1, fa0, fa0
+; CHECKIF-NEXT: seqz a1, a1
+; CHECKIF-NEXT: addi a1, a1, -1
+; CHECKIF-NEXT: and a0, a1, a0
; CHECKIF-NEXT: ret
%a = call float @llvm.floor.f32(float %x)
%b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
; RV32IF-NEXT: addi a1, a3, -1
; RV32IF-NEXT: .LBB1_4:
; RV32IF-NEXT: feq.s a3, fs0, fs0
-; RV32IF-NEXT: bnez a3, .LBB1_6
-; RV32IF-NEXT: # %bb.5:
-; RV32IF-NEXT: li a1, 0
-; RV32IF-NEXT: li a0, 0
-; RV32IF-NEXT: j .LBB1_7
-; RV32IF-NEXT: .LBB1_6:
-; RV32IF-NEXT: neg a3, s0
-; RV32IF-NEXT: and a0, a3, a0
+; RV32IF-NEXT: seqz a3, a3
+; RV32IF-NEXT: addi a3, a3, -1
+; RV32IF-NEXT: and a1, a3, a1
+; RV32IF-NEXT: seqz a4, s0
+; RV32IF-NEXT: addi a4, a4, -1
+; RV32IF-NEXT: and a0, a4, a0
; RV32IF-NEXT: seqz a2, a2
; RV32IF-NEXT: addi a2, a2, -1
-; RV32IF-NEXT: or a0, a0, a2
-; RV32IF-NEXT: .LBB1_7:
+; RV32IF-NEXT: or a0, a2, a0
+; RV32IF-NEXT: and a0, a3, a0
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IF-LABEL: test_floor_si64:
; RV64IF: # %bb.0:
-; RV64IF-NEXT: feq.s a0, fa0, fa0
-; RV64IF-NEXT: beqz a0, .LBB1_2
-; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: fcvt.l.s a0, fa0, rdn
-; RV64IF-NEXT: .LBB1_2:
+; RV64IF-NEXT: feq.s a1, fa0, fa0
+; RV64IF-NEXT: seqz a1, a1
+; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: and a0, a1, a0
; RV64IF-NEXT: ret
%a = call float @llvm.floor.f32(float %x)
%b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
define signext i32 @test_floor_ui32(float %x) {
; CHECKIF-LABEL: test_floor_ui32:
; CHECKIF: # %bb.0:
-; CHECKIF-NEXT: feq.s a0, fa0, fa0
-; CHECKIF-NEXT: beqz a0, .LBB2_2
-; CHECKIF-NEXT: # %bb.1:
; CHECKIF-NEXT: fcvt.wu.s a0, fa0, rdn
-; CHECKIF-NEXT: .LBB2_2:
+; CHECKIF-NEXT: feq.s a1, fa0, fa0
+; CHECKIF-NEXT: seqz a1, a1
+; CHECKIF-NEXT: addi a1, a1, -1
+; CHECKIF-NEXT: and a0, a1, a0
; CHECKIF-NEXT: ret
%a = call float @llvm.floor.f32(float %x)
%b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: fmv.w.x ft0, zero
; RV32IF-NEXT: fle.s a0, ft0, fa0
-; RV32IF-NEXT: neg s0, a0
+; RV32IF-NEXT: seqz a0, a0
+; RV32IF-NEXT: addi s0, a0, -1
; RV32IF-NEXT: call __fixunssfdi@plt
; RV32IF-NEXT: lui a2, %hi(.LCPI3_0)
; RV32IF-NEXT: flw ft0, %lo(.LCPI3_0)(a2)
; RV32IF-NEXT: flt.s a2, ft0, fs0
; RV32IF-NEXT: seqz a2, a2
; RV32IF-NEXT: addi a2, a2, -1
-; RV32IF-NEXT: or a0, a0, a2
+; RV32IF-NEXT: or a0, a2, a0
; RV32IF-NEXT: and a1, s0, a1
-; RV32IF-NEXT: or a1, a1, a2
+; RV32IF-NEXT: or a1, a2, a1
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IF-LABEL: test_floor_ui64:
; RV64IF: # %bb.0:
-; RV64IF-NEXT: feq.s a0, fa0, fa0
-; RV64IF-NEXT: beqz a0, .LBB3_2
-; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: fcvt.lu.s a0, fa0, rdn
-; RV64IF-NEXT: .LBB3_2:
+; RV64IF-NEXT: feq.s a1, fa0, fa0
+; RV64IF-NEXT: seqz a1, a1
+; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: and a0, a1, a0
; RV64IF-NEXT: ret
%a = call float @llvm.floor.f32(float %x)
%b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
define signext i32 @test_ceil_si32(float %x) {
; CHECKIF-LABEL: test_ceil_si32:
; CHECKIF: # %bb.0:
-; CHECKIF-NEXT: feq.s a0, fa0, fa0
-; CHECKIF-NEXT: beqz a0, .LBB4_2
-; CHECKIF-NEXT: # %bb.1:
; CHECKIF-NEXT: fcvt.w.s a0, fa0, rup
-; CHECKIF-NEXT: .LBB4_2:
+; CHECKIF-NEXT: feq.s a1, fa0, fa0
+; CHECKIF-NEXT: seqz a1, a1
+; CHECKIF-NEXT: addi a1, a1, -1
+; CHECKIF-NEXT: and a0, a1, a0
; CHECKIF-NEXT: ret
%a = call float @llvm.ceil.f32(float %x)
%b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
; RV32IF-NEXT: addi a1, a3, -1
; RV32IF-NEXT: .LBB5_4:
; RV32IF-NEXT: feq.s a3, fs0, fs0
-; RV32IF-NEXT: bnez a3, .LBB5_6
-; RV32IF-NEXT: # %bb.5:
-; RV32IF-NEXT: li a1, 0
-; RV32IF-NEXT: li a0, 0
-; RV32IF-NEXT: j .LBB5_7
-; RV32IF-NEXT: .LBB5_6:
-; RV32IF-NEXT: neg a3, s0
-; RV32IF-NEXT: and a0, a3, a0
+; RV32IF-NEXT: seqz a3, a3
+; RV32IF-NEXT: addi a3, a3, -1
+; RV32IF-NEXT: and a1, a3, a1
+; RV32IF-NEXT: seqz a4, s0
+; RV32IF-NEXT: addi a4, a4, -1
+; RV32IF-NEXT: and a0, a4, a0
; RV32IF-NEXT: seqz a2, a2
; RV32IF-NEXT: addi a2, a2, -1
-; RV32IF-NEXT: or a0, a0, a2
-; RV32IF-NEXT: .LBB5_7:
+; RV32IF-NEXT: or a0, a2, a0
+; RV32IF-NEXT: and a0, a3, a0
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IF-LABEL: test_ceil_si64:
; RV64IF: # %bb.0:
-; RV64IF-NEXT: feq.s a0, fa0, fa0
-; RV64IF-NEXT: beqz a0, .LBB5_2
-; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: fcvt.l.s a0, fa0, rup
-; RV64IF-NEXT: .LBB5_2:
+; RV64IF-NEXT: feq.s a1, fa0, fa0
+; RV64IF-NEXT: seqz a1, a1
+; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: and a0, a1, a0
; RV64IF-NEXT: ret
%a = call float @llvm.ceil.f32(float %x)
%b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
define signext i32 @test_ceil_ui32(float %x) {
; CHECKIF-LABEL: test_ceil_ui32:
; CHECKIF: # %bb.0:
-; CHECKIF-NEXT: feq.s a0, fa0, fa0
-; CHECKIF-NEXT: beqz a0, .LBB6_2
-; CHECKIF-NEXT: # %bb.1:
; CHECKIF-NEXT: fcvt.wu.s a0, fa0, rup
-; CHECKIF-NEXT: .LBB6_2:
+; CHECKIF-NEXT: feq.s a1, fa0, fa0
+; CHECKIF-NEXT: seqz a1, a1
+; CHECKIF-NEXT: addi a1, a1, -1
+; CHECKIF-NEXT: and a0, a1, a0
; CHECKIF-NEXT: ret
%a = call float @llvm.ceil.f32(float %x)
%b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: fmv.w.x ft0, zero
; RV32IF-NEXT: fle.s a0, ft0, fa0
-; RV32IF-NEXT: neg s0, a0
+; RV32IF-NEXT: seqz a0, a0
+; RV32IF-NEXT: addi s0, a0, -1
; RV32IF-NEXT: call __fixunssfdi@plt
; RV32IF-NEXT: lui a2, %hi(.LCPI7_0)
; RV32IF-NEXT: flw ft0, %lo(.LCPI7_0)(a2)
; RV32IF-NEXT: flt.s a2, ft0, fs0
; RV32IF-NEXT: seqz a2, a2
; RV32IF-NEXT: addi a2, a2, -1
-; RV32IF-NEXT: or a0, a0, a2
+; RV32IF-NEXT: or a0, a2, a0
; RV32IF-NEXT: and a1, s0, a1
-; RV32IF-NEXT: or a1, a1, a2
+; RV32IF-NEXT: or a1, a2, a1
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IF-LABEL: test_ceil_ui64:
; RV64IF: # %bb.0:
-; RV64IF-NEXT: feq.s a0, fa0, fa0
-; RV64IF-NEXT: beqz a0, .LBB7_2
-; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: fcvt.lu.s a0, fa0, rup
-; RV64IF-NEXT: .LBB7_2:
+; RV64IF-NEXT: feq.s a1, fa0, fa0
+; RV64IF-NEXT: seqz a1, a1
+; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: and a0, a1, a0
; RV64IF-NEXT: ret
%a = call float @llvm.ceil.f32(float %x)
%b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
define signext i32 @test_trunc_si32(float %x) {
; CHECKIF-LABEL: test_trunc_si32:
; CHECKIF: # %bb.0:
-; CHECKIF-NEXT: feq.s a0, fa0, fa0
-; CHECKIF-NEXT: beqz a0, .LBB8_2
-; CHECKIF-NEXT: # %bb.1:
; CHECKIF-NEXT: fcvt.w.s a0, fa0, rtz
-; CHECKIF-NEXT: .LBB8_2:
+; CHECKIF-NEXT: feq.s a1, fa0, fa0
+; CHECKIF-NEXT: seqz a1, a1
+; CHECKIF-NEXT: addi a1, a1, -1
+; CHECKIF-NEXT: and a0, a1, a0
; CHECKIF-NEXT: ret
%a = call float @llvm.trunc.f32(float %x)
%b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
; RV32IF-NEXT: addi a1, a3, -1
; RV32IF-NEXT: .LBB9_4:
; RV32IF-NEXT: feq.s a3, fs0, fs0
-; RV32IF-NEXT: bnez a3, .LBB9_6
-; RV32IF-NEXT: # %bb.5:
-; RV32IF-NEXT: li a1, 0
-; RV32IF-NEXT: li a0, 0
-; RV32IF-NEXT: j .LBB9_7
-; RV32IF-NEXT: .LBB9_6:
-; RV32IF-NEXT: neg a3, s0
-; RV32IF-NEXT: and a0, a3, a0
+; RV32IF-NEXT: seqz a3, a3
+; RV32IF-NEXT: addi a3, a3, -1
+; RV32IF-NEXT: and a1, a3, a1
+; RV32IF-NEXT: seqz a4, s0
+; RV32IF-NEXT: addi a4, a4, -1
+; RV32IF-NEXT: and a0, a4, a0
; RV32IF-NEXT: seqz a2, a2
; RV32IF-NEXT: addi a2, a2, -1
-; RV32IF-NEXT: or a0, a0, a2
-; RV32IF-NEXT: .LBB9_7:
+; RV32IF-NEXT: or a0, a2, a0
+; RV32IF-NEXT: and a0, a3, a0
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IF-LABEL: test_trunc_si64:
; RV64IF: # %bb.0:
-; RV64IF-NEXT: feq.s a0, fa0, fa0
-; RV64IF-NEXT: beqz a0, .LBB9_2
-; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: fcvt.l.s a0, fa0, rtz
-; RV64IF-NEXT: .LBB9_2:
+; RV64IF-NEXT: feq.s a1, fa0, fa0
+; RV64IF-NEXT: seqz a1, a1
+; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: and a0, a1, a0
; RV64IF-NEXT: ret
%a = call float @llvm.trunc.f32(float %x)
%b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
define signext i32 @test_trunc_ui32(float %x) {
; CHECKIF-LABEL: test_trunc_ui32:
; CHECKIF: # %bb.0:
-; CHECKIF-NEXT: feq.s a0, fa0, fa0
-; CHECKIF-NEXT: beqz a0, .LBB10_2
-; CHECKIF-NEXT: # %bb.1:
; CHECKIF-NEXT: fcvt.wu.s a0, fa0, rtz
-; CHECKIF-NEXT: .LBB10_2:
+; CHECKIF-NEXT: feq.s a1, fa0, fa0
+; CHECKIF-NEXT: seqz a1, a1
+; CHECKIF-NEXT: addi a1, a1, -1
+; CHECKIF-NEXT: and a0, a1, a0
; CHECKIF-NEXT: ret
%a = call float @llvm.trunc.f32(float %x)
%b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: fmv.w.x ft0, zero
; RV32IF-NEXT: fle.s a0, ft0, fa0
-; RV32IF-NEXT: neg s0, a0
+; RV32IF-NEXT: seqz a0, a0
+; RV32IF-NEXT: addi s0, a0, -1
; RV32IF-NEXT: call __fixunssfdi@plt
; RV32IF-NEXT: lui a2, %hi(.LCPI11_0)
; RV32IF-NEXT: flw ft0, %lo(.LCPI11_0)(a2)
; RV32IF-NEXT: flt.s a2, ft0, fs0
; RV32IF-NEXT: seqz a2, a2
; RV32IF-NEXT: addi a2, a2, -1
-; RV32IF-NEXT: or a0, a0, a2
+; RV32IF-NEXT: or a0, a2, a0
; RV32IF-NEXT: and a1, s0, a1
-; RV32IF-NEXT: or a1, a1, a2
+; RV32IF-NEXT: or a1, a2, a1
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IF-LABEL: test_trunc_ui64:
; RV64IF: # %bb.0:
-; RV64IF-NEXT: feq.s a0, fa0, fa0
-; RV64IF-NEXT: beqz a0, .LBB11_2
-; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: fcvt.lu.s a0, fa0, rtz
-; RV64IF-NEXT: .LBB11_2:
+; RV64IF-NEXT: feq.s a1, fa0, fa0
+; RV64IF-NEXT: seqz a1, a1
+; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: and a0, a1, a0
; RV64IF-NEXT: ret
%a = call float @llvm.trunc.f32(float %x)
%b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
define signext i32 @test_round_si32(float %x) {
; CHECKIF-LABEL: test_round_si32:
; CHECKIF: # %bb.0:
-; CHECKIF-NEXT: feq.s a0, fa0, fa0
-; CHECKIF-NEXT: beqz a0, .LBB12_2
-; CHECKIF-NEXT: # %bb.1:
; CHECKIF-NEXT: fcvt.w.s a0, fa0, rmm
-; CHECKIF-NEXT: .LBB12_2:
+; CHECKIF-NEXT: feq.s a1, fa0, fa0
+; CHECKIF-NEXT: seqz a1, a1
+; CHECKIF-NEXT: addi a1, a1, -1
+; CHECKIF-NEXT: and a0, a1, a0
; CHECKIF-NEXT: ret
%a = call float @llvm.round.f32(float %x)
%b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
; RV32IF-NEXT: addi a1, a3, -1
; RV32IF-NEXT: .LBB13_4:
; RV32IF-NEXT: feq.s a3, fs0, fs0
-; RV32IF-NEXT: bnez a3, .LBB13_6
-; RV32IF-NEXT: # %bb.5:
-; RV32IF-NEXT: li a1, 0
-; RV32IF-NEXT: li a0, 0
-; RV32IF-NEXT: j .LBB13_7
-; RV32IF-NEXT: .LBB13_6:
-; RV32IF-NEXT: neg a3, s0
-; RV32IF-NEXT: and a0, a3, a0
+; RV32IF-NEXT: seqz a3, a3
+; RV32IF-NEXT: addi a3, a3, -1
+; RV32IF-NEXT: and a1, a3, a1
+; RV32IF-NEXT: seqz a4, s0
+; RV32IF-NEXT: addi a4, a4, -1
+; RV32IF-NEXT: and a0, a4, a0
; RV32IF-NEXT: seqz a2, a2
; RV32IF-NEXT: addi a2, a2, -1
-; RV32IF-NEXT: or a0, a0, a2
-; RV32IF-NEXT: .LBB13_7:
+; RV32IF-NEXT: or a0, a2, a0
+; RV32IF-NEXT: and a0, a3, a0
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IF-LABEL: test_round_si64:
; RV64IF: # %bb.0:
-; RV64IF-NEXT: feq.s a0, fa0, fa0
-; RV64IF-NEXT: beqz a0, .LBB13_2
-; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: fcvt.l.s a0, fa0, rmm
-; RV64IF-NEXT: .LBB13_2:
+; RV64IF-NEXT: feq.s a1, fa0, fa0
+; RV64IF-NEXT: seqz a1, a1
+; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: and a0, a1, a0
; RV64IF-NEXT: ret
%a = call float @llvm.round.f32(float %x)
%b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
define signext i32 @test_round_ui32(float %x) {
; CHECKIF-LABEL: test_round_ui32:
; CHECKIF: # %bb.0:
-; CHECKIF-NEXT: feq.s a0, fa0, fa0
-; CHECKIF-NEXT: beqz a0, .LBB14_2
-; CHECKIF-NEXT: # %bb.1:
; CHECKIF-NEXT: fcvt.wu.s a0, fa0, rmm
-; CHECKIF-NEXT: .LBB14_2:
+; CHECKIF-NEXT: feq.s a1, fa0, fa0
+; CHECKIF-NEXT: seqz a1, a1
+; CHECKIF-NEXT: addi a1, a1, -1
+; CHECKIF-NEXT: and a0, a1, a0
; CHECKIF-NEXT: ret
%a = call float @llvm.round.f32(float %x)
%b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: fmv.w.x ft0, zero
; RV32IF-NEXT: fle.s a0, ft0, fa0
-; RV32IF-NEXT: neg s0, a0
+; RV32IF-NEXT: seqz a0, a0
+; RV32IF-NEXT: addi s0, a0, -1
; RV32IF-NEXT: call __fixunssfdi@plt
; RV32IF-NEXT: lui a2, %hi(.LCPI15_0)
; RV32IF-NEXT: flw ft0, %lo(.LCPI15_0)(a2)
; RV32IF-NEXT: flt.s a2, ft0, fs0
; RV32IF-NEXT: seqz a2, a2
; RV32IF-NEXT: addi a2, a2, -1
-; RV32IF-NEXT: or a0, a0, a2
+; RV32IF-NEXT: or a0, a2, a0
; RV32IF-NEXT: and a1, s0, a1
-; RV32IF-NEXT: or a1, a1, a2
+; RV32IF-NEXT: or a1, a2, a1
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IF-LABEL: test_round_ui64:
; RV64IF: # %bb.0:
-; RV64IF-NEXT: feq.s a0, fa0, fa0
-; RV64IF-NEXT: beqz a0, .LBB15_2
-; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: fcvt.lu.s a0, fa0, rmm
-; RV64IF-NEXT: .LBB15_2:
+; RV64IF-NEXT: feq.s a1, fa0, fa0
+; RV64IF-NEXT: seqz a1, a1
+; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: and a0, a1, a0
; RV64IF-NEXT: ret
%a = call float @llvm.round.f32(float %x)
%b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
define signext i32 @test_roundeven_si32(float %x) {
; CHECKIF-LABEL: test_roundeven_si32:
; CHECKIF: # %bb.0:
-; CHECKIF-NEXT: feq.s a0, fa0, fa0
-; CHECKIF-NEXT: beqz a0, .LBB16_2
-; CHECKIF-NEXT: # %bb.1:
; CHECKIF-NEXT: fcvt.w.s a0, fa0, rne
-; CHECKIF-NEXT: .LBB16_2:
+; CHECKIF-NEXT: feq.s a1, fa0, fa0
+; CHECKIF-NEXT: seqz a1, a1
+; CHECKIF-NEXT: addi a1, a1, -1
+; CHECKIF-NEXT: and a0, a1, a0
; CHECKIF-NEXT: ret
%a = call float @llvm.roundeven.f32(float %x)
%b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
; RV32IF-NEXT: addi a1, a3, -1
; RV32IF-NEXT: .LBB17_4:
; RV32IF-NEXT: feq.s a3, fs0, fs0
-; RV32IF-NEXT: bnez a3, .LBB17_6
-; RV32IF-NEXT: # %bb.5:
-; RV32IF-NEXT: li a1, 0
-; RV32IF-NEXT: li a0, 0
-; RV32IF-NEXT: j .LBB17_7
-; RV32IF-NEXT: .LBB17_6:
-; RV32IF-NEXT: neg a3, s0
-; RV32IF-NEXT: and a0, a3, a0
+; RV32IF-NEXT: seqz a3, a3
+; RV32IF-NEXT: addi a3, a3, -1
+; RV32IF-NEXT: and a1, a3, a1
+; RV32IF-NEXT: seqz a4, s0
+; RV32IF-NEXT: addi a4, a4, -1
+; RV32IF-NEXT: and a0, a4, a0
; RV32IF-NEXT: seqz a2, a2
; RV32IF-NEXT: addi a2, a2, -1
-; RV32IF-NEXT: or a0, a0, a2
-; RV32IF-NEXT: .LBB17_7:
+; RV32IF-NEXT: or a0, a2, a0
+; RV32IF-NEXT: and a0, a3, a0
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IF-LABEL: test_roundeven_si64:
; RV64IF: # %bb.0:
-; RV64IF-NEXT: feq.s a0, fa0, fa0
-; RV64IF-NEXT: beqz a0, .LBB17_2
-; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: fcvt.l.s a0, fa0, rne
-; RV64IF-NEXT: .LBB17_2:
+; RV64IF-NEXT: feq.s a1, fa0, fa0
+; RV64IF-NEXT: seqz a1, a1
+; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: and a0, a1, a0
; RV64IF-NEXT: ret
%a = call float @llvm.roundeven.f32(float %x)
%b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
define signext i32 @test_roundeven_ui32(float %x) {
; CHECKIF-LABEL: test_roundeven_ui32:
; CHECKIF: # %bb.0:
-; CHECKIF-NEXT: feq.s a0, fa0, fa0
-; CHECKIF-NEXT: beqz a0, .LBB18_2
-; CHECKIF-NEXT: # %bb.1:
; CHECKIF-NEXT: fcvt.wu.s a0, fa0, rne
-; CHECKIF-NEXT: .LBB18_2:
+; CHECKIF-NEXT: feq.s a1, fa0, fa0
+; CHECKIF-NEXT: seqz a1, a1
+; CHECKIF-NEXT: addi a1, a1, -1
+; CHECKIF-NEXT: and a0, a1, a0
; CHECKIF-NEXT: ret
%a = call float @llvm.roundeven.f32(float %x)
%b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: fmv.w.x ft0, zero
; RV32IF-NEXT: fle.s a0, ft0, fa0
-; RV32IF-NEXT: neg s0, a0
+; RV32IF-NEXT: seqz a0, a0
+; RV32IF-NEXT: addi s0, a0, -1
; RV32IF-NEXT: call __fixunssfdi@plt
; RV32IF-NEXT: lui a2, %hi(.LCPI19_0)
; RV32IF-NEXT: flw ft0, %lo(.LCPI19_0)(a2)
; RV32IF-NEXT: flt.s a2, ft0, fs0
; RV32IF-NEXT: seqz a2, a2
; RV32IF-NEXT: addi a2, a2, -1
-; RV32IF-NEXT: or a0, a0, a2
+; RV32IF-NEXT: or a0, a2, a0
; RV32IF-NEXT: and a1, s0, a1
-; RV32IF-NEXT: or a1, a1, a2
+; RV32IF-NEXT: or a1, a2, a1
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IF-LABEL: test_roundeven_ui64:
; RV64IF: # %bb.0:
-; RV64IF-NEXT: feq.s a0, fa0, fa0
-; RV64IF-NEXT: beqz a0, .LBB19_2
-; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: fcvt.lu.s a0, fa0, rne
-; RV64IF-NEXT: .LBB19_2:
+; RV64IF-NEXT: feq.s a1, fa0, fa0
+; RV64IF-NEXT: seqz a1, a1
+; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: and a0, a1, a0
; RV64IF-NEXT: ret
%a = call float @llvm.roundeven.f32(float %x)
%b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
; RV32-NEXT: j .LBB49_2
; RV32-NEXT: .LBB49_1: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1
+; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a3, a0, a1
; RV32-NEXT: sw a4, 0(sp)
; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: mv a1, sp
; RV32-NEXT: call __atomic_compare_exchange_8@plt
; RV32-NEXT: lw a1, 4(sp)
; RV32-NEXT: lw a4, 0(sp)
-; RV32-NEXT: bnez a0, .LBB49_7
+; RV32-NEXT: bnez a0, .LBB49_6
; RV32-NEXT: .LBB49_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beqz a1, .LBB49_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1
; RV32-NEXT: sgtz a0, a1
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: bnez a0, .LBB49_1
; RV32-NEXT: j .LBB49_5
; RV32-NEXT: .LBB49_4: # in Loop: Header=BB49_2 Depth=1
; RV32-NEXT: sltiu a0, a4, 2
; RV32-NEXT: xori a0, a0, 1
-; RV32-NEXT: .LBB49_5: # %atomicrmw.start
-; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1
; RV32-NEXT: mv a2, a4
-; RV32-NEXT: mv a3, a1
; RV32-NEXT: bnez a0, .LBB49_1
-; RV32-NEXT: # %bb.6: # %atomicrmw.start
+; RV32-NEXT: .LBB49_5: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1
-; RV32-NEXT: li a3, 0
; RV32-NEXT: li a2, 1
; RV32-NEXT: j .LBB49_1
-; RV32-NEXT: .LBB49_7: # %atomicrmw.end
+; RV32-NEXT: .LBB49_6: # %atomicrmw.end
; RV32-NEXT: mv a0, a4
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32-NEXT: j .LBB50_2
; RV32-NEXT: .LBB50_1: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1
+; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a3, a0, a1
; RV32-NEXT: sw a4, 0(sp)
; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: mv a1, sp
; RV32-NEXT: call __atomic_compare_exchange_8@plt
; RV32-NEXT: lw a1, 4(sp)
; RV32-NEXT: lw a4, 0(sp)
-; RV32-NEXT: bnez a0, .LBB50_7
+; RV32-NEXT: bnez a0, .LBB50_6
; RV32-NEXT: .LBB50_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beqz a1, .LBB50_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1
; RV32-NEXT: slti a0, a1, 0
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: bnez a0, .LBB50_1
; RV32-NEXT: j .LBB50_5
; RV32-NEXT: .LBB50_4: # in Loop: Header=BB50_2 Depth=1
; RV32-NEXT: sltiu a0, a4, 2
-; RV32-NEXT: .LBB50_5: # %atomicrmw.start
-; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1
; RV32-NEXT: mv a2, a4
-; RV32-NEXT: mv a3, a1
; RV32-NEXT: bnez a0, .LBB50_1
-; RV32-NEXT: # %bb.6: # %atomicrmw.start
+; RV32-NEXT: .LBB50_5: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1
-; RV32-NEXT: li a3, 0
; RV32-NEXT: li a2, 1
; RV32-NEXT: j .LBB50_1
-; RV32-NEXT: .LBB50_7: # %atomicrmw.end
+; RV32-NEXT: .LBB50_6: # %atomicrmw.end
; RV32-NEXT: mv a0, a4
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32-NEXT: j .LBB51_2
; RV32-NEXT: .LBB51_1: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1
+; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a3, a0, a1
; RV32-NEXT: sw a4, 0(sp)
; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: mv a1, sp
; RV32-NEXT: call __atomic_compare_exchange_8@plt
; RV32-NEXT: lw a1, 4(sp)
; RV32-NEXT: lw a4, 0(sp)
-; RV32-NEXT: bnez a0, .LBB51_7
+; RV32-NEXT: bnez a0, .LBB51_6
; RV32-NEXT: .LBB51_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beqz a1, .LBB51_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1
; RV32-NEXT: snez a0, a1
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: bnez a0, .LBB51_1
; RV32-NEXT: j .LBB51_5
; RV32-NEXT: .LBB51_4: # in Loop: Header=BB51_2 Depth=1
; RV32-NEXT: sltiu a0, a4, 2
; RV32-NEXT: xori a0, a0, 1
-; RV32-NEXT: .LBB51_5: # %atomicrmw.start
-; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1
; RV32-NEXT: mv a2, a4
-; RV32-NEXT: mv a3, a1
; RV32-NEXT: bnez a0, .LBB51_1
-; RV32-NEXT: # %bb.6: # %atomicrmw.start
+; RV32-NEXT: .LBB51_5: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1
-; RV32-NEXT: li a3, 0
; RV32-NEXT: li a2, 1
; RV32-NEXT: j .LBB51_1
-; RV32-NEXT: .LBB51_7: # %atomicrmw.end
+; RV32-NEXT: .LBB51_6: # %atomicrmw.end
; RV32-NEXT: mv a0, a4
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32-NEXT: j .LBB52_2
; RV32-NEXT: .LBB52_1: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1
+; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a3, a0, a1
; RV32-NEXT: sw a4, 0(sp)
; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: mv a1, sp
; RV32-NEXT: call __atomic_compare_exchange_8@plt
; RV32-NEXT: lw a1, 4(sp)
; RV32-NEXT: lw a4, 0(sp)
-; RV32-NEXT: bnez a0, .LBB52_7
+; RV32-NEXT: bnez a0, .LBB52_4
; RV32-NEXT: .LBB52_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32-NEXT: beqz a1, .LBB52_4
-; RV32-NEXT: # %bb.3: # %atomicrmw.start
-; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: j .LBB52_5
-; RV32-NEXT: .LBB52_4: # in Loop: Header=BB52_2 Depth=1
; RV32-NEXT: sltiu a0, a4, 2
-; RV32-NEXT: .LBB52_5: # %atomicrmw.start
-; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1
+; RV32-NEXT: snez a2, a1
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a0, a2, a0
; RV32-NEXT: mv a2, a4
-; RV32-NEXT: mv a3, a1
; RV32-NEXT: bnez a0, .LBB52_1
-; RV32-NEXT: # %bb.6: # %atomicrmw.start
+; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1
-; RV32-NEXT: li a3, 0
; RV32-NEXT: li a2, 1
; RV32-NEXT: j .LBB52_1
-; RV32-NEXT: .LBB52_7: # %atomicrmw.end
+; RV32-NEXT: .LBB52_4: # %atomicrmw.end
; RV32-NEXT: mv a0, a4
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: beqz a1, .LBB0_2
; RV32IF-NEXT: # %bb.1: # %entry
; RV32IF-NEXT: slti a4, a1, 0
-; RV32IF-NEXT: beqz a4, .LBB0_3
-; RV32IF-NEXT: j .LBB0_4
+; RV32IF-NEXT: j .LBB0_3
; RV32IF-NEXT: .LBB0_2:
; RV32IF-NEXT: sltu a4, a0, a3
-; RV32IF-NEXT: bnez a4, .LBB0_4
; RV32IF-NEXT: .LBB0_3: # %entry
-; RV32IF-NEXT: li a1, 0
+; RV32IF-NEXT: seqz a5, a4
+; RV32IF-NEXT: addi a5, a5, -1
+; RV32IF-NEXT: and a1, a5, a1
+; RV32IF-NEXT: bnez a4, .LBB0_5
+; RV32IF-NEXT: # %bb.4: # %entry
; RV32IF-NEXT: mv a0, a3
-; RV32IF-NEXT: .LBB0_4: # %entry
+; RV32IF-NEXT: .LBB0_5: # %entry
; RV32IF-NEXT: li a3, -1
-; RV32IF-NEXT: beq a1, a3, .LBB0_6
-; RV32IF-NEXT: # %bb.5: # %entry
+; RV32IF-NEXT: beq a1, a3, .LBB0_7
+; RV32IF-NEXT: # %bb.6: # %entry
; RV32IF-NEXT: slti a1, a1, 0
; RV32IF-NEXT: xori a1, a1, 1
-; RV32IF-NEXT: beqz a1, .LBB0_7
-; RV32IF-NEXT: j .LBB0_8
-; RV32IF-NEXT: .LBB0_6:
+; RV32IF-NEXT: beqz a1, .LBB0_8
+; RV32IF-NEXT: j .LBB0_9
+; RV32IF-NEXT: .LBB0_7:
; RV32IF-NEXT: sltu a1, a2, a0
-; RV32IF-NEXT: bnez a1, .LBB0_8
-; RV32IF-NEXT: .LBB0_7: # %entry
-; RV32IF-NEXT: lui a0, 524288
+; RV32IF-NEXT: bnez a1, .LBB0_9
; RV32IF-NEXT: .LBB0_8: # %entry
+; RV32IF-NEXT: lui a0, 524288
+; RV32IF-NEXT: .LBB0_9: # %entry
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
;
; RV32IFD-LABEL: stest_f64i32:
; RV32IFD: # %bb.0: # %entry
-; RV32IFD-NEXT: feq.d a0, fa0, fa0
-; RV32IFD-NEXT: beqz a0, .LBB0_2
-; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: fcvt.w.d a0, fa0, rtz
-; RV32IFD-NEXT: .LBB0_2: # %entry
+; RV32IFD-NEXT: feq.d a1, fa0, fa0
+; RV32IFD-NEXT: seqz a1, a1
+; RV32IFD-NEXT: addi a1, a1, -1
+; RV32IFD-NEXT: and a0, a1, a0
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: stest_f64i32:
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: .cfi_offset ra, -4
; RV32IF-NEXT: call __fixunsdfdi@plt
-; RV32IF-NEXT: beqz a1, .LBB1_2
-; RV32IF-NEXT: # %bb.1: # %entry
-; RV32IF-NEXT: li a1, 0
-; RV32IF-NEXT: j .LBB1_3
-; RV32IF-NEXT: .LBB1_2:
-; RV32IF-NEXT: sltiu a1, a0, -1
-; RV32IF-NEXT: .LBB1_3: # %entry
+; RV32IF-NEXT: sltiu a2, a0, -1
; RV32IF-NEXT: snez a1, a1
; RV32IF-NEXT: addi a1, a1, -1
+; RV32IF-NEXT: and a1, a1, a2
+; RV32IF-NEXT: addi a1, a1, -1
; RV32IF-NEXT: or a0, a1, a0
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
;
; RV32IFD-LABEL: utest_f64i32:
; RV32IFD: # %bb.0: # %entry
-; RV32IFD-NEXT: feq.d a0, fa0, fa0
-; RV32IFD-NEXT: beqz a0, .LBB1_2
-; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz
-; RV32IFD-NEXT: .LBB1_2: # %entry
+; RV32IFD-NEXT: feq.d a1, fa0, fa0
+; RV32IFD-NEXT: seqz a1, a1
+; RV32IFD-NEXT: addi a1, a1, -1
+; RV32IFD-NEXT: and a0, a1, a0
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: utest_f64i32:
; RV32IF-NEXT: .LBB2_2:
; RV32IF-NEXT: sltiu a2, a0, -1
; RV32IF-NEXT: .LBB2_3: # %entry
-; RV32IF-NEXT: snez a3, a2
-; RV32IF-NEXT: addi a3, a3, -1
-; RV32IF-NEXT: bnez a2, .LBB2_5
-; RV32IF-NEXT: # %bb.4: # %entry
-; RV32IF-NEXT: li a1, 0
-; RV32IF-NEXT: .LBB2_5: # %entry
+; RV32IF-NEXT: snez a2, a2
+; RV32IF-NEXT: addi a3, a2, -1
+; RV32IF-NEXT: neg a2, a2
+; RV32IF-NEXT: and a1, a2, a1
; RV32IF-NEXT: or a0, a3, a0
-; RV32IF-NEXT: beqz a1, .LBB2_7
-; RV32IF-NEXT: # %bb.6: # %entry
+; RV32IF-NEXT: beqz a1, .LBB2_5
+; RV32IF-NEXT: # %bb.4: # %entry
; RV32IF-NEXT: sgtz a1, a1
-; RV32IF-NEXT: beqz a1, .LBB2_8
-; RV32IF-NEXT: j .LBB2_9
-; RV32IF-NEXT: .LBB2_7:
+; RV32IF-NEXT: j .LBB2_6
+; RV32IF-NEXT: .LBB2_5:
; RV32IF-NEXT: snez a1, a0
-; RV32IF-NEXT: bnez a1, .LBB2_9
-; RV32IF-NEXT: .LBB2_8: # %entry
-; RV32IF-NEXT: li a0, 0
-; RV32IF-NEXT: .LBB2_9: # %entry
+; RV32IF-NEXT: .LBB2_6: # %entry
+; RV32IF-NEXT: seqz a1, a1
+; RV32IF-NEXT: addi a1, a1, -1
+; RV32IF-NEXT: and a0, a1, a0
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
; RV64IF-NEXT: # %bb.1: # %entry
; RV64IF-NEXT: mv a0, a1
; RV64IF-NEXT: .LBB2_2: # %entry
-; RV64IF-NEXT: bgtz a0, .LBB2_4
-; RV64IF-NEXT: # %bb.3: # %entry
-; RV64IF-NEXT: li a0, 0
-; RV64IF-NEXT: .LBB2_4: # %entry
+; RV64IF-NEXT: sgtz a1, a0
+; RV64IF-NEXT: neg a1, a1
+; RV64IF-NEXT: and a0, a1, a0
; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IF-NEXT: addi sp, sp, 16
; RV64IF-NEXT: ret
;
; RV32IFD-LABEL: ustest_f64i32:
; RV32IFD: # %bb.0: # %entry
-; RV32IFD-NEXT: feq.d a0, fa0, fa0
-; RV32IFD-NEXT: beqz a0, .LBB2_2
-; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz
-; RV32IFD-NEXT: .LBB2_2: # %entry
+; RV32IFD-NEXT: feq.d a1, fa0, fa0
+; RV32IFD-NEXT: seqz a1, a1
+; RV32IFD-NEXT: addi a1, a1, -1
+; RV32IFD-NEXT: and a0, a1, a0
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: ustest_f64i32:
; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz
; RV64IFD-NEXT: li a1, -1
; RV64IFD-NEXT: srli a1, a1, 32
-; RV64IFD-NEXT: bge a0, a1, .LBB2_3
+; RV64IFD-NEXT: blt a0, a1, .LBB2_2
; RV64IFD-NEXT: # %bb.1: # %entry
-; RV64IFD-NEXT: blez a0, .LBB2_4
-; RV64IFD-NEXT: .LBB2_2: # %entry
-; RV64IFD-NEXT: ret
-; RV64IFD-NEXT: .LBB2_3: # %entry
; RV64IFD-NEXT: mv a0, a1
-; RV64IFD-NEXT: bgtz a0, .LBB2_2
-; RV64IFD-NEXT: .LBB2_4: # %entry
-; RV64IFD-NEXT: li a0, 0
+; RV64IFD-NEXT: .LBB2_2: # %entry
+; RV64IFD-NEXT: sgtz a1, a0
+; RV64IFD-NEXT: neg a1, a1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
entry:
%conv = fptosi double %x to i64
define i32 @stest_f32i32(float %x) {
; RV32-LABEL: stest_f32i32:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: feq.s a0, fa0, fa0
-; RV32-NEXT: beqz a0, .LBB3_2
-; RV32-NEXT: # %bb.1:
; RV32-NEXT: fcvt.w.s a0, fa0, rtz
-; RV32-NEXT: .LBB3_2: # %entry
+; RV32-NEXT: feq.s a1, fa0, fa0
+; RV32-NEXT: seqz a1, a1
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a0, a1, a0
; RV32-NEXT: ret
;
; RV64-LABEL: stest_f32i32:
define i32 @utest_f32i32(float %x) {
; RV32-LABEL: utest_f32i32:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: feq.s a0, fa0, fa0
-; RV32-NEXT: beqz a0, .LBB4_2
-; RV32-NEXT: # %bb.1:
; RV32-NEXT: fcvt.wu.s a0, fa0, rtz
-; RV32-NEXT: .LBB4_2: # %entry
+; RV32-NEXT: feq.s a1, fa0, fa0
+; RV32-NEXT: seqz a1, a1
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a0, a1, a0
; RV32-NEXT: ret
;
; RV64-LABEL: utest_f32i32:
define i32 @ustest_f32i32(float %x) {
; RV32-LABEL: ustest_f32i32:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: feq.s a0, fa0, fa0
-; RV32-NEXT: beqz a0, .LBB5_2
-; RV32-NEXT: # %bb.1:
; RV32-NEXT: fcvt.wu.s a0, fa0, rtz
-; RV32-NEXT: .LBB5_2: # %entry
+; RV32-NEXT: feq.s a1, fa0, fa0
+; RV32-NEXT: seqz a1, a1
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a0, a1, a0
; RV32-NEXT: ret
;
; RV64-LABEL: ustest_f32i32:
; RV64-NEXT: fcvt.l.s a0, fa0, rtz
; RV64-NEXT: li a1, -1
; RV64-NEXT: srli a1, a1, 32
-; RV64-NEXT: bge a0, a1, .LBB5_3
+; RV64-NEXT: blt a0, a1, .LBB5_2
; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: blez a0, .LBB5_4
-; RV64-NEXT: .LBB5_2: # %entry
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB5_3: # %entry
; RV64-NEXT: mv a0, a1
-; RV64-NEXT: bgtz a0, .LBB5_2
-; RV64-NEXT: .LBB5_4: # %entry
-; RV64-NEXT: li a0, 0
+; RV64-NEXT: .LBB5_2: # %entry
+; RV64-NEXT: sgtz a1, a0
+; RV64-NEXT: neg a1, a1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ret
entry:
%conv = fptosi float %x to i64
; RV32-NEXT: beqz a1, .LBB6_2
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: slti a4, a1, 0
-; RV32-NEXT: beqz a4, .LBB6_3
-; RV32-NEXT: j .LBB6_4
+; RV32-NEXT: j .LBB6_3
; RV32-NEXT: .LBB6_2:
; RV32-NEXT: sltu a4, a0, a3
-; RV32-NEXT: bnez a4, .LBB6_4
; RV32-NEXT: .LBB6_3: # %entry
-; RV32-NEXT: li a1, 0
+; RV32-NEXT: seqz a5, a4
+; RV32-NEXT: addi a5, a5, -1
+; RV32-NEXT: and a1, a5, a1
+; RV32-NEXT: bnez a4, .LBB6_5
+; RV32-NEXT: # %bb.4: # %entry
; RV32-NEXT: mv a0, a3
-; RV32-NEXT: .LBB6_4: # %entry
+; RV32-NEXT: .LBB6_5: # %entry
; RV32-NEXT: li a3, -1
-; RV32-NEXT: beq a1, a3, .LBB6_6
-; RV32-NEXT: # %bb.5: # %entry
+; RV32-NEXT: beq a1, a3, .LBB6_7
+; RV32-NEXT: # %bb.6: # %entry
; RV32-NEXT: slti a1, a1, 0
; RV32-NEXT: xori a1, a1, 1
-; RV32-NEXT: beqz a1, .LBB6_7
-; RV32-NEXT: j .LBB6_8
-; RV32-NEXT: .LBB6_6:
+; RV32-NEXT: beqz a1, .LBB6_8
+; RV32-NEXT: j .LBB6_9
+; RV32-NEXT: .LBB6_7:
; RV32-NEXT: sltu a1, a2, a0
-; RV32-NEXT: bnez a1, .LBB6_8
-; RV32-NEXT: .LBB6_7: # %entry
-; RV32-NEXT: lui a0, 524288
+; RV32-NEXT: bnez a1, .LBB6_9
; RV32-NEXT: .LBB6_8: # %entry
+; RV32-NEXT: lui a0, 524288
+; RV32-NEXT: .LBB6_9: # %entry
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2@plt
; RV32-NEXT: call __fixunssfdi@plt
-; RV32-NEXT: beqz a1, .LBB7_2
-; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: j .LBB7_3
-; RV32-NEXT: .LBB7_2:
-; RV32-NEXT: sltiu a1, a0, -1
-; RV32-NEXT: .LBB7_3: # %entry
+; RV32-NEXT: sltiu a2, a0, -1
; RV32-NEXT: snez a1, a1
; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a1, a1, a2
+; RV32-NEXT: addi a1, a1, -1
; RV32-NEXT: or a0, a1, a0
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: .LBB8_2:
; RV32-NEXT: sltiu a2, a0, -1
; RV32-NEXT: .LBB8_3: # %entry
-; RV32-NEXT: snez a3, a2
-; RV32-NEXT: addi a3, a3, -1
-; RV32-NEXT: bnez a2, .LBB8_5
-; RV32-NEXT: # %bb.4: # %entry
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: .LBB8_5: # %entry
+; RV32-NEXT: snez a2, a2
+; RV32-NEXT: addi a3, a2, -1
+; RV32-NEXT: neg a2, a2
+; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: or a0, a3, a0
-; RV32-NEXT: beqz a1, .LBB8_7
-; RV32-NEXT: # %bb.6: # %entry
+; RV32-NEXT: beqz a1, .LBB8_5
+; RV32-NEXT: # %bb.4: # %entry
; RV32-NEXT: sgtz a1, a1
-; RV32-NEXT: beqz a1, .LBB8_8
-; RV32-NEXT: j .LBB8_9
-; RV32-NEXT: .LBB8_7:
+; RV32-NEXT: j .LBB8_6
+; RV32-NEXT: .LBB8_5:
; RV32-NEXT: snez a1, a0
-; RV32-NEXT: bnez a1, .LBB8_9
-; RV32-NEXT: .LBB8_8: # %entry
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: .LBB8_9: # %entry
+; RV32-NEXT: .LBB8_6: # %entry
+; RV32-NEXT: seqz a1, a1
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a0, a1, a0
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: mv a0, a1
; RV64-NEXT: .LBB8_2: # %entry
-; RV64-NEXT: bgtz a0, .LBB8_4
-; RV64-NEXT: # %bb.3: # %entry
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: .LBB8_4: # %entry
+; RV64-NEXT: sgtz a1, a0
+; RV64-NEXT: neg a1, a1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; RV32IF-NEXT: # %bb.1: # %entry
; RV32IF-NEXT: mv a0, a1
; RV32IF-NEXT: .LBB11_2: # %entry
-; RV32IF-NEXT: bgtz a0, .LBB11_4
-; RV32IF-NEXT: # %bb.3: # %entry
-; RV32IF-NEXT: li a0, 0
-; RV32IF-NEXT: .LBB11_4: # %entry
+; RV32IF-NEXT: sgtz a1, a0
+; RV32IF-NEXT: neg a1, a1
+; RV32IF-NEXT: and a0, a1, a0
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
; RV64IF-NEXT: # %bb.1: # %entry
; RV64IF-NEXT: mv a0, a1
; RV64IF-NEXT: .LBB11_2: # %entry
-; RV64IF-NEXT: bgtz a0, .LBB11_4
-; RV64IF-NEXT: # %bb.3: # %entry
-; RV64IF-NEXT: li a0, 0
-; RV64IF-NEXT: .LBB11_4: # %entry
+; RV64IF-NEXT: sgtz a1, a0
+; RV64IF-NEXT: neg a1, a1
+; RV64IF-NEXT: and a0, a1, a0
; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IF-NEXT: addi sp, sp, 16
; RV64IF-NEXT: ret
; RV32IFD-NEXT: fcvt.w.d a0, fa0, rtz
; RV32IFD-NEXT: lui a1, 16
; RV32IFD-NEXT: addi a1, a1, -1
-; RV32IFD-NEXT: bge a0, a1, .LBB11_3
+; RV32IFD-NEXT: blt a0, a1, .LBB11_2
; RV32IFD-NEXT: # %bb.1: # %entry
-; RV32IFD-NEXT: blez a0, .LBB11_4
-; RV32IFD-NEXT: .LBB11_2: # %entry
-; RV32IFD-NEXT: ret
-; RV32IFD-NEXT: .LBB11_3: # %entry
; RV32IFD-NEXT: mv a0, a1
-; RV32IFD-NEXT: bgtz a0, .LBB11_2
-; RV32IFD-NEXT: .LBB11_4: # %entry
-; RV32IFD-NEXT: li a0, 0
+; RV32IFD-NEXT: .LBB11_2: # %entry
+; RV32IFD-NEXT: sgtz a1, a0
+; RV32IFD-NEXT: neg a1, a1
+; RV32IFD-NEXT: and a0, a1, a0
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: ustest_f64i16:
; RV64IFD-NEXT: fcvt.w.d a0, fa0, rtz
; RV64IFD-NEXT: lui a1, 16
; RV64IFD-NEXT: addiw a1, a1, -1
-; RV64IFD-NEXT: bge a0, a1, .LBB11_3
+; RV64IFD-NEXT: blt a0, a1, .LBB11_2
; RV64IFD-NEXT: # %bb.1: # %entry
-; RV64IFD-NEXT: blez a0, .LBB11_4
-; RV64IFD-NEXT: .LBB11_2: # %entry
-; RV64IFD-NEXT: ret
-; RV64IFD-NEXT: .LBB11_3: # %entry
; RV64IFD-NEXT: mv a0, a1
-; RV64IFD-NEXT: bgtz a0, .LBB11_2
-; RV64IFD-NEXT: .LBB11_4: # %entry
-; RV64IFD-NEXT: li a0, 0
+; RV64IFD-NEXT: .LBB11_2: # %entry
+; RV64IFD-NEXT: sgtz a1, a0
+; RV64IFD-NEXT: neg a1, a1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
entry:
%conv = fptosi double %x to i32
; RV32-NEXT: fcvt.w.s a0, fa0, rtz
; RV32-NEXT: lui a1, 16
; RV32-NEXT: addi a1, a1, -1
-; RV32-NEXT: bge a0, a1, .LBB14_3
+; RV32-NEXT: blt a0, a1, .LBB14_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: blez a0, .LBB14_4
-; RV32-NEXT: .LBB14_2: # %entry
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB14_3: # %entry
; RV32-NEXT: mv a0, a1
-; RV32-NEXT: bgtz a0, .LBB14_2
-; RV32-NEXT: .LBB14_4: # %entry
-; RV32-NEXT: li a0, 0
+; RV32-NEXT: .LBB14_2: # %entry
+; RV32-NEXT: sgtz a1, a0
+; RV32-NEXT: neg a1, a1
+; RV32-NEXT: and a0, a1, a0
; RV32-NEXT: ret
;
; RV64-LABEL: ustest_f32i16:
; RV64-NEXT: fcvt.w.s a0, fa0, rtz
; RV64-NEXT: lui a1, 16
; RV64-NEXT: addiw a1, a1, -1
-; RV64-NEXT: bge a0, a1, .LBB14_3
+; RV64-NEXT: blt a0, a1, .LBB14_2
; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: blez a0, .LBB14_4
-; RV64-NEXT: .LBB14_2: # %entry
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB14_3: # %entry
; RV64-NEXT: mv a0, a1
-; RV64-NEXT: bgtz a0, .LBB14_2
-; RV64-NEXT: .LBB14_4: # %entry
-; RV64-NEXT: li a0, 0
+; RV64-NEXT: .LBB14_2: # %entry
+; RV64-NEXT: sgtz a1, a0
+; RV64-NEXT: neg a1, a1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ret
entry:
%conv = fptosi float %x to i32
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: mv a0, a1
; RV32-NEXT: .LBB17_2: # %entry
-; RV32-NEXT: bgtz a0, .LBB17_4
-; RV32-NEXT: # %bb.3: # %entry
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: .LBB17_4: # %entry
+; RV32-NEXT: sgtz a1, a0
+; RV32-NEXT: neg a1, a1
+; RV32-NEXT: and a0, a1, a0
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: mv a0, a1
; RV64-NEXT: .LBB17_2: # %entry
-; RV64-NEXT: bgtz a0, .LBB17_4
-; RV64-NEXT: # %bb.3: # %entry
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: .LBB17_4: # %entry
+; RV64-NEXT: sgtz a1, a0
+; RV64-NEXT: neg a1, a1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; RV32IF-NEXT: mv a1, a0
; RV32IF-NEXT: addi a0, sp, 8
; RV32IF-NEXT: call __fixdfti@plt
-; RV32IF-NEXT: lw a2, 20(sp)
-; RV32IF-NEXT: lw a3, 16(sp)
+; RV32IF-NEXT: lw a0, 20(sp)
+; RV32IF-NEXT: lw a2, 16(sp)
; RV32IF-NEXT: lw a1, 12(sp)
-; RV32IF-NEXT: lw a0, 8(sp)
-; RV32IF-NEXT: lui a4, 524288
-; RV32IF-NEXT: addi a5, a4, -1
+; RV32IF-NEXT: lw a4, 8(sp)
+; RV32IF-NEXT: lui a3, 524288
+; RV32IF-NEXT: addi a5, a3, -1
; RV32IF-NEXT: beq a1, a5, .LBB18_2
; RV32IF-NEXT: # %bb.1: # %entry
; RV32IF-NEXT: sltu a6, a1, a5
-; RV32IF-NEXT: or a7, a3, a2
+; RV32IF-NEXT: or a7, a2, a0
; RV32IF-NEXT: bnez a7, .LBB18_3
; RV32IF-NEXT: j .LBB18_4
; RV32IF-NEXT: .LBB18_2:
-; RV32IF-NEXT: sltiu a6, a0, -1
-; RV32IF-NEXT: or a7, a3, a2
+; RV32IF-NEXT: sltiu a6, a4, -1
+; RV32IF-NEXT: or a7, a2, a0
; RV32IF-NEXT: beqz a7, .LBB18_4
; RV32IF-NEXT: .LBB18_3: # %entry
-; RV32IF-NEXT: slti a6, a2, 0
+; RV32IF-NEXT: slti a6, a0, 0
; RV32IF-NEXT: .LBB18_4: # %entry
-; RV32IF-NEXT: snez a7, a6
-; RV32IF-NEXT: addi a7, a7, -1
+; RV32IF-NEXT: seqz t0, a6
+; RV32IF-NEXT: addi a7, t0, -1
+; RV32IF-NEXT: neg t0, t0
; RV32IF-NEXT: bnez a6, .LBB18_6
; RV32IF-NEXT: # %bb.5: # %entry
-; RV32IF-NEXT: li a2, 0
-; RV32IF-NEXT: li a3, 0
; RV32IF-NEXT: mv a1, a5
; RV32IF-NEXT: .LBB18_6: # %entry
-; RV32IF-NEXT: or a0, a7, a0
-; RV32IF-NEXT: beq a1, a4, .LBB18_8
+; RV32IF-NEXT: or a4, t0, a4
+; RV32IF-NEXT: and a5, a7, a0
+; RV32IF-NEXT: and a2, a7, a2
+; RV32IF-NEXT: beq a1, a3, .LBB18_8
; RV32IF-NEXT: # %bb.7: # %entry
-; RV32IF-NEXT: sltu a4, a4, a1
+; RV32IF-NEXT: sltu a0, a3, a1
; RV32IF-NEXT: j .LBB18_9
; RV32IF-NEXT: .LBB18_8:
-; RV32IF-NEXT: snez a4, a0
+; RV32IF-NEXT: snez a0, a4
; RV32IF-NEXT: .LBB18_9: # %entry
-; RV32IF-NEXT: and a3, a3, a2
-; RV32IF-NEXT: li a5, -1
-; RV32IF-NEXT: beq a3, a5, .LBB18_11
+; RV32IF-NEXT: and a2, a2, a5
+; RV32IF-NEXT: li a3, -1
+; RV32IF-NEXT: beq a2, a3, .LBB18_11
; RV32IF-NEXT: # %bb.10: # %entry
-; RV32IF-NEXT: slti a2, a2, 0
-; RV32IF-NEXT: xori a4, a2, 1
+; RV32IF-NEXT: slti a0, a5, 0
+; RV32IF-NEXT: xori a0, a0, 1
; RV32IF-NEXT: .LBB18_11: # %entry
-; RV32IF-NEXT: bnez a4, .LBB18_13
+; RV32IF-NEXT: bnez a0, .LBB18_13
; RV32IF-NEXT: # %bb.12: # %entry
-; RV32IF-NEXT: li a0, 0
; RV32IF-NEXT: lui a1, 524288
; RV32IF-NEXT: .LBB18_13: # %entry
+; RV32IF-NEXT: seqz a0, a0
+; RV32IF-NEXT: addi a0, a0, -1
+; RV32IF-NEXT: and a0, a0, a4
; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 32
; RV32IF-NEXT: ret
; RV64IF-NEXT: beqz a1, .LBB18_2
; RV64IF-NEXT: # %bb.1: # %entry
; RV64IF-NEXT: slti a4, a1, 0
-; RV64IF-NEXT: beqz a4, .LBB18_3
-; RV64IF-NEXT: j .LBB18_4
+; RV64IF-NEXT: j .LBB18_3
; RV64IF-NEXT: .LBB18_2:
; RV64IF-NEXT: sltu a4, a0, a3
-; RV64IF-NEXT: bnez a4, .LBB18_4
; RV64IF-NEXT: .LBB18_3: # %entry
-; RV64IF-NEXT: li a1, 0
-; RV64IF-NEXT: mv a0, a3
-; RV64IF-NEXT: .LBB18_4: # %entry
-; RV64IF-NEXT: slli a3, a2, 63
-; RV64IF-NEXT: beq a1, a2, .LBB18_6
-; RV64IF-NEXT: # %bb.5: # %entry
-; RV64IF-NEXT: slti a1, a1, 0
-; RV64IF-NEXT: xori a1, a1, 1
-; RV64IF-NEXT: beqz a1, .LBB18_7
-; RV64IF-NEXT: j .LBB18_8
-; RV64IF-NEXT: .LBB18_6:
-; RV64IF-NEXT: sltu a1, a3, a0
-; RV64IF-NEXT: bnez a1, .LBB18_8
-; RV64IF-NEXT: .LBB18_7: # %entry
+; RV64IF-NEXT: seqz a5, a4
+; RV64IF-NEXT: addi a5, a5, -1
+; RV64IF-NEXT: and a5, a5, a1
+; RV64IF-NEXT: bnez a4, .LBB18_5
+; RV64IF-NEXT: # %bb.4: # %entry
; RV64IF-NEXT: mv a0, a3
+; RV64IF-NEXT: .LBB18_5: # %entry
+; RV64IF-NEXT: slli a1, a2, 63
+; RV64IF-NEXT: beq a5, a2, .LBB18_7
+; RV64IF-NEXT: # %bb.6: # %entry
+; RV64IF-NEXT: slti a2, a5, 0
+; RV64IF-NEXT: xori a2, a2, 1
+; RV64IF-NEXT: beqz a2, .LBB18_8
+; RV64IF-NEXT: j .LBB18_9
+; RV64IF-NEXT: .LBB18_7:
+; RV64IF-NEXT: sltu a2, a1, a0
+; RV64IF-NEXT: bnez a2, .LBB18_9
; RV64IF-NEXT: .LBB18_8: # %entry
+; RV64IF-NEXT: mv a0, a1
+; RV64IF-NEXT: .LBB18_9: # %entry
; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IF-NEXT: addi sp, sp, 16
; RV64IF-NEXT: ret
; RV32IFD-NEXT: .cfi_offset ra, -4
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call __fixdfti@plt
-; RV32IFD-NEXT: lw a2, 20(sp)
-; RV32IFD-NEXT: lw a3, 16(sp)
+; RV32IFD-NEXT: lw a0, 20(sp)
+; RV32IFD-NEXT: lw a2, 16(sp)
; RV32IFD-NEXT: lw a1, 12(sp)
-; RV32IFD-NEXT: lw a0, 8(sp)
-; RV32IFD-NEXT: lui a4, 524288
-; RV32IFD-NEXT: addi a5, a4, -1
+; RV32IFD-NEXT: lw a4, 8(sp)
+; RV32IFD-NEXT: lui a3, 524288
+; RV32IFD-NEXT: addi a5, a3, -1
; RV32IFD-NEXT: beq a1, a5, .LBB18_2
; RV32IFD-NEXT: # %bb.1: # %entry
; RV32IFD-NEXT: sltu a6, a1, a5
-; RV32IFD-NEXT: or a7, a3, a2
+; RV32IFD-NEXT: or a7, a2, a0
; RV32IFD-NEXT: bnez a7, .LBB18_3
; RV32IFD-NEXT: j .LBB18_4
; RV32IFD-NEXT: .LBB18_2:
-; RV32IFD-NEXT: sltiu a6, a0, -1
-; RV32IFD-NEXT: or a7, a3, a2
+; RV32IFD-NEXT: sltiu a6, a4, -1
+; RV32IFD-NEXT: or a7, a2, a0
; RV32IFD-NEXT: beqz a7, .LBB18_4
; RV32IFD-NEXT: .LBB18_3: # %entry
-; RV32IFD-NEXT: slti a6, a2, 0
+; RV32IFD-NEXT: slti a6, a0, 0
; RV32IFD-NEXT: .LBB18_4: # %entry
-; RV32IFD-NEXT: snez a7, a6
-; RV32IFD-NEXT: addi a7, a7, -1
+; RV32IFD-NEXT: seqz t0, a6
+; RV32IFD-NEXT: addi a7, t0, -1
+; RV32IFD-NEXT: neg t0, t0
; RV32IFD-NEXT: bnez a6, .LBB18_6
; RV32IFD-NEXT: # %bb.5: # %entry
-; RV32IFD-NEXT: li a2, 0
-; RV32IFD-NEXT: li a3, 0
; RV32IFD-NEXT: mv a1, a5
; RV32IFD-NEXT: .LBB18_6: # %entry
-; RV32IFD-NEXT: or a0, a7, a0
-; RV32IFD-NEXT: beq a1, a4, .LBB18_8
+; RV32IFD-NEXT: or a4, t0, a4
+; RV32IFD-NEXT: and a5, a7, a0
+; RV32IFD-NEXT: and a2, a7, a2
+; RV32IFD-NEXT: beq a1, a3, .LBB18_8
; RV32IFD-NEXT: # %bb.7: # %entry
-; RV32IFD-NEXT: sltu a4, a4, a1
+; RV32IFD-NEXT: sltu a0, a3, a1
; RV32IFD-NEXT: j .LBB18_9
; RV32IFD-NEXT: .LBB18_8:
-; RV32IFD-NEXT: snez a4, a0
+; RV32IFD-NEXT: snez a0, a4
; RV32IFD-NEXT: .LBB18_9: # %entry
-; RV32IFD-NEXT: and a3, a3, a2
-; RV32IFD-NEXT: li a5, -1
-; RV32IFD-NEXT: beq a3, a5, .LBB18_11
+; RV32IFD-NEXT: and a2, a2, a5
+; RV32IFD-NEXT: li a3, -1
+; RV32IFD-NEXT: beq a2, a3, .LBB18_11
; RV32IFD-NEXT: # %bb.10: # %entry
-; RV32IFD-NEXT: slti a2, a2, 0
-; RV32IFD-NEXT: xori a4, a2, 1
+; RV32IFD-NEXT: slti a0, a5, 0
+; RV32IFD-NEXT: xori a0, a0, 1
; RV32IFD-NEXT: .LBB18_11: # %entry
-; RV32IFD-NEXT: bnez a4, .LBB18_13
+; RV32IFD-NEXT: bnez a0, .LBB18_13
; RV32IFD-NEXT: # %bb.12: # %entry
-; RV32IFD-NEXT: li a0, 0
; RV32IFD-NEXT: lui a1, 524288
; RV32IFD-NEXT: .LBB18_13: # %entry
+; RV32IFD-NEXT: seqz a0, a0
+; RV32IFD-NEXT: addi a0, a0, -1
+; RV32IFD-NEXT: and a0, a0, a4
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: stest_f64i64:
; RV64IFD: # %bb.0: # %entry
-; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: beqz a0, .LBB18_2
-; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz
-; RV64IFD-NEXT: .LBB18_2: # %entry
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
entry:
%conv = fptosi double %x to i128
; RV32IF-NEXT: mv a1, a0
; RV32IF-NEXT: addi a0, sp, 8
; RV32IF-NEXT: call __fixunsdfti@plt
-; RV32IF-NEXT: lw a0, 20(sp)
-; RV32IF-NEXT: lw a1, 16(sp)
-; RV32IF-NEXT: beqz a0, .LBB19_2
-; RV32IF-NEXT: # %bb.1: # %entry
-; RV32IF-NEXT: li a2, 0
-; RV32IF-NEXT: j .LBB19_3
-; RV32IF-NEXT: .LBB19_2:
-; RV32IF-NEXT: seqz a2, a1
-; RV32IF-NEXT: .LBB19_3: # %entry
-; RV32IF-NEXT: xori a1, a1, 1
-; RV32IF-NEXT: or a1, a1, a0
-; RV32IF-NEXT: li a0, 0
-; RV32IF-NEXT: beqz a1, .LBB19_5
-; RV32IF-NEXT: # %bb.4: # %entry
-; RV32IF-NEXT: mv a0, a2
-; RV32IF-NEXT: .LBB19_5: # %entry
-; RV32IF-NEXT: bnez a0, .LBB19_7
-; RV32IF-NEXT: # %bb.6: # %entry
-; RV32IF-NEXT: li a1, 0
-; RV32IF-NEXT: j .LBB19_8
-; RV32IF-NEXT: .LBB19_7:
-; RV32IF-NEXT: lw a1, 12(sp)
-; RV32IF-NEXT: lw a0, 8(sp)
-; RV32IF-NEXT: .LBB19_8: # %entry
+; RV32IF-NEXT: lw a0, 16(sp)
+; RV32IF-NEXT: lw a1, 20(sp)
+; RV32IF-NEXT: lw a2, 12(sp)
+; RV32IF-NEXT: lw a3, 8(sp)
+; RV32IF-NEXT: seqz a4, a0
+; RV32IF-NEXT: snez a5, a1
+; RV32IF-NEXT: addi a5, a5, -1
+; RV32IF-NEXT: and a4, a5, a4
+; RV32IF-NEXT: xori a0, a0, 1
+; RV32IF-NEXT: or a0, a0, a1
+; RV32IF-NEXT: seqz a0, a0
+; RV32IF-NEXT: addi a0, a0, -1
+; RV32IF-NEXT: and a0, a0, a4
+; RV32IF-NEXT: seqz a0, a0
+; RV32IF-NEXT: addi a1, a0, -1
+; RV32IF-NEXT: and a0, a1, a3
+; RV32IF-NEXT: and a1, a1, a2
; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 32
; RV32IF-NEXT: ret
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: call __fixunsdfti@plt
-; RV64-NEXT: beqz a1, .LBB19_2
-; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: .LBB19_2: # %entry
+; RV64-NEXT: snez a1, a1
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; RV32IFD-NEXT: .cfi_offset ra, -4
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call __fixunsdfti@plt
-; RV32IFD-NEXT: lw a0, 20(sp)
-; RV32IFD-NEXT: lw a1, 16(sp)
-; RV32IFD-NEXT: beqz a0, .LBB19_2
-; RV32IFD-NEXT: # %bb.1: # %entry
-; RV32IFD-NEXT: li a2, 0
-; RV32IFD-NEXT: j .LBB19_3
-; RV32IFD-NEXT: .LBB19_2:
-; RV32IFD-NEXT: seqz a2, a1
-; RV32IFD-NEXT: .LBB19_3: # %entry
-; RV32IFD-NEXT: xori a1, a1, 1
-; RV32IFD-NEXT: or a1, a1, a0
-; RV32IFD-NEXT: li a0, 0
-; RV32IFD-NEXT: beqz a1, .LBB19_5
-; RV32IFD-NEXT: # %bb.4: # %entry
-; RV32IFD-NEXT: mv a0, a2
-; RV32IFD-NEXT: .LBB19_5: # %entry
-; RV32IFD-NEXT: bnez a0, .LBB19_7
-; RV32IFD-NEXT: # %bb.6: # %entry
-; RV32IFD-NEXT: li a1, 0
-; RV32IFD-NEXT: j .LBB19_8
-; RV32IFD-NEXT: .LBB19_7:
-; RV32IFD-NEXT: lw a1, 12(sp)
-; RV32IFD-NEXT: lw a0, 8(sp)
-; RV32IFD-NEXT: .LBB19_8: # %entry
+; RV32IFD-NEXT: lw a0, 16(sp)
+; RV32IFD-NEXT: lw a1, 20(sp)
+; RV32IFD-NEXT: lw a2, 12(sp)
+; RV32IFD-NEXT: lw a3, 8(sp)
+; RV32IFD-NEXT: seqz a4, a0
+; RV32IFD-NEXT: snez a5, a1
+; RV32IFD-NEXT: addi a5, a5, -1
+; RV32IFD-NEXT: and a4, a5, a4
+; RV32IFD-NEXT: xori a0, a0, 1
+; RV32IFD-NEXT: or a0, a0, a1
+; RV32IFD-NEXT: seqz a0, a0
+; RV32IFD-NEXT: addi a0, a0, -1
+; RV32IFD-NEXT: and a0, a0, a4
+; RV32IFD-NEXT: seqz a0, a0
+; RV32IFD-NEXT: addi a1, a0, -1
+; RV32IFD-NEXT: and a0, a1, a3
+; RV32IFD-NEXT: and a1, a1, a2
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
; RV32IF-NEXT: mv a1, a0
; RV32IF-NEXT: addi a0, sp, 8
; RV32IF-NEXT: call __fixdfti@plt
-; RV32IF-NEXT: lw a2, 20(sp)
-; RV32IF-NEXT: lw a3, 16(sp)
-; RV32IF-NEXT: beqz a2, .LBB20_2
+; RV32IF-NEXT: lw a1, 20(sp)
+; RV32IF-NEXT: lw a0, 16(sp)
+; RV32IF-NEXT: beqz a1, .LBB20_2
; RV32IF-NEXT: # %bb.1: # %entry
-; RV32IF-NEXT: slti a0, a2, 0
+; RV32IF-NEXT: slti a2, a1, 0
; RV32IF-NEXT: j .LBB20_3
; RV32IF-NEXT: .LBB20_2:
-; RV32IF-NEXT: seqz a0, a3
+; RV32IF-NEXT: seqz a2, a0
; RV32IF-NEXT: .LBB20_3: # %entry
-; RV32IF-NEXT: xori a1, a3, 1
-; RV32IF-NEXT: or a4, a1, a2
-; RV32IF-NEXT: li a1, 0
-; RV32IF-NEXT: beqz a4, .LBB20_5
+; RV32IF-NEXT: lw a4, 12(sp)
+; RV32IF-NEXT: xori a3, a0, 1
+; RV32IF-NEXT: or a3, a3, a1
+; RV32IF-NEXT: seqz a3, a3
+; RV32IF-NEXT: addi a3, a3, -1
+; RV32IF-NEXT: and a2, a3, a2
+; RV32IF-NEXT: seqz a3, a2
+; RV32IF-NEXT: addi a3, a3, -1
+; RV32IF-NEXT: bnez a2, .LBB20_5
; RV32IF-NEXT: # %bb.4: # %entry
-; RV32IF-NEXT: mv a1, a0
+; RV32IF-NEXT: li a0, 1
; RV32IF-NEXT: .LBB20_5: # %entry
-; RV32IF-NEXT: bnez a1, .LBB20_9
+; RV32IF-NEXT: lw a5, 8(sp)
+; RV32IF-NEXT: and a2, a3, a1
+; RV32IF-NEXT: and a1, a3, a4
+; RV32IF-NEXT: beqz a2, .LBB20_8
; RV32IF-NEXT: # %bb.6: # %entry
-; RV32IF-NEXT: li a0, 0
-; RV32IF-NEXT: li a2, 0
-; RV32IF-NEXT: li a3, 1
-; RV32IF-NEXT: bnez a2, .LBB20_10
+; RV32IF-NEXT: sgtz a4, a2
+; RV32IF-NEXT: and a3, a3, a5
+; RV32IF-NEXT: bnez a1, .LBB20_9
; RV32IF-NEXT: .LBB20_7:
-; RV32IF-NEXT: snez a4, a3
-; RV32IF-NEXT: bnez a1, .LBB20_11
+; RV32IF-NEXT: snez a5, a3
+; RV32IF-NEXT: or a0, a0, a2
+; RV32IF-NEXT: bnez a0, .LBB20_10
+; RV32IF-NEXT: j .LBB20_11
; RV32IF-NEXT: .LBB20_8:
-; RV32IF-NEXT: snez a5, a0
-; RV32IF-NEXT: or a2, a3, a2
-; RV32IF-NEXT: bnez a2, .LBB20_12
-; RV32IF-NEXT: j .LBB20_13
-; RV32IF-NEXT: .LBB20_9:
-; RV32IF-NEXT: lw a1, 12(sp)
-; RV32IF-NEXT: lw a0, 8(sp)
-; RV32IF-NEXT: beqz a2, .LBB20_7
-; RV32IF-NEXT: .LBB20_10: # %entry
-; RV32IF-NEXT: sgtz a4, a2
-; RV32IF-NEXT: beqz a1, .LBB20_8
-; RV32IF-NEXT: .LBB20_11: # %entry
+; RV32IF-NEXT: snez a4, a0
+; RV32IF-NEXT: and a3, a3, a5
+; RV32IF-NEXT: beqz a1, .LBB20_7
+; RV32IF-NEXT: .LBB20_9: # %entry
; RV32IF-NEXT: snez a5, a1
-; RV32IF-NEXT: or a2, a3, a2
-; RV32IF-NEXT: beqz a2, .LBB20_13
-; RV32IF-NEXT: .LBB20_12: # %entry
+; RV32IF-NEXT: or a0, a0, a2
+; RV32IF-NEXT: beqz a0, .LBB20_11
+; RV32IF-NEXT: .LBB20_10: # %entry
; RV32IF-NEXT: mv a5, a4
-; RV32IF-NEXT: .LBB20_13: # %entry
-; RV32IF-NEXT: bnez a5, .LBB20_15
-; RV32IF-NEXT: # %bb.14: # %entry
-; RV32IF-NEXT: li a0, 0
-; RV32IF-NEXT: li a1, 0
-; RV32IF-NEXT: .LBB20_15: # %entry
+; RV32IF-NEXT: .LBB20_11: # %entry
+; RV32IF-NEXT: seqz a0, a5
+; RV32IF-NEXT: addi a2, a0, -1
+; RV32IF-NEXT: and a0, a2, a3
+; RV32IF-NEXT: and a1, a2, a1
; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 32
; RV32IF-NEXT: ret
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: call __fixdfti@plt
+; RV64-NEXT: mv a2, a1
; RV64-NEXT: blez a1, .LBB20_2
; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: li a1, 1
+; RV64-NEXT: li a2, 1
; RV64-NEXT: .LBB20_2: # %entry
-; RV64-NEXT: beqz a1, .LBB20_4
-; RV64-NEXT: # %bb.3: # %entry
; RV64-NEXT: sgtz a1, a1
-; RV64-NEXT: beqz a1, .LBB20_5
-; RV64-NEXT: j .LBB20_6
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: beqz a2, .LBB20_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: sgtz a1, a2
+; RV64-NEXT: j .LBB20_5
; RV64-NEXT: .LBB20_4:
; RV64-NEXT: snez a1, a0
-; RV64-NEXT: bnez a1, .LBB20_6
; RV64-NEXT: .LBB20_5: # %entry
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: .LBB20_6: # %entry
+; RV64-NEXT: seqz a1, a1
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; RV32IFD-NEXT: .cfi_offset ra, -4
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call __fixdfti@plt
-; RV32IFD-NEXT: lw a2, 20(sp)
-; RV32IFD-NEXT: lw a3, 16(sp)
-; RV32IFD-NEXT: beqz a2, .LBB20_2
+; RV32IFD-NEXT: lw a1, 20(sp)
+; RV32IFD-NEXT: lw a0, 16(sp)
+; RV32IFD-NEXT: beqz a1, .LBB20_2
; RV32IFD-NEXT: # %bb.1: # %entry
-; RV32IFD-NEXT: slti a0, a2, 0
+; RV32IFD-NEXT: slti a2, a1, 0
; RV32IFD-NEXT: j .LBB20_3
; RV32IFD-NEXT: .LBB20_2:
-; RV32IFD-NEXT: seqz a0, a3
+; RV32IFD-NEXT: seqz a2, a0
; RV32IFD-NEXT: .LBB20_3: # %entry
-; RV32IFD-NEXT: xori a1, a3, 1
-; RV32IFD-NEXT: or a4, a1, a2
-; RV32IFD-NEXT: li a1, 0
-; RV32IFD-NEXT: beqz a4, .LBB20_5
+; RV32IFD-NEXT: lw a4, 12(sp)
+; RV32IFD-NEXT: xori a3, a0, 1
+; RV32IFD-NEXT: or a3, a3, a1
+; RV32IFD-NEXT: seqz a3, a3
+; RV32IFD-NEXT: addi a3, a3, -1
+; RV32IFD-NEXT: and a2, a3, a2
+; RV32IFD-NEXT: seqz a3, a2
+; RV32IFD-NEXT: addi a3, a3, -1
+; RV32IFD-NEXT: bnez a2, .LBB20_5
; RV32IFD-NEXT: # %bb.4: # %entry
-; RV32IFD-NEXT: mv a1, a0
+; RV32IFD-NEXT: li a0, 1
; RV32IFD-NEXT: .LBB20_5: # %entry
-; RV32IFD-NEXT: bnez a1, .LBB20_9
+; RV32IFD-NEXT: lw a5, 8(sp)
+; RV32IFD-NEXT: and a2, a3, a1
+; RV32IFD-NEXT: and a1, a3, a4
+; RV32IFD-NEXT: beqz a2, .LBB20_8
; RV32IFD-NEXT: # %bb.6: # %entry
-; RV32IFD-NEXT: li a0, 0
-; RV32IFD-NEXT: li a2, 0
-; RV32IFD-NEXT: li a3, 1
-; RV32IFD-NEXT: bnez a2, .LBB20_10
+; RV32IFD-NEXT: sgtz a4, a2
+; RV32IFD-NEXT: and a3, a3, a5
+; RV32IFD-NEXT: bnez a1, .LBB20_9
; RV32IFD-NEXT: .LBB20_7:
-; RV32IFD-NEXT: snez a4, a3
-; RV32IFD-NEXT: bnez a1, .LBB20_11
+; RV32IFD-NEXT: snez a5, a3
+; RV32IFD-NEXT: or a0, a0, a2
+; RV32IFD-NEXT: bnez a0, .LBB20_10
+; RV32IFD-NEXT: j .LBB20_11
; RV32IFD-NEXT: .LBB20_8:
-; RV32IFD-NEXT: snez a5, a0
-; RV32IFD-NEXT: or a2, a3, a2
-; RV32IFD-NEXT: bnez a2, .LBB20_12
-; RV32IFD-NEXT: j .LBB20_13
-; RV32IFD-NEXT: .LBB20_9:
-; RV32IFD-NEXT: lw a1, 12(sp)
-; RV32IFD-NEXT: lw a0, 8(sp)
-; RV32IFD-NEXT: beqz a2, .LBB20_7
-; RV32IFD-NEXT: .LBB20_10: # %entry
-; RV32IFD-NEXT: sgtz a4, a2
-; RV32IFD-NEXT: beqz a1, .LBB20_8
-; RV32IFD-NEXT: .LBB20_11: # %entry
+; RV32IFD-NEXT: snez a4, a0
+; RV32IFD-NEXT: and a3, a3, a5
+; RV32IFD-NEXT: beqz a1, .LBB20_7
+; RV32IFD-NEXT: .LBB20_9: # %entry
; RV32IFD-NEXT: snez a5, a1
-; RV32IFD-NEXT: or a2, a3, a2
-; RV32IFD-NEXT: beqz a2, .LBB20_13
-; RV32IFD-NEXT: .LBB20_12: # %entry
+; RV32IFD-NEXT: or a0, a0, a2
+; RV32IFD-NEXT: beqz a0, .LBB20_11
+; RV32IFD-NEXT: .LBB20_10: # %entry
; RV32IFD-NEXT: mv a5, a4
-; RV32IFD-NEXT: .LBB20_13: # %entry
-; RV32IFD-NEXT: bnez a5, .LBB20_15
-; RV32IFD-NEXT: # %bb.14: # %entry
-; RV32IFD-NEXT: li a0, 0
-; RV32IFD-NEXT: li a1, 0
-; RV32IFD-NEXT: .LBB20_15: # %entry
+; RV32IFD-NEXT: .LBB20_11: # %entry
+; RV32IFD-NEXT: seqz a0, a5
+; RV32IFD-NEXT: addi a2, a0, -1
+; RV32IFD-NEXT: and a0, a2, a3
+; RV32IFD-NEXT: and a1, a2, a1
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti@plt
-; RV32-NEXT: lw a2, 20(sp)
-; RV32-NEXT: lw a3, 16(sp)
+; RV32-NEXT: lw a0, 20(sp)
+; RV32-NEXT: lw a2, 16(sp)
; RV32-NEXT: lw a1, 12(sp)
-; RV32-NEXT: lw a0, 8(sp)
-; RV32-NEXT: lui a4, 524288
-; RV32-NEXT: addi a5, a4, -1
+; RV32-NEXT: lw a4, 8(sp)
+; RV32-NEXT: lui a3, 524288
+; RV32-NEXT: addi a5, a3, -1
; RV32-NEXT: beq a1, a5, .LBB21_2
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: sltu a6, a1, a5
-; RV32-NEXT: or a7, a3, a2
+; RV32-NEXT: or a7, a2, a0
; RV32-NEXT: bnez a7, .LBB21_3
; RV32-NEXT: j .LBB21_4
; RV32-NEXT: .LBB21_2:
-; RV32-NEXT: sltiu a6, a0, -1
-; RV32-NEXT: or a7, a3, a2
+; RV32-NEXT: sltiu a6, a4, -1
+; RV32-NEXT: or a7, a2, a0
; RV32-NEXT: beqz a7, .LBB21_4
; RV32-NEXT: .LBB21_3: # %entry
-; RV32-NEXT: slti a6, a2, 0
+; RV32-NEXT: slti a6, a0, 0
; RV32-NEXT: .LBB21_4: # %entry
-; RV32-NEXT: snez a7, a6
-; RV32-NEXT: addi a7, a7, -1
+; RV32-NEXT: seqz t0, a6
+; RV32-NEXT: addi a7, t0, -1
+; RV32-NEXT: neg t0, t0
; RV32-NEXT: bnez a6, .LBB21_6
; RV32-NEXT: # %bb.5: # %entry
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: li a3, 0
; RV32-NEXT: mv a1, a5
; RV32-NEXT: .LBB21_6: # %entry
-; RV32-NEXT: or a0, a7, a0
-; RV32-NEXT: beq a1, a4, .LBB21_8
+; RV32-NEXT: or a4, t0, a4
+; RV32-NEXT: and a5, a7, a0
+; RV32-NEXT: and a2, a7, a2
+; RV32-NEXT: beq a1, a3, .LBB21_8
; RV32-NEXT: # %bb.7: # %entry
-; RV32-NEXT: sltu a4, a4, a1
+; RV32-NEXT: sltu a0, a3, a1
; RV32-NEXT: j .LBB21_9
; RV32-NEXT: .LBB21_8:
-; RV32-NEXT: snez a4, a0
+; RV32-NEXT: snez a0, a4
; RV32-NEXT: .LBB21_9: # %entry
-; RV32-NEXT: and a3, a3, a2
-; RV32-NEXT: li a5, -1
-; RV32-NEXT: beq a3, a5, .LBB21_11
+; RV32-NEXT: and a2, a2, a5
+; RV32-NEXT: li a3, -1
+; RV32-NEXT: beq a2, a3, .LBB21_11
; RV32-NEXT: # %bb.10: # %entry
-; RV32-NEXT: slti a2, a2, 0
-; RV32-NEXT: xori a4, a2, 1
+; RV32-NEXT: slti a0, a5, 0
+; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: .LBB21_11: # %entry
-; RV32-NEXT: bnez a4, .LBB21_13
+; RV32-NEXT: bnez a0, .LBB21_13
; RV32-NEXT: # %bb.12: # %entry
-; RV32-NEXT: li a0, 0
; RV32-NEXT: lui a1, 524288
; RV32-NEXT: .LBB21_13: # %entry
+; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a0, a0, a4
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: stest_f32i64:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: feq.s a0, fa0, fa0
-; RV64-NEXT: beqz a0, .LBB21_2
-; RV64-NEXT: # %bb.1:
; RV64-NEXT: fcvt.l.s a0, fa0, rtz
-; RV64-NEXT: .LBB21_2: # %entry
+; RV64-NEXT: feq.s a1, fa0, fa0
+; RV64-NEXT: seqz a1, a1
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ret
entry:
%conv = fptosi float %x to i128
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixunssfti@plt
-; RV32-NEXT: lw a0, 20(sp)
-; RV32-NEXT: lw a1, 16(sp)
-; RV32-NEXT: beqz a0, .LBB22_2
-; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: j .LBB22_3
-; RV32-NEXT: .LBB22_2:
-; RV32-NEXT: seqz a2, a1
-; RV32-NEXT: .LBB22_3: # %entry
-; RV32-NEXT: xori a1, a1, 1
-; RV32-NEXT: or a1, a1, a0
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: beqz a1, .LBB22_5
-; RV32-NEXT: # %bb.4: # %entry
-; RV32-NEXT: mv a0, a2
-; RV32-NEXT: .LBB22_5: # %entry
-; RV32-NEXT: bnez a0, .LBB22_7
-; RV32-NEXT: # %bb.6: # %entry
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: j .LBB22_8
-; RV32-NEXT: .LBB22_7:
-; RV32-NEXT: lw a1, 12(sp)
-; RV32-NEXT: lw a0, 8(sp)
-; RV32-NEXT: .LBB22_8: # %entry
+; RV32-NEXT: lw a0, 16(sp)
+; RV32-NEXT: lw a1, 20(sp)
+; RV32-NEXT: lw a2, 12(sp)
+; RV32-NEXT: lw a3, 8(sp)
+; RV32-NEXT: seqz a4, a0
+; RV32-NEXT: snez a5, a1
+; RV32-NEXT: addi a5, a5, -1
+; RV32-NEXT: and a4, a5, a4
+; RV32-NEXT: xori a0, a0, 1
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a0, a0, a4
+; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: addi a1, a0, -1
+; RV32-NEXT: and a0, a1, a3
+; RV32-NEXT: and a1, a1, a2
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: call __fixunssfti@plt
-; RV64-NEXT: beqz a1, .LBB22_2
-; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: .LBB22_2: # %entry
+; RV64-NEXT: snez a1, a1
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti@plt
-; RV32-NEXT: lw a2, 20(sp)
-; RV32-NEXT: lw a3, 16(sp)
-; RV32-NEXT: beqz a2, .LBB23_2
+; RV32-NEXT: lw a1, 20(sp)
+; RV32-NEXT: lw a0, 16(sp)
+; RV32-NEXT: beqz a1, .LBB23_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: slti a0, a2, 0
+; RV32-NEXT: slti a2, a1, 0
; RV32-NEXT: j .LBB23_3
; RV32-NEXT: .LBB23_2:
-; RV32-NEXT: seqz a0, a3
+; RV32-NEXT: seqz a2, a0
; RV32-NEXT: .LBB23_3: # %entry
-; RV32-NEXT: xori a1, a3, 1
-; RV32-NEXT: or a4, a1, a2
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: beqz a4, .LBB23_5
+; RV32-NEXT: lw a4, 12(sp)
+; RV32-NEXT: xori a3, a0, 1
+; RV32-NEXT: or a3, a3, a1
+; RV32-NEXT: seqz a3, a3
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: and a2, a3, a2
+; RV32-NEXT: seqz a3, a2
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: bnez a2, .LBB23_5
; RV32-NEXT: # %bb.4: # %entry
-; RV32-NEXT: mv a1, a0
+; RV32-NEXT: li a0, 1
; RV32-NEXT: .LBB23_5: # %entry
-; RV32-NEXT: bnez a1, .LBB23_9
+; RV32-NEXT: lw a5, 8(sp)
+; RV32-NEXT: and a2, a3, a1
+; RV32-NEXT: and a1, a3, a4
+; RV32-NEXT: beqz a2, .LBB23_8
; RV32-NEXT: # %bb.6: # %entry
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: li a3, 1
-; RV32-NEXT: bnez a2, .LBB23_10
+; RV32-NEXT: sgtz a4, a2
+; RV32-NEXT: and a3, a3, a5
+; RV32-NEXT: bnez a1, .LBB23_9
; RV32-NEXT: .LBB23_7:
-; RV32-NEXT: snez a4, a3
-; RV32-NEXT: bnez a1, .LBB23_11
+; RV32-NEXT: snez a5, a3
+; RV32-NEXT: or a0, a0, a2
+; RV32-NEXT: bnez a0, .LBB23_10
+; RV32-NEXT: j .LBB23_11
; RV32-NEXT: .LBB23_8:
-; RV32-NEXT: snez a5, a0
-; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: bnez a2, .LBB23_12
-; RV32-NEXT: j .LBB23_13
-; RV32-NEXT: .LBB23_9:
-; RV32-NEXT: lw a1, 12(sp)
-; RV32-NEXT: lw a0, 8(sp)
-; RV32-NEXT: beqz a2, .LBB23_7
-; RV32-NEXT: .LBB23_10: # %entry
-; RV32-NEXT: sgtz a4, a2
-; RV32-NEXT: beqz a1, .LBB23_8
-; RV32-NEXT: .LBB23_11: # %entry
+; RV32-NEXT: snez a4, a0
+; RV32-NEXT: and a3, a3, a5
+; RV32-NEXT: beqz a1, .LBB23_7
+; RV32-NEXT: .LBB23_9: # %entry
; RV32-NEXT: snez a5, a1
-; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: beqz a2, .LBB23_13
-; RV32-NEXT: .LBB23_12: # %entry
+; RV32-NEXT: or a0, a0, a2
+; RV32-NEXT: beqz a0, .LBB23_11
+; RV32-NEXT: .LBB23_10: # %entry
; RV32-NEXT: mv a5, a4
-; RV32-NEXT: .LBB23_13: # %entry
-; RV32-NEXT: bnez a5, .LBB23_15
-; RV32-NEXT: # %bb.14: # %entry
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: .LBB23_15: # %entry
+; RV32-NEXT: .LBB23_11: # %entry
+; RV32-NEXT: seqz a0, a5
+; RV32-NEXT: addi a2, a0, -1
+; RV32-NEXT: and a0, a2, a3
+; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: call __fixsfti@plt
+; RV64-NEXT: mv a2, a1
; RV64-NEXT: blez a1, .LBB23_2
; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: li a1, 1
+; RV64-NEXT: li a2, 1
; RV64-NEXT: .LBB23_2: # %entry
-; RV64-NEXT: beqz a1, .LBB23_4
-; RV64-NEXT: # %bb.3: # %entry
; RV64-NEXT: sgtz a1, a1
-; RV64-NEXT: beqz a1, .LBB23_5
-; RV64-NEXT: j .LBB23_6
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: beqz a2, .LBB23_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: sgtz a1, a2
+; RV64-NEXT: j .LBB23_5
; RV64-NEXT: .LBB23_4:
; RV64-NEXT: snez a1, a0
-; RV64-NEXT: bnez a1, .LBB23_6
; RV64-NEXT: .LBB23_5: # %entry
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: .LBB23_6: # %entry
+; RV64-NEXT: seqz a1, a1
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; RV32-NEXT: call __extendhfsf2@plt
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti@plt
-; RV32-NEXT: lw a2, 20(sp)
-; RV32-NEXT: lw a3, 16(sp)
+; RV32-NEXT: lw a0, 20(sp)
+; RV32-NEXT: lw a2, 16(sp)
; RV32-NEXT: lw a1, 12(sp)
-; RV32-NEXT: lw a0, 8(sp)
-; RV32-NEXT: lui a4, 524288
-; RV32-NEXT: addi a5, a4, -1
+; RV32-NEXT: lw a4, 8(sp)
+; RV32-NEXT: lui a3, 524288
+; RV32-NEXT: addi a5, a3, -1
; RV32-NEXT: beq a1, a5, .LBB24_2
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: sltu a6, a1, a5
-; RV32-NEXT: or a7, a3, a2
+; RV32-NEXT: or a7, a2, a0
; RV32-NEXT: bnez a7, .LBB24_3
; RV32-NEXT: j .LBB24_4
; RV32-NEXT: .LBB24_2:
-; RV32-NEXT: sltiu a6, a0, -1
-; RV32-NEXT: or a7, a3, a2
+; RV32-NEXT: sltiu a6, a4, -1
+; RV32-NEXT: or a7, a2, a0
; RV32-NEXT: beqz a7, .LBB24_4
; RV32-NEXT: .LBB24_3: # %entry
-; RV32-NEXT: slti a6, a2, 0
+; RV32-NEXT: slti a6, a0, 0
; RV32-NEXT: .LBB24_4: # %entry
-; RV32-NEXT: snez a7, a6
-; RV32-NEXT: addi a7, a7, -1
+; RV32-NEXT: seqz t0, a6
+; RV32-NEXT: addi a7, t0, -1
+; RV32-NEXT: neg t0, t0
; RV32-NEXT: bnez a6, .LBB24_6
; RV32-NEXT: # %bb.5: # %entry
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: li a3, 0
; RV32-NEXT: mv a1, a5
; RV32-NEXT: .LBB24_6: # %entry
-; RV32-NEXT: or a0, a7, a0
-; RV32-NEXT: beq a1, a4, .LBB24_8
+; RV32-NEXT: or a4, t0, a4
+; RV32-NEXT: and a5, a7, a0
+; RV32-NEXT: and a2, a7, a2
+; RV32-NEXT: beq a1, a3, .LBB24_8
; RV32-NEXT: # %bb.7: # %entry
-; RV32-NEXT: sltu a4, a4, a1
+; RV32-NEXT: sltu a0, a3, a1
; RV32-NEXT: j .LBB24_9
; RV32-NEXT: .LBB24_8:
-; RV32-NEXT: snez a4, a0
+; RV32-NEXT: snez a0, a4
; RV32-NEXT: .LBB24_9: # %entry
-; RV32-NEXT: and a3, a3, a2
-; RV32-NEXT: li a5, -1
-; RV32-NEXT: beq a3, a5, .LBB24_11
+; RV32-NEXT: and a2, a2, a5
+; RV32-NEXT: li a3, -1
+; RV32-NEXT: beq a2, a3, .LBB24_11
; RV32-NEXT: # %bb.10: # %entry
-; RV32-NEXT: slti a2, a2, 0
-; RV32-NEXT: xori a4, a2, 1
+; RV32-NEXT: slti a0, a5, 0
+; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: .LBB24_11: # %entry
-; RV32-NEXT: bnez a4, .LBB24_13
+; RV32-NEXT: bnez a0, .LBB24_13
; RV32-NEXT: # %bb.12: # %entry
-; RV32-NEXT: li a0, 0
; RV32-NEXT: lui a1, 524288
; RV32-NEXT: .LBB24_13: # %entry
+; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a0, a0, a4
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
; RV64-NEXT: beqz a1, .LBB24_2
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: slti a4, a1, 0
-; RV64-NEXT: beqz a4, .LBB24_3
-; RV64-NEXT: j .LBB24_4
+; RV64-NEXT: j .LBB24_3
; RV64-NEXT: .LBB24_2:
; RV64-NEXT: sltu a4, a0, a3
-; RV64-NEXT: bnez a4, .LBB24_4
; RV64-NEXT: .LBB24_3: # %entry
-; RV64-NEXT: li a1, 0
-; RV64-NEXT: mv a0, a3
-; RV64-NEXT: .LBB24_4: # %entry
-; RV64-NEXT: slli a3, a2, 63
-; RV64-NEXT: beq a1, a2, .LBB24_6
-; RV64-NEXT: # %bb.5: # %entry
-; RV64-NEXT: slti a1, a1, 0
-; RV64-NEXT: xori a1, a1, 1
-; RV64-NEXT: beqz a1, .LBB24_7
-; RV64-NEXT: j .LBB24_8
-; RV64-NEXT: .LBB24_6:
-; RV64-NEXT: sltu a1, a3, a0
-; RV64-NEXT: bnez a1, .LBB24_8
-; RV64-NEXT: .LBB24_7: # %entry
+; RV64-NEXT: seqz a5, a4
+; RV64-NEXT: addi a5, a5, -1
+; RV64-NEXT: and a5, a5, a1
+; RV64-NEXT: bnez a4, .LBB24_5
+; RV64-NEXT: # %bb.4: # %entry
; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB24_5: # %entry
+; RV64-NEXT: slli a1, a2, 63
+; RV64-NEXT: beq a5, a2, .LBB24_7
+; RV64-NEXT: # %bb.6: # %entry
+; RV64-NEXT: slti a2, a5, 0
+; RV64-NEXT: xori a2, a2, 1
+; RV64-NEXT: beqz a2, .LBB24_8
+; RV64-NEXT: j .LBB24_9
+; RV64-NEXT: .LBB24_7:
+; RV64-NEXT: sltu a2, a1, a0
+; RV64-NEXT: bnez a2, .LBB24_9
; RV64-NEXT: .LBB24_8: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB24_9: # %entry
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; RV32-NEXT: call __extendhfsf2@plt
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixunssfti@plt
-; RV32-NEXT: lw a0, 20(sp)
-; RV32-NEXT: lw a1, 16(sp)
-; RV32-NEXT: beqz a0, .LBB25_2
-; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: j .LBB25_3
-; RV32-NEXT: .LBB25_2:
-; RV32-NEXT: seqz a2, a1
-; RV32-NEXT: .LBB25_3: # %entry
-; RV32-NEXT: xori a1, a1, 1
-; RV32-NEXT: or a1, a1, a0
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: beqz a1, .LBB25_5
-; RV32-NEXT: # %bb.4: # %entry
-; RV32-NEXT: mv a0, a2
-; RV32-NEXT: .LBB25_5: # %entry
-; RV32-NEXT: bnez a0, .LBB25_7
-; RV32-NEXT: # %bb.6: # %entry
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: j .LBB25_8
-; RV32-NEXT: .LBB25_7:
-; RV32-NEXT: lw a1, 12(sp)
-; RV32-NEXT: lw a0, 8(sp)
-; RV32-NEXT: .LBB25_8: # %entry
+; RV32-NEXT: lw a0, 16(sp)
+; RV32-NEXT: lw a1, 20(sp)
+; RV32-NEXT: lw a2, 12(sp)
+; RV32-NEXT: lw a3, 8(sp)
+; RV32-NEXT: seqz a4, a0
+; RV32-NEXT: snez a5, a1
+; RV32-NEXT: addi a5, a5, -1
+; RV32-NEXT: and a4, a5, a4
+; RV32-NEXT: xori a0, a0, 1
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a0, a0, a4
+; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: addi a1, a0, -1
+; RV32-NEXT: and a0, a1, a3
+; RV32-NEXT: and a1, a1, a2
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
; RV64-NEXT: fmv.x.w a0, fa0
; RV64-NEXT: call __extendhfsf2@plt
; RV64-NEXT: call __fixunssfti@plt
-; RV64-NEXT: beqz a1, .LBB25_2
-; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: .LBB25_2: # %entry
+; RV64-NEXT: snez a1, a1
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; RV32-NEXT: call __extendhfsf2@plt
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti@plt
-; RV32-NEXT: lw a2, 20(sp)
-; RV32-NEXT: lw a3, 16(sp)
-; RV32-NEXT: beqz a2, .LBB26_2
+; RV32-NEXT: lw a1, 20(sp)
+; RV32-NEXT: lw a0, 16(sp)
+; RV32-NEXT: beqz a1, .LBB26_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: slti a0, a2, 0
+; RV32-NEXT: slti a2, a1, 0
; RV32-NEXT: j .LBB26_3
; RV32-NEXT: .LBB26_2:
-; RV32-NEXT: seqz a0, a3
+; RV32-NEXT: seqz a2, a0
; RV32-NEXT: .LBB26_3: # %entry
-; RV32-NEXT: xori a1, a3, 1
-; RV32-NEXT: or a4, a1, a2
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: beqz a4, .LBB26_5
+; RV32-NEXT: lw a4, 12(sp)
+; RV32-NEXT: xori a3, a0, 1
+; RV32-NEXT: or a3, a3, a1
+; RV32-NEXT: seqz a3, a3
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: and a2, a3, a2
+; RV32-NEXT: seqz a3, a2
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: bnez a2, .LBB26_5
; RV32-NEXT: # %bb.4: # %entry
-; RV32-NEXT: mv a1, a0
+; RV32-NEXT: li a0, 1
; RV32-NEXT: .LBB26_5: # %entry
-; RV32-NEXT: bnez a1, .LBB26_9
+; RV32-NEXT: lw a5, 8(sp)
+; RV32-NEXT: and a2, a3, a1
+; RV32-NEXT: and a1, a3, a4
+; RV32-NEXT: beqz a2, .LBB26_8
; RV32-NEXT: # %bb.6: # %entry
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: li a3, 1
-; RV32-NEXT: bnez a2, .LBB26_10
+; RV32-NEXT: sgtz a4, a2
+; RV32-NEXT: and a3, a3, a5
+; RV32-NEXT: bnez a1, .LBB26_9
; RV32-NEXT: .LBB26_7:
-; RV32-NEXT: snez a4, a3
-; RV32-NEXT: bnez a1, .LBB26_11
+; RV32-NEXT: snez a5, a3
+; RV32-NEXT: or a0, a0, a2
+; RV32-NEXT: bnez a0, .LBB26_10
+; RV32-NEXT: j .LBB26_11
; RV32-NEXT: .LBB26_8:
-; RV32-NEXT: snez a5, a0
-; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: bnez a2, .LBB26_12
-; RV32-NEXT: j .LBB26_13
-; RV32-NEXT: .LBB26_9:
-; RV32-NEXT: lw a1, 12(sp)
-; RV32-NEXT: lw a0, 8(sp)
-; RV32-NEXT: beqz a2, .LBB26_7
-; RV32-NEXT: .LBB26_10: # %entry
-; RV32-NEXT: sgtz a4, a2
-; RV32-NEXT: beqz a1, .LBB26_8
-; RV32-NEXT: .LBB26_11: # %entry
+; RV32-NEXT: snez a4, a0
+; RV32-NEXT: and a3, a3, a5
+; RV32-NEXT: beqz a1, .LBB26_7
+; RV32-NEXT: .LBB26_9: # %entry
; RV32-NEXT: snez a5, a1
-; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: beqz a2, .LBB26_13
-; RV32-NEXT: .LBB26_12: # %entry
+; RV32-NEXT: or a0, a0, a2
+; RV32-NEXT: beqz a0, .LBB26_11
+; RV32-NEXT: .LBB26_10: # %entry
; RV32-NEXT: mv a5, a4
-; RV32-NEXT: .LBB26_13: # %entry
-; RV32-NEXT: bnez a5, .LBB26_15
-; RV32-NEXT: # %bb.14: # %entry
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: .LBB26_15: # %entry
+; RV32-NEXT: .LBB26_11: # %entry
+; RV32-NEXT: seqz a0, a5
+; RV32-NEXT: addi a2, a0, -1
+; RV32-NEXT: and a0, a2, a3
+; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
; RV64-NEXT: fmv.x.w a0, fa0
; RV64-NEXT: call __extendhfsf2@plt
; RV64-NEXT: call __fixsfti@plt
+; RV64-NEXT: mv a2, a1
; RV64-NEXT: blez a1, .LBB26_2
; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: li a1, 1
+; RV64-NEXT: li a2, 1
; RV64-NEXT: .LBB26_2: # %entry
-; RV64-NEXT: beqz a1, .LBB26_4
-; RV64-NEXT: # %bb.3: # %entry
; RV64-NEXT: sgtz a1, a1
-; RV64-NEXT: beqz a1, .LBB26_5
-; RV64-NEXT: j .LBB26_6
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: beqz a2, .LBB26_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: sgtz a1, a2
+; RV64-NEXT: j .LBB26_5
; RV64-NEXT: .LBB26_4:
; RV64-NEXT: snez a1, a0
-; RV64-NEXT: bnez a1, .LBB26_6
; RV64-NEXT: .LBB26_5: # %entry
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: .LBB26_6: # %entry
+; RV64-NEXT: seqz a1, a1
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; RV32IF-NEXT: # %bb.1: # %entry
; RV32IF-NEXT: bgeu a0, a4, .LBB27_10
; RV32IF-NEXT: .LBB27_2: # %entry
-; RV32IF-NEXT: bnez a1, .LBB27_11
+; RV32IF-NEXT: beqz a1, .LBB27_4
; RV32IF-NEXT: .LBB27_3: # %entry
-; RV32IF-NEXT: bgez a1, .LBB27_12
+; RV32IF-NEXT: mv a0, a3
; RV32IF-NEXT: .LBB27_4: # %entry
+; RV32IF-NEXT: slti a3, a1, 0
+; RV32IF-NEXT: neg a3, a3
+; RV32IF-NEXT: and a1, a3, a1
; RV32IF-NEXT: mv a3, a0
-; RV32IF-NEXT: bltz a1, .LBB27_13
-; RV32IF-NEXT: .LBB27_5: # %entry
-; RV32IF-NEXT: bgeu a2, a0, .LBB27_14
+; RV32IF-NEXT: bltz a1, .LBB27_11
+; RV32IF-NEXT: # %bb.5: # %entry
+; RV32IF-NEXT: bgeu a2, a0, .LBB27_12
; RV32IF-NEXT: .LBB27_6: # %entry
; RV32IF-NEXT: li a2, -1
; RV32IF-NEXT: beq a1, a2, .LBB27_8
; RV32IF-NEXT: bltu a0, a4, .LBB27_2
; RV32IF-NEXT: .LBB27_10: # %entry
; RV32IF-NEXT: mv a0, a4
-; RV32IF-NEXT: beqz a1, .LBB27_3
+; RV32IF-NEXT: bnez a1, .LBB27_3
+; RV32IF-NEXT: j .LBB27_4
; RV32IF-NEXT: .LBB27_11: # %entry
-; RV32IF-NEXT: mv a0, a3
-; RV32IF-NEXT: bltz a1, .LBB27_4
-; RV32IF-NEXT: .LBB27_12: # %entry
-; RV32IF-NEXT: li a1, 0
-; RV32IF-NEXT: mv a3, a0
-; RV32IF-NEXT: bgez a1, .LBB27_5
-; RV32IF-NEXT: .LBB27_13: # %entry
; RV32IF-NEXT: lui a3, 524288
; RV32IF-NEXT: bltu a2, a0, .LBB27_6
-; RV32IF-NEXT: .LBB27_14: # %entry
+; RV32IF-NEXT: .LBB27_12: # %entry
; RV32IF-NEXT: lui a0, 524288
; RV32IF-NEXT: li a2, -1
; RV32IF-NEXT: bne a1, a2, .LBB27_7
;
; RV32IFD-LABEL: stest_f64i32_mm:
; RV32IFD: # %bb.0: # %entry
-; RV32IFD-NEXT: feq.d a0, fa0, fa0
-; RV32IFD-NEXT: beqz a0, .LBB27_2
-; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: fcvt.w.d a0, fa0, rtz
-; RV32IFD-NEXT: .LBB27_2: # %entry
+; RV32IFD-NEXT: feq.d a1, fa0, fa0
+; RV32IFD-NEXT: seqz a1, a1
+; RV32IFD-NEXT: addi a1, a1, -1
+; RV32IFD-NEXT: and a0, a1, a0
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: stest_f64i32_mm:
;
; RV32IFD-LABEL: utest_f64i32_mm:
; RV32IFD: # %bb.0: # %entry
-; RV32IFD-NEXT: feq.d a0, fa0, fa0
-; RV32IFD-NEXT: beqz a0, .LBB28_2
-; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz
-; RV32IFD-NEXT: .LBB28_2: # %entry
+; RV32IFD-NEXT: feq.d a1, fa0, fa0
+; RV32IFD-NEXT: seqz a1, a1
+; RV32IFD-NEXT: addi a1, a1, -1
+; RV32IFD-NEXT: and a0, a1, a0
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: utest_f64i32_mm:
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: .cfi_offset ra, -4
; RV32IF-NEXT: call __fixdfdi@plt
-; RV32IF-NEXT: bnez a1, .LBB29_6
+; RV32IF-NEXT: slti a2, a1, 0
+; RV32IF-NEXT: beqz a1, .LBB29_2
; RV32IF-NEXT: # %bb.1: # %entry
-; RV32IF-NEXT: bgez a1, .LBB29_7
+; RV32IF-NEXT: addi a3, a2, -1
+; RV32IF-NEXT: or a0, a3, a0
; RV32IF-NEXT: .LBB29_2: # %entry
-; RV32IF-NEXT: mv a2, a0
-; RV32IF-NEXT: blez a1, .LBB29_8
-; RV32IF-NEXT: .LBB29_3: # %entry
-; RV32IF-NEXT: beqz a1, .LBB29_5
+; RV32IF-NEXT: neg a2, a2
+; RV32IF-NEXT: and a1, a2, a1
+; RV32IF-NEXT: beqz a1, .LBB29_4
+; RV32IF-NEXT: # %bb.3: # %entry
+; RV32IF-NEXT: sgtz a1, a1
+; RV32IF-NEXT: neg a1, a1
+; RV32IF-NEXT: and a0, a1, a0
; RV32IF-NEXT: .LBB29_4: # %entry
-; RV32IF-NEXT: mv a0, a2
-; RV32IF-NEXT: .LBB29_5: # %entry
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
-; RV32IF-NEXT: .LBB29_6: # %entry
-; RV32IF-NEXT: slti a2, a1, 0
-; RV32IF-NEXT: addi a2, a2, -1
-; RV32IF-NEXT: or a0, a2, a0
-; RV32IF-NEXT: bltz a1, .LBB29_2
-; RV32IF-NEXT: .LBB29_7: # %entry
-; RV32IF-NEXT: li a1, 0
-; RV32IF-NEXT: mv a2, a0
-; RV32IF-NEXT: bgtz a1, .LBB29_3
-; RV32IF-NEXT: .LBB29_8: # %entry
-; RV32IF-NEXT: li a2, 0
-; RV32IF-NEXT: bnez a1, .LBB29_4
-; RV32IF-NEXT: j .LBB29_5
;
; RV64IF-LABEL: ustest_f64i32_mm:
; RV64IF: # %bb.0: # %entry
; RV64IF-NEXT: # %bb.1: # %entry
; RV64IF-NEXT: mv a0, a1
; RV64IF-NEXT: .LBB29_2: # %entry
-; RV64IF-NEXT: bgtz a0, .LBB29_4
-; RV64IF-NEXT: # %bb.3: # %entry
-; RV64IF-NEXT: li a0, 0
-; RV64IF-NEXT: .LBB29_4: # %entry
+; RV64IF-NEXT: sgtz a1, a0
+; RV64IF-NEXT: neg a1, a1
+; RV64IF-NEXT: and a0, a1, a0
; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IF-NEXT: addi sp, sp, 16
; RV64IF-NEXT: ret
;
; RV32IFD-LABEL: ustest_f64i32_mm:
; RV32IFD: # %bb.0: # %entry
-; RV32IFD-NEXT: feq.d a0, fa0, fa0
-; RV32IFD-NEXT: beqz a0, .LBB29_2
-; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz
-; RV32IFD-NEXT: .LBB29_2: # %entry
+; RV32IFD-NEXT: feq.d a1, fa0, fa0
+; RV32IFD-NEXT: seqz a1, a1
+; RV32IFD-NEXT: addi a1, a1, -1
+; RV32IFD-NEXT: and a0, a1, a0
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: ustest_f64i32_mm:
; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz
; RV64IFD-NEXT: li a1, -1
; RV64IFD-NEXT: srli a1, a1, 32
-; RV64IFD-NEXT: bge a0, a1, .LBB29_3
+; RV64IFD-NEXT: blt a0, a1, .LBB29_2
; RV64IFD-NEXT: # %bb.1: # %entry
-; RV64IFD-NEXT: blez a0, .LBB29_4
-; RV64IFD-NEXT: .LBB29_2: # %entry
-; RV64IFD-NEXT: ret
-; RV64IFD-NEXT: .LBB29_3: # %entry
; RV64IFD-NEXT: mv a0, a1
-; RV64IFD-NEXT: bgtz a0, .LBB29_2
-; RV64IFD-NEXT: .LBB29_4: # %entry
-; RV64IFD-NEXT: li a0, 0
+; RV64IFD-NEXT: .LBB29_2: # %entry
+; RV64IFD-NEXT: sgtz a1, a0
+; RV64IFD-NEXT: neg a1, a1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
entry:
%conv = fptosi double %x to i64
define i32 @stest_f32i32_mm(float %x) {
; RV32-LABEL: stest_f32i32_mm:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: feq.s a0, fa0, fa0
-; RV32-NEXT: beqz a0, .LBB30_2
-; RV32-NEXT: # %bb.1:
; RV32-NEXT: fcvt.w.s a0, fa0, rtz
-; RV32-NEXT: .LBB30_2: # %entry
+; RV32-NEXT: feq.s a1, fa0, fa0
+; RV32-NEXT: seqz a1, a1
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a0, a1, a0
; RV32-NEXT: ret
;
; RV64-LABEL: stest_f32i32_mm:
define i32 @utest_f32i32_mm(float %x) {
; RV32-LABEL: utest_f32i32_mm:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: feq.s a0, fa0, fa0
-; RV32-NEXT: beqz a0, .LBB31_2
-; RV32-NEXT: # %bb.1:
; RV32-NEXT: fcvt.wu.s a0, fa0, rtz
-; RV32-NEXT: .LBB31_2: # %entry
+; RV32-NEXT: feq.s a1, fa0, fa0
+; RV32-NEXT: seqz a1, a1
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a0, a1, a0
; RV32-NEXT: ret
;
; RV64-LABEL: utest_f32i32_mm:
define i32 @ustest_f32i32_mm(float %x) {
; RV32-LABEL: ustest_f32i32_mm:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: feq.s a0, fa0, fa0
-; RV32-NEXT: beqz a0, .LBB32_2
-; RV32-NEXT: # %bb.1:
; RV32-NEXT: fcvt.wu.s a0, fa0, rtz
-; RV32-NEXT: .LBB32_2: # %entry
+; RV32-NEXT: feq.s a1, fa0, fa0
+; RV32-NEXT: seqz a1, a1
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a0, a1, a0
; RV32-NEXT: ret
;
; RV64-LABEL: ustest_f32i32_mm:
; RV64-NEXT: fcvt.l.s a0, fa0, rtz
; RV64-NEXT: li a1, -1
; RV64-NEXT: srli a1, a1, 32
-; RV64-NEXT: bge a0, a1, .LBB32_3
+; RV64-NEXT: blt a0, a1, .LBB32_2
; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: blez a0, .LBB32_4
-; RV64-NEXT: .LBB32_2: # %entry
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB32_3: # %entry
; RV64-NEXT: mv a0, a1
-; RV64-NEXT: bgtz a0, .LBB32_2
-; RV64-NEXT: .LBB32_4: # %entry
-; RV64-NEXT: li a0, 0
+; RV64-NEXT: .LBB32_2: # %entry
+; RV64-NEXT: sgtz a1, a0
+; RV64-NEXT: neg a1, a1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ret
entry:
%conv = fptosi float %x to i64
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: bgeu a0, a4, .LBB33_10
; RV32-NEXT: .LBB33_2: # %entry
-; RV32-NEXT: bnez a1, .LBB33_11
+; RV32-NEXT: beqz a1, .LBB33_4
; RV32-NEXT: .LBB33_3: # %entry
-; RV32-NEXT: bgez a1, .LBB33_12
+; RV32-NEXT: mv a0, a3
; RV32-NEXT: .LBB33_4: # %entry
+; RV32-NEXT: slti a3, a1, 0
+; RV32-NEXT: neg a3, a3
+; RV32-NEXT: and a1, a3, a1
; RV32-NEXT: mv a3, a0
-; RV32-NEXT: bltz a1, .LBB33_13
-; RV32-NEXT: .LBB33_5: # %entry
-; RV32-NEXT: bgeu a2, a0, .LBB33_14
+; RV32-NEXT: bltz a1, .LBB33_11
+; RV32-NEXT: # %bb.5: # %entry
+; RV32-NEXT: bgeu a2, a0, .LBB33_12
; RV32-NEXT: .LBB33_6: # %entry
; RV32-NEXT: li a2, -1
; RV32-NEXT: beq a1, a2, .LBB33_8
; RV32-NEXT: bltu a0, a4, .LBB33_2
; RV32-NEXT: .LBB33_10: # %entry
; RV32-NEXT: mv a0, a4
-; RV32-NEXT: beqz a1, .LBB33_3
+; RV32-NEXT: bnez a1, .LBB33_3
+; RV32-NEXT: j .LBB33_4
; RV32-NEXT: .LBB33_11: # %entry
-; RV32-NEXT: mv a0, a3
-; RV32-NEXT: bltz a1, .LBB33_4
-; RV32-NEXT: .LBB33_12: # %entry
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a3, a0
-; RV32-NEXT: bgez a1, .LBB33_5
-; RV32-NEXT: .LBB33_13: # %entry
; RV32-NEXT: lui a3, 524288
; RV32-NEXT: bltu a2, a0, .LBB33_6
-; RV32-NEXT: .LBB33_14: # %entry
+; RV32-NEXT: .LBB33_12: # %entry
; RV32-NEXT: lui a0, 524288
; RV32-NEXT: li a2, -1
; RV32-NEXT: bne a1, a2, .LBB33_7
; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2@plt
; RV32-NEXT: call __fixsfdi@plt
-; RV32-NEXT: mv a2, a1
-; RV32-NEXT: bgez a1, .LBB35_6
+; RV32-NEXT: slti a2, a1, 0
+; RV32-NEXT: beqz a1, .LBB35_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: bnez a1, .LBB35_7
+; RV32-NEXT: addi a3, a2, -1
+; RV32-NEXT: or a0, a3, a0
; RV32-NEXT: .LBB35_2: # %entry
-; RV32-NEXT: mv a1, a0
-; RV32-NEXT: blez a2, .LBB35_8
-; RV32-NEXT: .LBB35_3: # %entry
-; RV32-NEXT: beqz a2, .LBB35_5
-; RV32-NEXT: .LBB35_4: # %entry
-; RV32-NEXT: mv a0, a1
-; RV32-NEXT: .LBB35_5: # %entry
+; RV32-NEXT: neg a2, a2
+; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: beqz a1, .LBB35_4
+; RV32-NEXT: # %bb.3: # %entry
+; RV32-NEXT: sgtz a1, a1
+; RV32-NEXT: neg a1, a1
+; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: .LBB35_4: # %entry
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
-; RV32-NEXT: .LBB35_6: # %entry
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: beqz a1, .LBB35_2
-; RV32-NEXT: .LBB35_7: # %entry
-; RV32-NEXT: slti a1, a1, 0
-; RV32-NEXT: addi a1, a1, -1
-; RV32-NEXT: or a0, a1, a0
-; RV32-NEXT: mv a1, a0
-; RV32-NEXT: bgtz a2, .LBB35_3
-; RV32-NEXT: .LBB35_8: # %entry
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: bnez a2, .LBB35_4
-; RV32-NEXT: j .LBB35_5
;
; RV64-LABEL: ustest_f16i32_mm:
; RV64: # %bb.0: # %entry
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: mv a0, a1
; RV64-NEXT: .LBB35_2: # %entry
-; RV64-NEXT: bgtz a0, .LBB35_4
-; RV64-NEXT: # %bb.3: # %entry
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: .LBB35_4: # %entry
+; RV64-NEXT: sgtz a1, a0
+; RV64-NEXT: neg a1, a1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; RV32IF-NEXT: # %bb.1: # %entry
; RV32IF-NEXT: mv a0, a1
; RV32IF-NEXT: .LBB38_2: # %entry
-; RV32IF-NEXT: bgtz a0, .LBB38_4
-; RV32IF-NEXT: # %bb.3: # %entry
-; RV32IF-NEXT: li a0, 0
-; RV32IF-NEXT: .LBB38_4: # %entry
+; RV32IF-NEXT: sgtz a1, a0
+; RV32IF-NEXT: neg a1, a1
+; RV32IF-NEXT: and a0, a1, a0
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
; RV64IF-NEXT: # %bb.1: # %entry
; RV64IF-NEXT: mv a0, a1
; RV64IF-NEXT: .LBB38_2: # %entry
-; RV64IF-NEXT: bgtz a0, .LBB38_4
-; RV64IF-NEXT: # %bb.3: # %entry
-; RV64IF-NEXT: li a0, 0
-; RV64IF-NEXT: .LBB38_4: # %entry
+; RV64IF-NEXT: sgtz a1, a0
+; RV64IF-NEXT: neg a1, a1
+; RV64IF-NEXT: and a0, a1, a0
; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IF-NEXT: addi sp, sp, 16
; RV64IF-NEXT: ret
; RV32IFD-NEXT: fcvt.w.d a0, fa0, rtz
; RV32IFD-NEXT: lui a1, 16
; RV32IFD-NEXT: addi a1, a1, -1
-; RV32IFD-NEXT: bge a0, a1, .LBB38_3
+; RV32IFD-NEXT: blt a0, a1, .LBB38_2
; RV32IFD-NEXT: # %bb.1: # %entry
-; RV32IFD-NEXT: blez a0, .LBB38_4
-; RV32IFD-NEXT: .LBB38_2: # %entry
-; RV32IFD-NEXT: ret
-; RV32IFD-NEXT: .LBB38_3: # %entry
; RV32IFD-NEXT: mv a0, a1
-; RV32IFD-NEXT: bgtz a0, .LBB38_2
-; RV32IFD-NEXT: .LBB38_4: # %entry
-; RV32IFD-NEXT: li a0, 0
+; RV32IFD-NEXT: .LBB38_2: # %entry
+; RV32IFD-NEXT: sgtz a1, a0
+; RV32IFD-NEXT: neg a1, a1
+; RV32IFD-NEXT: and a0, a1, a0
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: ustest_f64i16_mm:
; RV64IFD-NEXT: fcvt.w.d a0, fa0, rtz
; RV64IFD-NEXT: lui a1, 16
; RV64IFD-NEXT: addiw a1, a1, -1
-; RV64IFD-NEXT: bge a0, a1, .LBB38_3
+; RV64IFD-NEXT: blt a0, a1, .LBB38_2
; RV64IFD-NEXT: # %bb.1: # %entry
-; RV64IFD-NEXT: blez a0, .LBB38_4
-; RV64IFD-NEXT: .LBB38_2: # %entry
-; RV64IFD-NEXT: ret
-; RV64IFD-NEXT: .LBB38_3: # %entry
; RV64IFD-NEXT: mv a0, a1
-; RV64IFD-NEXT: bgtz a0, .LBB38_2
-; RV64IFD-NEXT: .LBB38_4: # %entry
-; RV64IFD-NEXT: li a0, 0
+; RV64IFD-NEXT: .LBB38_2: # %entry
+; RV64IFD-NEXT: sgtz a1, a0
+; RV64IFD-NEXT: neg a1, a1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
entry:
%conv = fptosi double %x to i32
; RV32-NEXT: fcvt.w.s a0, fa0, rtz
; RV32-NEXT: lui a1, 16
; RV32-NEXT: addi a1, a1, -1
-; RV32-NEXT: bge a0, a1, .LBB41_3
+; RV32-NEXT: blt a0, a1, .LBB41_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: blez a0, .LBB41_4
-; RV32-NEXT: .LBB41_2: # %entry
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB41_3: # %entry
; RV32-NEXT: mv a0, a1
-; RV32-NEXT: bgtz a0, .LBB41_2
-; RV32-NEXT: .LBB41_4: # %entry
-; RV32-NEXT: li a0, 0
+; RV32-NEXT: .LBB41_2: # %entry
+; RV32-NEXT: sgtz a1, a0
+; RV32-NEXT: neg a1, a1
+; RV32-NEXT: and a0, a1, a0
; RV32-NEXT: ret
;
; RV64-LABEL: ustest_f32i16_mm:
; RV64-NEXT: fcvt.w.s a0, fa0, rtz
; RV64-NEXT: lui a1, 16
; RV64-NEXT: addiw a1, a1, -1
-; RV64-NEXT: bge a0, a1, .LBB41_3
+; RV64-NEXT: blt a0, a1, .LBB41_2
; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: blez a0, .LBB41_4
-; RV64-NEXT: .LBB41_2: # %entry
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB41_3: # %entry
; RV64-NEXT: mv a0, a1
-; RV64-NEXT: bgtz a0, .LBB41_2
-; RV64-NEXT: .LBB41_4: # %entry
-; RV64-NEXT: li a0, 0
+; RV64-NEXT: .LBB41_2: # %entry
+; RV64-NEXT: sgtz a1, a0
+; RV64-NEXT: neg a1, a1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ret
entry:
%conv = fptosi float %x to i32
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: mv a0, a1
; RV32-NEXT: .LBB44_2: # %entry
-; RV32-NEXT: bgtz a0, .LBB44_4
-; RV32-NEXT: # %bb.3: # %entry
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: .LBB44_4: # %entry
+; RV32-NEXT: sgtz a1, a0
+; RV32-NEXT: neg a1, a1
+; RV32-NEXT: and a0, a1, a0
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: mv a0, a1
; RV64-NEXT: .LBB44_2: # %entry
-; RV64-NEXT: bgtz a0, .LBB44_4
-; RV64-NEXT: # %bb.3: # %entry
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: .LBB44_4: # %entry
+; RV64-NEXT: sgtz a1, a0
+; RV64-NEXT: neg a1, a1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; RV32IF-NEXT: mv a1, a0
; RV32IF-NEXT: addi a0, sp, 8
; RV32IF-NEXT: call __fixdfti@plt
-; RV32IF-NEXT: lw a2, 20(sp)
-; RV32IF-NEXT: lw a7, 8(sp)
-; RV32IF-NEXT: lw a5, 12(sp)
-; RV32IF-NEXT: lw a0, 16(sp)
-; RV32IF-NEXT: lui a4, 524288
-; RV32IF-NEXT: addi a1, a4, -1
-; RV32IF-NEXT: mv a3, a7
-; RV32IF-NEXT: bne a5, a1, .LBB45_17
+; RV32IF-NEXT: lw a0, 20(sp)
+; RV32IF-NEXT: lw t0, 8(sp)
+; RV32IF-NEXT: lw a4, 12(sp)
+; RV32IF-NEXT: lw a1, 16(sp)
+; RV32IF-NEXT: lui a3, 524288
+; RV32IF-NEXT: addi a6, a3, -1
+; RV32IF-NEXT: mv a2, t0
+; RV32IF-NEXT: beq a4, a6, .LBB45_2
; RV32IF-NEXT: # %bb.1: # %entry
-; RV32IF-NEXT: or a6, a0, a2
-; RV32IF-NEXT: bnez a6, .LBB45_18
+; RV32IF-NEXT: sltu a2, a4, a6
+; RV32IF-NEXT: addi a2, a2, -1
+; RV32IF-NEXT: or a2, a2, t0
; RV32IF-NEXT: .LBB45_2: # %entry
-; RV32IF-NEXT: mv a7, a5
-; RV32IF-NEXT: bgez a2, .LBB45_19
-; RV32IF-NEXT: .LBB45_3: # %entry
-; RV32IF-NEXT: bgeu a5, a1, .LBB45_20
+; RV32IF-NEXT: or a7, a1, a0
+; RV32IF-NEXT: slti a5, a0, 0
+; RV32IF-NEXT: bnez a7, .LBB45_16
+; RV32IF-NEXT: # %bb.3: # %entry
+; RV32IF-NEXT: mv t0, a4
+; RV32IF-NEXT: bgez a0, .LBB45_17
; RV32IF-NEXT: .LBB45_4: # %entry
-; RV32IF-NEXT: bnez a6, .LBB45_21
+; RV32IF-NEXT: bgeu a4, a6, .LBB45_18
; RV32IF-NEXT: .LBB45_5: # %entry
-; RV32IF-NEXT: li a6, 0
-; RV32IF-NEXT: bnez a2, .LBB45_22
+; RV32IF-NEXT: beqz a7, .LBB45_7
; RV32IF-NEXT: .LBB45_6: # %entry
-; RV32IF-NEXT: bgez a2, .LBB45_23
+; RV32IF-NEXT: mv a4, t0
; RV32IF-NEXT: .LBB45_7: # %entry
-; RV32IF-NEXT: mv a0, a5
-; RV32IF-NEXT: bltz a2, .LBB45_24
-; RV32IF-NEXT: .LBB45_8: # %entry
-; RV32IF-NEXT: mv a1, a5
-; RV32IF-NEXT: bltu a4, a5, .LBB45_10
+; RV32IF-NEXT: srai a6, a0, 31
+; RV32IF-NEXT: and a1, a6, a1
+; RV32IF-NEXT: seqz a6, a0
+; RV32IF-NEXT: neg a5, a5
+; RV32IF-NEXT: and a5, a5, a0
+; RV32IF-NEXT: addi a6, a6, -1
+; RV32IF-NEXT: mv a0, a4
+; RV32IF-NEXT: bgez a5, .LBB45_9
+; RV32IF-NEXT: # %bb.8: # %entry
+; RV32IF-NEXT: lui a0, 524288
; RV32IF-NEXT: .LBB45_9: # %entry
+; RV32IF-NEXT: and a6, a6, a1
+; RV32IF-NEXT: mv a1, a4
+; RV32IF-NEXT: bltu a3, a4, .LBB45_11
+; RV32IF-NEXT: # %bb.10: # %entry
; RV32IF-NEXT: lui a1, 524288
-; RV32IF-NEXT: .LBB45_10: # %entry
-; RV32IF-NEXT: and a6, a6, a2
+; RV32IF-NEXT: .LBB45_11: # %entry
+; RV32IF-NEXT: and a6, a6, a5
; RV32IF-NEXT: li a7, -1
-; RV32IF-NEXT: bne a6, a7, .LBB45_25
-; RV32IF-NEXT: # %bb.11: # %entry
-; RV32IF-NEXT: mv t0, a3
-; RV32IF-NEXT: bgeu a4, a5, .LBB45_26
-; RV32IF-NEXT: .LBB45_12: # %entry
-; RV32IF-NEXT: mv a0, a3
-; RV32IF-NEXT: bne a5, a4, .LBB45_27
+; RV32IF-NEXT: bne a6, a7, .LBB45_19
+; RV32IF-NEXT: # %bb.12: # %entry
+; RV32IF-NEXT: mv a0, a2
+; RV32IF-NEXT: bne a4, a3, .LBB45_20
; RV32IF-NEXT: .LBB45_13: # %entry
-; RV32IF-NEXT: bltz a2, .LBB45_28
+; RV32IF-NEXT: beq a6, a7, .LBB45_15
; RV32IF-NEXT: .LBB45_14: # %entry
-; RV32IF-NEXT: beq a6, a7, .LBB45_16
+; RV32IF-NEXT: slti a0, a5, 0
+; RV32IF-NEXT: addi a0, a0, -1
+; RV32IF-NEXT: and a0, a0, a2
; RV32IF-NEXT: .LBB45_15: # %entry
-; RV32IF-NEXT: mv a0, a3
-; RV32IF-NEXT: .LBB45_16: # %entry
; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 32
; RV32IF-NEXT: ret
+; RV32IF-NEXT: .LBB45_16: # %entry
+; RV32IF-NEXT: addi a2, a5, -1
+; RV32IF-NEXT: or a2, a2, t0
+; RV32IF-NEXT: mv t0, a4
+; RV32IF-NEXT: bltz a0, .LBB45_4
; RV32IF-NEXT: .LBB45_17: # %entry
-; RV32IF-NEXT: sltu a3, a5, a1
-; RV32IF-NEXT: addi a3, a3, -1
-; RV32IF-NEXT: or a3, a3, a7
-; RV32IF-NEXT: or a6, a0, a2
-; RV32IF-NEXT: beqz a6, .LBB45_2
+; RV32IF-NEXT: mv t0, a6
+; RV32IF-NEXT: bltu a4, a6, .LBB45_5
; RV32IF-NEXT: .LBB45_18: # %entry
-; RV32IF-NEXT: slti a3, a2, 0
-; RV32IF-NEXT: addi a3, a3, -1
-; RV32IF-NEXT: or a3, a3, a7
-; RV32IF-NEXT: mv a7, a5
-; RV32IF-NEXT: bltz a2, .LBB45_3
+; RV32IF-NEXT: mv a4, a6
+; RV32IF-NEXT: bnez a7, .LBB45_6
+; RV32IF-NEXT: j .LBB45_7
; RV32IF-NEXT: .LBB45_19: # %entry
-; RV32IF-NEXT: mv a7, a1
-; RV32IF-NEXT: bltu a5, a1, .LBB45_4
-; RV32IF-NEXT: .LBB45_20: # %entry
-; RV32IF-NEXT: mv a5, a1
-; RV32IF-NEXT: beqz a6, .LBB45_5
-; RV32IF-NEXT: .LBB45_21: # %entry
-; RV32IF-NEXT: mv a5, a7
-; RV32IF-NEXT: li a6, 0
-; RV32IF-NEXT: beqz a2, .LBB45_6
-; RV32IF-NEXT: .LBB45_22: # %entry
-; RV32IF-NEXT: srai a1, a2, 31
-; RV32IF-NEXT: and a6, a1, a0
-; RV32IF-NEXT: bltz a2, .LBB45_7
-; RV32IF-NEXT: .LBB45_23: # %entry
-; RV32IF-NEXT: li a2, 0
-; RV32IF-NEXT: mv a0, a5
-; RV32IF-NEXT: bgez a2, .LBB45_8
-; RV32IF-NEXT: .LBB45_24: # %entry
-; RV32IF-NEXT: lui a0, 524288
-; RV32IF-NEXT: mv a1, a5
-; RV32IF-NEXT: bgeu a4, a5, .LBB45_9
-; RV32IF-NEXT: j .LBB45_10
-; RV32IF-NEXT: .LBB45_25: # %entry
; RV32IF-NEXT: mv a1, a0
-; RV32IF-NEXT: mv t0, a3
-; RV32IF-NEXT: bltu a4, a5, .LBB45_12
-; RV32IF-NEXT: .LBB45_26: # %entry
-; RV32IF-NEXT: li t0, 0
-; RV32IF-NEXT: mv a0, a3
-; RV32IF-NEXT: beq a5, a4, .LBB45_13
-; RV32IF-NEXT: .LBB45_27: # %entry
-; RV32IF-NEXT: mv a0, t0
-; RV32IF-NEXT: bgez a2, .LBB45_14
-; RV32IF-NEXT: .LBB45_28: # %entry
-; RV32IF-NEXT: li a3, 0
-; RV32IF-NEXT: bne a6, a7, .LBB45_15
-; RV32IF-NEXT: j .LBB45_16
+; RV32IF-NEXT: mv a0, a2
+; RV32IF-NEXT: beq a4, a3, .LBB45_13
+; RV32IF-NEXT: .LBB45_20: # %entry
+; RV32IF-NEXT: sltu a0, a3, a4
+; RV32IF-NEXT: neg a0, a0
+; RV32IF-NEXT: and a0, a0, a2
+; RV32IF-NEXT: bne a6, a7, .LBB45_14
+; RV32IF-NEXT: j .LBB45_15
;
; RV64IF-LABEL: stest_f64i64_mm:
; RV64IF: # %bb.0: # %entry
; RV64IF-NEXT: li a2, -1
; RV64IF-NEXT: srli a4, a2, 1
; RV64IF-NEXT: mv a3, a0
-; RV64IF-NEXT: bgez a1, .LBB45_10
+; RV64IF-NEXT: bgez a1, .LBB45_9
; RV64IF-NEXT: # %bb.1: # %entry
-; RV64IF-NEXT: bgeu a0, a4, .LBB45_11
+; RV64IF-NEXT: bgeu a0, a4, .LBB45_10
; RV64IF-NEXT: .LBB45_2: # %entry
-; RV64IF-NEXT: bnez a1, .LBB45_12
+; RV64IF-NEXT: beqz a1, .LBB45_4
; RV64IF-NEXT: .LBB45_3: # %entry
-; RV64IF-NEXT: bltz a1, .LBB45_5
+; RV64IF-NEXT: mv a0, a3
; RV64IF-NEXT: .LBB45_4: # %entry
-; RV64IF-NEXT: li a1, 0
-; RV64IF-NEXT: .LBB45_5: # %entry
+; RV64IF-NEXT: slti a3, a1, 0
+; RV64IF-NEXT: neg a3, a3
+; RV64IF-NEXT: and a1, a3, a1
; RV64IF-NEXT: slli a4, a2, 63
; RV64IF-NEXT: mv a3, a0
-; RV64IF-NEXT: bltz a1, .LBB45_13
-; RV64IF-NEXT: # %bb.6: # %entry
-; RV64IF-NEXT: bgeu a4, a0, .LBB45_14
+; RV64IF-NEXT: bltz a1, .LBB45_11
+; RV64IF-NEXT: # %bb.5: # %entry
+; RV64IF-NEXT: bgeu a4, a0, .LBB45_12
+; RV64IF-NEXT: .LBB45_6: # %entry
+; RV64IF-NEXT: beq a1, a2, .LBB45_8
; RV64IF-NEXT: .LBB45_7: # %entry
-; RV64IF-NEXT: beq a1, a2, .LBB45_9
-; RV64IF-NEXT: .LBB45_8: # %entry
; RV64IF-NEXT: mv a0, a3
-; RV64IF-NEXT: .LBB45_9: # %entry
+; RV64IF-NEXT: .LBB45_8: # %entry
; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IF-NEXT: addi sp, sp, 16
; RV64IF-NEXT: ret
-; RV64IF-NEXT: .LBB45_10: # %entry
+; RV64IF-NEXT: .LBB45_9: # %entry
; RV64IF-NEXT: mv a3, a4
; RV64IF-NEXT: bltu a0, a4, .LBB45_2
-; RV64IF-NEXT: .LBB45_11: # %entry
+; RV64IF-NEXT: .LBB45_10: # %entry
; RV64IF-NEXT: mv a0, a4
-; RV64IF-NEXT: beqz a1, .LBB45_3
-; RV64IF-NEXT: .LBB45_12: # %entry
-; RV64IF-NEXT: mv a0, a3
-; RV64IF-NEXT: bgez a1, .LBB45_4
-; RV64IF-NEXT: j .LBB45_5
-; RV64IF-NEXT: .LBB45_13: # %entry
+; RV64IF-NEXT: bnez a1, .LBB45_3
+; RV64IF-NEXT: j .LBB45_4
+; RV64IF-NEXT: .LBB45_11: # %entry
; RV64IF-NEXT: mv a3, a4
-; RV64IF-NEXT: bltu a4, a0, .LBB45_7
-; RV64IF-NEXT: .LBB45_14: # %entry
+; RV64IF-NEXT: bltu a4, a0, .LBB45_6
+; RV64IF-NEXT: .LBB45_12: # %entry
; RV64IF-NEXT: mv a0, a4
-; RV64IF-NEXT: bne a1, a2, .LBB45_8
-; RV64IF-NEXT: j .LBB45_9
+; RV64IF-NEXT: bne a1, a2, .LBB45_7
+; RV64IF-NEXT: j .LBB45_8
;
; RV32IFD-LABEL: stest_f64i64_mm:
; RV32IFD: # %bb.0: # %entry
; RV32IFD-NEXT: .cfi_offset ra, -4
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call __fixdfti@plt
-; RV32IFD-NEXT: lw a2, 20(sp)
-; RV32IFD-NEXT: lw a7, 8(sp)
-; RV32IFD-NEXT: lw a5, 12(sp)
-; RV32IFD-NEXT: lw a0, 16(sp)
-; RV32IFD-NEXT: lui a4, 524288
-; RV32IFD-NEXT: addi a1, a4, -1
-; RV32IFD-NEXT: mv a3, a7
-; RV32IFD-NEXT: bne a5, a1, .LBB45_17
+; RV32IFD-NEXT: lw a0, 20(sp)
+; RV32IFD-NEXT: lw t0, 8(sp)
+; RV32IFD-NEXT: lw a4, 12(sp)
+; RV32IFD-NEXT: lw a1, 16(sp)
+; RV32IFD-NEXT: lui a3, 524288
+; RV32IFD-NEXT: addi a6, a3, -1
+; RV32IFD-NEXT: mv a2, t0
+; RV32IFD-NEXT: beq a4, a6, .LBB45_2
; RV32IFD-NEXT: # %bb.1: # %entry
-; RV32IFD-NEXT: or a6, a0, a2
-; RV32IFD-NEXT: bnez a6, .LBB45_18
+; RV32IFD-NEXT: sltu a2, a4, a6
+; RV32IFD-NEXT: addi a2, a2, -1
+; RV32IFD-NEXT: or a2, a2, t0
; RV32IFD-NEXT: .LBB45_2: # %entry
-; RV32IFD-NEXT: mv a7, a5
-; RV32IFD-NEXT: bgez a2, .LBB45_19
-; RV32IFD-NEXT: .LBB45_3: # %entry
-; RV32IFD-NEXT: bgeu a5, a1, .LBB45_20
+; RV32IFD-NEXT: or a7, a1, a0
+; RV32IFD-NEXT: slti a5, a0, 0
+; RV32IFD-NEXT: bnez a7, .LBB45_16
+; RV32IFD-NEXT: # %bb.3: # %entry
+; RV32IFD-NEXT: mv t0, a4
+; RV32IFD-NEXT: bgez a0, .LBB45_17
; RV32IFD-NEXT: .LBB45_4: # %entry
-; RV32IFD-NEXT: bnez a6, .LBB45_21
+; RV32IFD-NEXT: bgeu a4, a6, .LBB45_18
; RV32IFD-NEXT: .LBB45_5: # %entry
-; RV32IFD-NEXT: li a6, 0
-; RV32IFD-NEXT: bnez a2, .LBB45_22
+; RV32IFD-NEXT: beqz a7, .LBB45_7
; RV32IFD-NEXT: .LBB45_6: # %entry
-; RV32IFD-NEXT: bgez a2, .LBB45_23
+; RV32IFD-NEXT: mv a4, t0
; RV32IFD-NEXT: .LBB45_7: # %entry
-; RV32IFD-NEXT: mv a0, a5
-; RV32IFD-NEXT: bltz a2, .LBB45_24
-; RV32IFD-NEXT: .LBB45_8: # %entry
-; RV32IFD-NEXT: mv a1, a5
-; RV32IFD-NEXT: bltu a4, a5, .LBB45_10
+; RV32IFD-NEXT: srai a6, a0, 31
+; RV32IFD-NEXT: and a1, a6, a1
+; RV32IFD-NEXT: seqz a6, a0
+; RV32IFD-NEXT: neg a5, a5
+; RV32IFD-NEXT: and a5, a5, a0
+; RV32IFD-NEXT: addi a6, a6, -1
+; RV32IFD-NEXT: mv a0, a4
+; RV32IFD-NEXT: bgez a5, .LBB45_9
+; RV32IFD-NEXT: # %bb.8: # %entry
+; RV32IFD-NEXT: lui a0, 524288
; RV32IFD-NEXT: .LBB45_9: # %entry
+; RV32IFD-NEXT: and a6, a6, a1
+; RV32IFD-NEXT: mv a1, a4
+; RV32IFD-NEXT: bltu a3, a4, .LBB45_11
+; RV32IFD-NEXT: # %bb.10: # %entry
; RV32IFD-NEXT: lui a1, 524288
-; RV32IFD-NEXT: .LBB45_10: # %entry
-; RV32IFD-NEXT: and a6, a6, a2
+; RV32IFD-NEXT: .LBB45_11: # %entry
+; RV32IFD-NEXT: and a6, a6, a5
; RV32IFD-NEXT: li a7, -1
-; RV32IFD-NEXT: bne a6, a7, .LBB45_25
-; RV32IFD-NEXT: # %bb.11: # %entry
-; RV32IFD-NEXT: mv t0, a3
-; RV32IFD-NEXT: bgeu a4, a5, .LBB45_26
-; RV32IFD-NEXT: .LBB45_12: # %entry
-; RV32IFD-NEXT: mv a0, a3
-; RV32IFD-NEXT: bne a5, a4, .LBB45_27
+; RV32IFD-NEXT: bne a6, a7, .LBB45_19
+; RV32IFD-NEXT: # %bb.12: # %entry
+; RV32IFD-NEXT: mv a0, a2
+; RV32IFD-NEXT: bne a4, a3, .LBB45_20
; RV32IFD-NEXT: .LBB45_13: # %entry
-; RV32IFD-NEXT: bltz a2, .LBB45_28
+; RV32IFD-NEXT: beq a6, a7, .LBB45_15
; RV32IFD-NEXT: .LBB45_14: # %entry
-; RV32IFD-NEXT: beq a6, a7, .LBB45_16
+; RV32IFD-NEXT: slti a0, a5, 0
+; RV32IFD-NEXT: addi a0, a0, -1
+; RV32IFD-NEXT: and a0, a0, a2
; RV32IFD-NEXT: .LBB45_15: # %entry
-; RV32IFD-NEXT: mv a0, a3
-; RV32IFD-NEXT: .LBB45_16: # %entry
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
+; RV32IFD-NEXT: .LBB45_16: # %entry
+; RV32IFD-NEXT: addi a2, a5, -1
+; RV32IFD-NEXT: or a2, a2, t0
+; RV32IFD-NEXT: mv t0, a4
+; RV32IFD-NEXT: bltz a0, .LBB45_4
; RV32IFD-NEXT: .LBB45_17: # %entry
-; RV32IFD-NEXT: sltu a3, a5, a1
-; RV32IFD-NEXT: addi a3, a3, -1
-; RV32IFD-NEXT: or a3, a3, a7
-; RV32IFD-NEXT: or a6, a0, a2
-; RV32IFD-NEXT: beqz a6, .LBB45_2
+; RV32IFD-NEXT: mv t0, a6
+; RV32IFD-NEXT: bltu a4, a6, .LBB45_5
; RV32IFD-NEXT: .LBB45_18: # %entry
-; RV32IFD-NEXT: slti a3, a2, 0
-; RV32IFD-NEXT: addi a3, a3, -1
-; RV32IFD-NEXT: or a3, a3, a7
-; RV32IFD-NEXT: mv a7, a5
-; RV32IFD-NEXT: bltz a2, .LBB45_3
+; RV32IFD-NEXT: mv a4, a6
+; RV32IFD-NEXT: bnez a7, .LBB45_6
+; RV32IFD-NEXT: j .LBB45_7
; RV32IFD-NEXT: .LBB45_19: # %entry
-; RV32IFD-NEXT: mv a7, a1
-; RV32IFD-NEXT: bltu a5, a1, .LBB45_4
-; RV32IFD-NEXT: .LBB45_20: # %entry
-; RV32IFD-NEXT: mv a5, a1
-; RV32IFD-NEXT: beqz a6, .LBB45_5
-; RV32IFD-NEXT: .LBB45_21: # %entry
-; RV32IFD-NEXT: mv a5, a7
-; RV32IFD-NEXT: li a6, 0
-; RV32IFD-NEXT: beqz a2, .LBB45_6
-; RV32IFD-NEXT: .LBB45_22: # %entry
-; RV32IFD-NEXT: srai a1, a2, 31
-; RV32IFD-NEXT: and a6, a1, a0
-; RV32IFD-NEXT: bltz a2, .LBB45_7
-; RV32IFD-NEXT: .LBB45_23: # %entry
-; RV32IFD-NEXT: li a2, 0
-; RV32IFD-NEXT: mv a0, a5
-; RV32IFD-NEXT: bgez a2, .LBB45_8
-; RV32IFD-NEXT: .LBB45_24: # %entry
-; RV32IFD-NEXT: lui a0, 524288
-; RV32IFD-NEXT: mv a1, a5
-; RV32IFD-NEXT: bgeu a4, a5, .LBB45_9
-; RV32IFD-NEXT: j .LBB45_10
-; RV32IFD-NEXT: .LBB45_25: # %entry
; RV32IFD-NEXT: mv a1, a0
-; RV32IFD-NEXT: mv t0, a3
-; RV32IFD-NEXT: bltu a4, a5, .LBB45_12
-; RV32IFD-NEXT: .LBB45_26: # %entry
-; RV32IFD-NEXT: li t0, 0
-; RV32IFD-NEXT: mv a0, a3
-; RV32IFD-NEXT: beq a5, a4, .LBB45_13
-; RV32IFD-NEXT: .LBB45_27: # %entry
-; RV32IFD-NEXT: mv a0, t0
-; RV32IFD-NEXT: bgez a2, .LBB45_14
-; RV32IFD-NEXT: .LBB45_28: # %entry
-; RV32IFD-NEXT: li a3, 0
-; RV32IFD-NEXT: bne a6, a7, .LBB45_15
-; RV32IFD-NEXT: j .LBB45_16
+; RV32IFD-NEXT: mv a0, a2
+; RV32IFD-NEXT: beq a4, a3, .LBB45_13
+; RV32IFD-NEXT: .LBB45_20: # %entry
+; RV32IFD-NEXT: sltu a0, a3, a4
+; RV32IFD-NEXT: neg a0, a0
+; RV32IFD-NEXT: and a0, a0, a2
+; RV32IFD-NEXT: bne a6, a7, .LBB45_14
+; RV32IFD-NEXT: j .LBB45_15
;
; RV64IFD-LABEL: stest_f64i64_mm:
; RV64IFD: # %bb.0: # %entry
-; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: beqz a0, .LBB45_2
-; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz
-; RV64IFD-NEXT: .LBB45_2: # %entry
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a1, a0
; RV64IFD-NEXT: ret
entry:
%conv = fptosi double %x to i128
; RV32IF-NEXT: mv a1, a0
; RV32IF-NEXT: addi a0, sp, 8
; RV32IF-NEXT: call __fixunsdfti@plt
-; RV32IF-NEXT: lw a0, 20(sp)
-; RV32IF-NEXT: lw a3, 16(sp)
-; RV32IF-NEXT: li a1, 0
-; RV32IF-NEXT: beqz a0, .LBB46_3
-; RV32IF-NEXT: # %bb.1: # %entry
-; RV32IF-NEXT: mv a2, a1
-; RV32IF-NEXT: beqz a2, .LBB46_4
-; RV32IF-NEXT: .LBB46_2:
-; RV32IF-NEXT: lw a4, 8(sp)
-; RV32IF-NEXT: j .LBB46_5
-; RV32IF-NEXT: .LBB46_3:
-; RV32IF-NEXT: seqz a2, a3
-; RV32IF-NEXT: bnez a2, .LBB46_2
-; RV32IF-NEXT: .LBB46_4: # %entry
-; RV32IF-NEXT: mv a4, a1
-; RV32IF-NEXT: .LBB46_5: # %entry
-; RV32IF-NEXT: xori a3, a3, 1
-; RV32IF-NEXT: or a3, a3, a0
-; RV32IF-NEXT: mv a0, a1
-; RV32IF-NEXT: beqz a3, .LBB46_7
-; RV32IF-NEXT: # %bb.6: # %entry
-; RV32IF-NEXT: mv a0, a4
-; RV32IF-NEXT: .LBB46_7: # %entry
-; RV32IF-NEXT: bnez a2, .LBB46_9
-; RV32IF-NEXT: # %bb.8: # %entry
-; RV32IF-NEXT: mv a2, a1
-; RV32IF-NEXT: bnez a3, .LBB46_10
-; RV32IF-NEXT: j .LBB46_11
-; RV32IF-NEXT: .LBB46_9:
+; RV32IF-NEXT: lw a0, 16(sp)
+; RV32IF-NEXT: lw a1, 20(sp)
; RV32IF-NEXT: lw a2, 12(sp)
-; RV32IF-NEXT: beqz a3, .LBB46_11
-; RV32IF-NEXT: .LBB46_10: # %entry
-; RV32IF-NEXT: mv a1, a2
-; RV32IF-NEXT: .LBB46_11: # %entry
+; RV32IF-NEXT: lw a3, 8(sp)
+; RV32IF-NEXT: seqz a4, a0
+; RV32IF-NEXT: snez a5, a1
+; RV32IF-NEXT: addi a5, a5, -1
+; RV32IF-NEXT: and a4, a5, a4
+; RV32IF-NEXT: seqz a4, a4
+; RV32IF-NEXT: addi a4, a4, -1
+; RV32IF-NEXT: and a3, a4, a3
+; RV32IF-NEXT: xori a0, a0, 1
+; RV32IF-NEXT: or a0, a0, a1
+; RV32IF-NEXT: seqz a0, a0
+; RV32IF-NEXT: addi a1, a0, -1
+; RV32IF-NEXT: and a0, a1, a3
+; RV32IF-NEXT: and a2, a4, a2
+; RV32IF-NEXT: and a1, a1, a2
; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 32
; RV32IF-NEXT: ret
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: call __fixunsdfti@plt
-; RV64-NEXT: mv a2, a0
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: beqz a1, .LBB46_2
-; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: mv a2, a0
-; RV64-NEXT: .LBB46_2: # %entry
-; RV64-NEXT: li a3, 1
-; RV64-NEXT: beq a1, a3, .LBB46_4
-; RV64-NEXT: # %bb.3: # %entry
-; RV64-NEXT: mv a0, a2
-; RV64-NEXT: .LBB46_4: # %entry
+; RV64-NEXT: snez a2, a1
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: seqz a1, a1
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; RV32IFD-NEXT: .cfi_offset ra, -4
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call __fixunsdfti@plt
-; RV32IFD-NEXT: lw a0, 20(sp)
-; RV32IFD-NEXT: lw a3, 16(sp)
-; RV32IFD-NEXT: li a1, 0
-; RV32IFD-NEXT: beqz a0, .LBB46_3
-; RV32IFD-NEXT: # %bb.1: # %entry
-; RV32IFD-NEXT: mv a2, a1
-; RV32IFD-NEXT: beqz a2, .LBB46_4
-; RV32IFD-NEXT: .LBB46_2:
-; RV32IFD-NEXT: lw a4, 8(sp)
-; RV32IFD-NEXT: j .LBB46_5
-; RV32IFD-NEXT: .LBB46_3:
-; RV32IFD-NEXT: seqz a2, a3
-; RV32IFD-NEXT: bnez a2, .LBB46_2
-; RV32IFD-NEXT: .LBB46_4: # %entry
-; RV32IFD-NEXT: mv a4, a1
-; RV32IFD-NEXT: .LBB46_5: # %entry
-; RV32IFD-NEXT: xori a3, a3, 1
-; RV32IFD-NEXT: or a3, a3, a0
-; RV32IFD-NEXT: mv a0, a1
-; RV32IFD-NEXT: beqz a3, .LBB46_7
-; RV32IFD-NEXT: # %bb.6: # %entry
-; RV32IFD-NEXT: mv a0, a4
-; RV32IFD-NEXT: .LBB46_7: # %entry
-; RV32IFD-NEXT: bnez a2, .LBB46_9
-; RV32IFD-NEXT: # %bb.8: # %entry
-; RV32IFD-NEXT: mv a2, a1
-; RV32IFD-NEXT: bnez a3, .LBB46_10
-; RV32IFD-NEXT: j .LBB46_11
-; RV32IFD-NEXT: .LBB46_9:
+; RV32IFD-NEXT: lw a0, 16(sp)
+; RV32IFD-NEXT: lw a1, 20(sp)
; RV32IFD-NEXT: lw a2, 12(sp)
-; RV32IFD-NEXT: beqz a3, .LBB46_11
-; RV32IFD-NEXT: .LBB46_10: # %entry
-; RV32IFD-NEXT: mv a1, a2
-; RV32IFD-NEXT: .LBB46_11: # %entry
+; RV32IFD-NEXT: lw a3, 8(sp)
+; RV32IFD-NEXT: seqz a4, a0
+; RV32IFD-NEXT: snez a5, a1
+; RV32IFD-NEXT: addi a5, a5, -1
+; RV32IFD-NEXT: and a4, a5, a4
+; RV32IFD-NEXT: seqz a4, a4
+; RV32IFD-NEXT: addi a4, a4, -1
+; RV32IFD-NEXT: and a3, a4, a3
+; RV32IFD-NEXT: xori a0, a0, 1
+; RV32IFD-NEXT: or a0, a0, a1
+; RV32IFD-NEXT: seqz a0, a0
+; RV32IFD-NEXT: addi a1, a0, -1
+; RV32IFD-NEXT: and a0, a1, a3
+; RV32IFD-NEXT: and a2, a4, a2
+; RV32IFD-NEXT: and a1, a1, a2
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
; RV32IF-NEXT: mv a1, a0
; RV32IF-NEXT: addi a0, sp, 8
; RV32IF-NEXT: call __fixdfti@plt
-; RV32IF-NEXT: lw a2, 20(sp)
-; RV32IF-NEXT: lw a3, 16(sp)
-; RV32IF-NEXT: beqz a2, .LBB47_3
+; RV32IF-NEXT: lw a1, 16(sp)
+; RV32IF-NEXT: lw a0, 20(sp)
+; RV32IF-NEXT: li a3, 1
+; RV32IF-NEXT: mv a6, a1
+; RV32IF-NEXT: bltz a0, .LBB47_2
; RV32IF-NEXT: # %bb.1: # %entry
-; RV32IF-NEXT: slti a0, a2, 0
-; RV32IF-NEXT: beqz a0, .LBB47_4
-; RV32IF-NEXT: .LBB47_2:
-; RV32IF-NEXT: lw a5, 12(sp)
-; RV32IF-NEXT: j .LBB47_5
-; RV32IF-NEXT: .LBB47_3:
-; RV32IF-NEXT: seqz a0, a3
-; RV32IF-NEXT: bnez a0, .LBB47_2
+; RV32IF-NEXT: li a6, 1
+; RV32IF-NEXT: .LBB47_2: # %entry
+; RV32IF-NEXT: mv a2, a1
+; RV32IF-NEXT: bltu a1, a3, .LBB47_4
+; RV32IF-NEXT: # %bb.3: # %entry
+; RV32IF-NEXT: li a2, 1
; RV32IF-NEXT: .LBB47_4: # %entry
-; RV32IF-NEXT: li a5, 0
-; RV32IF-NEXT: .LBB47_5: # %entry
-; RV32IF-NEXT: xori a1, a3, 1
-; RV32IF-NEXT: or a4, a1, a2
-; RV32IF-NEXT: li a1, 0
-; RV32IF-NEXT: beqz a4, .LBB47_7
-; RV32IF-NEXT: # %bb.6: # %entry
-; RV32IF-NEXT: mv a1, a5
+; RV32IF-NEXT: lw a5, 12(sp)
+; RV32IF-NEXT: lw a3, 8(sp)
+; RV32IF-NEXT: slti a4, a0, 0
+; RV32IF-NEXT: beqz a0, .LBB47_6
+; RV32IF-NEXT: # %bb.5: # %entry
+; RV32IF-NEXT: mv a2, a6
+; RV32IF-NEXT: mv a6, a4
+; RV32IF-NEXT: j .LBB47_7
+; RV32IF-NEXT: .LBB47_6:
+; RV32IF-NEXT: seqz a6, a1
; RV32IF-NEXT: .LBB47_7: # %entry
-; RV32IF-NEXT: bnez a0, .LBB47_9
+; RV32IF-NEXT: seqz a6, a6
+; RV32IF-NEXT: addi a6, a6, -1
+; RV32IF-NEXT: and a3, a6, a3
+; RV32IF-NEXT: xori a1, a1, 1
+; RV32IF-NEXT: or a1, a1, a0
+; RV32IF-NEXT: seqz a1, a1
+; RV32IF-NEXT: addi a1, a1, -1
+; RV32IF-NEXT: and a3, a1, a3
+; RV32IF-NEXT: and a5, a6, a5
+; RV32IF-NEXT: and a1, a1, a5
+; RV32IF-NEXT: neg a4, a4
+; RV32IF-NEXT: and a4, a4, a0
+; RV32IF-NEXT: mv a0, a3
+; RV32IF-NEXT: beqz a1, .LBB47_9
; RV32IF-NEXT: # %bb.8: # %entry
-; RV32IF-NEXT: li a5, 0
-; RV32IF-NEXT: li a0, 0
-; RV32IF-NEXT: bnez a4, .LBB47_10
-; RV32IF-NEXT: j .LBB47_11
-; RV32IF-NEXT: .LBB47_9:
-; RV32IF-NEXT: lw a5, 8(sp)
-; RV32IF-NEXT: li a0, 0
+; RV32IF-NEXT: seqz a0, a1
+; RV32IF-NEXT: addi a0, a0, -1
+; RV32IF-NEXT: and a0, a0, a3
+; RV32IF-NEXT: .LBB47_9: # %entry
; RV32IF-NEXT: beqz a4, .LBB47_11
-; RV32IF-NEXT: .LBB47_10: # %entry
-; RV32IF-NEXT: mv a0, a5
-; RV32IF-NEXT: .LBB47_11: # %entry
-; RV32IF-NEXT: li a5, 1
-; RV32IF-NEXT: mv a4, a3
-; RV32IF-NEXT: bgez a2, .LBB47_17
-; RV32IF-NEXT: # %bb.12: # %entry
-; RV32IF-NEXT: bgeu a3, a5, .LBB47_18
+; RV32IF-NEXT: # %bb.10: # %entry
+; RV32IF-NEXT: sgtz a5, a4
+; RV32IF-NEXT: or a2, a2, a4
+; RV32IF-NEXT: bnez a2, .LBB47_12
+; RV32IF-NEXT: j .LBB47_13
+; RV32IF-NEXT: .LBB47_11:
+; RV32IF-NEXT: snez a5, a2
+; RV32IF-NEXT: or a2, a2, a4
+; RV32IF-NEXT: beqz a2, .LBB47_13
+; RV32IF-NEXT: .LBB47_12: # %entry
+; RV32IF-NEXT: seqz a0, a5
+; RV32IF-NEXT: addi a2, a0, -1
+; RV32IF-NEXT: and a0, a2, a3
+; RV32IF-NEXT: and a1, a2, a1
; RV32IF-NEXT: .LBB47_13: # %entry
-; RV32IF-NEXT: bnez a2, .LBB47_19
-; RV32IF-NEXT: .LBB47_14: # %entry
-; RV32IF-NEXT: bgez a2, .LBB47_20
-; RV32IF-NEXT: .LBB47_15: # %entry
-; RV32IF-NEXT: beqz a2, .LBB47_21
-; RV32IF-NEXT: .LBB47_16: # %entry
-; RV32IF-NEXT: sgtz a4, a2
-; RV32IF-NEXT: mv a5, a0
-; RV32IF-NEXT: beqz a4, .LBB47_22
-; RV32IF-NEXT: j .LBB47_23
-; RV32IF-NEXT: .LBB47_17: # %entry
-; RV32IF-NEXT: li a4, 1
-; RV32IF-NEXT: bltu a3, a5, .LBB47_13
-; RV32IF-NEXT: .LBB47_18: # %entry
-; RV32IF-NEXT: li a3, 1
-; RV32IF-NEXT: beqz a2, .LBB47_14
-; RV32IF-NEXT: .LBB47_19: # %entry
-; RV32IF-NEXT: mv a3, a4
-; RV32IF-NEXT: bltz a2, .LBB47_15
-; RV32IF-NEXT: .LBB47_20: # %entry
-; RV32IF-NEXT: li a2, 0
-; RV32IF-NEXT: bnez a2, .LBB47_16
-; RV32IF-NEXT: .LBB47_21:
-; RV32IF-NEXT: snez a4, a3
-; RV32IF-NEXT: mv a5, a0
-; RV32IF-NEXT: bnez a4, .LBB47_23
-; RV32IF-NEXT: .LBB47_22: # %entry
-; RV32IF-NEXT: li a5, 0
-; RV32IF-NEXT: .LBB47_23: # %entry
-; RV32IF-NEXT: mv a6, a0
-; RV32IF-NEXT: beqz a1, .LBB47_30
-; RV32IF-NEXT: # %bb.24: # %entry
-; RV32IF-NEXT: bnez a1, .LBB47_31
-; RV32IF-NEXT: .LBB47_25: # %entry
-; RV32IF-NEXT: or a2, a3, a2
-; RV32IF-NEXT: bnez a2, .LBB47_32
-; RV32IF-NEXT: .LBB47_26: # %entry
-; RV32IF-NEXT: mv a3, a1
-; RV32IF-NEXT: beqz a4, .LBB47_33
-; RV32IF-NEXT: .LBB47_27: # %entry
-; RV32IF-NEXT: beqz a2, .LBB47_29
-; RV32IF-NEXT: .LBB47_28: # %entry
-; RV32IF-NEXT: mv a1, a3
-; RV32IF-NEXT: .LBB47_29: # %entry
; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 32
; RV32IF-NEXT: ret
-; RV32IF-NEXT: .LBB47_30: # %entry
-; RV32IF-NEXT: li a6, 0
-; RV32IF-NEXT: beqz a1, .LBB47_25
-; RV32IF-NEXT: .LBB47_31: # %entry
-; RV32IF-NEXT: mv a0, a6
-; RV32IF-NEXT: or a2, a3, a2
-; RV32IF-NEXT: beqz a2, .LBB47_26
-; RV32IF-NEXT: .LBB47_32: # %entry
-; RV32IF-NEXT: mv a0, a5
-; RV32IF-NEXT: mv a3, a1
-; RV32IF-NEXT: bnez a4, .LBB47_27
-; RV32IF-NEXT: .LBB47_33: # %entry
-; RV32IF-NEXT: li a3, 0
-; RV32IF-NEXT: bnez a2, .LBB47_28
-; RV32IF-NEXT: j .LBB47_29
;
; RV64-LABEL: ustest_f64i64_mm:
; RV64: # %bb.0: # %entry
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: call __fixdfti@plt
-; RV64-NEXT: mv a2, a0
-; RV64-NEXT: li a4, 1
-; RV64-NEXT: mv a3, a1
-; RV64-NEXT: bgtz a1, .LBB47_6
+; RV64-NEXT: mv a2, a1
+; RV64-NEXT: blez a1, .LBB47_2
; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: bne a1, a4, .LBB47_7
+; RV64-NEXT: li a2, 1
; RV64-NEXT: .LBB47_2: # %entry
-; RV64-NEXT: mv a1, a0
-; RV64-NEXT: blez a3, .LBB47_8
-; RV64-NEXT: .LBB47_3: # %entry
-; RV64-NEXT: beqz a3, .LBB47_5
+; RV64-NEXT: sgtz a3, a1
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: and a0, a3, a0
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: seqz a1, a1
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: beqz a2, .LBB47_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: sgtz a1, a2
+; RV64-NEXT: neg a1, a1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: .LBB47_4: # %entry
-; RV64-NEXT: mv a0, a1
-; RV64-NEXT: .LBB47_5: # %entry
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
-; RV64-NEXT: .LBB47_6: # %entry
-; RV64-NEXT: li a2, 0
-; RV64-NEXT: li a3, 1
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: beq a1, a4, .LBB47_2
-; RV64-NEXT: .LBB47_7: # %entry
-; RV64-NEXT: mv a0, a2
-; RV64-NEXT: mv a1, a0
-; RV64-NEXT: bgtz a3, .LBB47_3
-; RV64-NEXT: .LBB47_8: # %entry
-; RV64-NEXT: li a1, 0
-; RV64-NEXT: bnez a3, .LBB47_4
-; RV64-NEXT: j .LBB47_5
;
; RV32IFD-LABEL: ustest_f64i64_mm:
; RV32IFD: # %bb.0: # %entry
; RV32IFD-NEXT: .cfi_offset ra, -4
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call __fixdfti@plt
-; RV32IFD-NEXT: lw a2, 20(sp)
-; RV32IFD-NEXT: lw a3, 16(sp)
-; RV32IFD-NEXT: beqz a2, .LBB47_3
+; RV32IFD-NEXT: lw a1, 16(sp)
+; RV32IFD-NEXT: lw a0, 20(sp)
+; RV32IFD-NEXT: li a3, 1
+; RV32IFD-NEXT: mv a6, a1
+; RV32IFD-NEXT: bltz a0, .LBB47_2
; RV32IFD-NEXT: # %bb.1: # %entry
-; RV32IFD-NEXT: slti a0, a2, 0
-; RV32IFD-NEXT: beqz a0, .LBB47_4
-; RV32IFD-NEXT: .LBB47_2:
-; RV32IFD-NEXT: lw a5, 12(sp)
-; RV32IFD-NEXT: j .LBB47_5
-; RV32IFD-NEXT: .LBB47_3:
-; RV32IFD-NEXT: seqz a0, a3
-; RV32IFD-NEXT: bnez a0, .LBB47_2
+; RV32IFD-NEXT: li a6, 1
+; RV32IFD-NEXT: .LBB47_2: # %entry
+; RV32IFD-NEXT: mv a2, a1
+; RV32IFD-NEXT: bltu a1, a3, .LBB47_4
+; RV32IFD-NEXT: # %bb.3: # %entry
+; RV32IFD-NEXT: li a2, 1
; RV32IFD-NEXT: .LBB47_4: # %entry
-; RV32IFD-NEXT: li a5, 0
-; RV32IFD-NEXT: .LBB47_5: # %entry
-; RV32IFD-NEXT: xori a1, a3, 1
-; RV32IFD-NEXT: or a4, a1, a2
-; RV32IFD-NEXT: li a1, 0
-; RV32IFD-NEXT: beqz a4, .LBB47_7
-; RV32IFD-NEXT: # %bb.6: # %entry
-; RV32IFD-NEXT: mv a1, a5
+; RV32IFD-NEXT: lw a5, 12(sp)
+; RV32IFD-NEXT: lw a3, 8(sp)
+; RV32IFD-NEXT: slti a4, a0, 0
+; RV32IFD-NEXT: beqz a0, .LBB47_6
+; RV32IFD-NEXT: # %bb.5: # %entry
+; RV32IFD-NEXT: mv a2, a6
+; RV32IFD-NEXT: mv a6, a4
+; RV32IFD-NEXT: j .LBB47_7
+; RV32IFD-NEXT: .LBB47_6:
+; RV32IFD-NEXT: seqz a6, a1
; RV32IFD-NEXT: .LBB47_7: # %entry
-; RV32IFD-NEXT: bnez a0, .LBB47_9
+; RV32IFD-NEXT: seqz a6, a6
+; RV32IFD-NEXT: addi a6, a6, -1
+; RV32IFD-NEXT: and a3, a6, a3
+; RV32IFD-NEXT: xori a1, a1, 1
+; RV32IFD-NEXT: or a1, a1, a0
+; RV32IFD-NEXT: seqz a1, a1
+; RV32IFD-NEXT: addi a1, a1, -1
+; RV32IFD-NEXT: and a3, a1, a3
+; RV32IFD-NEXT: and a5, a6, a5
+; RV32IFD-NEXT: and a1, a1, a5
+; RV32IFD-NEXT: neg a4, a4
+; RV32IFD-NEXT: and a4, a4, a0
+; RV32IFD-NEXT: mv a0, a3
+; RV32IFD-NEXT: beqz a1, .LBB47_9
; RV32IFD-NEXT: # %bb.8: # %entry
-; RV32IFD-NEXT: li a5, 0
-; RV32IFD-NEXT: li a0, 0
-; RV32IFD-NEXT: bnez a4, .LBB47_10
-; RV32IFD-NEXT: j .LBB47_11
-; RV32IFD-NEXT: .LBB47_9:
-; RV32IFD-NEXT: lw a5, 8(sp)
-; RV32IFD-NEXT: li a0, 0
+; RV32IFD-NEXT: seqz a0, a1
+; RV32IFD-NEXT: addi a0, a0, -1
+; RV32IFD-NEXT: and a0, a0, a3
+; RV32IFD-NEXT: .LBB47_9: # %entry
; RV32IFD-NEXT: beqz a4, .LBB47_11
-; RV32IFD-NEXT: .LBB47_10: # %entry
-; RV32IFD-NEXT: mv a0, a5
-; RV32IFD-NEXT: .LBB47_11: # %entry
-; RV32IFD-NEXT: li a5, 1
-; RV32IFD-NEXT: mv a4, a3
-; RV32IFD-NEXT: bgez a2, .LBB47_17
-; RV32IFD-NEXT: # %bb.12: # %entry
-; RV32IFD-NEXT: bgeu a3, a5, .LBB47_18
+; RV32IFD-NEXT: # %bb.10: # %entry
+; RV32IFD-NEXT: sgtz a5, a4
+; RV32IFD-NEXT: or a2, a2, a4
+; RV32IFD-NEXT: bnez a2, .LBB47_12
+; RV32IFD-NEXT: j .LBB47_13
+; RV32IFD-NEXT: .LBB47_11:
+; RV32IFD-NEXT: snez a5, a2
+; RV32IFD-NEXT: or a2, a2, a4
+; RV32IFD-NEXT: beqz a2, .LBB47_13
+; RV32IFD-NEXT: .LBB47_12: # %entry
+; RV32IFD-NEXT: seqz a0, a5
+; RV32IFD-NEXT: addi a2, a0, -1
+; RV32IFD-NEXT: and a0, a2, a3
+; RV32IFD-NEXT: and a1, a2, a1
; RV32IFD-NEXT: .LBB47_13: # %entry
-; RV32IFD-NEXT: bnez a2, .LBB47_19
-; RV32IFD-NEXT: .LBB47_14: # %entry
-; RV32IFD-NEXT: bgez a2, .LBB47_20
-; RV32IFD-NEXT: .LBB47_15: # %entry
-; RV32IFD-NEXT: beqz a2, .LBB47_21
-; RV32IFD-NEXT: .LBB47_16: # %entry
-; RV32IFD-NEXT: sgtz a4, a2
-; RV32IFD-NEXT: mv a5, a0
-; RV32IFD-NEXT: beqz a4, .LBB47_22
-; RV32IFD-NEXT: j .LBB47_23
-; RV32IFD-NEXT: .LBB47_17: # %entry
-; RV32IFD-NEXT: li a4, 1
-; RV32IFD-NEXT: bltu a3, a5, .LBB47_13
-; RV32IFD-NEXT: .LBB47_18: # %entry
-; RV32IFD-NEXT: li a3, 1
-; RV32IFD-NEXT: beqz a2, .LBB47_14
-; RV32IFD-NEXT: .LBB47_19: # %entry
-; RV32IFD-NEXT: mv a3, a4
-; RV32IFD-NEXT: bltz a2, .LBB47_15
-; RV32IFD-NEXT: .LBB47_20: # %entry
-; RV32IFD-NEXT: li a2, 0
-; RV32IFD-NEXT: bnez a2, .LBB47_16
-; RV32IFD-NEXT: .LBB47_21:
-; RV32IFD-NEXT: snez a4, a3
-; RV32IFD-NEXT: mv a5, a0
-; RV32IFD-NEXT: bnez a4, .LBB47_23
-; RV32IFD-NEXT: .LBB47_22: # %entry
-; RV32IFD-NEXT: li a5, 0
-; RV32IFD-NEXT: .LBB47_23: # %entry
-; RV32IFD-NEXT: mv a6, a0
-; RV32IFD-NEXT: beqz a1, .LBB47_30
-; RV32IFD-NEXT: # %bb.24: # %entry
-; RV32IFD-NEXT: bnez a1, .LBB47_31
-; RV32IFD-NEXT: .LBB47_25: # %entry
-; RV32IFD-NEXT: or a2, a3, a2
-; RV32IFD-NEXT: bnez a2, .LBB47_32
-; RV32IFD-NEXT: .LBB47_26: # %entry
-; RV32IFD-NEXT: mv a3, a1
-; RV32IFD-NEXT: beqz a4, .LBB47_33
-; RV32IFD-NEXT: .LBB47_27: # %entry
-; RV32IFD-NEXT: beqz a2, .LBB47_29
-; RV32IFD-NEXT: .LBB47_28: # %entry
-; RV32IFD-NEXT: mv a1, a3
-; RV32IFD-NEXT: .LBB47_29: # %entry
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
-; RV32IFD-NEXT: .LBB47_30: # %entry
-; RV32IFD-NEXT: li a6, 0
-; RV32IFD-NEXT: beqz a1, .LBB47_25
-; RV32IFD-NEXT: .LBB47_31: # %entry
-; RV32IFD-NEXT: mv a0, a6
-; RV32IFD-NEXT: or a2, a3, a2
-; RV32IFD-NEXT: beqz a2, .LBB47_26
-; RV32IFD-NEXT: .LBB47_32: # %entry
-; RV32IFD-NEXT: mv a0, a5
-; RV32IFD-NEXT: mv a3, a1
-; RV32IFD-NEXT: bnez a4, .LBB47_27
-; RV32IFD-NEXT: .LBB47_33: # %entry
-; RV32IFD-NEXT: li a3, 0
-; RV32IFD-NEXT: bnez a2, .LBB47_28
-; RV32IFD-NEXT: j .LBB47_29
entry:
%conv = fptosi double %x to i128
%spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616)
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti@plt
-; RV32-NEXT: lw a2, 20(sp)
-; RV32-NEXT: lw a7, 8(sp)
-; RV32-NEXT: lw a5, 12(sp)
-; RV32-NEXT: lw a0, 16(sp)
-; RV32-NEXT: lui a4, 524288
-; RV32-NEXT: addi a1, a4, -1
-; RV32-NEXT: mv a3, a7
-; RV32-NEXT: bne a5, a1, .LBB48_17
+; RV32-NEXT: lw a0, 20(sp)
+; RV32-NEXT: lw t0, 8(sp)
+; RV32-NEXT: lw a4, 12(sp)
+; RV32-NEXT: lw a1, 16(sp)
+; RV32-NEXT: lui a3, 524288
+; RV32-NEXT: addi a6, a3, -1
+; RV32-NEXT: mv a2, t0
+; RV32-NEXT: beq a4, a6, .LBB48_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: or a6, a0, a2
-; RV32-NEXT: bnez a6, .LBB48_18
+; RV32-NEXT: sltu a2, a4, a6
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: or a2, a2, t0
; RV32-NEXT: .LBB48_2: # %entry
-; RV32-NEXT: mv a7, a5
-; RV32-NEXT: bgez a2, .LBB48_19
-; RV32-NEXT: .LBB48_3: # %entry
-; RV32-NEXT: bgeu a5, a1, .LBB48_20
+; RV32-NEXT: or a7, a1, a0
+; RV32-NEXT: slti a5, a0, 0
+; RV32-NEXT: bnez a7, .LBB48_16
+; RV32-NEXT: # %bb.3: # %entry
+; RV32-NEXT: mv t0, a4
+; RV32-NEXT: bgez a0, .LBB48_17
; RV32-NEXT: .LBB48_4: # %entry
-; RV32-NEXT: bnez a6, .LBB48_21
+; RV32-NEXT: bgeu a4, a6, .LBB48_18
; RV32-NEXT: .LBB48_5: # %entry
-; RV32-NEXT: li a6, 0
-; RV32-NEXT: bnez a2, .LBB48_22
+; RV32-NEXT: beqz a7, .LBB48_7
; RV32-NEXT: .LBB48_6: # %entry
-; RV32-NEXT: bgez a2, .LBB48_23
+; RV32-NEXT: mv a4, t0
; RV32-NEXT: .LBB48_7: # %entry
-; RV32-NEXT: mv a0, a5
-; RV32-NEXT: bltz a2, .LBB48_24
-; RV32-NEXT: .LBB48_8: # %entry
-; RV32-NEXT: mv a1, a5
-; RV32-NEXT: bltu a4, a5, .LBB48_10
+; RV32-NEXT: srai a6, a0, 31
+; RV32-NEXT: and a1, a6, a1
+; RV32-NEXT: seqz a6, a0
+; RV32-NEXT: neg a5, a5
+; RV32-NEXT: and a5, a5, a0
+; RV32-NEXT: addi a6, a6, -1
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: bgez a5, .LBB48_9
+; RV32-NEXT: # %bb.8: # %entry
+; RV32-NEXT: lui a0, 524288
; RV32-NEXT: .LBB48_9: # %entry
+; RV32-NEXT: and a6, a6, a1
+; RV32-NEXT: mv a1, a4
+; RV32-NEXT: bltu a3, a4, .LBB48_11
+; RV32-NEXT: # %bb.10: # %entry
; RV32-NEXT: lui a1, 524288
-; RV32-NEXT: .LBB48_10: # %entry
-; RV32-NEXT: and a6, a6, a2
+; RV32-NEXT: .LBB48_11: # %entry
+; RV32-NEXT: and a6, a6, a5
; RV32-NEXT: li a7, -1
-; RV32-NEXT: bne a6, a7, .LBB48_25
-; RV32-NEXT: # %bb.11: # %entry
-; RV32-NEXT: mv t0, a3
-; RV32-NEXT: bgeu a4, a5, .LBB48_26
-; RV32-NEXT: .LBB48_12: # %entry
-; RV32-NEXT: mv a0, a3
-; RV32-NEXT: bne a5, a4, .LBB48_27
+; RV32-NEXT: bne a6, a7, .LBB48_19
+; RV32-NEXT: # %bb.12: # %entry
+; RV32-NEXT: mv a0, a2
+; RV32-NEXT: bne a4, a3, .LBB48_20
; RV32-NEXT: .LBB48_13: # %entry
-; RV32-NEXT: bltz a2, .LBB48_28
+; RV32-NEXT: beq a6, a7, .LBB48_15
; RV32-NEXT: .LBB48_14: # %entry
-; RV32-NEXT: beq a6, a7, .LBB48_16
+; RV32-NEXT: slti a0, a5, 0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a0, a0, a2
; RV32-NEXT: .LBB48_15: # %entry
-; RV32-NEXT: mv a0, a3
-; RV32-NEXT: .LBB48_16: # %entry
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
+; RV32-NEXT: .LBB48_16: # %entry
+; RV32-NEXT: addi a2, a5, -1
+; RV32-NEXT: or a2, a2, t0
+; RV32-NEXT: mv t0, a4
+; RV32-NEXT: bltz a0, .LBB48_4
; RV32-NEXT: .LBB48_17: # %entry
-; RV32-NEXT: sltu a3, a5, a1
-; RV32-NEXT: addi a3, a3, -1
-; RV32-NEXT: or a3, a3, a7
-; RV32-NEXT: or a6, a0, a2
-; RV32-NEXT: beqz a6, .LBB48_2
+; RV32-NEXT: mv t0, a6
+; RV32-NEXT: bltu a4, a6, .LBB48_5
; RV32-NEXT: .LBB48_18: # %entry
-; RV32-NEXT: slti a3, a2, 0
-; RV32-NEXT: addi a3, a3, -1
-; RV32-NEXT: or a3, a3, a7
-; RV32-NEXT: mv a7, a5
-; RV32-NEXT: bltz a2, .LBB48_3
+; RV32-NEXT: mv a4, a6
+; RV32-NEXT: bnez a7, .LBB48_6
+; RV32-NEXT: j .LBB48_7
; RV32-NEXT: .LBB48_19: # %entry
-; RV32-NEXT: mv a7, a1
-; RV32-NEXT: bltu a5, a1, .LBB48_4
-; RV32-NEXT: .LBB48_20: # %entry
-; RV32-NEXT: mv a5, a1
-; RV32-NEXT: beqz a6, .LBB48_5
-; RV32-NEXT: .LBB48_21: # %entry
-; RV32-NEXT: mv a5, a7
-; RV32-NEXT: li a6, 0
-; RV32-NEXT: beqz a2, .LBB48_6
-; RV32-NEXT: .LBB48_22: # %entry
-; RV32-NEXT: srai a1, a2, 31
-; RV32-NEXT: and a6, a1, a0
-; RV32-NEXT: bltz a2, .LBB48_7
-; RV32-NEXT: .LBB48_23: # %entry
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: mv a0, a5
-; RV32-NEXT: bgez a2, .LBB48_8
-; RV32-NEXT: .LBB48_24: # %entry
-; RV32-NEXT: lui a0, 524288
-; RV32-NEXT: mv a1, a5
-; RV32-NEXT: bgeu a4, a5, .LBB48_9
-; RV32-NEXT: j .LBB48_10
-; RV32-NEXT: .LBB48_25: # %entry
; RV32-NEXT: mv a1, a0
-; RV32-NEXT: mv t0, a3
-; RV32-NEXT: bltu a4, a5, .LBB48_12
-; RV32-NEXT: .LBB48_26: # %entry
-; RV32-NEXT: li t0, 0
-; RV32-NEXT: mv a0, a3
-; RV32-NEXT: beq a5, a4, .LBB48_13
-; RV32-NEXT: .LBB48_27: # %entry
-; RV32-NEXT: mv a0, t0
-; RV32-NEXT: bgez a2, .LBB48_14
-; RV32-NEXT: .LBB48_28: # %entry
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: bne a6, a7, .LBB48_15
-; RV32-NEXT: j .LBB48_16
+; RV32-NEXT: mv a0, a2
+; RV32-NEXT: beq a4, a3, .LBB48_13
+; RV32-NEXT: .LBB48_20: # %entry
+; RV32-NEXT: sltu a0, a3, a4
+; RV32-NEXT: neg a0, a0
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: bne a6, a7, .LBB48_14
+; RV32-NEXT: j .LBB48_15
;
; RV64-LABEL: stest_f32i64_mm:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: feq.s a0, fa0, fa0
-; RV64-NEXT: beqz a0, .LBB48_2
-; RV64-NEXT: # %bb.1:
; RV64-NEXT: fcvt.l.s a0, fa0, rtz
-; RV64-NEXT: .LBB48_2: # %entry
+; RV64-NEXT: feq.s a1, fa0, fa0
+; RV64-NEXT: seqz a1, a1
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ret
entry:
%conv = fptosi float %x to i128
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixunssfti@plt
-; RV32-NEXT: lw a0, 20(sp)
-; RV32-NEXT: lw a3, 16(sp)
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: beqz a0, .LBB49_3
-; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: mv a2, a1
-; RV32-NEXT: beqz a2, .LBB49_4
-; RV32-NEXT: .LBB49_2:
-; RV32-NEXT: lw a4, 8(sp)
-; RV32-NEXT: j .LBB49_5
-; RV32-NEXT: .LBB49_3:
-; RV32-NEXT: seqz a2, a3
-; RV32-NEXT: bnez a2, .LBB49_2
-; RV32-NEXT: .LBB49_4: # %entry
-; RV32-NEXT: mv a4, a1
-; RV32-NEXT: .LBB49_5: # %entry
-; RV32-NEXT: xori a3, a3, 1
-; RV32-NEXT: or a3, a3, a0
-; RV32-NEXT: mv a0, a1
-; RV32-NEXT: beqz a3, .LBB49_7
-; RV32-NEXT: # %bb.6: # %entry
-; RV32-NEXT: mv a0, a4
-; RV32-NEXT: .LBB49_7: # %entry
-; RV32-NEXT: bnez a2, .LBB49_9
-; RV32-NEXT: # %bb.8: # %entry
-; RV32-NEXT: mv a2, a1
-; RV32-NEXT: bnez a3, .LBB49_10
-; RV32-NEXT: j .LBB49_11
-; RV32-NEXT: .LBB49_9:
+; RV32-NEXT: lw a0, 16(sp)
+; RV32-NEXT: lw a1, 20(sp)
; RV32-NEXT: lw a2, 12(sp)
-; RV32-NEXT: beqz a3, .LBB49_11
-; RV32-NEXT: .LBB49_10: # %entry
-; RV32-NEXT: mv a1, a2
-; RV32-NEXT: .LBB49_11: # %entry
+; RV32-NEXT: lw a3, 8(sp)
+; RV32-NEXT: seqz a4, a0
+; RV32-NEXT: snez a5, a1
+; RV32-NEXT: addi a5, a5, -1
+; RV32-NEXT: and a4, a5, a4
+; RV32-NEXT: seqz a4, a4
+; RV32-NEXT: addi a4, a4, -1
+; RV32-NEXT: and a3, a4, a3
+; RV32-NEXT: xori a0, a0, 1
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: addi a1, a0, -1
+; RV32-NEXT: and a0, a1, a3
+; RV32-NEXT: and a2, a4, a2
+; RV32-NEXT: and a1, a1, a2
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: call __fixunssfti@plt
-; RV64-NEXT: mv a2, a0
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: beqz a1, .LBB49_2
-; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: mv a2, a0
-; RV64-NEXT: .LBB49_2: # %entry
-; RV64-NEXT: li a3, 1
-; RV64-NEXT: beq a1, a3, .LBB49_4
-; RV64-NEXT: # %bb.3: # %entry
-; RV64-NEXT: mv a0, a2
-; RV64-NEXT: .LBB49_4: # %entry
+; RV64-NEXT: snez a2, a1
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: seqz a1, a1
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti@plt
-; RV32-NEXT: lw a2, 20(sp)
-; RV32-NEXT: lw a3, 16(sp)
-; RV32-NEXT: beqz a2, .LBB50_3
+; RV32-NEXT: lw a1, 16(sp)
+; RV32-NEXT: lw a0, 20(sp)
+; RV32-NEXT: li a3, 1
+; RV32-NEXT: mv a6, a1
+; RV32-NEXT: bltz a0, .LBB50_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: slti a0, a2, 0
-; RV32-NEXT: beqz a0, .LBB50_4
-; RV32-NEXT: .LBB50_2:
-; RV32-NEXT: lw a5, 12(sp)
-; RV32-NEXT: j .LBB50_5
-; RV32-NEXT: .LBB50_3:
-; RV32-NEXT: seqz a0, a3
-; RV32-NEXT: bnez a0, .LBB50_2
+; RV32-NEXT: li a6, 1
+; RV32-NEXT: .LBB50_2: # %entry
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: bltu a1, a3, .LBB50_4
+; RV32-NEXT: # %bb.3: # %entry
+; RV32-NEXT: li a2, 1
; RV32-NEXT: .LBB50_4: # %entry
-; RV32-NEXT: li a5, 0
-; RV32-NEXT: .LBB50_5: # %entry
-; RV32-NEXT: xori a1, a3, 1
-; RV32-NEXT: or a4, a1, a2
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: beqz a4, .LBB50_7
-; RV32-NEXT: # %bb.6: # %entry
-; RV32-NEXT: mv a1, a5
+; RV32-NEXT: lw a5, 12(sp)
+; RV32-NEXT: lw a3, 8(sp)
+; RV32-NEXT: slti a4, a0, 0
+; RV32-NEXT: beqz a0, .LBB50_6
+; RV32-NEXT: # %bb.5: # %entry
+; RV32-NEXT: mv a2, a6
+; RV32-NEXT: mv a6, a4
+; RV32-NEXT: j .LBB50_7
+; RV32-NEXT: .LBB50_6:
+; RV32-NEXT: seqz a6, a1
; RV32-NEXT: .LBB50_7: # %entry
-; RV32-NEXT: bnez a0, .LBB50_9
+; RV32-NEXT: seqz a6, a6
+; RV32-NEXT: addi a6, a6, -1
+; RV32-NEXT: and a3, a6, a3
+; RV32-NEXT: xori a1, a1, 1
+; RV32-NEXT: or a1, a1, a0
+; RV32-NEXT: seqz a1, a1
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a3, a1, a3
+; RV32-NEXT: and a5, a6, a5
+; RV32-NEXT: and a1, a1, a5
+; RV32-NEXT: neg a4, a4
+; RV32-NEXT: and a4, a4, a0
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: beqz a1, .LBB50_9
; RV32-NEXT: # %bb.8: # %entry
-; RV32-NEXT: li a5, 0
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: bnez a4, .LBB50_10
-; RV32-NEXT: j .LBB50_11
-; RV32-NEXT: .LBB50_9:
-; RV32-NEXT: lw a5, 8(sp)
-; RV32-NEXT: li a0, 0
+; RV32-NEXT: seqz a0, a1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a0, a0, a3
+; RV32-NEXT: .LBB50_9: # %entry
; RV32-NEXT: beqz a4, .LBB50_11
-; RV32-NEXT: .LBB50_10: # %entry
-; RV32-NEXT: mv a0, a5
-; RV32-NEXT: .LBB50_11: # %entry
-; RV32-NEXT: li a5, 1
-; RV32-NEXT: mv a4, a3
-; RV32-NEXT: bgez a2, .LBB50_17
-; RV32-NEXT: # %bb.12: # %entry
-; RV32-NEXT: bgeu a3, a5, .LBB50_18
+; RV32-NEXT: # %bb.10: # %entry
+; RV32-NEXT: sgtz a5, a4
+; RV32-NEXT: or a2, a2, a4
+; RV32-NEXT: bnez a2, .LBB50_12
+; RV32-NEXT: j .LBB50_13
+; RV32-NEXT: .LBB50_11:
+; RV32-NEXT: snez a5, a2
+; RV32-NEXT: or a2, a2, a4
+; RV32-NEXT: beqz a2, .LBB50_13
+; RV32-NEXT: .LBB50_12: # %entry
+; RV32-NEXT: seqz a0, a5
+; RV32-NEXT: addi a2, a0, -1
+; RV32-NEXT: and a0, a2, a3
+; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: .LBB50_13: # %entry
-; RV32-NEXT: bnez a2, .LBB50_19
-; RV32-NEXT: .LBB50_14: # %entry
-; RV32-NEXT: bgez a2, .LBB50_20
-; RV32-NEXT: .LBB50_15: # %entry
-; RV32-NEXT: beqz a2, .LBB50_21
-; RV32-NEXT: .LBB50_16: # %entry
-; RV32-NEXT: sgtz a4, a2
-; RV32-NEXT: mv a5, a0
-; RV32-NEXT: beqz a4, .LBB50_22
-; RV32-NEXT: j .LBB50_23
-; RV32-NEXT: .LBB50_17: # %entry
-; RV32-NEXT: li a4, 1
-; RV32-NEXT: bltu a3, a5, .LBB50_13
-; RV32-NEXT: .LBB50_18: # %entry
-; RV32-NEXT: li a3, 1
-; RV32-NEXT: beqz a2, .LBB50_14
-; RV32-NEXT: .LBB50_19: # %entry
-; RV32-NEXT: mv a3, a4
-; RV32-NEXT: bltz a2, .LBB50_15
-; RV32-NEXT: .LBB50_20: # %entry
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: bnez a2, .LBB50_16
-; RV32-NEXT: .LBB50_21:
-; RV32-NEXT: snez a4, a3
-; RV32-NEXT: mv a5, a0
-; RV32-NEXT: bnez a4, .LBB50_23
-; RV32-NEXT: .LBB50_22: # %entry
-; RV32-NEXT: li a5, 0
-; RV32-NEXT: .LBB50_23: # %entry
-; RV32-NEXT: mv a6, a0
-; RV32-NEXT: beqz a1, .LBB50_30
-; RV32-NEXT: # %bb.24: # %entry
-; RV32-NEXT: bnez a1, .LBB50_31
-; RV32-NEXT: .LBB50_25: # %entry
-; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: bnez a2, .LBB50_32
-; RV32-NEXT: .LBB50_26: # %entry
-; RV32-NEXT: mv a3, a1
-; RV32-NEXT: beqz a4, .LBB50_33
-; RV32-NEXT: .LBB50_27: # %entry
-; RV32-NEXT: beqz a2, .LBB50_29
-; RV32-NEXT: .LBB50_28: # %entry
-; RV32-NEXT: mv a1, a3
-; RV32-NEXT: .LBB50_29: # %entry
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
-; RV32-NEXT: .LBB50_30: # %entry
-; RV32-NEXT: li a6, 0
-; RV32-NEXT: beqz a1, .LBB50_25
-; RV32-NEXT: .LBB50_31: # %entry
-; RV32-NEXT: mv a0, a6
-; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: beqz a2, .LBB50_26
-; RV32-NEXT: .LBB50_32: # %entry
-; RV32-NEXT: mv a0, a5
-; RV32-NEXT: mv a3, a1
-; RV32-NEXT: bnez a4, .LBB50_27
-; RV32-NEXT: .LBB50_33: # %entry
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: bnez a2, .LBB50_28
-; RV32-NEXT: j .LBB50_29
;
; RV64-LABEL: ustest_f32i64_mm:
; RV64: # %bb.0: # %entry
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: call __fixsfti@plt
-; RV64-NEXT: mv a2, a0
-; RV64-NEXT: li a4, 1
-; RV64-NEXT: mv a3, a1
-; RV64-NEXT: bgtz a1, .LBB50_6
+; RV64-NEXT: mv a2, a1
+; RV64-NEXT: blez a1, .LBB50_2
; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: bne a1, a4, .LBB50_7
+; RV64-NEXT: li a2, 1
; RV64-NEXT: .LBB50_2: # %entry
-; RV64-NEXT: mv a1, a0
-; RV64-NEXT: blez a3, .LBB50_8
-; RV64-NEXT: .LBB50_3: # %entry
-; RV64-NEXT: beqz a3, .LBB50_5
+; RV64-NEXT: sgtz a3, a1
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: and a0, a3, a0
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: seqz a1, a1
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: beqz a2, .LBB50_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: sgtz a1, a2
+; RV64-NEXT: neg a1, a1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: .LBB50_4: # %entry
-; RV64-NEXT: mv a0, a1
-; RV64-NEXT: .LBB50_5: # %entry
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
-; RV64-NEXT: .LBB50_6: # %entry
-; RV64-NEXT: li a2, 0
-; RV64-NEXT: li a3, 1
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: beq a1, a4, .LBB50_2
-; RV64-NEXT: .LBB50_7: # %entry
-; RV64-NEXT: mv a0, a2
-; RV64-NEXT: mv a1, a0
-; RV64-NEXT: bgtz a3, .LBB50_3
-; RV64-NEXT: .LBB50_8: # %entry
-; RV64-NEXT: li a1, 0
-; RV64-NEXT: bnez a3, .LBB50_4
-; RV64-NEXT: j .LBB50_5
entry:
%conv = fptosi float %x to i128
%spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616)
; RV32-NEXT: call __extendhfsf2@plt
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti@plt
-; RV32-NEXT: lw a2, 20(sp)
-; RV32-NEXT: lw a7, 8(sp)
-; RV32-NEXT: lw a5, 12(sp)
-; RV32-NEXT: lw a0, 16(sp)
-; RV32-NEXT: lui a4, 524288
-; RV32-NEXT: addi a1, a4, -1
-; RV32-NEXT: mv a3, a7
-; RV32-NEXT: bne a5, a1, .LBB51_17
+; RV32-NEXT: lw a0, 20(sp)
+; RV32-NEXT: lw t0, 8(sp)
+; RV32-NEXT: lw a4, 12(sp)
+; RV32-NEXT: lw a1, 16(sp)
+; RV32-NEXT: lui a3, 524288
+; RV32-NEXT: addi a6, a3, -1
+; RV32-NEXT: mv a2, t0
+; RV32-NEXT: beq a4, a6, .LBB51_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: or a6, a0, a2
-; RV32-NEXT: bnez a6, .LBB51_18
+; RV32-NEXT: sltu a2, a4, a6
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: or a2, a2, t0
; RV32-NEXT: .LBB51_2: # %entry
-; RV32-NEXT: mv a7, a5
-; RV32-NEXT: bgez a2, .LBB51_19
-; RV32-NEXT: .LBB51_3: # %entry
-; RV32-NEXT: bgeu a5, a1, .LBB51_20
+; RV32-NEXT: or a7, a1, a0
+; RV32-NEXT: slti a5, a0, 0
+; RV32-NEXT: bnez a7, .LBB51_16
+; RV32-NEXT: # %bb.3: # %entry
+; RV32-NEXT: mv t0, a4
+; RV32-NEXT: bgez a0, .LBB51_17
; RV32-NEXT: .LBB51_4: # %entry
-; RV32-NEXT: bnez a6, .LBB51_21
+; RV32-NEXT: bgeu a4, a6, .LBB51_18
; RV32-NEXT: .LBB51_5: # %entry
-; RV32-NEXT: li a6, 0
-; RV32-NEXT: bnez a2, .LBB51_22
+; RV32-NEXT: beqz a7, .LBB51_7
; RV32-NEXT: .LBB51_6: # %entry
-; RV32-NEXT: bgez a2, .LBB51_23
+; RV32-NEXT: mv a4, t0
; RV32-NEXT: .LBB51_7: # %entry
-; RV32-NEXT: mv a0, a5
-; RV32-NEXT: bltz a2, .LBB51_24
-; RV32-NEXT: .LBB51_8: # %entry
-; RV32-NEXT: mv a1, a5
-; RV32-NEXT: bltu a4, a5, .LBB51_10
+; RV32-NEXT: srai a6, a0, 31
+; RV32-NEXT: and a1, a6, a1
+; RV32-NEXT: seqz a6, a0
+; RV32-NEXT: neg a5, a5
+; RV32-NEXT: and a5, a5, a0
+; RV32-NEXT: addi a6, a6, -1
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: bgez a5, .LBB51_9
+; RV32-NEXT: # %bb.8: # %entry
+; RV32-NEXT: lui a0, 524288
; RV32-NEXT: .LBB51_9: # %entry
+; RV32-NEXT: and a6, a6, a1
+; RV32-NEXT: mv a1, a4
+; RV32-NEXT: bltu a3, a4, .LBB51_11
+; RV32-NEXT: # %bb.10: # %entry
; RV32-NEXT: lui a1, 524288
-; RV32-NEXT: .LBB51_10: # %entry
-; RV32-NEXT: and a6, a6, a2
+; RV32-NEXT: .LBB51_11: # %entry
+; RV32-NEXT: and a6, a6, a5
; RV32-NEXT: li a7, -1
-; RV32-NEXT: bne a6, a7, .LBB51_25
-; RV32-NEXT: # %bb.11: # %entry
-; RV32-NEXT: mv t0, a3
-; RV32-NEXT: bgeu a4, a5, .LBB51_26
-; RV32-NEXT: .LBB51_12: # %entry
-; RV32-NEXT: mv a0, a3
-; RV32-NEXT: bne a5, a4, .LBB51_27
+; RV32-NEXT: bne a6, a7, .LBB51_19
+; RV32-NEXT: # %bb.12: # %entry
+; RV32-NEXT: mv a0, a2
+; RV32-NEXT: bne a4, a3, .LBB51_20
; RV32-NEXT: .LBB51_13: # %entry
-; RV32-NEXT: bltz a2, .LBB51_28
+; RV32-NEXT: beq a6, a7, .LBB51_15
; RV32-NEXT: .LBB51_14: # %entry
-; RV32-NEXT: beq a6, a7, .LBB51_16
+; RV32-NEXT: slti a0, a5, 0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a0, a0, a2
; RV32-NEXT: .LBB51_15: # %entry
-; RV32-NEXT: mv a0, a3
-; RV32-NEXT: .LBB51_16: # %entry
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
+; RV32-NEXT: .LBB51_16: # %entry
+; RV32-NEXT: addi a2, a5, -1
+; RV32-NEXT: or a2, a2, t0
+; RV32-NEXT: mv t0, a4
+; RV32-NEXT: bltz a0, .LBB51_4
; RV32-NEXT: .LBB51_17: # %entry
-; RV32-NEXT: sltu a3, a5, a1
-; RV32-NEXT: addi a3, a3, -1
-; RV32-NEXT: or a3, a3, a7
-; RV32-NEXT: or a6, a0, a2
-; RV32-NEXT: beqz a6, .LBB51_2
+; RV32-NEXT: mv t0, a6
+; RV32-NEXT: bltu a4, a6, .LBB51_5
; RV32-NEXT: .LBB51_18: # %entry
-; RV32-NEXT: slti a3, a2, 0
-; RV32-NEXT: addi a3, a3, -1
-; RV32-NEXT: or a3, a3, a7
-; RV32-NEXT: mv a7, a5
-; RV32-NEXT: bltz a2, .LBB51_3
+; RV32-NEXT: mv a4, a6
+; RV32-NEXT: bnez a7, .LBB51_6
+; RV32-NEXT: j .LBB51_7
; RV32-NEXT: .LBB51_19: # %entry
-; RV32-NEXT: mv a7, a1
-; RV32-NEXT: bltu a5, a1, .LBB51_4
-; RV32-NEXT: .LBB51_20: # %entry
-; RV32-NEXT: mv a5, a1
-; RV32-NEXT: beqz a6, .LBB51_5
-; RV32-NEXT: .LBB51_21: # %entry
-; RV32-NEXT: mv a5, a7
-; RV32-NEXT: li a6, 0
-; RV32-NEXT: beqz a2, .LBB51_6
-; RV32-NEXT: .LBB51_22: # %entry
-; RV32-NEXT: srai a1, a2, 31
-; RV32-NEXT: and a6, a1, a0
-; RV32-NEXT: bltz a2, .LBB51_7
-; RV32-NEXT: .LBB51_23: # %entry
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: mv a0, a5
-; RV32-NEXT: bgez a2, .LBB51_8
-; RV32-NEXT: .LBB51_24: # %entry
-; RV32-NEXT: lui a0, 524288
-; RV32-NEXT: mv a1, a5
-; RV32-NEXT: bgeu a4, a5, .LBB51_9
-; RV32-NEXT: j .LBB51_10
-; RV32-NEXT: .LBB51_25: # %entry
; RV32-NEXT: mv a1, a0
-; RV32-NEXT: mv t0, a3
-; RV32-NEXT: bltu a4, a5, .LBB51_12
-; RV32-NEXT: .LBB51_26: # %entry
-; RV32-NEXT: li t0, 0
-; RV32-NEXT: mv a0, a3
-; RV32-NEXT: beq a5, a4, .LBB51_13
-; RV32-NEXT: .LBB51_27: # %entry
-; RV32-NEXT: mv a0, t0
-; RV32-NEXT: bgez a2, .LBB51_14
-; RV32-NEXT: .LBB51_28: # %entry
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: bne a6, a7, .LBB51_15
-; RV32-NEXT: j .LBB51_16
+; RV32-NEXT: mv a0, a2
+; RV32-NEXT: beq a4, a3, .LBB51_13
+; RV32-NEXT: .LBB51_20: # %entry
+; RV32-NEXT: sltu a0, a3, a4
+; RV32-NEXT: neg a0, a0
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: bne a6, a7, .LBB51_14
+; RV32-NEXT: j .LBB51_15
;
; RV64-LABEL: stest_f16i64_mm:
; RV64: # %bb.0: # %entry
; RV64-NEXT: li a2, -1
; RV64-NEXT: srli a4, a2, 1
; RV64-NEXT: mv a3, a0
-; RV64-NEXT: bgez a1, .LBB51_10
+; RV64-NEXT: bgez a1, .LBB51_9
; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: bgeu a0, a4, .LBB51_11
+; RV64-NEXT: bgeu a0, a4, .LBB51_10
; RV64-NEXT: .LBB51_2: # %entry
-; RV64-NEXT: bnez a1, .LBB51_12
+; RV64-NEXT: beqz a1, .LBB51_4
; RV64-NEXT: .LBB51_3: # %entry
-; RV64-NEXT: bltz a1, .LBB51_5
+; RV64-NEXT: mv a0, a3
; RV64-NEXT: .LBB51_4: # %entry
-; RV64-NEXT: li a1, 0
-; RV64-NEXT: .LBB51_5: # %entry
+; RV64-NEXT: slti a3, a1, 0
+; RV64-NEXT: neg a3, a3
+; RV64-NEXT: and a1, a3, a1
; RV64-NEXT: slli a4, a2, 63
; RV64-NEXT: mv a3, a0
-; RV64-NEXT: bltz a1, .LBB51_13
-; RV64-NEXT: # %bb.6: # %entry
-; RV64-NEXT: bgeu a4, a0, .LBB51_14
+; RV64-NEXT: bltz a1, .LBB51_11
+; RV64-NEXT: # %bb.5: # %entry
+; RV64-NEXT: bgeu a4, a0, .LBB51_12
+; RV64-NEXT: .LBB51_6: # %entry
+; RV64-NEXT: beq a1, a2, .LBB51_8
; RV64-NEXT: .LBB51_7: # %entry
-; RV64-NEXT: beq a1, a2, .LBB51_9
-; RV64-NEXT: .LBB51_8: # %entry
; RV64-NEXT: mv a0, a3
-; RV64-NEXT: .LBB51_9: # %entry
+; RV64-NEXT: .LBB51_8: # %entry
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
-; RV64-NEXT: .LBB51_10: # %entry
+; RV64-NEXT: .LBB51_9: # %entry
; RV64-NEXT: mv a3, a4
; RV64-NEXT: bltu a0, a4, .LBB51_2
-; RV64-NEXT: .LBB51_11: # %entry
+; RV64-NEXT: .LBB51_10: # %entry
; RV64-NEXT: mv a0, a4
-; RV64-NEXT: beqz a1, .LBB51_3
-; RV64-NEXT: .LBB51_12: # %entry
-; RV64-NEXT: mv a0, a3
-; RV64-NEXT: bgez a1, .LBB51_4
-; RV64-NEXT: j .LBB51_5
-; RV64-NEXT: .LBB51_13: # %entry
+; RV64-NEXT: bnez a1, .LBB51_3
+; RV64-NEXT: j .LBB51_4
+; RV64-NEXT: .LBB51_11: # %entry
; RV64-NEXT: mv a3, a4
-; RV64-NEXT: bltu a4, a0, .LBB51_7
-; RV64-NEXT: .LBB51_14: # %entry
+; RV64-NEXT: bltu a4, a0, .LBB51_6
+; RV64-NEXT: .LBB51_12: # %entry
; RV64-NEXT: mv a0, a4
-; RV64-NEXT: bne a1, a2, .LBB51_8
-; RV64-NEXT: j .LBB51_9
+; RV64-NEXT: bne a1, a2, .LBB51_7
+; RV64-NEXT: j .LBB51_8
entry:
%conv = fptosi half %x to i128
%spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
; RV32-NEXT: call __extendhfsf2@plt
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixunssfti@plt
-; RV32-NEXT: lw a0, 20(sp)
-; RV32-NEXT: lw a3, 16(sp)
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: beqz a0, .LBB52_3
-; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: mv a2, a1
-; RV32-NEXT: beqz a2, .LBB52_4
-; RV32-NEXT: .LBB52_2:
-; RV32-NEXT: lw a4, 8(sp)
-; RV32-NEXT: j .LBB52_5
-; RV32-NEXT: .LBB52_3:
-; RV32-NEXT: seqz a2, a3
-; RV32-NEXT: bnez a2, .LBB52_2
-; RV32-NEXT: .LBB52_4: # %entry
-; RV32-NEXT: mv a4, a1
-; RV32-NEXT: .LBB52_5: # %entry
-; RV32-NEXT: xori a3, a3, 1
-; RV32-NEXT: or a3, a3, a0
-; RV32-NEXT: mv a0, a1
-; RV32-NEXT: beqz a3, .LBB52_7
-; RV32-NEXT: # %bb.6: # %entry
-; RV32-NEXT: mv a0, a4
-; RV32-NEXT: .LBB52_7: # %entry
-; RV32-NEXT: bnez a2, .LBB52_9
-; RV32-NEXT: # %bb.8: # %entry
-; RV32-NEXT: mv a2, a1
-; RV32-NEXT: bnez a3, .LBB52_10
-; RV32-NEXT: j .LBB52_11
-; RV32-NEXT: .LBB52_9:
+; RV32-NEXT: lw a0, 16(sp)
+; RV32-NEXT: lw a1, 20(sp)
; RV32-NEXT: lw a2, 12(sp)
-; RV32-NEXT: beqz a3, .LBB52_11
-; RV32-NEXT: .LBB52_10: # %entry
-; RV32-NEXT: mv a1, a2
-; RV32-NEXT: .LBB52_11: # %entry
+; RV32-NEXT: lw a3, 8(sp)
+; RV32-NEXT: seqz a4, a0
+; RV32-NEXT: snez a5, a1
+; RV32-NEXT: addi a5, a5, -1
+; RV32-NEXT: and a4, a5, a4
+; RV32-NEXT: seqz a4, a4
+; RV32-NEXT: addi a4, a4, -1
+; RV32-NEXT: and a3, a4, a3
+; RV32-NEXT: xori a0, a0, 1
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: addi a1, a0, -1
+; RV32-NEXT: and a0, a1, a3
+; RV32-NEXT: and a2, a4, a2
+; RV32-NEXT: and a1, a1, a2
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
; RV64-NEXT: fmv.x.w a0, fa0
; RV64-NEXT: call __extendhfsf2@plt
; RV64-NEXT: call __fixunssfti@plt
-; RV64-NEXT: mv a2, a0
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: beqz a1, .LBB52_2
-; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: mv a2, a0
-; RV64-NEXT: .LBB52_2: # %entry
-; RV64-NEXT: li a3, 1
-; RV64-NEXT: beq a1, a3, .LBB52_4
-; RV64-NEXT: # %bb.3: # %entry
-; RV64-NEXT: mv a0, a2
-; RV64-NEXT: .LBB52_4: # %entry
+; RV64-NEXT: snez a2, a1
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: seqz a1, a1
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; RV32-NEXT: call __extendhfsf2@plt
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti@plt
-; RV32-NEXT: lw a2, 20(sp)
-; RV32-NEXT: lw a3, 16(sp)
-; RV32-NEXT: beqz a2, .LBB53_3
+; RV32-NEXT: lw a1, 16(sp)
+; RV32-NEXT: lw a0, 20(sp)
+; RV32-NEXT: li a3, 1
+; RV32-NEXT: mv a6, a1
+; RV32-NEXT: bltz a0, .LBB53_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: slti a0, a2, 0
-; RV32-NEXT: beqz a0, .LBB53_4
-; RV32-NEXT: .LBB53_2:
-; RV32-NEXT: lw a5, 12(sp)
-; RV32-NEXT: j .LBB53_5
-; RV32-NEXT: .LBB53_3:
-; RV32-NEXT: seqz a0, a3
-; RV32-NEXT: bnez a0, .LBB53_2
+; RV32-NEXT: li a6, 1
+; RV32-NEXT: .LBB53_2: # %entry
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: bltu a1, a3, .LBB53_4
+; RV32-NEXT: # %bb.3: # %entry
+; RV32-NEXT: li a2, 1
; RV32-NEXT: .LBB53_4: # %entry
-; RV32-NEXT: li a5, 0
-; RV32-NEXT: .LBB53_5: # %entry
-; RV32-NEXT: xori a1, a3, 1
-; RV32-NEXT: or a4, a1, a2
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: beqz a4, .LBB53_7
-; RV32-NEXT: # %bb.6: # %entry
-; RV32-NEXT: mv a1, a5
+; RV32-NEXT: lw a5, 12(sp)
+; RV32-NEXT: lw a3, 8(sp)
+; RV32-NEXT: slti a4, a0, 0
+; RV32-NEXT: beqz a0, .LBB53_6
+; RV32-NEXT: # %bb.5: # %entry
+; RV32-NEXT: mv a2, a6
+; RV32-NEXT: mv a6, a4
+; RV32-NEXT: j .LBB53_7
+; RV32-NEXT: .LBB53_6:
+; RV32-NEXT: seqz a6, a1
; RV32-NEXT: .LBB53_7: # %entry
-; RV32-NEXT: bnez a0, .LBB53_9
+; RV32-NEXT: seqz a6, a6
+; RV32-NEXT: addi a6, a6, -1
+; RV32-NEXT: and a3, a6, a3
+; RV32-NEXT: xori a1, a1, 1
+; RV32-NEXT: or a1, a1, a0
+; RV32-NEXT: seqz a1, a1
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a3, a1, a3
+; RV32-NEXT: and a5, a6, a5
+; RV32-NEXT: and a1, a1, a5
+; RV32-NEXT: neg a4, a4
+; RV32-NEXT: and a4, a4, a0
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: beqz a1, .LBB53_9
; RV32-NEXT: # %bb.8: # %entry
-; RV32-NEXT: li a5, 0
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: bnez a4, .LBB53_10
-; RV32-NEXT: j .LBB53_11
-; RV32-NEXT: .LBB53_9:
-; RV32-NEXT: lw a5, 8(sp)
-; RV32-NEXT: li a0, 0
+; RV32-NEXT: seqz a0, a1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a0, a0, a3
+; RV32-NEXT: .LBB53_9: # %entry
; RV32-NEXT: beqz a4, .LBB53_11
-; RV32-NEXT: .LBB53_10: # %entry
-; RV32-NEXT: mv a0, a5
-; RV32-NEXT: .LBB53_11: # %entry
-; RV32-NEXT: li a5, 1
-; RV32-NEXT: mv a4, a3
-; RV32-NEXT: bgez a2, .LBB53_17
-; RV32-NEXT: # %bb.12: # %entry
-; RV32-NEXT: bgeu a3, a5, .LBB53_18
+; RV32-NEXT: # %bb.10: # %entry
+; RV32-NEXT: sgtz a5, a4
+; RV32-NEXT: or a2, a2, a4
+; RV32-NEXT: bnez a2, .LBB53_12
+; RV32-NEXT: j .LBB53_13
+; RV32-NEXT: .LBB53_11:
+; RV32-NEXT: snez a5, a2
+; RV32-NEXT: or a2, a2, a4
+; RV32-NEXT: beqz a2, .LBB53_13
+; RV32-NEXT: .LBB53_12: # %entry
+; RV32-NEXT: seqz a0, a5
+; RV32-NEXT: addi a2, a0, -1
+; RV32-NEXT: and a0, a2, a3
+; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: .LBB53_13: # %entry
-; RV32-NEXT: bnez a2, .LBB53_19
-; RV32-NEXT: .LBB53_14: # %entry
-; RV32-NEXT: bgez a2, .LBB53_20
-; RV32-NEXT: .LBB53_15: # %entry
-; RV32-NEXT: beqz a2, .LBB53_21
-; RV32-NEXT: .LBB53_16: # %entry
-; RV32-NEXT: sgtz a4, a2
-; RV32-NEXT: mv a5, a0
-; RV32-NEXT: beqz a4, .LBB53_22
-; RV32-NEXT: j .LBB53_23
-; RV32-NEXT: .LBB53_17: # %entry
-; RV32-NEXT: li a4, 1
-; RV32-NEXT: bltu a3, a5, .LBB53_13
-; RV32-NEXT: .LBB53_18: # %entry
-; RV32-NEXT: li a3, 1
-; RV32-NEXT: beqz a2, .LBB53_14
-; RV32-NEXT: .LBB53_19: # %entry
-; RV32-NEXT: mv a3, a4
-; RV32-NEXT: bltz a2, .LBB53_15
-; RV32-NEXT: .LBB53_20: # %entry
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: bnez a2, .LBB53_16
-; RV32-NEXT: .LBB53_21:
-; RV32-NEXT: snez a4, a3
-; RV32-NEXT: mv a5, a0
-; RV32-NEXT: bnez a4, .LBB53_23
-; RV32-NEXT: .LBB53_22: # %entry
-; RV32-NEXT: li a5, 0
-; RV32-NEXT: .LBB53_23: # %entry
-; RV32-NEXT: mv a6, a0
-; RV32-NEXT: beqz a1, .LBB53_30
-; RV32-NEXT: # %bb.24: # %entry
-; RV32-NEXT: bnez a1, .LBB53_31
-; RV32-NEXT: .LBB53_25: # %entry
-; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: bnez a2, .LBB53_32
-; RV32-NEXT: .LBB53_26: # %entry
-; RV32-NEXT: mv a3, a1
-; RV32-NEXT: beqz a4, .LBB53_33
-; RV32-NEXT: .LBB53_27: # %entry
-; RV32-NEXT: beqz a2, .LBB53_29
-; RV32-NEXT: .LBB53_28: # %entry
-; RV32-NEXT: mv a1, a3
-; RV32-NEXT: .LBB53_29: # %entry
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
-; RV32-NEXT: .LBB53_30: # %entry
-; RV32-NEXT: li a6, 0
-; RV32-NEXT: beqz a1, .LBB53_25
-; RV32-NEXT: .LBB53_31: # %entry
-; RV32-NEXT: mv a0, a6
-; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: beqz a2, .LBB53_26
-; RV32-NEXT: .LBB53_32: # %entry
-; RV32-NEXT: mv a0, a5
-; RV32-NEXT: mv a3, a1
-; RV32-NEXT: bnez a4, .LBB53_27
-; RV32-NEXT: .LBB53_33: # %entry
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: bnez a2, .LBB53_28
-; RV32-NEXT: j .LBB53_29
;
; RV64-LABEL: ustest_f16i64_mm:
; RV64: # %bb.0: # %entry
; RV64-NEXT: fmv.x.w a0, fa0
; RV64-NEXT: call __extendhfsf2@plt
; RV64-NEXT: call __fixsfti@plt
-; RV64-NEXT: mv a2, a0
-; RV64-NEXT: li a4, 1
-; RV64-NEXT: mv a3, a1
-; RV64-NEXT: bgtz a1, .LBB53_6
+; RV64-NEXT: mv a2, a1
+; RV64-NEXT: blez a1, .LBB53_2
; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: bne a1, a4, .LBB53_7
+; RV64-NEXT: li a2, 1
; RV64-NEXT: .LBB53_2: # %entry
-; RV64-NEXT: mv a1, a0
-; RV64-NEXT: blez a3, .LBB53_8
-; RV64-NEXT: .LBB53_3: # %entry
-; RV64-NEXT: beqz a3, .LBB53_5
+; RV64-NEXT: sgtz a3, a1
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: and a0, a3, a0
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: seqz a1, a1
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: beqz a2, .LBB53_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: sgtz a1, a2
+; RV64-NEXT: neg a1, a1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: .LBB53_4: # %entry
-; RV64-NEXT: mv a0, a1
-; RV64-NEXT: .LBB53_5: # %entry
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
-; RV64-NEXT: .LBB53_6: # %entry
-; RV64-NEXT: li a2, 0
-; RV64-NEXT: li a3, 1
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: beq a1, a4, .LBB53_2
-; RV64-NEXT: .LBB53_7: # %entry
-; RV64-NEXT: mv a0, a2
-; RV64-NEXT: mv a1, a0
-; RV64-NEXT: bgtz a3, .LBB53_3
-; RV64-NEXT: .LBB53_8: # %entry
-; RV64-NEXT: li a1, 0
-; RV64-NEXT: bnez a3, .LBB53_4
-; RV64-NEXT: j .LBB53_5
entry:
%conv = fptosi half %x to i128
%spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616)
; CHECK-NOV-NEXT: li a0, -1
; CHECK-NOV-NEXT: srli a2, a0, 32
; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz
-; CHECK-NOV-NEXT: bge a1, a2, .LBB2_5
+; CHECK-NOV-NEXT: blt a1, a2, .LBB2_2
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: bge a0, a2, .LBB2_6
-; CHECK-NOV-NEXT: .LBB2_2: # %entry
-; CHECK-NOV-NEXT: blez a0, .LBB2_7
-; CHECK-NOV-NEXT: .LBB2_3: # %entry
-; CHECK-NOV-NEXT: blez a1, .LBB2_8
-; CHECK-NOV-NEXT: .LBB2_4: # %entry
-; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB2_5: # %entry
; CHECK-NOV-NEXT: mv a1, a2
-; CHECK-NOV-NEXT: blt a0, a2, .LBB2_2
-; CHECK-NOV-NEXT: .LBB2_6: # %entry
+; CHECK-NOV-NEXT: .LBB2_2: # %entry
+; CHECK-NOV-NEXT: blt a0, a2, .LBB2_4
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: mv a0, a2
-; CHECK-NOV-NEXT: bgtz a0, .LBB2_3
-; CHECK-NOV-NEXT: .LBB2_7: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: bgtz a1, .LBB2_4
-; CHECK-NOV-NEXT: .LBB2_8: # %entry
-; CHECK-NOV-NEXT: li a1, 0
+; CHECK-NOV-NEXT: .LBB2_4: # %entry
+; CHECK-NOV-NEXT: sgtz a2, a0
+; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: and a0, a2, a0
+; CHECK-NOV-NEXT: sgtz a2, a1
+; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: and a1, a2, a1
; CHECK-NOV-NEXT: ret
;
; CHECK-V-LABEL: ustest_f64i32:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fa3, rtz
; CHECK-NOV-NEXT: li a2, -1
-; CHECK-NOV-NEXT: srli a5, a2, 32
+; CHECK-NOV-NEXT: srli a4, a2, 32
; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz
-; CHECK-NOV-NEXT: bge a1, a5, .LBB5_10
+; CHECK-NOV-NEXT: bge a1, a4, .LBB5_6
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz
-; CHECK-NOV-NEXT: bge a2, a5, .LBB5_11
+; CHECK-NOV-NEXT: bge a2, a4, .LBB5_7
; CHECK-NOV-NEXT: .LBB5_2: # %entry
-; CHECK-NOV-NEXT: fcvt.l.s a4, fa0, rtz
-; CHECK-NOV-NEXT: bge a3, a5, .LBB5_12
+; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
+; CHECK-NOV-NEXT: bge a3, a4, .LBB5_8
; CHECK-NOV-NEXT: .LBB5_3: # %entry
-; CHECK-NOV-NEXT: bge a4, a5, .LBB5_13
+; CHECK-NOV-NEXT: blt a5, a4, .LBB5_5
; CHECK-NOV-NEXT: .LBB5_4: # %entry
-; CHECK-NOV-NEXT: blez a4, .LBB5_14
+; CHECK-NOV-NEXT: mv a5, a4
; CHECK-NOV-NEXT: .LBB5_5: # %entry
-; CHECK-NOV-NEXT: blez a3, .LBB5_15
-; CHECK-NOV-NEXT: .LBB5_6: # %entry
-; CHECK-NOV-NEXT: blez a2, .LBB5_16
-; CHECK-NOV-NEXT: .LBB5_7: # %entry
-; CHECK-NOV-NEXT: bgtz a1, .LBB5_9
-; CHECK-NOV-NEXT: .LBB5_8: # %entry
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: .LBB5_9: # %entry
+; CHECK-NOV-NEXT: sgtz a4, a5
+; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: and a4, a4, a5
+; CHECK-NOV-NEXT: sgtz a5, a3
+; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: and a3, a5, a3
+; CHECK-NOV-NEXT: sgtz a5, a2
+; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: and a2, a5, a2
+; CHECK-NOV-NEXT: sgtz a5, a1
+; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: and a1, a5, a1
; CHECK-NOV-NEXT: sw a1, 12(a0)
; CHECK-NOV-NEXT: sw a2, 8(a0)
; CHECK-NOV-NEXT: sw a3, 4(a0)
; CHECK-NOV-NEXT: sw a4, 0(a0)
; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB5_10: # %entry
-; CHECK-NOV-NEXT: mv a1, a5
+; CHECK-NOV-NEXT: .LBB5_6: # %entry
+; CHECK-NOV-NEXT: mv a1, a4
; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz
-; CHECK-NOV-NEXT: blt a2, a5, .LBB5_2
-; CHECK-NOV-NEXT: .LBB5_11: # %entry
-; CHECK-NOV-NEXT: mv a2, a5
-; CHECK-NOV-NEXT: fcvt.l.s a4, fa0, rtz
-; CHECK-NOV-NEXT: blt a3, a5, .LBB5_3
-; CHECK-NOV-NEXT: .LBB5_12: # %entry
-; CHECK-NOV-NEXT: mv a3, a5
-; CHECK-NOV-NEXT: blt a4, a5, .LBB5_4
-; CHECK-NOV-NEXT: .LBB5_13: # %entry
-; CHECK-NOV-NEXT: mv a4, a5
-; CHECK-NOV-NEXT: bgtz a4, .LBB5_5
-; CHECK-NOV-NEXT: .LBB5_14: # %entry
-; CHECK-NOV-NEXT: li a4, 0
-; CHECK-NOV-NEXT: bgtz a3, .LBB5_6
-; CHECK-NOV-NEXT: .LBB5_15: # %entry
-; CHECK-NOV-NEXT: li a3, 0
-; CHECK-NOV-NEXT: bgtz a2, .LBB5_7
-; CHECK-NOV-NEXT: .LBB5_16: # %entry
-; CHECK-NOV-NEXT: li a2, 0
-; CHECK-NOV-NEXT: blez a1, .LBB5_8
-; CHECK-NOV-NEXT: j .LBB5_9
+; CHECK-NOV-NEXT: blt a2, a4, .LBB5_2
+; CHECK-NOV-NEXT: .LBB5_7: # %entry
+; CHECK-NOV-NEXT: mv a2, a4
+; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
+; CHECK-NOV-NEXT: blt a3, a4, .LBB5_3
+; CHECK-NOV-NEXT: .LBB5_8: # %entry
+; CHECK-NOV-NEXT: mv a3, a4
+; CHECK-NOV-NEXT: bge a5, a4, .LBB5_4
+; CHECK-NOV-NEXT: j .LBB5_5
;
; CHECK-V-LABEL: ustest_f32i32:
; CHECK-V: # %bb.0: # %entry
; CHECK-NOV-NEXT: call __extendhfsf2@plt
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-NOV-NEXT: li a1, -1
-; CHECK-NOV-NEXT: srli a3, a1, 32
-; CHECK-NOV-NEXT: bge a0, a3, .LBB8_10
+; CHECK-NOV-NEXT: srli a2, a1, 32
+; CHECK-NOV-NEXT: bge a0, a2, .LBB8_6
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz
-; CHECK-NOV-NEXT: bge s2, a3, .LBB8_11
+; CHECK-NOV-NEXT: bge s2, a2, .LBB8_7
; CHECK-NOV-NEXT: .LBB8_2: # %entry
-; CHECK-NOV-NEXT: fcvt.l.s a2, fs0, rtz
-; CHECK-NOV-NEXT: bge a1, a3, .LBB8_12
+; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
+; CHECK-NOV-NEXT: bge a1, a2, .LBB8_8
; CHECK-NOV-NEXT: .LBB8_3: # %entry
-; CHECK-NOV-NEXT: bge a2, a3, .LBB8_13
+; CHECK-NOV-NEXT: blt a3, a2, .LBB8_5
; CHECK-NOV-NEXT: .LBB8_4: # %entry
-; CHECK-NOV-NEXT: blez a2, .LBB8_14
+; CHECK-NOV-NEXT: mv a3, a2
; CHECK-NOV-NEXT: .LBB8_5: # %entry
-; CHECK-NOV-NEXT: blez a1, .LBB8_15
-; CHECK-NOV-NEXT: .LBB8_6: # %entry
-; CHECK-NOV-NEXT: blez s2, .LBB8_16
-; CHECK-NOV-NEXT: .LBB8_7: # %entry
-; CHECK-NOV-NEXT: bgtz a0, .LBB8_9
-; CHECK-NOV-NEXT: .LBB8_8: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: .LBB8_9: # %entry
+; CHECK-NOV-NEXT: sgtz a2, a3
+; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: and a2, a2, a3
+; CHECK-NOV-NEXT: sgtz a3, a1
+; CHECK-NOV-NEXT: neg a3, a3
+; CHECK-NOV-NEXT: and a1, a3, a1
+; CHECK-NOV-NEXT: sgtz a3, s2
+; CHECK-NOV-NEXT: neg a3, a3
+; CHECK-NOV-NEXT: and a3, a3, s2
+; CHECK-NOV-NEXT: sgtz a4, a0
+; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: and a0, a4, a0
; CHECK-NOV-NEXT: sw a0, 12(s0)
-; CHECK-NOV-NEXT: sw s2, 8(s0)
+; CHECK-NOV-NEXT: sw a3, 8(s0)
; CHECK-NOV-NEXT: sw a1, 4(s0)
; CHECK-NOV-NEXT: sw a2, 0(s0)
; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: fld fs2, 0(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: addi sp, sp, 64
; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB8_10: # %entry
-; CHECK-NOV-NEXT: mv a0, a3
+; CHECK-NOV-NEXT: .LBB8_6: # %entry
+; CHECK-NOV-NEXT: mv a0, a2
; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz
-; CHECK-NOV-NEXT: blt s2, a3, .LBB8_2
-; CHECK-NOV-NEXT: .LBB8_11: # %entry
-; CHECK-NOV-NEXT: mv s2, a3
-; CHECK-NOV-NEXT: fcvt.l.s a2, fs0, rtz
-; CHECK-NOV-NEXT: blt a1, a3, .LBB8_3
-; CHECK-NOV-NEXT: .LBB8_12: # %entry
-; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: blt a2, a3, .LBB8_4
-; CHECK-NOV-NEXT: .LBB8_13: # %entry
-; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: bgtz a2, .LBB8_5
-; CHECK-NOV-NEXT: .LBB8_14: # %entry
-; CHECK-NOV-NEXT: li a2, 0
-; CHECK-NOV-NEXT: bgtz a1, .LBB8_6
-; CHECK-NOV-NEXT: .LBB8_15: # %entry
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: bgtz s2, .LBB8_7
-; CHECK-NOV-NEXT: .LBB8_16: # %entry
-; CHECK-NOV-NEXT: li s2, 0
-; CHECK-NOV-NEXT: blez a0, .LBB8_8
-; CHECK-NOV-NEXT: j .LBB8_9
+; CHECK-NOV-NEXT: blt s2, a2, .LBB8_2
+; CHECK-NOV-NEXT: .LBB8_7: # %entry
+; CHECK-NOV-NEXT: mv s2, a2
+; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
+; CHECK-NOV-NEXT: blt a1, a2, .LBB8_3
+; CHECK-NOV-NEXT: .LBB8_8: # %entry
+; CHECK-NOV-NEXT: mv a1, a2
+; CHECK-NOV-NEXT: bge a3, a2, .LBB8_4
+; CHECK-NOV-NEXT: j .LBB8_5
;
; CHECK-V-LABEL: ustest_f16i32:
; CHECK-V: # %bb.0: # %entry
; CHECK-NOV-NEXT: lui a0, 16
; CHECK-NOV-NEXT: addiw a2, a0, -1
; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz
-; CHECK-NOV-NEXT: bge a1, a2, .LBB11_5
+; CHECK-NOV-NEXT: blt a1, a2, .LBB11_2
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: bge a0, a2, .LBB11_6
-; CHECK-NOV-NEXT: .LBB11_2: # %entry
-; CHECK-NOV-NEXT: blez a0, .LBB11_7
-; CHECK-NOV-NEXT: .LBB11_3: # %entry
-; CHECK-NOV-NEXT: blez a1, .LBB11_8
-; CHECK-NOV-NEXT: .LBB11_4: # %entry
-; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB11_5: # %entry
; CHECK-NOV-NEXT: mv a1, a2
-; CHECK-NOV-NEXT: blt a0, a2, .LBB11_2
-; CHECK-NOV-NEXT: .LBB11_6: # %entry
+; CHECK-NOV-NEXT: .LBB11_2: # %entry
+; CHECK-NOV-NEXT: blt a0, a2, .LBB11_4
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: mv a0, a2
-; CHECK-NOV-NEXT: bgtz a0, .LBB11_3
-; CHECK-NOV-NEXT: .LBB11_7: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: bgtz a1, .LBB11_4
-; CHECK-NOV-NEXT: .LBB11_8: # %entry
-; CHECK-NOV-NEXT: li a1, 0
+; CHECK-NOV-NEXT: .LBB11_4: # %entry
+; CHECK-NOV-NEXT: sgtz a2, a0
+; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: and a0, a2, a0
+; CHECK-NOV-NEXT: sgtz a2, a1
+; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: and a1, a2, a1
; CHECK-NOV-NEXT: ret
;
; CHECK-V-LABEL: ustest_f64i16:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a1, fa3, rtz
; CHECK-NOV-NEXT: lui a2, 16
-; CHECK-NOV-NEXT: addiw a5, a2, -1
+; CHECK-NOV-NEXT: addiw a4, a2, -1
; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz
-; CHECK-NOV-NEXT: bge a1, a5, .LBB14_10
+; CHECK-NOV-NEXT: bge a1, a4, .LBB14_6
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz
-; CHECK-NOV-NEXT: bge a2, a5, .LBB14_11
+; CHECK-NOV-NEXT: bge a2, a4, .LBB14_7
; CHECK-NOV-NEXT: .LBB14_2: # %entry
-; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz
-; CHECK-NOV-NEXT: bge a3, a5, .LBB14_12
+; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz
+; CHECK-NOV-NEXT: bge a3, a4, .LBB14_8
; CHECK-NOV-NEXT: .LBB14_3: # %entry
-; CHECK-NOV-NEXT: bge a4, a5, .LBB14_13
+; CHECK-NOV-NEXT: blt a5, a4, .LBB14_5
; CHECK-NOV-NEXT: .LBB14_4: # %entry
-; CHECK-NOV-NEXT: blez a4, .LBB14_14
+; CHECK-NOV-NEXT: mv a5, a4
; CHECK-NOV-NEXT: .LBB14_5: # %entry
-; CHECK-NOV-NEXT: blez a3, .LBB14_15
-; CHECK-NOV-NEXT: .LBB14_6: # %entry
-; CHECK-NOV-NEXT: blez a2, .LBB14_16
-; CHECK-NOV-NEXT: .LBB14_7: # %entry
-; CHECK-NOV-NEXT: bgtz a1, .LBB14_9
-; CHECK-NOV-NEXT: .LBB14_8: # %entry
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: .LBB14_9: # %entry
+; CHECK-NOV-NEXT: sgtz a4, a5
+; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: and a4, a4, a5
+; CHECK-NOV-NEXT: sgtz a5, a3
+; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: and a3, a5, a3
+; CHECK-NOV-NEXT: sgtz a5, a2
+; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: and a2, a5, a2
+; CHECK-NOV-NEXT: sgtz a5, a1
+; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: and a1, a5, a1
; CHECK-NOV-NEXT: sh a1, 6(a0)
; CHECK-NOV-NEXT: sh a2, 4(a0)
; CHECK-NOV-NEXT: sh a3, 2(a0)
; CHECK-NOV-NEXT: sh a4, 0(a0)
; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB14_10: # %entry
-; CHECK-NOV-NEXT: mv a1, a5
+; CHECK-NOV-NEXT: .LBB14_6: # %entry
+; CHECK-NOV-NEXT: mv a1, a4
; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz
-; CHECK-NOV-NEXT: blt a2, a5, .LBB14_2
-; CHECK-NOV-NEXT: .LBB14_11: # %entry
-; CHECK-NOV-NEXT: mv a2, a5
-; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz
-; CHECK-NOV-NEXT: blt a3, a5, .LBB14_3
-; CHECK-NOV-NEXT: .LBB14_12: # %entry
-; CHECK-NOV-NEXT: mv a3, a5
-; CHECK-NOV-NEXT: blt a4, a5, .LBB14_4
-; CHECK-NOV-NEXT: .LBB14_13: # %entry
-; CHECK-NOV-NEXT: mv a4, a5
-; CHECK-NOV-NEXT: bgtz a4, .LBB14_5
-; CHECK-NOV-NEXT: .LBB14_14: # %entry
-; CHECK-NOV-NEXT: li a4, 0
-; CHECK-NOV-NEXT: bgtz a3, .LBB14_6
-; CHECK-NOV-NEXT: .LBB14_15: # %entry
-; CHECK-NOV-NEXT: li a3, 0
-; CHECK-NOV-NEXT: bgtz a2, .LBB14_7
-; CHECK-NOV-NEXT: .LBB14_16: # %entry
-; CHECK-NOV-NEXT: li a2, 0
-; CHECK-NOV-NEXT: blez a1, .LBB14_8
-; CHECK-NOV-NEXT: j .LBB14_9
+; CHECK-NOV-NEXT: blt a2, a4, .LBB14_2
+; CHECK-NOV-NEXT: .LBB14_7: # %entry
+; CHECK-NOV-NEXT: mv a2, a4
+; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz
+; CHECK-NOV-NEXT: blt a3, a4, .LBB14_3
+; CHECK-NOV-NEXT: .LBB14_8: # %entry
+; CHECK-NOV-NEXT: mv a3, a4
+; CHECK-NOV-NEXT: bge a5, a4, .LBB14_4
+; CHECK-NOV-NEXT: j .LBB14_5
;
; CHECK-V-LABEL: ustest_f32i16:
; CHECK-V: # %bb.0: # %entry
; CHECK-NOV-NEXT: call __extendhfsf2@plt
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-NOV-NEXT: lui a1, 16
-; CHECK-NOV-NEXT: addiw a7, a1, -1
-; CHECK-NOV-NEXT: bge a0, a7, .LBB17_18
+; CHECK-NOV-NEXT: addiw a3, a1, -1
+; CHECK-NOV-NEXT: bge a0, a3, .LBB17_10
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
-; CHECK-NOV-NEXT: bge s2, a7, .LBB17_19
+; CHECK-NOV-NEXT: bge s2, a3, .LBB17_11
; CHECK-NOV-NEXT: .LBB17_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
-; CHECK-NOV-NEXT: bge a1, a7, .LBB17_20
+; CHECK-NOV-NEXT: bge a1, a3, .LBB17_12
; CHECK-NOV-NEXT: .LBB17_3: # %entry
-; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz
-; CHECK-NOV-NEXT: bge a2, a7, .LBB17_21
+; CHECK-NOV-NEXT: fcvt.l.s a4, fs3, rtz
+; CHECK-NOV-NEXT: bge a2, a3, .LBB17_13
; CHECK-NOV-NEXT: .LBB17_4: # %entry
-; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz
-; CHECK-NOV-NEXT: bge a3, a7, .LBB17_22
+; CHECK-NOV-NEXT: fcvt.l.s a5, fs2, rtz
+; CHECK-NOV-NEXT: bge a4, a3, .LBB17_14
; CHECK-NOV-NEXT: .LBB17_5: # %entry
-; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz
-; CHECK-NOV-NEXT: bge a4, a7, .LBB17_23
+; CHECK-NOV-NEXT: fcvt.l.s a6, fs1, rtz
+; CHECK-NOV-NEXT: bge a5, a3, .LBB17_15
; CHECK-NOV-NEXT: .LBB17_6: # %entry
-; CHECK-NOV-NEXT: fcvt.l.s a6, fs0, rtz
-; CHECK-NOV-NEXT: bge a5, a7, .LBB17_24
+; CHECK-NOV-NEXT: fcvt.l.s a7, fs0, rtz
+; CHECK-NOV-NEXT: bge a6, a3, .LBB17_16
; CHECK-NOV-NEXT: .LBB17_7: # %entry
-; CHECK-NOV-NEXT: bge a6, a7, .LBB17_25
+; CHECK-NOV-NEXT: blt a7, a3, .LBB17_9
; CHECK-NOV-NEXT: .LBB17_8: # %entry
-; CHECK-NOV-NEXT: blez a6, .LBB17_26
+; CHECK-NOV-NEXT: mv a7, a3
; CHECK-NOV-NEXT: .LBB17_9: # %entry
-; CHECK-NOV-NEXT: blez a5, .LBB17_27
-; CHECK-NOV-NEXT: .LBB17_10: # %entry
-; CHECK-NOV-NEXT: blez a4, .LBB17_28
-; CHECK-NOV-NEXT: .LBB17_11: # %entry
-; CHECK-NOV-NEXT: blez a3, .LBB17_29
-; CHECK-NOV-NEXT: .LBB17_12: # %entry
-; CHECK-NOV-NEXT: blez a2, .LBB17_30
-; CHECK-NOV-NEXT: .LBB17_13: # %entry
-; CHECK-NOV-NEXT: blez a1, .LBB17_31
-; CHECK-NOV-NEXT: .LBB17_14: # %entry
-; CHECK-NOV-NEXT: blez s2, .LBB17_32
-; CHECK-NOV-NEXT: .LBB17_15: # %entry
-; CHECK-NOV-NEXT: bgtz a0, .LBB17_17
-; CHECK-NOV-NEXT: .LBB17_16: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: .LBB17_17: # %entry
+; CHECK-NOV-NEXT: sgtz a3, a7
+; CHECK-NOV-NEXT: neg a3, a3
+; CHECK-NOV-NEXT: and a3, a3, a7
+; CHECK-NOV-NEXT: sgtz a7, a6
+; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: and a6, a7, a6
+; CHECK-NOV-NEXT: sgtz a7, a5
+; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: and a5, a7, a5
+; CHECK-NOV-NEXT: sgtz a7, a4
+; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: and a4, a7, a4
+; CHECK-NOV-NEXT: sgtz a7, a2
+; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: and a2, a7, a2
+; CHECK-NOV-NEXT: sgtz a7, a1
+; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: and a1, a7, a1
+; CHECK-NOV-NEXT: sgtz a7, s2
+; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: and a7, a7, s2
+; CHECK-NOV-NEXT: sgtz t0, a0
+; CHECK-NOV-NEXT: neg t0, t0
+; CHECK-NOV-NEXT: and a0, t0, a0
; CHECK-NOV-NEXT: sh a0, 14(s0)
-; CHECK-NOV-NEXT: sh s2, 12(s0)
+; CHECK-NOV-NEXT: sh a7, 12(s0)
; CHECK-NOV-NEXT: sh a1, 10(s0)
; CHECK-NOV-NEXT: sh a2, 8(s0)
-; CHECK-NOV-NEXT: sh a3, 6(s0)
-; CHECK-NOV-NEXT: sh a4, 4(s0)
-; CHECK-NOV-NEXT: sh a5, 2(s0)
-; CHECK-NOV-NEXT: sh a6, 0(s0)
+; CHECK-NOV-NEXT: sh a4, 6(s0)
+; CHECK-NOV-NEXT: sh a5, 4(s0)
+; CHECK-NOV-NEXT: sh a6, 2(s0)
+; CHECK-NOV-NEXT: sh a3, 0(s0)
; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: fld fs6, 0(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: addi sp, sp, 128
; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB17_18: # %entry
-; CHECK-NOV-NEXT: mv a0, a7
+; CHECK-NOV-NEXT: .LBB17_10: # %entry
+; CHECK-NOV-NEXT: mv a0, a3
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
-; CHECK-NOV-NEXT: blt s2, a7, .LBB17_2
-; CHECK-NOV-NEXT: .LBB17_19: # %entry
-; CHECK-NOV-NEXT: mv s2, a7
+; CHECK-NOV-NEXT: blt s2, a3, .LBB17_2
+; CHECK-NOV-NEXT: .LBB17_11: # %entry
+; CHECK-NOV-NEXT: mv s2, a3
; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
-; CHECK-NOV-NEXT: blt a1, a7, .LBB17_3
-; CHECK-NOV-NEXT: .LBB17_20: # %entry
-; CHECK-NOV-NEXT: mv a1, a7
-; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz
-; CHECK-NOV-NEXT: blt a2, a7, .LBB17_4
-; CHECK-NOV-NEXT: .LBB17_21: # %entry
-; CHECK-NOV-NEXT: mv a2, a7
-; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz
-; CHECK-NOV-NEXT: blt a3, a7, .LBB17_5
-; CHECK-NOV-NEXT: .LBB17_22: # %entry
-; CHECK-NOV-NEXT: mv a3, a7
-; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz
-; CHECK-NOV-NEXT: blt a4, a7, .LBB17_6
-; CHECK-NOV-NEXT: .LBB17_23: # %entry
-; CHECK-NOV-NEXT: mv a4, a7
-; CHECK-NOV-NEXT: fcvt.l.s a6, fs0, rtz
-; CHECK-NOV-NEXT: blt a5, a7, .LBB17_7
-; CHECK-NOV-NEXT: .LBB17_24: # %entry
-; CHECK-NOV-NEXT: mv a5, a7
-; CHECK-NOV-NEXT: blt a6, a7, .LBB17_8
-; CHECK-NOV-NEXT: .LBB17_25: # %entry
-; CHECK-NOV-NEXT: mv a6, a7
-; CHECK-NOV-NEXT: bgtz a6, .LBB17_9
-; CHECK-NOV-NEXT: .LBB17_26: # %entry
-; CHECK-NOV-NEXT: li a6, 0
-; CHECK-NOV-NEXT: bgtz a5, .LBB17_10
-; CHECK-NOV-NEXT: .LBB17_27: # %entry
-; CHECK-NOV-NEXT: li a5, 0
-; CHECK-NOV-NEXT: bgtz a4, .LBB17_11
-; CHECK-NOV-NEXT: .LBB17_28: # %entry
-; CHECK-NOV-NEXT: li a4, 0
-; CHECK-NOV-NEXT: bgtz a3, .LBB17_12
-; CHECK-NOV-NEXT: .LBB17_29: # %entry
-; CHECK-NOV-NEXT: li a3, 0
-; CHECK-NOV-NEXT: bgtz a2, .LBB17_13
-; CHECK-NOV-NEXT: .LBB17_30: # %entry
-; CHECK-NOV-NEXT: li a2, 0
-; CHECK-NOV-NEXT: bgtz a1, .LBB17_14
-; CHECK-NOV-NEXT: .LBB17_31: # %entry
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: bgtz s2, .LBB17_15
-; CHECK-NOV-NEXT: .LBB17_32: # %entry
-; CHECK-NOV-NEXT: li s2, 0
-; CHECK-NOV-NEXT: blez a0, .LBB17_16
-; CHECK-NOV-NEXT: j .LBB17_17
+; CHECK-NOV-NEXT: blt a1, a3, .LBB17_3
+; CHECK-NOV-NEXT: .LBB17_12: # %entry
+; CHECK-NOV-NEXT: mv a1, a3
+; CHECK-NOV-NEXT: fcvt.l.s a4, fs3, rtz
+; CHECK-NOV-NEXT: blt a2, a3, .LBB17_4
+; CHECK-NOV-NEXT: .LBB17_13: # %entry
+; CHECK-NOV-NEXT: mv a2, a3
+; CHECK-NOV-NEXT: fcvt.l.s a5, fs2, rtz
+; CHECK-NOV-NEXT: blt a4, a3, .LBB17_5
+; CHECK-NOV-NEXT: .LBB17_14: # %entry
+; CHECK-NOV-NEXT: mv a4, a3
+; CHECK-NOV-NEXT: fcvt.l.s a6, fs1, rtz
+; CHECK-NOV-NEXT: blt a5, a3, .LBB17_6
+; CHECK-NOV-NEXT: .LBB17_15: # %entry
+; CHECK-NOV-NEXT: mv a5, a3
+; CHECK-NOV-NEXT: fcvt.l.s a7, fs0, rtz
+; CHECK-NOV-NEXT: blt a6, a3, .LBB17_7
+; CHECK-NOV-NEXT: .LBB17_16: # %entry
+; CHECK-NOV-NEXT: mv a6, a3
+; CHECK-NOV-NEXT: bge a7, a3, .LBB17_8
+; CHECK-NOV-NEXT: j .LBB17_9
;
; CHECK-V-LABEL: ustest_f16i16:
; CHECK-V: # %bb.0: # %entry
; CHECK-NOV-NEXT: bnez s1, .LBB18_4
; CHECK-NOV-NEXT: .LBB18_2:
; CHECK-NOV-NEXT: sltu a5, s0, a3
+; CHECK-NOV-NEXT: seqz a6, a5
; CHECK-NOV-NEXT: beqz a5, .LBB18_5
; CHECK-NOV-NEXT: j .LBB18_6
; CHECK-NOV-NEXT: .LBB18_3:
; CHECK-NOV-NEXT: beqz s1, .LBB18_2
; CHECK-NOV-NEXT: .LBB18_4: # %entry
; CHECK-NOV-NEXT: slti a5, s1, 0
+; CHECK-NOV-NEXT: seqz a6, a5
; CHECK-NOV-NEXT: bnez a5, .LBB18_6
; CHECK-NOV-NEXT: .LBB18_5: # %entry
-; CHECK-NOV-NEXT: li s1, 0
; CHECK-NOV-NEXT: mv s0, a3
; CHECK-NOV-NEXT: .LBB18_6: # %entry
-; CHECK-NOV-NEXT: beqz a4, .LBB18_10
+; CHECK-NOV-NEXT: addi a6, a6, -1
+; CHECK-NOV-NEXT: seqz a5, a4
+; CHECK-NOV-NEXT: addi a5, a5, -1
+; CHECK-NOV-NEXT: and a5, a5, a1
+; CHECK-NOV-NEXT: bnez a4, .LBB18_8
; CHECK-NOV-NEXT: # %bb.7: # %entry
-; CHECK-NOV-NEXT: slli a3, a0, 63
-; CHECK-NOV-NEXT: beq a1, a0, .LBB18_11
+; CHECK-NOV-NEXT: mv a2, a3
; CHECK-NOV-NEXT: .LBB18_8: # %entry
-; CHECK-NOV-NEXT: slti a1, a1, 0
-; CHECK-NOV-NEXT: xori a1, a1, 1
-; CHECK-NOV-NEXT: bne s1, a0, .LBB18_12
-; CHECK-NOV-NEXT: .LBB18_9:
-; CHECK-NOV-NEXT: sltu a0, a3, s0
+; CHECK-NOV-NEXT: and a4, a6, s1
+; CHECK-NOV-NEXT: slli a1, a0, 63
+; CHECK-NOV-NEXT: beq a5, a0, .LBB18_11
+; CHECK-NOV-NEXT: # %bb.9: # %entry
+; CHECK-NOV-NEXT: slti a3, a5, 0
+; CHECK-NOV-NEXT: xori a3, a3, 1
+; CHECK-NOV-NEXT: bne a4, a0, .LBB18_12
+; CHECK-NOV-NEXT: .LBB18_10:
+; CHECK-NOV-NEXT: sltu a0, a1, s0
; CHECK-NOV-NEXT: beqz a0, .LBB18_13
; CHECK-NOV-NEXT: j .LBB18_14
-; CHECK-NOV-NEXT: .LBB18_10: # %entry
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: slli a3, a0, 63
-; CHECK-NOV-NEXT: bne a1, a0, .LBB18_8
; CHECK-NOV-NEXT: .LBB18_11:
-; CHECK-NOV-NEXT: sltu a1, a3, a2
-; CHECK-NOV-NEXT: beq s1, a0, .LBB18_9
+; CHECK-NOV-NEXT: sltu a3, a1, a2
+; CHECK-NOV-NEXT: beq a4, a0, .LBB18_10
; CHECK-NOV-NEXT: .LBB18_12: # %entry
-; CHECK-NOV-NEXT: slti a0, s1, 0
+; CHECK-NOV-NEXT: slti a0, a4, 0
; CHECK-NOV-NEXT: xori a0, a0, 1
; CHECK-NOV-NEXT: bnez a0, .LBB18_14
; CHECK-NOV-NEXT: .LBB18_13: # %entry
-; CHECK-NOV-NEXT: mv s0, a3
+; CHECK-NOV-NEXT: mv s0, a1
; CHECK-NOV-NEXT: .LBB18_14: # %entry
-; CHECK-NOV-NEXT: bnez a1, .LBB18_16
+; CHECK-NOV-NEXT: bnez a3, .LBB18_16
; CHECK-NOV-NEXT: # %bb.15: # %entry
-; CHECK-NOV-NEXT: mv a2, a3
+; CHECK-NOV-NEXT: mv a2, a1
; CHECK-NOV-NEXT: .LBB18_16: # %entry
; CHECK-NOV-NEXT: mv a0, s0
; CHECK-NOV-NEXT: mv a1, a2
; CHECK-V-NEXT: bnez a1, .LBB18_4
; CHECK-V-NEXT: .LBB18_2:
; CHECK-V-NEXT: sltu a5, a0, a3
-; CHECK-V-NEXT: beqz a4, .LBB18_5
-; CHECK-V-NEXT: j .LBB18_6
+; CHECK-V-NEXT: j .LBB18_5
; CHECK-V-NEXT: .LBB18_3:
; CHECK-V-NEXT: sltu a4, s0, a3
; CHECK-V-NEXT: beqz a1, .LBB18_2
; CHECK-V-NEXT: .LBB18_4: # %entry
; CHECK-V-NEXT: slti a5, a1, 0
-; CHECK-V-NEXT: bnez a4, .LBB18_6
; CHECK-V-NEXT: .LBB18_5: # %entry
-; CHECK-V-NEXT: li s1, 0
+; CHECK-V-NEXT: seqz a6, a4
+; CHECK-V-NEXT: addi a6, a6, -1
+; CHECK-V-NEXT: bnez a4, .LBB18_7
+; CHECK-V-NEXT: # %bb.6: # %entry
; CHECK-V-NEXT: mv s0, a3
-; CHECK-V-NEXT: .LBB18_6: # %entry
-; CHECK-V-NEXT: beqz a5, .LBB18_10
-; CHECK-V-NEXT: # %bb.7: # %entry
-; CHECK-V-NEXT: slli a3, a2, 63
-; CHECK-V-NEXT: beq s1, a2, .LBB18_11
-; CHECK-V-NEXT: .LBB18_8: # %entry
-; CHECK-V-NEXT: slti a4, s1, 0
-; CHECK-V-NEXT: xori a4, a4, 1
-; CHECK-V-NEXT: bne a1, a2, .LBB18_12
-; CHECK-V-NEXT: .LBB18_9:
-; CHECK-V-NEXT: sltu a1, a3, a0
-; CHECK-V-NEXT: beqz a4, .LBB18_13
-; CHECK-V-NEXT: j .LBB18_14
-; CHECK-V-NEXT: .LBB18_10: # %entry
-; CHECK-V-NEXT: li a1, 0
+; CHECK-V-NEXT: .LBB18_7: # %entry
+; CHECK-V-NEXT: and a6, a6, s1
+; CHECK-V-NEXT: seqz a4, a5
+; CHECK-V-NEXT: addi a4, a4, -1
+; CHECK-V-NEXT: bnez a5, .LBB18_9
+; CHECK-V-NEXT: # %bb.8: # %entry
; CHECK-V-NEXT: mv a0, a3
-; CHECK-V-NEXT: slli a3, a2, 63
-; CHECK-V-NEXT: bne s1, a2, .LBB18_8
+; CHECK-V-NEXT: .LBB18_9: # %entry
+; CHECK-V-NEXT: and a4, a4, a1
+; CHECK-V-NEXT: slli a1, a2, 63
+; CHECK-V-NEXT: beq a6, a2, .LBB18_12
+; CHECK-V-NEXT: # %bb.10: # %entry
+; CHECK-V-NEXT: slti a3, a6, 0
+; CHECK-V-NEXT: xori a3, a3, 1
+; CHECK-V-NEXT: bne a4, a2, .LBB18_13
; CHECK-V-NEXT: .LBB18_11:
-; CHECK-V-NEXT: sltu a4, a3, s0
-; CHECK-V-NEXT: beq a1, a2, .LBB18_9
-; CHECK-V-NEXT: .LBB18_12: # %entry
-; CHECK-V-NEXT: slti a1, a1, 0
-; CHECK-V-NEXT: xori a1, a1, 1
-; CHECK-V-NEXT: bnez a4, .LBB18_14
+; CHECK-V-NEXT: sltu a2, a1, a0
+; CHECK-V-NEXT: beqz a3, .LBB18_14
+; CHECK-V-NEXT: j .LBB18_15
+; CHECK-V-NEXT: .LBB18_12:
+; CHECK-V-NEXT: sltu a3, a1, s0
+; CHECK-V-NEXT: beq a4, a2, .LBB18_11
; CHECK-V-NEXT: .LBB18_13: # %entry
-; CHECK-V-NEXT: mv s0, a3
+; CHECK-V-NEXT: slti a2, a4, 0
+; CHECK-V-NEXT: xori a2, a2, 1
+; CHECK-V-NEXT: bnez a3, .LBB18_15
; CHECK-V-NEXT: .LBB18_14: # %entry
-; CHECK-V-NEXT: bnez a1, .LBB18_16
-; CHECK-V-NEXT: # %bb.15: # %entry
-; CHECK-V-NEXT: mv a0, a3
-; CHECK-V-NEXT: .LBB18_16: # %entry
+; CHECK-V-NEXT: mv s0, a1
+; CHECK-V-NEXT: .LBB18_15: # %entry
+; CHECK-V-NEXT: bnez a2, .LBB18_17
+; CHECK-V-NEXT: # %bb.16: # %entry
+; CHECK-V-NEXT: mv a0, a1
+; CHECK-V-NEXT: .LBB18_17: # %entry
; CHECK-V-NEXT: sd a0, 24(sp)
; CHECK-V-NEXT: sd s0, 32(sp)
; CHECK-V-NEXT: addi a0, sp, 24
; CHECK-NOV-NEXT: mv s1, a1
; CHECK-NOV-NEXT: fmv.d fa0, fs0
; CHECK-NOV-NEXT: call __fixunsdfti@plt
-; CHECK-NOV-NEXT: beqz a1, .LBB19_2
-; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: .LBB19_2: # %entry
-; CHECK-NOV-NEXT: beqz s1, .LBB19_4
-; CHECK-NOV-NEXT: # %bb.3: # %entry
-; CHECK-NOV-NEXT: li s0, 0
-; CHECK-NOV-NEXT: .LBB19_4: # %entry
-; CHECK-NOV-NEXT: mv a1, s0
+; CHECK-NOV-NEXT: snez a1, a1
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: and a0, a1, a0
+; CHECK-NOV-NEXT: snez a1, s1
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: and a1, a1, s0
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixunsdfti@plt
-; CHECK-V-NEXT: beqz s1, .LBB19_2
-; CHECK-V-NEXT: # %bb.1: # %entry
-; CHECK-V-NEXT: li s0, 0
-; CHECK-V-NEXT: .LBB19_2: # %entry
-; CHECK-V-NEXT: beqz a1, .LBB19_4
-; CHECK-V-NEXT: # %bb.3: # %entry
-; CHECK-V-NEXT: li a0, 0
-; CHECK-V-NEXT: .LBB19_4: # %entry
+; CHECK-V-NEXT: snez a2, s1
+; CHECK-V-NEXT: addi a2, a2, -1
+; CHECK-V-NEXT: and a2, a2, s0
+; CHECK-V-NEXT: snez a1, a1
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: and a0, a1, a0
; CHECK-V-NEXT: sd a0, 24(sp)
-; CHECK-V-NEXT: sd s0, 32(sp)
+; CHECK-V-NEXT: sd a2, 32(sp)
; CHECK-V-NEXT: addi a0, sp, 24
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vle64.v v8, (a0)
; CHECK-NOV-NEXT: fmv.d fa0, fs0
; CHECK-NOV-NEXT: call __fixdfti@plt
; CHECK-NOV-NEXT: mv a2, a1
-; CHECK-NOV-NEXT: bgtz a1, .LBB20_7
+; CHECK-NOV-NEXT: blez a1, .LBB20_2
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: mv a3, s1
-; CHECK-NOV-NEXT: bgtz s1, .LBB20_8
+; CHECK-NOV-NEXT: li a2, 1
; CHECK-NOV-NEXT: .LBB20_2: # %entry
-; CHECK-NOV-NEXT: bgtz a1, .LBB20_9
-; CHECK-NOV-NEXT: .LBB20_3: # %entry
-; CHECK-NOV-NEXT: bgtz s1, .LBB20_10
+; CHECK-NOV-NEXT: mv a3, s1
+; CHECK-NOV-NEXT: blez s1, .LBB20_4
+; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: li a3, 1
; CHECK-NOV-NEXT: .LBB20_4: # %entry
-; CHECK-NOV-NEXT: beqz a3, .LBB20_11
-; CHECK-NOV-NEXT: .LBB20_5: # %entry
-; CHECK-NOV-NEXT: sgtz a1, a3
-; CHECK-NOV-NEXT: bnez a2, .LBB20_12
+; CHECK-NOV-NEXT: sgtz a1, a1
+; CHECK-NOV-NEXT: addi a4, a1, -1
+; CHECK-NOV-NEXT: sgtz a1, s1
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: and a1, a1, s0
+; CHECK-NOV-NEXT: beqz a3, .LBB20_7
+; CHECK-NOV-NEXT: # %bb.5: # %entry
+; CHECK-NOV-NEXT: sgtz a3, a3
+; CHECK-NOV-NEXT: and a0, a4, a0
+; CHECK-NOV-NEXT: bnez a2, .LBB20_8
; CHECK-NOV-NEXT: .LBB20_6:
; CHECK-NOV-NEXT: snez a2, a0
-; CHECK-NOV-NEXT: beqz a2, .LBB20_13
-; CHECK-NOV-NEXT: j .LBB20_14
-; CHECK-NOV-NEXT: .LBB20_7: # %entry
-; CHECK-NOV-NEXT: li a2, 1
-; CHECK-NOV-NEXT: mv a3, s1
-; CHECK-NOV-NEXT: blez s1, .LBB20_2
-; CHECK-NOV-NEXT: .LBB20_8: # %entry
-; CHECK-NOV-NEXT: li a3, 1
-; CHECK-NOV-NEXT: blez a1, .LBB20_3
-; CHECK-NOV-NEXT: .LBB20_9: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: blez s1, .LBB20_4
-; CHECK-NOV-NEXT: .LBB20_10: # %entry
-; CHECK-NOV-NEXT: li s0, 0
-; CHECK-NOV-NEXT: bnez a3, .LBB20_5
-; CHECK-NOV-NEXT: .LBB20_11:
-; CHECK-NOV-NEXT: snez a1, s0
+; CHECK-NOV-NEXT: j .LBB20_9
+; CHECK-NOV-NEXT: .LBB20_7:
+; CHECK-NOV-NEXT: snez a3, a1
+; CHECK-NOV-NEXT: and a0, a4, a0
; CHECK-NOV-NEXT: beqz a2, .LBB20_6
-; CHECK-NOV-NEXT: .LBB20_12: # %entry
+; CHECK-NOV-NEXT: .LBB20_8: # %entry
; CHECK-NOV-NEXT: sgtz a2, a2
-; CHECK-NOV-NEXT: bnez a2, .LBB20_14
-; CHECK-NOV-NEXT: .LBB20_13: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: .LBB20_14: # %entry
-; CHECK-NOV-NEXT: bnez a1, .LBB20_16
-; CHECK-NOV-NEXT: # %bb.15: # %entry
-; CHECK-NOV-NEXT: li s0, 0
-; CHECK-NOV-NEXT: .LBB20_16: # %entry
-; CHECK-NOV-NEXT: mv a1, s0
+; CHECK-NOV-NEXT: .LBB20_9: # %entry
+; CHECK-NOV-NEXT: seqz a2, a2
+; CHECK-NOV-NEXT: addi a2, a2, -1
+; CHECK-NOV-NEXT: and a0, a2, a0
+; CHECK-NOV-NEXT: seqz a2, a3
+; CHECK-NOV-NEXT: addi a2, a2, -1
+; CHECK-NOV-NEXT: and a1, a2, a1
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: vslidedown.vi v9, v8, 1
; CHECK-V-NEXT: vfmv.f.s fa0, v9
; CHECK-V-NEXT: call __fixdfti@plt
-; CHECK-V-NEXT: mv s0, a0
-; CHECK-V-NEXT: mv s1, a1
+; CHECK-V-NEXT: mv s1, a0
+; CHECK-V-NEXT: mv s0, a1
; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma
; CHECK-V-NEXT: addi a0, sp, 48
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixdfti@plt
; CHECK-V-NEXT: mv a2, a1
-; CHECK-V-NEXT: bgtz a1, .LBB20_6
+; CHECK-V-NEXT: blez a1, .LBB20_2
; CHECK-V-NEXT: # %bb.1: # %entry
-; CHECK-V-NEXT: bgtz s1, .LBB20_7
+; CHECK-V-NEXT: li a2, 1
; CHECK-V-NEXT: .LBB20_2: # %entry
-; CHECK-V-NEXT: bgtz a1, .LBB20_8
-; CHECK-V-NEXT: .LBB20_3: # %entry
-; CHECK-V-NEXT: beqz a2, .LBB20_9
+; CHECK-V-NEXT: sgtz a3, s0
+; CHECK-V-NEXT: blez s0, .LBB20_4
+; CHECK-V-NEXT: # %bb.3: # %entry
+; CHECK-V-NEXT: li s0, 1
; CHECK-V-NEXT: .LBB20_4: # %entry
+; CHECK-V-NEXT: addi a3, a3, -1
+; CHECK-V-NEXT: sgtz a1, a1
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: and a0, a1, a0
+; CHECK-V-NEXT: beqz a2, .LBB20_7
+; CHECK-V-NEXT: # %bb.5: # %entry
; CHECK-V-NEXT: sgtz a1, a2
-; CHECK-V-NEXT: bnez s1, .LBB20_10
-; CHECK-V-NEXT: .LBB20_5:
-; CHECK-V-NEXT: snez a2, s0
-; CHECK-V-NEXT: beqz a2, .LBB20_11
-; CHECK-V-NEXT: j .LBB20_12
-; CHECK-V-NEXT: .LBB20_6: # %entry
-; CHECK-V-NEXT: li a2, 1
-; CHECK-V-NEXT: blez s1, .LBB20_2
-; CHECK-V-NEXT: .LBB20_7: # %entry
-; CHECK-V-NEXT: li s0, 0
-; CHECK-V-NEXT: li s1, 1
-; CHECK-V-NEXT: blez a1, .LBB20_3
-; CHECK-V-NEXT: .LBB20_8: # %entry
-; CHECK-V-NEXT: li a0, 0
-; CHECK-V-NEXT: bnez a2, .LBB20_4
-; CHECK-V-NEXT: .LBB20_9:
+; CHECK-V-NEXT: and a2, a3, s1
+; CHECK-V-NEXT: bnez s0, .LBB20_8
+; CHECK-V-NEXT: .LBB20_6:
+; CHECK-V-NEXT: snez a3, a2
+; CHECK-V-NEXT: j .LBB20_9
+; CHECK-V-NEXT: .LBB20_7:
; CHECK-V-NEXT: snez a1, a0
-; CHECK-V-NEXT: beqz s1, .LBB20_5
-; CHECK-V-NEXT: .LBB20_10: # %entry
-; CHECK-V-NEXT: sgtz a2, s1
-; CHECK-V-NEXT: bnez a2, .LBB20_12
-; CHECK-V-NEXT: .LBB20_11: # %entry
-; CHECK-V-NEXT: li s0, 0
-; CHECK-V-NEXT: .LBB20_12: # %entry
-; CHECK-V-NEXT: bnez a1, .LBB20_14
-; CHECK-V-NEXT: # %bb.13: # %entry
-; CHECK-V-NEXT: li a0, 0
-; CHECK-V-NEXT: .LBB20_14: # %entry
+; CHECK-V-NEXT: and a2, a3, s1
+; CHECK-V-NEXT: beqz s0, .LBB20_6
+; CHECK-V-NEXT: .LBB20_8: # %entry
+; CHECK-V-NEXT: sgtz a3, s0
+; CHECK-V-NEXT: .LBB20_9: # %entry
+; CHECK-V-NEXT: seqz a3, a3
+; CHECK-V-NEXT: addi a3, a3, -1
+; CHECK-V-NEXT: and a2, a3, a2
+; CHECK-V-NEXT: seqz a1, a1
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: and a0, a1, a0
; CHECK-V-NEXT: sd a0, 24(sp)
-; CHECK-V-NEXT: sd s0, 32(sp)
+; CHECK-V-NEXT: sd a2, 32(sp)
; CHECK-V-NEXT: addi a0, sp, 24
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vle64.v v8, (a0)
; CHECK-NOV-NEXT: bnez s1, .LBB21_4
; CHECK-NOV-NEXT: .LBB21_2:
; CHECK-NOV-NEXT: sltu a5, s0, a3
+; CHECK-NOV-NEXT: seqz a6, a5
; CHECK-NOV-NEXT: beqz a5, .LBB21_5
; CHECK-NOV-NEXT: j .LBB21_6
; CHECK-NOV-NEXT: .LBB21_3:
; CHECK-NOV-NEXT: beqz s1, .LBB21_2
; CHECK-NOV-NEXT: .LBB21_4: # %entry
; CHECK-NOV-NEXT: slti a5, s1, 0
+; CHECK-NOV-NEXT: seqz a6, a5
; CHECK-NOV-NEXT: bnez a5, .LBB21_6
; CHECK-NOV-NEXT: .LBB21_5: # %entry
-; CHECK-NOV-NEXT: li s1, 0
; CHECK-NOV-NEXT: mv s0, a3
; CHECK-NOV-NEXT: .LBB21_6: # %entry
-; CHECK-NOV-NEXT: beqz a4, .LBB21_10
+; CHECK-NOV-NEXT: addi a6, a6, -1
+; CHECK-NOV-NEXT: seqz a5, a4
+; CHECK-NOV-NEXT: addi a5, a5, -1
+; CHECK-NOV-NEXT: and a5, a5, a1
+; CHECK-NOV-NEXT: bnez a4, .LBB21_8
; CHECK-NOV-NEXT: # %bb.7: # %entry
-; CHECK-NOV-NEXT: slli a3, a0, 63
-; CHECK-NOV-NEXT: beq a1, a0, .LBB21_11
+; CHECK-NOV-NEXT: mv a2, a3
; CHECK-NOV-NEXT: .LBB21_8: # %entry
-; CHECK-NOV-NEXT: slti a1, a1, 0
-; CHECK-NOV-NEXT: xori a1, a1, 1
-; CHECK-NOV-NEXT: bne s1, a0, .LBB21_12
-; CHECK-NOV-NEXT: .LBB21_9:
-; CHECK-NOV-NEXT: sltu a0, a3, s0
+; CHECK-NOV-NEXT: and a4, a6, s1
+; CHECK-NOV-NEXT: slli a1, a0, 63
+; CHECK-NOV-NEXT: beq a5, a0, .LBB21_11
+; CHECK-NOV-NEXT: # %bb.9: # %entry
+; CHECK-NOV-NEXT: slti a3, a5, 0
+; CHECK-NOV-NEXT: xori a3, a3, 1
+; CHECK-NOV-NEXT: bne a4, a0, .LBB21_12
+; CHECK-NOV-NEXT: .LBB21_10:
+; CHECK-NOV-NEXT: sltu a0, a1, s0
; CHECK-NOV-NEXT: beqz a0, .LBB21_13
; CHECK-NOV-NEXT: j .LBB21_14
-; CHECK-NOV-NEXT: .LBB21_10: # %entry
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: slli a3, a0, 63
-; CHECK-NOV-NEXT: bne a1, a0, .LBB21_8
; CHECK-NOV-NEXT: .LBB21_11:
-; CHECK-NOV-NEXT: sltu a1, a3, a2
-; CHECK-NOV-NEXT: beq s1, a0, .LBB21_9
+; CHECK-NOV-NEXT: sltu a3, a1, a2
+; CHECK-NOV-NEXT: beq a4, a0, .LBB21_10
; CHECK-NOV-NEXT: .LBB21_12: # %entry
-; CHECK-NOV-NEXT: slti a0, s1, 0
+; CHECK-NOV-NEXT: slti a0, a4, 0
; CHECK-NOV-NEXT: xori a0, a0, 1
; CHECK-NOV-NEXT: bnez a0, .LBB21_14
; CHECK-NOV-NEXT: .LBB21_13: # %entry
-; CHECK-NOV-NEXT: mv s0, a3
+; CHECK-NOV-NEXT: mv s0, a1
; CHECK-NOV-NEXT: .LBB21_14: # %entry
-; CHECK-NOV-NEXT: bnez a1, .LBB21_16
+; CHECK-NOV-NEXT: bnez a3, .LBB21_16
; CHECK-NOV-NEXT: # %bb.15: # %entry
-; CHECK-NOV-NEXT: mv a2, a3
+; CHECK-NOV-NEXT: mv a2, a1
; CHECK-NOV-NEXT: .LBB21_16: # %entry
; CHECK-NOV-NEXT: mv a0, s0
; CHECK-NOV-NEXT: mv a1, a2
; CHECK-V-NEXT: bnez a1, .LBB21_4
; CHECK-V-NEXT: .LBB21_2:
; CHECK-V-NEXT: sltu a5, a0, a3
-; CHECK-V-NEXT: beqz a4, .LBB21_5
-; CHECK-V-NEXT: j .LBB21_6
+; CHECK-V-NEXT: j .LBB21_5
; CHECK-V-NEXT: .LBB21_3:
; CHECK-V-NEXT: sltu a4, s0, a3
; CHECK-V-NEXT: beqz a1, .LBB21_2
; CHECK-V-NEXT: .LBB21_4: # %entry
; CHECK-V-NEXT: slti a5, a1, 0
-; CHECK-V-NEXT: bnez a4, .LBB21_6
; CHECK-V-NEXT: .LBB21_5: # %entry
-; CHECK-V-NEXT: li s1, 0
+; CHECK-V-NEXT: seqz a6, a4
+; CHECK-V-NEXT: addi a6, a6, -1
+; CHECK-V-NEXT: bnez a4, .LBB21_7
+; CHECK-V-NEXT: # %bb.6: # %entry
; CHECK-V-NEXT: mv s0, a3
-; CHECK-V-NEXT: .LBB21_6: # %entry
-; CHECK-V-NEXT: beqz a5, .LBB21_10
-; CHECK-V-NEXT: # %bb.7: # %entry
-; CHECK-V-NEXT: slli a3, a2, 63
-; CHECK-V-NEXT: beq s1, a2, .LBB21_11
-; CHECK-V-NEXT: .LBB21_8: # %entry
-; CHECK-V-NEXT: slti a4, s1, 0
-; CHECK-V-NEXT: xori a4, a4, 1
-; CHECK-V-NEXT: bne a1, a2, .LBB21_12
-; CHECK-V-NEXT: .LBB21_9:
-; CHECK-V-NEXT: sltu a1, a3, a0
-; CHECK-V-NEXT: beqz a4, .LBB21_13
-; CHECK-V-NEXT: j .LBB21_14
-; CHECK-V-NEXT: .LBB21_10: # %entry
-; CHECK-V-NEXT: li a1, 0
+; CHECK-V-NEXT: .LBB21_7: # %entry
+; CHECK-V-NEXT: and a6, a6, s1
+; CHECK-V-NEXT: seqz a4, a5
+; CHECK-V-NEXT: addi a4, a4, -1
+; CHECK-V-NEXT: bnez a5, .LBB21_9
+; CHECK-V-NEXT: # %bb.8: # %entry
; CHECK-V-NEXT: mv a0, a3
-; CHECK-V-NEXT: slli a3, a2, 63
-; CHECK-V-NEXT: bne s1, a2, .LBB21_8
+; CHECK-V-NEXT: .LBB21_9: # %entry
+; CHECK-V-NEXT: and a4, a4, a1
+; CHECK-V-NEXT: slli a1, a2, 63
+; CHECK-V-NEXT: beq a6, a2, .LBB21_12
+; CHECK-V-NEXT: # %bb.10: # %entry
+; CHECK-V-NEXT: slti a3, a6, 0
+; CHECK-V-NEXT: xori a3, a3, 1
+; CHECK-V-NEXT: bne a4, a2, .LBB21_13
; CHECK-V-NEXT: .LBB21_11:
-; CHECK-V-NEXT: sltu a4, a3, s0
-; CHECK-V-NEXT: beq a1, a2, .LBB21_9
-; CHECK-V-NEXT: .LBB21_12: # %entry
-; CHECK-V-NEXT: slti a1, a1, 0
-; CHECK-V-NEXT: xori a1, a1, 1
-; CHECK-V-NEXT: bnez a4, .LBB21_14
+; CHECK-V-NEXT: sltu a2, a1, a0
+; CHECK-V-NEXT: beqz a3, .LBB21_14
+; CHECK-V-NEXT: j .LBB21_15
+; CHECK-V-NEXT: .LBB21_12:
+; CHECK-V-NEXT: sltu a3, a1, s0
+; CHECK-V-NEXT: beq a4, a2, .LBB21_11
; CHECK-V-NEXT: .LBB21_13: # %entry
-; CHECK-V-NEXT: mv s0, a3
+; CHECK-V-NEXT: slti a2, a4, 0
+; CHECK-V-NEXT: xori a2, a2, 1
+; CHECK-V-NEXT: bnez a3, .LBB21_15
; CHECK-V-NEXT: .LBB21_14: # %entry
-; CHECK-V-NEXT: bnez a1, .LBB21_16
-; CHECK-V-NEXT: # %bb.15: # %entry
-; CHECK-V-NEXT: mv a0, a3
-; CHECK-V-NEXT: .LBB21_16: # %entry
+; CHECK-V-NEXT: mv s0, a1
+; CHECK-V-NEXT: .LBB21_15: # %entry
+; CHECK-V-NEXT: bnez a2, .LBB21_17
+; CHECK-V-NEXT: # %bb.16: # %entry
+; CHECK-V-NEXT: mv a0, a1
+; CHECK-V-NEXT: .LBB21_17: # %entry
; CHECK-V-NEXT: sd a0, 24(sp)
; CHECK-V-NEXT: sd s0, 32(sp)
; CHECK-V-NEXT: addi a0, sp, 24
; CHECK-NOV-NEXT: mv s1, a1
; CHECK-NOV-NEXT: fmv.s fa0, fs0
; CHECK-NOV-NEXT: call __fixunssfti@plt
-; CHECK-NOV-NEXT: beqz a1, .LBB22_2
-; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: .LBB22_2: # %entry
-; CHECK-NOV-NEXT: beqz s1, .LBB22_4
-; CHECK-NOV-NEXT: # %bb.3: # %entry
-; CHECK-NOV-NEXT: li s0, 0
-; CHECK-NOV-NEXT: .LBB22_4: # %entry
-; CHECK-NOV-NEXT: mv a1, s0
+; CHECK-NOV-NEXT: snez a1, a1
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: and a0, a1, a0
+; CHECK-NOV-NEXT: snez a1, s1
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: and a1, a1, s0
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixunssfti@plt
-; CHECK-V-NEXT: beqz s1, .LBB22_2
-; CHECK-V-NEXT: # %bb.1: # %entry
-; CHECK-V-NEXT: li s0, 0
-; CHECK-V-NEXT: .LBB22_2: # %entry
-; CHECK-V-NEXT: beqz a1, .LBB22_4
-; CHECK-V-NEXT: # %bb.3: # %entry
-; CHECK-V-NEXT: li a0, 0
-; CHECK-V-NEXT: .LBB22_4: # %entry
+; CHECK-V-NEXT: snez a2, s1
+; CHECK-V-NEXT: addi a2, a2, -1
+; CHECK-V-NEXT: and a2, a2, s0
+; CHECK-V-NEXT: snez a1, a1
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: and a0, a1, a0
; CHECK-V-NEXT: sd a0, 24(sp)
-; CHECK-V-NEXT: sd s0, 32(sp)
+; CHECK-V-NEXT: sd a2, 32(sp)
; CHECK-V-NEXT: addi a0, sp, 24
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vle64.v v8, (a0)
; CHECK-NOV-NEXT: fmv.s fa0, fs0
; CHECK-NOV-NEXT: call __fixsfti@plt
; CHECK-NOV-NEXT: mv a2, a1
-; CHECK-NOV-NEXT: bgtz a1, .LBB23_7
+; CHECK-NOV-NEXT: blez a1, .LBB23_2
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: mv a3, s1
-; CHECK-NOV-NEXT: bgtz s1, .LBB23_8
+; CHECK-NOV-NEXT: li a2, 1
; CHECK-NOV-NEXT: .LBB23_2: # %entry
-; CHECK-NOV-NEXT: bgtz a1, .LBB23_9
-; CHECK-NOV-NEXT: .LBB23_3: # %entry
-; CHECK-NOV-NEXT: bgtz s1, .LBB23_10
+; CHECK-NOV-NEXT: mv a3, s1
+; CHECK-NOV-NEXT: blez s1, .LBB23_4
+; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: li a3, 1
; CHECK-NOV-NEXT: .LBB23_4: # %entry
-; CHECK-NOV-NEXT: beqz a3, .LBB23_11
-; CHECK-NOV-NEXT: .LBB23_5: # %entry
-; CHECK-NOV-NEXT: sgtz a1, a3
-; CHECK-NOV-NEXT: bnez a2, .LBB23_12
+; CHECK-NOV-NEXT: sgtz a1, a1
+; CHECK-NOV-NEXT: addi a4, a1, -1
+; CHECK-NOV-NEXT: sgtz a1, s1
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: and a1, a1, s0
+; CHECK-NOV-NEXT: beqz a3, .LBB23_7
+; CHECK-NOV-NEXT: # %bb.5: # %entry
+; CHECK-NOV-NEXT: sgtz a3, a3
+; CHECK-NOV-NEXT: and a0, a4, a0
+; CHECK-NOV-NEXT: bnez a2, .LBB23_8
; CHECK-NOV-NEXT: .LBB23_6:
; CHECK-NOV-NEXT: snez a2, a0
-; CHECK-NOV-NEXT: beqz a2, .LBB23_13
-; CHECK-NOV-NEXT: j .LBB23_14
-; CHECK-NOV-NEXT: .LBB23_7: # %entry
-; CHECK-NOV-NEXT: li a2, 1
-; CHECK-NOV-NEXT: mv a3, s1
-; CHECK-NOV-NEXT: blez s1, .LBB23_2
-; CHECK-NOV-NEXT: .LBB23_8: # %entry
-; CHECK-NOV-NEXT: li a3, 1
-; CHECK-NOV-NEXT: blez a1, .LBB23_3
-; CHECK-NOV-NEXT: .LBB23_9: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: blez s1, .LBB23_4
-; CHECK-NOV-NEXT: .LBB23_10: # %entry
-; CHECK-NOV-NEXT: li s0, 0
-; CHECK-NOV-NEXT: bnez a3, .LBB23_5
-; CHECK-NOV-NEXT: .LBB23_11:
-; CHECK-NOV-NEXT: snez a1, s0
+; CHECK-NOV-NEXT: j .LBB23_9
+; CHECK-NOV-NEXT: .LBB23_7:
+; CHECK-NOV-NEXT: snez a3, a1
+; CHECK-NOV-NEXT: and a0, a4, a0
; CHECK-NOV-NEXT: beqz a2, .LBB23_6
-; CHECK-NOV-NEXT: .LBB23_12: # %entry
+; CHECK-NOV-NEXT: .LBB23_8: # %entry
; CHECK-NOV-NEXT: sgtz a2, a2
-; CHECK-NOV-NEXT: bnez a2, .LBB23_14
-; CHECK-NOV-NEXT: .LBB23_13: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: .LBB23_14: # %entry
-; CHECK-NOV-NEXT: bnez a1, .LBB23_16
-; CHECK-NOV-NEXT: # %bb.15: # %entry
-; CHECK-NOV-NEXT: li s0, 0
-; CHECK-NOV-NEXT: .LBB23_16: # %entry
-; CHECK-NOV-NEXT: mv a1, s0
+; CHECK-NOV-NEXT: .LBB23_9: # %entry
+; CHECK-NOV-NEXT: seqz a2, a2
+; CHECK-NOV-NEXT: addi a2, a2, -1
+; CHECK-NOV-NEXT: and a0, a2, a0
+; CHECK-NOV-NEXT: seqz a2, a3
+; CHECK-NOV-NEXT: addi a2, a2, -1
+; CHECK-NOV-NEXT: and a1, a2, a1
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: vslidedown.vi v9, v8, 1
; CHECK-V-NEXT: vfmv.f.s fa0, v9
; CHECK-V-NEXT: call __fixsfti@plt
-; CHECK-V-NEXT: mv s0, a0
-; CHECK-V-NEXT: mv s1, a1
+; CHECK-V-NEXT: mv s1, a0
+; CHECK-V-NEXT: mv s0, a1
; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
; CHECK-V-NEXT: addi a0, sp, 48
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixsfti@plt
; CHECK-V-NEXT: mv a2, a1
-; CHECK-V-NEXT: bgtz a1, .LBB23_6
+; CHECK-V-NEXT: blez a1, .LBB23_2
; CHECK-V-NEXT: # %bb.1: # %entry
-; CHECK-V-NEXT: bgtz s1, .LBB23_7
+; CHECK-V-NEXT: li a2, 1
; CHECK-V-NEXT: .LBB23_2: # %entry
-; CHECK-V-NEXT: bgtz a1, .LBB23_8
-; CHECK-V-NEXT: .LBB23_3: # %entry
-; CHECK-V-NEXT: beqz a2, .LBB23_9
+; CHECK-V-NEXT: sgtz a3, s0
+; CHECK-V-NEXT: blez s0, .LBB23_4
+; CHECK-V-NEXT: # %bb.3: # %entry
+; CHECK-V-NEXT: li s0, 1
; CHECK-V-NEXT: .LBB23_4: # %entry
+; CHECK-V-NEXT: addi a3, a3, -1
+; CHECK-V-NEXT: sgtz a1, a1
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: and a0, a1, a0
+; CHECK-V-NEXT: beqz a2, .LBB23_7
+; CHECK-V-NEXT: # %bb.5: # %entry
; CHECK-V-NEXT: sgtz a1, a2
-; CHECK-V-NEXT: bnez s1, .LBB23_10
-; CHECK-V-NEXT: .LBB23_5:
-; CHECK-V-NEXT: snez a2, s0
-; CHECK-V-NEXT: beqz a2, .LBB23_11
-; CHECK-V-NEXT: j .LBB23_12
-; CHECK-V-NEXT: .LBB23_6: # %entry
-; CHECK-V-NEXT: li a2, 1
-; CHECK-V-NEXT: blez s1, .LBB23_2
-; CHECK-V-NEXT: .LBB23_7: # %entry
-; CHECK-V-NEXT: li s0, 0
-; CHECK-V-NEXT: li s1, 1
-; CHECK-V-NEXT: blez a1, .LBB23_3
-; CHECK-V-NEXT: .LBB23_8: # %entry
-; CHECK-V-NEXT: li a0, 0
-; CHECK-V-NEXT: bnez a2, .LBB23_4
-; CHECK-V-NEXT: .LBB23_9:
+; CHECK-V-NEXT: and a2, a3, s1
+; CHECK-V-NEXT: bnez s0, .LBB23_8
+; CHECK-V-NEXT: .LBB23_6:
+; CHECK-V-NEXT: snez a3, a2
+; CHECK-V-NEXT: j .LBB23_9
+; CHECK-V-NEXT: .LBB23_7:
; CHECK-V-NEXT: snez a1, a0
-; CHECK-V-NEXT: beqz s1, .LBB23_5
-; CHECK-V-NEXT: .LBB23_10: # %entry
-; CHECK-V-NEXT: sgtz a2, s1
-; CHECK-V-NEXT: bnez a2, .LBB23_12
-; CHECK-V-NEXT: .LBB23_11: # %entry
-; CHECK-V-NEXT: li s0, 0
-; CHECK-V-NEXT: .LBB23_12: # %entry
-; CHECK-V-NEXT: bnez a1, .LBB23_14
-; CHECK-V-NEXT: # %bb.13: # %entry
-; CHECK-V-NEXT: li a0, 0
-; CHECK-V-NEXT: .LBB23_14: # %entry
+; CHECK-V-NEXT: and a2, a3, s1
+; CHECK-V-NEXT: beqz s0, .LBB23_6
+; CHECK-V-NEXT: .LBB23_8: # %entry
+; CHECK-V-NEXT: sgtz a3, s0
+; CHECK-V-NEXT: .LBB23_9: # %entry
+; CHECK-V-NEXT: seqz a3, a3
+; CHECK-V-NEXT: addi a3, a3, -1
+; CHECK-V-NEXT: and a2, a3, a2
+; CHECK-V-NEXT: seqz a1, a1
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: and a0, a1, a0
; CHECK-V-NEXT: sd a0, 24(sp)
-; CHECK-V-NEXT: sd s0, 32(sp)
+; CHECK-V-NEXT: sd a2, 32(sp)
; CHECK-V-NEXT: addi a0, sp, 24
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vle64.v v8, (a0)
; CHECK-NOV-NEXT: bnez s1, .LBB24_4
; CHECK-NOV-NEXT: .LBB24_2:
; CHECK-NOV-NEXT: sltu a5, s0, a3
+; CHECK-NOV-NEXT: seqz a6, a5
; CHECK-NOV-NEXT: beqz a5, .LBB24_5
; CHECK-NOV-NEXT: j .LBB24_6
; CHECK-NOV-NEXT: .LBB24_3:
; CHECK-NOV-NEXT: beqz s1, .LBB24_2
; CHECK-NOV-NEXT: .LBB24_4: # %entry
; CHECK-NOV-NEXT: slti a5, s1, 0
+; CHECK-NOV-NEXT: seqz a6, a5
; CHECK-NOV-NEXT: bnez a5, .LBB24_6
; CHECK-NOV-NEXT: .LBB24_5: # %entry
-; CHECK-NOV-NEXT: li s1, 0
; CHECK-NOV-NEXT: mv s0, a3
; CHECK-NOV-NEXT: .LBB24_6: # %entry
-; CHECK-NOV-NEXT: beqz a4, .LBB24_10
+; CHECK-NOV-NEXT: addi a6, a6, -1
+; CHECK-NOV-NEXT: seqz a5, a4
+; CHECK-NOV-NEXT: addi a5, a5, -1
+; CHECK-NOV-NEXT: and a5, a5, a1
+; CHECK-NOV-NEXT: bnez a4, .LBB24_8
; CHECK-NOV-NEXT: # %bb.7: # %entry
-; CHECK-NOV-NEXT: slli a3, a0, 63
-; CHECK-NOV-NEXT: beq a1, a0, .LBB24_11
+; CHECK-NOV-NEXT: mv a2, a3
; CHECK-NOV-NEXT: .LBB24_8: # %entry
-; CHECK-NOV-NEXT: slti a1, a1, 0
-; CHECK-NOV-NEXT: xori a1, a1, 1
-; CHECK-NOV-NEXT: bne s1, a0, .LBB24_12
-; CHECK-NOV-NEXT: .LBB24_9:
-; CHECK-NOV-NEXT: sltu a0, a3, s0
+; CHECK-NOV-NEXT: and a4, a6, s1
+; CHECK-NOV-NEXT: slli a1, a0, 63
+; CHECK-NOV-NEXT: beq a5, a0, .LBB24_11
+; CHECK-NOV-NEXT: # %bb.9: # %entry
+; CHECK-NOV-NEXT: slti a3, a5, 0
+; CHECK-NOV-NEXT: xori a3, a3, 1
+; CHECK-NOV-NEXT: bne a4, a0, .LBB24_12
+; CHECK-NOV-NEXT: .LBB24_10:
+; CHECK-NOV-NEXT: sltu a0, a1, s0
; CHECK-NOV-NEXT: beqz a0, .LBB24_13
; CHECK-NOV-NEXT: j .LBB24_14
-; CHECK-NOV-NEXT: .LBB24_10: # %entry
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: slli a3, a0, 63
-; CHECK-NOV-NEXT: bne a1, a0, .LBB24_8
; CHECK-NOV-NEXT: .LBB24_11:
-; CHECK-NOV-NEXT: sltu a1, a3, a2
-; CHECK-NOV-NEXT: beq s1, a0, .LBB24_9
+; CHECK-NOV-NEXT: sltu a3, a1, a2
+; CHECK-NOV-NEXT: beq a4, a0, .LBB24_10
; CHECK-NOV-NEXT: .LBB24_12: # %entry
-; CHECK-NOV-NEXT: slti a0, s1, 0
+; CHECK-NOV-NEXT: slti a0, a4, 0
; CHECK-NOV-NEXT: xori a0, a0, 1
; CHECK-NOV-NEXT: bnez a0, .LBB24_14
; CHECK-NOV-NEXT: .LBB24_13: # %entry
-; CHECK-NOV-NEXT: mv s0, a3
+; CHECK-NOV-NEXT: mv s0, a1
; CHECK-NOV-NEXT: .LBB24_14: # %entry
-; CHECK-NOV-NEXT: bnez a1, .LBB24_16
+; CHECK-NOV-NEXT: bnez a3, .LBB24_16
; CHECK-NOV-NEXT: # %bb.15: # %entry
-; CHECK-NOV-NEXT: mv a2, a3
+; CHECK-NOV-NEXT: mv a2, a1
; CHECK-NOV-NEXT: .LBB24_16: # %entry
; CHECK-NOV-NEXT: mv a0, s0
; CHECK-NOV-NEXT: mv a1, a2
; CHECK-V-NEXT: bnez s1, .LBB24_4
; CHECK-V-NEXT: .LBB24_2:
; CHECK-V-NEXT: sltu a5, s0, a3
+; CHECK-V-NEXT: seqz a6, a5
; CHECK-V-NEXT: beqz a5, .LBB24_5
; CHECK-V-NEXT: j .LBB24_6
; CHECK-V-NEXT: .LBB24_3:
; CHECK-V-NEXT: beqz s1, .LBB24_2
; CHECK-V-NEXT: .LBB24_4: # %entry
; CHECK-V-NEXT: slti a5, s1, 0
+; CHECK-V-NEXT: seqz a6, a5
; CHECK-V-NEXT: bnez a5, .LBB24_6
; CHECK-V-NEXT: .LBB24_5: # %entry
-; CHECK-V-NEXT: li s1, 0
; CHECK-V-NEXT: mv s0, a3
; CHECK-V-NEXT: .LBB24_6: # %entry
-; CHECK-V-NEXT: beqz a4, .LBB24_10
+; CHECK-V-NEXT: addi a6, a6, -1
+; CHECK-V-NEXT: seqz a5, a4
+; CHECK-V-NEXT: addi a5, a5, -1
+; CHECK-V-NEXT: and a5, a5, a1
+; CHECK-V-NEXT: bnez a4, .LBB24_8
; CHECK-V-NEXT: # %bb.7: # %entry
-; CHECK-V-NEXT: slli a3, a2, 63
-; CHECK-V-NEXT: beq a1, a2, .LBB24_11
+; CHECK-V-NEXT: mv a0, a3
; CHECK-V-NEXT: .LBB24_8: # %entry
-; CHECK-V-NEXT: slti a1, a1, 0
-; CHECK-V-NEXT: xori a1, a1, 1
-; CHECK-V-NEXT: bne s1, a2, .LBB24_12
-; CHECK-V-NEXT: .LBB24_9:
-; CHECK-V-NEXT: sltu a2, a3, s0
+; CHECK-V-NEXT: and a4, a6, s1
+; CHECK-V-NEXT: slli a1, a2, 63
+; CHECK-V-NEXT: beq a5, a2, .LBB24_11
+; CHECK-V-NEXT: # %bb.9: # %entry
+; CHECK-V-NEXT: slti a3, a5, 0
+; CHECK-V-NEXT: xori a3, a3, 1
+; CHECK-V-NEXT: bne a4, a2, .LBB24_12
+; CHECK-V-NEXT: .LBB24_10:
+; CHECK-V-NEXT: sltu a2, a1, s0
; CHECK-V-NEXT: beqz a2, .LBB24_13
; CHECK-V-NEXT: j .LBB24_14
-; CHECK-V-NEXT: .LBB24_10: # %entry
-; CHECK-V-NEXT: li a1, 0
-; CHECK-V-NEXT: mv a0, a3
-; CHECK-V-NEXT: slli a3, a2, 63
-; CHECK-V-NEXT: bne a1, a2, .LBB24_8
; CHECK-V-NEXT: .LBB24_11:
-; CHECK-V-NEXT: sltu a1, a3, a0
-; CHECK-V-NEXT: beq s1, a2, .LBB24_9
+; CHECK-V-NEXT: sltu a3, a1, a0
+; CHECK-V-NEXT: beq a4, a2, .LBB24_10
; CHECK-V-NEXT: .LBB24_12: # %entry
-; CHECK-V-NEXT: slti a2, s1, 0
+; CHECK-V-NEXT: slti a2, a4, 0
; CHECK-V-NEXT: xori a2, a2, 1
; CHECK-V-NEXT: bnez a2, .LBB24_14
; CHECK-V-NEXT: .LBB24_13: # %entry
-; CHECK-V-NEXT: mv s0, a3
+; CHECK-V-NEXT: mv s0, a1
; CHECK-V-NEXT: .LBB24_14: # %entry
-; CHECK-V-NEXT: bnez a1, .LBB24_16
+; CHECK-V-NEXT: bnez a3, .LBB24_16
; CHECK-V-NEXT: # %bb.15: # %entry
-; CHECK-V-NEXT: mv a0, a3
+; CHECK-V-NEXT: mv a0, a1
; CHECK-V-NEXT: .LBB24_16: # %entry
; CHECK-V-NEXT: sd a0, 8(sp)
; CHECK-V-NEXT: sd s0, 0(sp)
; CHECK-NOV-NEXT: .cfi_offset s0, -16
; CHECK-NOV-NEXT: .cfi_offset s1, -24
; CHECK-NOV-NEXT: .cfi_offset s2, -32
-; CHECK-NOV-NEXT: mv s2, a0
+; CHECK-NOV-NEXT: mv s0, a0
; CHECK-NOV-NEXT: mv a0, a1
; CHECK-NOV-NEXT: call __extendhfsf2@plt
; CHECK-NOV-NEXT: call __fixunssfti@plt
-; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: mv s1, a1
-; CHECK-NOV-NEXT: mv a0, s2
+; CHECK-NOV-NEXT: mv s1, a0
+; CHECK-NOV-NEXT: mv s2, a1
+; CHECK-NOV-NEXT: mv a0, s0
; CHECK-NOV-NEXT: call __extendhfsf2@plt
; CHECK-NOV-NEXT: call __fixunssfti@plt
-; CHECK-NOV-NEXT: beqz a1, .LBB25_2
-; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: .LBB25_2: # %entry
-; CHECK-NOV-NEXT: beqz s1, .LBB25_4
-; CHECK-NOV-NEXT: # %bb.3: # %entry
-; CHECK-NOV-NEXT: li s0, 0
-; CHECK-NOV-NEXT: .LBB25_4: # %entry
-; CHECK-NOV-NEXT: mv a1, s0
+; CHECK-NOV-NEXT: snez a1, a1
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: and a0, a1, a0
+; CHECK-NOV-NEXT: snez a1, s2
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: and a1, a1, s1
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: .cfi_offset s2, -32
-; CHECK-V-NEXT: mv s2, a0
+; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv a0, a1
; CHECK-V-NEXT: call __extendhfsf2@plt
; CHECK-V-NEXT: call __fixunssfti@plt
-; CHECK-V-NEXT: mv s0, a0
-; CHECK-V-NEXT: mv s1, a1
-; CHECK-V-NEXT: mv a0, s2
+; CHECK-V-NEXT: mv s1, a0
+; CHECK-V-NEXT: mv s2, a1
+; CHECK-V-NEXT: mv a0, s0
; CHECK-V-NEXT: call __extendhfsf2@plt
; CHECK-V-NEXT: call __fixunssfti@plt
-; CHECK-V-NEXT: beqz a1, .LBB25_2
-; CHECK-V-NEXT: # %bb.1: # %entry
-; CHECK-V-NEXT: li a0, 0
-; CHECK-V-NEXT: .LBB25_2: # %entry
-; CHECK-V-NEXT: beqz s1, .LBB25_4
-; CHECK-V-NEXT: # %bb.3: # %entry
-; CHECK-V-NEXT: li s0, 0
-; CHECK-V-NEXT: .LBB25_4: # %entry
-; CHECK-V-NEXT: sd s0, 8(sp)
+; CHECK-V-NEXT: snez a1, a1
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: and a0, a1, a0
+; CHECK-V-NEXT: snez a1, s2
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: and a1, a1, s1
+; CHECK-V-NEXT: sd a1, 8(sp)
; CHECK-V-NEXT: sd a0, 0(sp)
; CHECK-V-NEXT: addi a0, sp, 8
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NOV-NEXT: call __extendhfsf2@plt
; CHECK-NOV-NEXT: call __fixsfti@plt
; CHECK-NOV-NEXT: mv a2, a1
-; CHECK-NOV-NEXT: bgtz a1, .LBB26_7
+; CHECK-NOV-NEXT: blez a1, .LBB26_2
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: mv a3, s1
-; CHECK-NOV-NEXT: bgtz s1, .LBB26_8
+; CHECK-NOV-NEXT: li a2, 1
; CHECK-NOV-NEXT: .LBB26_2: # %entry
-; CHECK-NOV-NEXT: bgtz a1, .LBB26_9
-; CHECK-NOV-NEXT: .LBB26_3: # %entry
-; CHECK-NOV-NEXT: bgtz s1, .LBB26_10
+; CHECK-NOV-NEXT: mv a3, s1
+; CHECK-NOV-NEXT: blez s1, .LBB26_4
+; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: li a3, 1
; CHECK-NOV-NEXT: .LBB26_4: # %entry
-; CHECK-NOV-NEXT: beqz a3, .LBB26_11
-; CHECK-NOV-NEXT: .LBB26_5: # %entry
-; CHECK-NOV-NEXT: sgtz a1, a3
-; CHECK-NOV-NEXT: bnez a2, .LBB26_12
+; CHECK-NOV-NEXT: sgtz a1, a1
+; CHECK-NOV-NEXT: addi a4, a1, -1
+; CHECK-NOV-NEXT: sgtz a1, s1
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: and a1, a1, s0
+; CHECK-NOV-NEXT: beqz a3, .LBB26_7
+; CHECK-NOV-NEXT: # %bb.5: # %entry
+; CHECK-NOV-NEXT: sgtz a3, a3
+; CHECK-NOV-NEXT: and a0, a4, a0
+; CHECK-NOV-NEXT: bnez a2, .LBB26_8
; CHECK-NOV-NEXT: .LBB26_6:
; CHECK-NOV-NEXT: snez a2, a0
-; CHECK-NOV-NEXT: beqz a2, .LBB26_13
-; CHECK-NOV-NEXT: j .LBB26_14
-; CHECK-NOV-NEXT: .LBB26_7: # %entry
-; CHECK-NOV-NEXT: li a2, 1
-; CHECK-NOV-NEXT: mv a3, s1
-; CHECK-NOV-NEXT: blez s1, .LBB26_2
-; CHECK-NOV-NEXT: .LBB26_8: # %entry
-; CHECK-NOV-NEXT: li a3, 1
-; CHECK-NOV-NEXT: blez a1, .LBB26_3
-; CHECK-NOV-NEXT: .LBB26_9: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: blez s1, .LBB26_4
-; CHECK-NOV-NEXT: .LBB26_10: # %entry
-; CHECK-NOV-NEXT: li s0, 0
-; CHECK-NOV-NEXT: bnez a3, .LBB26_5
-; CHECK-NOV-NEXT: .LBB26_11:
-; CHECK-NOV-NEXT: snez a1, s0
+; CHECK-NOV-NEXT: j .LBB26_9
+; CHECK-NOV-NEXT: .LBB26_7:
+; CHECK-NOV-NEXT: snez a3, a1
+; CHECK-NOV-NEXT: and a0, a4, a0
; CHECK-NOV-NEXT: beqz a2, .LBB26_6
-; CHECK-NOV-NEXT: .LBB26_12: # %entry
+; CHECK-NOV-NEXT: .LBB26_8: # %entry
; CHECK-NOV-NEXT: sgtz a2, a2
-; CHECK-NOV-NEXT: bnez a2, .LBB26_14
-; CHECK-NOV-NEXT: .LBB26_13: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: .LBB26_14: # %entry
-; CHECK-NOV-NEXT: bnez a1, .LBB26_16
-; CHECK-NOV-NEXT: # %bb.15: # %entry
-; CHECK-NOV-NEXT: li s0, 0
-; CHECK-NOV-NEXT: .LBB26_16: # %entry
-; CHECK-NOV-NEXT: mv a1, s0
+; CHECK-NOV-NEXT: .LBB26_9: # %entry
+; CHECK-NOV-NEXT: seqz a2, a2
+; CHECK-NOV-NEXT: addi a2, a2, -1
+; CHECK-NOV-NEXT: and a0, a2, a0
+; CHECK-NOV-NEXT: seqz a2, a3
+; CHECK-NOV-NEXT: addi a2, a2, -1
+; CHECK-NOV-NEXT: and a1, a2, a1
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: call __extendhfsf2@plt
; CHECK-V-NEXT: call __fixsfti@plt
; CHECK-V-NEXT: mv a2, a1
-; CHECK-V-NEXT: bgtz a1, .LBB26_7
+; CHECK-V-NEXT: blez a1, .LBB26_2
; CHECK-V-NEXT: # %bb.1: # %entry
-; CHECK-V-NEXT: mv a3, s1
-; CHECK-V-NEXT: bgtz s1, .LBB26_8
+; CHECK-V-NEXT: li a2, 1
; CHECK-V-NEXT: .LBB26_2: # %entry
-; CHECK-V-NEXT: bgtz a1, .LBB26_9
-; CHECK-V-NEXT: .LBB26_3: # %entry
-; CHECK-V-NEXT: bgtz s1, .LBB26_10
+; CHECK-V-NEXT: mv a3, s1
+; CHECK-V-NEXT: blez s1, .LBB26_4
+; CHECK-V-NEXT: # %bb.3: # %entry
+; CHECK-V-NEXT: li a3, 1
; CHECK-V-NEXT: .LBB26_4: # %entry
-; CHECK-V-NEXT: beqz a3, .LBB26_11
-; CHECK-V-NEXT: .LBB26_5: # %entry
-; CHECK-V-NEXT: sgtz a1, a3
-; CHECK-V-NEXT: bnez a2, .LBB26_12
+; CHECK-V-NEXT: sgtz a1, a1
+; CHECK-V-NEXT: addi a4, a1, -1
+; CHECK-V-NEXT: sgtz a1, s1
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: and a1, a1, s0
+; CHECK-V-NEXT: beqz a3, .LBB26_7
+; CHECK-V-NEXT: # %bb.5: # %entry
+; CHECK-V-NEXT: sgtz a3, a3
+; CHECK-V-NEXT: and a0, a4, a0
+; CHECK-V-NEXT: bnez a2, .LBB26_8
; CHECK-V-NEXT: .LBB26_6:
; CHECK-V-NEXT: snez a2, a0
-; CHECK-V-NEXT: beqz a2, .LBB26_13
-; CHECK-V-NEXT: j .LBB26_14
-; CHECK-V-NEXT: .LBB26_7: # %entry
-; CHECK-V-NEXT: li a2, 1
-; CHECK-V-NEXT: mv a3, s1
-; CHECK-V-NEXT: blez s1, .LBB26_2
-; CHECK-V-NEXT: .LBB26_8: # %entry
-; CHECK-V-NEXT: li a3, 1
-; CHECK-V-NEXT: blez a1, .LBB26_3
-; CHECK-V-NEXT: .LBB26_9: # %entry
-; CHECK-V-NEXT: li a0, 0
-; CHECK-V-NEXT: blez s1, .LBB26_4
-; CHECK-V-NEXT: .LBB26_10: # %entry
-; CHECK-V-NEXT: li s0, 0
-; CHECK-V-NEXT: bnez a3, .LBB26_5
-; CHECK-V-NEXT: .LBB26_11:
-; CHECK-V-NEXT: snez a1, s0
+; CHECK-V-NEXT: j .LBB26_9
+; CHECK-V-NEXT: .LBB26_7:
+; CHECK-V-NEXT: snez a3, a1
+; CHECK-V-NEXT: and a0, a4, a0
; CHECK-V-NEXT: beqz a2, .LBB26_6
-; CHECK-V-NEXT: .LBB26_12: # %entry
+; CHECK-V-NEXT: .LBB26_8: # %entry
; CHECK-V-NEXT: sgtz a2, a2
-; CHECK-V-NEXT: bnez a2, .LBB26_14
-; CHECK-V-NEXT: .LBB26_13: # %entry
-; CHECK-V-NEXT: li a0, 0
-; CHECK-V-NEXT: .LBB26_14: # %entry
-; CHECK-V-NEXT: bnez a1, .LBB26_16
-; CHECK-V-NEXT: # %bb.15: # %entry
-; CHECK-V-NEXT: li s0, 0
-; CHECK-V-NEXT: .LBB26_16: # %entry
-; CHECK-V-NEXT: sd s0, 8(sp)
+; CHECK-V-NEXT: .LBB26_9: # %entry
+; CHECK-V-NEXT: seqz a2, a2
+; CHECK-V-NEXT: addi a2, a2, -1
+; CHECK-V-NEXT: and a0, a2, a0
+; CHECK-V-NEXT: seqz a2, a3
+; CHECK-V-NEXT: addi a2, a2, -1
+; CHECK-V-NEXT: and a1, a2, a1
+; CHECK-V-NEXT: sd a1, 8(sp)
; CHECK-V-NEXT: sd a0, 0(sp)
; CHECK-V-NEXT: addi a0, sp, 8
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NOV-NEXT: li a0, -1
; CHECK-NOV-NEXT: srli a2, a0, 32
; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz
-; CHECK-NOV-NEXT: bge a1, a2, .LBB29_5
+; CHECK-NOV-NEXT: blt a1, a2, .LBB29_2
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: bge a0, a2, .LBB29_6
-; CHECK-NOV-NEXT: .LBB29_2: # %entry
-; CHECK-NOV-NEXT: blez a0, .LBB29_7
-; CHECK-NOV-NEXT: .LBB29_3: # %entry
-; CHECK-NOV-NEXT: blez a1, .LBB29_8
-; CHECK-NOV-NEXT: .LBB29_4: # %entry
-; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB29_5: # %entry
; CHECK-NOV-NEXT: mv a1, a2
-; CHECK-NOV-NEXT: blt a0, a2, .LBB29_2
-; CHECK-NOV-NEXT: .LBB29_6: # %entry
+; CHECK-NOV-NEXT: .LBB29_2: # %entry
+; CHECK-NOV-NEXT: blt a0, a2, .LBB29_4
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: mv a0, a2
-; CHECK-NOV-NEXT: bgtz a0, .LBB29_3
-; CHECK-NOV-NEXT: .LBB29_7: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: bgtz a1, .LBB29_4
-; CHECK-NOV-NEXT: .LBB29_8: # %entry
-; CHECK-NOV-NEXT: li a1, 0
+; CHECK-NOV-NEXT: .LBB29_4: # %entry
+; CHECK-NOV-NEXT: sgtz a2, a0
+; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: and a0, a2, a0
+; CHECK-NOV-NEXT: sgtz a2, a1
+; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: and a1, a2, a1
; CHECK-NOV-NEXT: ret
;
; CHECK-V-LABEL: ustest_f64i32_mm:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fa3, rtz
; CHECK-NOV-NEXT: li a2, -1
-; CHECK-NOV-NEXT: srli a5, a2, 32
+; CHECK-NOV-NEXT: srli a4, a2, 32
; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz
-; CHECK-NOV-NEXT: bge a1, a5, .LBB32_10
+; CHECK-NOV-NEXT: bge a1, a4, .LBB32_6
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz
-; CHECK-NOV-NEXT: bge a2, a5, .LBB32_11
+; CHECK-NOV-NEXT: bge a2, a4, .LBB32_7
; CHECK-NOV-NEXT: .LBB32_2: # %entry
-; CHECK-NOV-NEXT: fcvt.l.s a4, fa0, rtz
-; CHECK-NOV-NEXT: bge a3, a5, .LBB32_12
+; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
+; CHECK-NOV-NEXT: bge a3, a4, .LBB32_8
; CHECK-NOV-NEXT: .LBB32_3: # %entry
-; CHECK-NOV-NEXT: bge a4, a5, .LBB32_13
+; CHECK-NOV-NEXT: blt a5, a4, .LBB32_5
; CHECK-NOV-NEXT: .LBB32_4: # %entry
-; CHECK-NOV-NEXT: blez a4, .LBB32_14
+; CHECK-NOV-NEXT: mv a5, a4
; CHECK-NOV-NEXT: .LBB32_5: # %entry
-; CHECK-NOV-NEXT: blez a3, .LBB32_15
-; CHECK-NOV-NEXT: .LBB32_6: # %entry
-; CHECK-NOV-NEXT: blez a2, .LBB32_16
-; CHECK-NOV-NEXT: .LBB32_7: # %entry
-; CHECK-NOV-NEXT: bgtz a1, .LBB32_9
-; CHECK-NOV-NEXT: .LBB32_8: # %entry
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: .LBB32_9: # %entry
+; CHECK-NOV-NEXT: sgtz a4, a5
+; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: and a4, a4, a5
+; CHECK-NOV-NEXT: sgtz a5, a3
+; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: and a3, a5, a3
+; CHECK-NOV-NEXT: sgtz a5, a2
+; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: and a2, a5, a2
+; CHECK-NOV-NEXT: sgtz a5, a1
+; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: and a1, a5, a1
; CHECK-NOV-NEXT: sw a1, 12(a0)
; CHECK-NOV-NEXT: sw a2, 8(a0)
; CHECK-NOV-NEXT: sw a3, 4(a0)
; CHECK-NOV-NEXT: sw a4, 0(a0)
; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB32_10: # %entry
-; CHECK-NOV-NEXT: mv a1, a5
+; CHECK-NOV-NEXT: .LBB32_6: # %entry
+; CHECK-NOV-NEXT: mv a1, a4
; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz
-; CHECK-NOV-NEXT: blt a2, a5, .LBB32_2
-; CHECK-NOV-NEXT: .LBB32_11: # %entry
-; CHECK-NOV-NEXT: mv a2, a5
-; CHECK-NOV-NEXT: fcvt.l.s a4, fa0, rtz
-; CHECK-NOV-NEXT: blt a3, a5, .LBB32_3
-; CHECK-NOV-NEXT: .LBB32_12: # %entry
-; CHECK-NOV-NEXT: mv a3, a5
-; CHECK-NOV-NEXT: blt a4, a5, .LBB32_4
-; CHECK-NOV-NEXT: .LBB32_13: # %entry
-; CHECK-NOV-NEXT: mv a4, a5
-; CHECK-NOV-NEXT: bgtz a4, .LBB32_5
-; CHECK-NOV-NEXT: .LBB32_14: # %entry
-; CHECK-NOV-NEXT: li a4, 0
-; CHECK-NOV-NEXT: bgtz a3, .LBB32_6
-; CHECK-NOV-NEXT: .LBB32_15: # %entry
-; CHECK-NOV-NEXT: li a3, 0
-; CHECK-NOV-NEXT: bgtz a2, .LBB32_7
-; CHECK-NOV-NEXT: .LBB32_16: # %entry
-; CHECK-NOV-NEXT: li a2, 0
-; CHECK-NOV-NEXT: blez a1, .LBB32_8
-; CHECK-NOV-NEXT: j .LBB32_9
+; CHECK-NOV-NEXT: blt a2, a4, .LBB32_2
+; CHECK-NOV-NEXT: .LBB32_7: # %entry
+; CHECK-NOV-NEXT: mv a2, a4
+; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
+; CHECK-NOV-NEXT: blt a3, a4, .LBB32_3
+; CHECK-NOV-NEXT: .LBB32_8: # %entry
+; CHECK-NOV-NEXT: mv a3, a4
+; CHECK-NOV-NEXT: bge a5, a4, .LBB32_4
+; CHECK-NOV-NEXT: j .LBB32_5
;
; CHECK-V-LABEL: ustest_f32i32_mm:
; CHECK-V: # %bb.0: # %entry
; CHECK-NOV-NEXT: call __extendhfsf2@plt
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-NOV-NEXT: li a1, -1
-; CHECK-NOV-NEXT: srli a3, a1, 32
-; CHECK-NOV-NEXT: bge a0, a3, .LBB35_10
+; CHECK-NOV-NEXT: srli a2, a1, 32
+; CHECK-NOV-NEXT: bge a0, a2, .LBB35_6
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz
-; CHECK-NOV-NEXT: bge s2, a3, .LBB35_11
+; CHECK-NOV-NEXT: bge s2, a2, .LBB35_7
; CHECK-NOV-NEXT: .LBB35_2: # %entry
-; CHECK-NOV-NEXT: fcvt.l.s a2, fs0, rtz
-; CHECK-NOV-NEXT: bge a1, a3, .LBB35_12
+; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
+; CHECK-NOV-NEXT: bge a1, a2, .LBB35_8
; CHECK-NOV-NEXT: .LBB35_3: # %entry
-; CHECK-NOV-NEXT: bge a2, a3, .LBB35_13
+; CHECK-NOV-NEXT: blt a3, a2, .LBB35_5
; CHECK-NOV-NEXT: .LBB35_4: # %entry
-; CHECK-NOV-NEXT: blez a2, .LBB35_14
+; CHECK-NOV-NEXT: mv a3, a2
; CHECK-NOV-NEXT: .LBB35_5: # %entry
-; CHECK-NOV-NEXT: blez a1, .LBB35_15
-; CHECK-NOV-NEXT: .LBB35_6: # %entry
-; CHECK-NOV-NEXT: blez s2, .LBB35_16
-; CHECK-NOV-NEXT: .LBB35_7: # %entry
-; CHECK-NOV-NEXT: bgtz a0, .LBB35_9
-; CHECK-NOV-NEXT: .LBB35_8: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: .LBB35_9: # %entry
+; CHECK-NOV-NEXT: sgtz a2, a3
+; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: and a2, a2, a3
+; CHECK-NOV-NEXT: sgtz a3, a1
+; CHECK-NOV-NEXT: neg a3, a3
+; CHECK-NOV-NEXT: and a1, a3, a1
+; CHECK-NOV-NEXT: sgtz a3, s2
+; CHECK-NOV-NEXT: neg a3, a3
+; CHECK-NOV-NEXT: and a3, a3, s2
+; CHECK-NOV-NEXT: sgtz a4, a0
+; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: and a0, a4, a0
; CHECK-NOV-NEXT: sw a0, 12(s0)
-; CHECK-NOV-NEXT: sw s2, 8(s0)
+; CHECK-NOV-NEXT: sw a3, 8(s0)
; CHECK-NOV-NEXT: sw a1, 4(s0)
; CHECK-NOV-NEXT: sw a2, 0(s0)
; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: fld fs2, 0(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: addi sp, sp, 64
; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB35_10: # %entry
-; CHECK-NOV-NEXT: mv a0, a3
+; CHECK-NOV-NEXT: .LBB35_6: # %entry
+; CHECK-NOV-NEXT: mv a0, a2
; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz
-; CHECK-NOV-NEXT: blt s2, a3, .LBB35_2
-; CHECK-NOV-NEXT: .LBB35_11: # %entry
-; CHECK-NOV-NEXT: mv s2, a3
-; CHECK-NOV-NEXT: fcvt.l.s a2, fs0, rtz
-; CHECK-NOV-NEXT: blt a1, a3, .LBB35_3
-; CHECK-NOV-NEXT: .LBB35_12: # %entry
-; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: blt a2, a3, .LBB35_4
-; CHECK-NOV-NEXT: .LBB35_13: # %entry
-; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: bgtz a2, .LBB35_5
-; CHECK-NOV-NEXT: .LBB35_14: # %entry
-; CHECK-NOV-NEXT: li a2, 0
-; CHECK-NOV-NEXT: bgtz a1, .LBB35_6
-; CHECK-NOV-NEXT: .LBB35_15: # %entry
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: bgtz s2, .LBB35_7
-; CHECK-NOV-NEXT: .LBB35_16: # %entry
-; CHECK-NOV-NEXT: li s2, 0
-; CHECK-NOV-NEXT: blez a0, .LBB35_8
-; CHECK-NOV-NEXT: j .LBB35_9
+; CHECK-NOV-NEXT: blt s2, a2, .LBB35_2
+; CHECK-NOV-NEXT: .LBB35_7: # %entry
+; CHECK-NOV-NEXT: mv s2, a2
+; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
+; CHECK-NOV-NEXT: blt a1, a2, .LBB35_3
+; CHECK-NOV-NEXT: .LBB35_8: # %entry
+; CHECK-NOV-NEXT: mv a1, a2
+; CHECK-NOV-NEXT: bge a3, a2, .LBB35_4
+; CHECK-NOV-NEXT: j .LBB35_5
;
; CHECK-V-LABEL: ustest_f16i32_mm:
; CHECK-V: # %bb.0: # %entry
; CHECK-NOV-NEXT: lui a0, 16
; CHECK-NOV-NEXT: addiw a2, a0, -1
; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz
-; CHECK-NOV-NEXT: bge a1, a2, .LBB38_5
+; CHECK-NOV-NEXT: blt a1, a2, .LBB38_2
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: bge a0, a2, .LBB38_6
-; CHECK-NOV-NEXT: .LBB38_2: # %entry
-; CHECK-NOV-NEXT: blez a0, .LBB38_7
-; CHECK-NOV-NEXT: .LBB38_3: # %entry
-; CHECK-NOV-NEXT: blez a1, .LBB38_8
-; CHECK-NOV-NEXT: .LBB38_4: # %entry
-; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB38_5: # %entry
; CHECK-NOV-NEXT: mv a1, a2
-; CHECK-NOV-NEXT: blt a0, a2, .LBB38_2
-; CHECK-NOV-NEXT: .LBB38_6: # %entry
+; CHECK-NOV-NEXT: .LBB38_2: # %entry
+; CHECK-NOV-NEXT: blt a0, a2, .LBB38_4
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: mv a0, a2
-; CHECK-NOV-NEXT: bgtz a0, .LBB38_3
-; CHECK-NOV-NEXT: .LBB38_7: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: bgtz a1, .LBB38_4
-; CHECK-NOV-NEXT: .LBB38_8: # %entry
-; CHECK-NOV-NEXT: li a1, 0
+; CHECK-NOV-NEXT: .LBB38_4: # %entry
+; CHECK-NOV-NEXT: sgtz a2, a0
+; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: and a0, a2, a0
+; CHECK-NOV-NEXT: sgtz a2, a1
+; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: and a1, a2, a1
; CHECK-NOV-NEXT: ret
;
; CHECK-V-LABEL: ustest_f64i16_mm:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a1, fa3, rtz
; CHECK-NOV-NEXT: lui a2, 16
-; CHECK-NOV-NEXT: addiw a5, a2, -1
+; CHECK-NOV-NEXT: addiw a4, a2, -1
; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz
-; CHECK-NOV-NEXT: bge a1, a5, .LBB41_10
+; CHECK-NOV-NEXT: bge a1, a4, .LBB41_6
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz
-; CHECK-NOV-NEXT: bge a2, a5, .LBB41_11
+; CHECK-NOV-NEXT: bge a2, a4, .LBB41_7
; CHECK-NOV-NEXT: .LBB41_2: # %entry
-; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz
-; CHECK-NOV-NEXT: bge a3, a5, .LBB41_12
+; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz
+; CHECK-NOV-NEXT: bge a3, a4, .LBB41_8
; CHECK-NOV-NEXT: .LBB41_3: # %entry
-; CHECK-NOV-NEXT: bge a4, a5, .LBB41_13
+; CHECK-NOV-NEXT: blt a5, a4, .LBB41_5
; CHECK-NOV-NEXT: .LBB41_4: # %entry
-; CHECK-NOV-NEXT: blez a4, .LBB41_14
+; CHECK-NOV-NEXT: mv a5, a4
; CHECK-NOV-NEXT: .LBB41_5: # %entry
-; CHECK-NOV-NEXT: blez a3, .LBB41_15
-; CHECK-NOV-NEXT: .LBB41_6: # %entry
-; CHECK-NOV-NEXT: blez a2, .LBB41_16
-; CHECK-NOV-NEXT: .LBB41_7: # %entry
-; CHECK-NOV-NEXT: bgtz a1, .LBB41_9
-; CHECK-NOV-NEXT: .LBB41_8: # %entry
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: .LBB41_9: # %entry
+; CHECK-NOV-NEXT: sgtz a4, a5
+; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: and a4, a4, a5
+; CHECK-NOV-NEXT: sgtz a5, a3
+; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: and a3, a5, a3
+; CHECK-NOV-NEXT: sgtz a5, a2
+; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: and a2, a5, a2
+; CHECK-NOV-NEXT: sgtz a5, a1
+; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: and a1, a5, a1
; CHECK-NOV-NEXT: sh a1, 6(a0)
; CHECK-NOV-NEXT: sh a2, 4(a0)
; CHECK-NOV-NEXT: sh a3, 2(a0)
; CHECK-NOV-NEXT: sh a4, 0(a0)
; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB41_10: # %entry
-; CHECK-NOV-NEXT: mv a1, a5
+; CHECK-NOV-NEXT: .LBB41_6: # %entry
+; CHECK-NOV-NEXT: mv a1, a4
; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz
-; CHECK-NOV-NEXT: blt a2, a5, .LBB41_2
-; CHECK-NOV-NEXT: .LBB41_11: # %entry
-; CHECK-NOV-NEXT: mv a2, a5
-; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz
-; CHECK-NOV-NEXT: blt a3, a5, .LBB41_3
-; CHECK-NOV-NEXT: .LBB41_12: # %entry
-; CHECK-NOV-NEXT: mv a3, a5
-; CHECK-NOV-NEXT: blt a4, a5, .LBB41_4
-; CHECK-NOV-NEXT: .LBB41_13: # %entry
-; CHECK-NOV-NEXT: mv a4, a5
-; CHECK-NOV-NEXT: bgtz a4, .LBB41_5
-; CHECK-NOV-NEXT: .LBB41_14: # %entry
-; CHECK-NOV-NEXT: li a4, 0
-; CHECK-NOV-NEXT: bgtz a3, .LBB41_6
-; CHECK-NOV-NEXT: .LBB41_15: # %entry
-; CHECK-NOV-NEXT: li a3, 0
-; CHECK-NOV-NEXT: bgtz a2, .LBB41_7
-; CHECK-NOV-NEXT: .LBB41_16: # %entry
-; CHECK-NOV-NEXT: li a2, 0
-; CHECK-NOV-NEXT: blez a1, .LBB41_8
-; CHECK-NOV-NEXT: j .LBB41_9
+; CHECK-NOV-NEXT: blt a2, a4, .LBB41_2
+; CHECK-NOV-NEXT: .LBB41_7: # %entry
+; CHECK-NOV-NEXT: mv a2, a4
+; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz
+; CHECK-NOV-NEXT: blt a3, a4, .LBB41_3
+; CHECK-NOV-NEXT: .LBB41_8: # %entry
+; CHECK-NOV-NEXT: mv a3, a4
+; CHECK-NOV-NEXT: bge a5, a4, .LBB41_4
+; CHECK-NOV-NEXT: j .LBB41_5
;
; CHECK-V-LABEL: ustest_f32i16_mm:
; CHECK-V: # %bb.0: # %entry
; CHECK-NOV-NEXT: call __extendhfsf2@plt
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-NOV-NEXT: lui a1, 16
-; CHECK-NOV-NEXT: addiw a7, a1, -1
-; CHECK-NOV-NEXT: bge a0, a7, .LBB44_18
+; CHECK-NOV-NEXT: addiw a3, a1, -1
+; CHECK-NOV-NEXT: bge a0, a3, .LBB44_10
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
-; CHECK-NOV-NEXT: bge s2, a7, .LBB44_19
+; CHECK-NOV-NEXT: bge s2, a3, .LBB44_11
; CHECK-NOV-NEXT: .LBB44_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
-; CHECK-NOV-NEXT: bge a1, a7, .LBB44_20
+; CHECK-NOV-NEXT: bge a1, a3, .LBB44_12
; CHECK-NOV-NEXT: .LBB44_3: # %entry
-; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz
-; CHECK-NOV-NEXT: bge a2, a7, .LBB44_21
+; CHECK-NOV-NEXT: fcvt.l.s a4, fs3, rtz
+; CHECK-NOV-NEXT: bge a2, a3, .LBB44_13
; CHECK-NOV-NEXT: .LBB44_4: # %entry
-; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz
-; CHECK-NOV-NEXT: bge a3, a7, .LBB44_22
+; CHECK-NOV-NEXT: fcvt.l.s a5, fs2, rtz
+; CHECK-NOV-NEXT: bge a4, a3, .LBB44_14
; CHECK-NOV-NEXT: .LBB44_5: # %entry
-; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz
-; CHECK-NOV-NEXT: bge a4, a7, .LBB44_23
+; CHECK-NOV-NEXT: fcvt.l.s a6, fs1, rtz
+; CHECK-NOV-NEXT: bge a5, a3, .LBB44_15
; CHECK-NOV-NEXT: .LBB44_6: # %entry
-; CHECK-NOV-NEXT: fcvt.l.s a6, fs0, rtz
-; CHECK-NOV-NEXT: bge a5, a7, .LBB44_24
+; CHECK-NOV-NEXT: fcvt.l.s a7, fs0, rtz
+; CHECK-NOV-NEXT: bge a6, a3, .LBB44_16
; CHECK-NOV-NEXT: .LBB44_7: # %entry
-; CHECK-NOV-NEXT: bge a6, a7, .LBB44_25
+; CHECK-NOV-NEXT: blt a7, a3, .LBB44_9
; CHECK-NOV-NEXT: .LBB44_8: # %entry
-; CHECK-NOV-NEXT: blez a6, .LBB44_26
+; CHECK-NOV-NEXT: mv a7, a3
; CHECK-NOV-NEXT: .LBB44_9: # %entry
-; CHECK-NOV-NEXT: blez a5, .LBB44_27
-; CHECK-NOV-NEXT: .LBB44_10: # %entry
-; CHECK-NOV-NEXT: blez a4, .LBB44_28
-; CHECK-NOV-NEXT: .LBB44_11: # %entry
-; CHECK-NOV-NEXT: blez a3, .LBB44_29
-; CHECK-NOV-NEXT: .LBB44_12: # %entry
-; CHECK-NOV-NEXT: blez a2, .LBB44_30
-; CHECK-NOV-NEXT: .LBB44_13: # %entry
-; CHECK-NOV-NEXT: blez a1, .LBB44_31
-; CHECK-NOV-NEXT: .LBB44_14: # %entry
-; CHECK-NOV-NEXT: blez s2, .LBB44_32
-; CHECK-NOV-NEXT: .LBB44_15: # %entry
-; CHECK-NOV-NEXT: bgtz a0, .LBB44_17
-; CHECK-NOV-NEXT: .LBB44_16: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: .LBB44_17: # %entry
+; CHECK-NOV-NEXT: sgtz a3, a7
+; CHECK-NOV-NEXT: neg a3, a3
+; CHECK-NOV-NEXT: and a3, a3, a7
+; CHECK-NOV-NEXT: sgtz a7, a6
+; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: and a6, a7, a6
+; CHECK-NOV-NEXT: sgtz a7, a5
+; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: and a5, a7, a5
+; CHECK-NOV-NEXT: sgtz a7, a4
+; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: and a4, a7, a4
+; CHECK-NOV-NEXT: sgtz a7, a2
+; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: and a2, a7, a2
+; CHECK-NOV-NEXT: sgtz a7, a1
+; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: and a1, a7, a1
+; CHECK-NOV-NEXT: sgtz a7, s2
+; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: and a7, a7, s2
+; CHECK-NOV-NEXT: sgtz t0, a0
+; CHECK-NOV-NEXT: neg t0, t0
+; CHECK-NOV-NEXT: and a0, t0, a0
; CHECK-NOV-NEXT: sh a0, 14(s0)
-; CHECK-NOV-NEXT: sh s2, 12(s0)
+; CHECK-NOV-NEXT: sh a7, 12(s0)
; CHECK-NOV-NEXT: sh a1, 10(s0)
; CHECK-NOV-NEXT: sh a2, 8(s0)
-; CHECK-NOV-NEXT: sh a3, 6(s0)
-; CHECK-NOV-NEXT: sh a4, 4(s0)
-; CHECK-NOV-NEXT: sh a5, 2(s0)
-; CHECK-NOV-NEXT: sh a6, 0(s0)
+; CHECK-NOV-NEXT: sh a4, 6(s0)
+; CHECK-NOV-NEXT: sh a5, 4(s0)
+; CHECK-NOV-NEXT: sh a6, 2(s0)
+; CHECK-NOV-NEXT: sh a3, 0(s0)
; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: fld fs6, 0(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: addi sp, sp, 128
; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB44_18: # %entry
-; CHECK-NOV-NEXT: mv a0, a7
+; CHECK-NOV-NEXT: .LBB44_10: # %entry
+; CHECK-NOV-NEXT: mv a0, a3
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
-; CHECK-NOV-NEXT: blt s2, a7, .LBB44_2
-; CHECK-NOV-NEXT: .LBB44_19: # %entry
-; CHECK-NOV-NEXT: mv s2, a7
+; CHECK-NOV-NEXT: blt s2, a3, .LBB44_2
+; CHECK-NOV-NEXT: .LBB44_11: # %entry
+; CHECK-NOV-NEXT: mv s2, a3
; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
-; CHECK-NOV-NEXT: blt a1, a7, .LBB44_3
-; CHECK-NOV-NEXT: .LBB44_20: # %entry
-; CHECK-NOV-NEXT: mv a1, a7
-; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz
-; CHECK-NOV-NEXT: blt a2, a7, .LBB44_4
-; CHECK-NOV-NEXT: .LBB44_21: # %entry
-; CHECK-NOV-NEXT: mv a2, a7
-; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz
-; CHECK-NOV-NEXT: blt a3, a7, .LBB44_5
-; CHECK-NOV-NEXT: .LBB44_22: # %entry
-; CHECK-NOV-NEXT: mv a3, a7
-; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz
-; CHECK-NOV-NEXT: blt a4, a7, .LBB44_6
-; CHECK-NOV-NEXT: .LBB44_23: # %entry
-; CHECK-NOV-NEXT: mv a4, a7
-; CHECK-NOV-NEXT: fcvt.l.s a6, fs0, rtz
-; CHECK-NOV-NEXT: blt a5, a7, .LBB44_7
-; CHECK-NOV-NEXT: .LBB44_24: # %entry
-; CHECK-NOV-NEXT: mv a5, a7
-; CHECK-NOV-NEXT: blt a6, a7, .LBB44_8
-; CHECK-NOV-NEXT: .LBB44_25: # %entry
-; CHECK-NOV-NEXT: mv a6, a7
-; CHECK-NOV-NEXT: bgtz a6, .LBB44_9
-; CHECK-NOV-NEXT: .LBB44_26: # %entry
-; CHECK-NOV-NEXT: li a6, 0
-; CHECK-NOV-NEXT: bgtz a5, .LBB44_10
-; CHECK-NOV-NEXT: .LBB44_27: # %entry
-; CHECK-NOV-NEXT: li a5, 0
-; CHECK-NOV-NEXT: bgtz a4, .LBB44_11
-; CHECK-NOV-NEXT: .LBB44_28: # %entry
-; CHECK-NOV-NEXT: li a4, 0
-; CHECK-NOV-NEXT: bgtz a3, .LBB44_12
-; CHECK-NOV-NEXT: .LBB44_29: # %entry
-; CHECK-NOV-NEXT: li a3, 0
-; CHECK-NOV-NEXT: bgtz a2, .LBB44_13
-; CHECK-NOV-NEXT: .LBB44_30: # %entry
-; CHECK-NOV-NEXT: li a2, 0
-; CHECK-NOV-NEXT: bgtz a1, .LBB44_14
-; CHECK-NOV-NEXT: .LBB44_31: # %entry
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: bgtz s2, .LBB44_15
-; CHECK-NOV-NEXT: .LBB44_32: # %entry
-; CHECK-NOV-NEXT: li s2, 0
-; CHECK-NOV-NEXT: blez a0, .LBB44_16
-; CHECK-NOV-NEXT: j .LBB44_17
+; CHECK-NOV-NEXT: blt a1, a3, .LBB44_3
+; CHECK-NOV-NEXT: .LBB44_12: # %entry
+; CHECK-NOV-NEXT: mv a1, a3
+; CHECK-NOV-NEXT: fcvt.l.s a4, fs3, rtz
+; CHECK-NOV-NEXT: blt a2, a3, .LBB44_4
+; CHECK-NOV-NEXT: .LBB44_13: # %entry
+; CHECK-NOV-NEXT: mv a2, a3
+; CHECK-NOV-NEXT: fcvt.l.s a5, fs2, rtz
+; CHECK-NOV-NEXT: blt a4, a3, .LBB44_5
+; CHECK-NOV-NEXT: .LBB44_14: # %entry
+; CHECK-NOV-NEXT: mv a4, a3
+; CHECK-NOV-NEXT: fcvt.l.s a6, fs1, rtz
+; CHECK-NOV-NEXT: blt a5, a3, .LBB44_6
+; CHECK-NOV-NEXT: .LBB44_15: # %entry
+; CHECK-NOV-NEXT: mv a5, a3
+; CHECK-NOV-NEXT: fcvt.l.s a7, fs0, rtz
+; CHECK-NOV-NEXT: blt a6, a3, .LBB44_7
+; CHECK-NOV-NEXT: .LBB44_16: # %entry
+; CHECK-NOV-NEXT: mv a6, a3
+; CHECK-NOV-NEXT: bge a7, a3, .LBB44_8
+; CHECK-NOV-NEXT: j .LBB44_9
;
; CHECK-V-LABEL: ustest_f16i16_mm:
; CHECK-V: # %bb.0: # %entry
; CHECK-NOV-NEXT: li a0, -1
; CHECK-NOV-NEXT: srli a3, a0, 1
; CHECK-NOV-NEXT: mv a4, a2
-; CHECK-NOV-NEXT: bgez a1, .LBB45_17
+; CHECK-NOV-NEXT: bgez a1, .LBB45_15
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: bgeu a2, a3, .LBB45_18
+; CHECK-NOV-NEXT: bgeu a2, a3, .LBB45_16
; CHECK-NOV-NEXT: .LBB45_2: # %entry
-; CHECK-NOV-NEXT: bnez a1, .LBB45_19
+; CHECK-NOV-NEXT: bnez a1, .LBB45_17
; CHECK-NOV-NEXT: .LBB45_3: # %entry
; CHECK-NOV-NEXT: mv a4, s0
-; CHECK-NOV-NEXT: bgez s1, .LBB45_20
+; CHECK-NOV-NEXT: bgez s1, .LBB45_18
; CHECK-NOV-NEXT: .LBB45_4: # %entry
-; CHECK-NOV-NEXT: bgeu s0, a3, .LBB45_21
+; CHECK-NOV-NEXT: bgeu s0, a3, .LBB45_19
; CHECK-NOV-NEXT: .LBB45_5: # %entry
-; CHECK-NOV-NEXT: bnez s1, .LBB45_22
+; CHECK-NOV-NEXT: beqz s1, .LBB45_7
; CHECK-NOV-NEXT: .LBB45_6: # %entry
-; CHECK-NOV-NEXT: bgez a1, .LBB45_23
+; CHECK-NOV-NEXT: mv s0, a4
; CHECK-NOV-NEXT: .LBB45_7: # %entry
-; CHECK-NOV-NEXT: bltz s1, .LBB45_9
-; CHECK-NOV-NEXT: .LBB45_8: # %entry
-; CHECK-NOV-NEXT: li s1, 0
-; CHECK-NOV-NEXT: .LBB45_9: # %entry
+; CHECK-NOV-NEXT: slti a6, a1, 0
+; CHECK-NOV-NEXT: slti a3, s1, 0
+; CHECK-NOV-NEXT: neg a3, a3
+; CHECK-NOV-NEXT: and a4, a3, s1
; CHECK-NOV-NEXT: slli a3, a0, 63
-; CHECK-NOV-NEXT: mv a4, s0
-; CHECK-NOV-NEXT: bltz s1, .LBB45_24
-; CHECK-NOV-NEXT: # %bb.10: # %entry
-; CHECK-NOV-NEXT: bgeu a3, s0, .LBB45_25
+; CHECK-NOV-NEXT: mv a5, s0
+; CHECK-NOV-NEXT: bltz a4, .LBB45_20
+; CHECK-NOV-NEXT: # %bb.8: # %entry
+; CHECK-NOV-NEXT: neg a6, a6
+; CHECK-NOV-NEXT: bgeu a3, s0, .LBB45_21
+; CHECK-NOV-NEXT: .LBB45_9: # %entry
+; CHECK-NOV-NEXT: and a1, a6, a1
+; CHECK-NOV-NEXT: bne a4, a0, .LBB45_22
+; CHECK-NOV-NEXT: .LBB45_10: # %entry
+; CHECK-NOV-NEXT: mv a4, a2
+; CHECK-NOV-NEXT: bltz a1, .LBB45_23
; CHECK-NOV-NEXT: .LBB45_11: # %entry
-; CHECK-NOV-NEXT: bne s1, a0, .LBB45_26
+; CHECK-NOV-NEXT: bgeu a3, a2, .LBB45_24
; CHECK-NOV-NEXT: .LBB45_12: # %entry
-; CHECK-NOV-NEXT: mv a4, a2
-; CHECK-NOV-NEXT: bltz a1, .LBB45_27
+; CHECK-NOV-NEXT: beq a1, a0, .LBB45_14
; CHECK-NOV-NEXT: .LBB45_13: # %entry
-; CHECK-NOV-NEXT: bgeu a3, a2, .LBB45_28
-; CHECK-NOV-NEXT: .LBB45_14: # %entry
-; CHECK-NOV-NEXT: beq a1, a0, .LBB45_16
-; CHECK-NOV-NEXT: .LBB45_15: # %entry
; CHECK-NOV-NEXT: mv a2, a4
-; CHECK-NOV-NEXT: .LBB45_16: # %entry
+; CHECK-NOV-NEXT: .LBB45_14: # %entry
; CHECK-NOV-NEXT: mv a0, s0
; CHECK-NOV-NEXT: mv a1, a2
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: addi sp, sp, 32
; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB45_17: # %entry
+; CHECK-NOV-NEXT: .LBB45_15: # %entry
; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: bltu a2, a3, .LBB45_2
-; CHECK-NOV-NEXT: .LBB45_18: # %entry
+; CHECK-NOV-NEXT: .LBB45_16: # %entry
; CHECK-NOV-NEXT: mv a2, a3
; CHECK-NOV-NEXT: beqz a1, .LBB45_3
-; CHECK-NOV-NEXT: .LBB45_19: # %entry
+; CHECK-NOV-NEXT: .LBB45_17: # %entry
; CHECK-NOV-NEXT: mv a2, a4
; CHECK-NOV-NEXT: mv a4, s0
; CHECK-NOV-NEXT: bltz s1, .LBB45_4
-; CHECK-NOV-NEXT: .LBB45_20: # %entry
+; CHECK-NOV-NEXT: .LBB45_18: # %entry
; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: bltu s0, a3, .LBB45_5
+; CHECK-NOV-NEXT: .LBB45_19: # %entry
+; CHECK-NOV-NEXT: mv s0, a3
+; CHECK-NOV-NEXT: bnez s1, .LBB45_6
+; CHECK-NOV-NEXT: j .LBB45_7
+; CHECK-NOV-NEXT: .LBB45_20: # %entry
+; CHECK-NOV-NEXT: mv a5, a3
+; CHECK-NOV-NEXT: neg a6, a6
+; CHECK-NOV-NEXT: bltu a3, s0, .LBB45_9
; CHECK-NOV-NEXT: .LBB45_21: # %entry
; CHECK-NOV-NEXT: mv s0, a3
-; CHECK-NOV-NEXT: beqz s1, .LBB45_6
+; CHECK-NOV-NEXT: and a1, a6, a1
+; CHECK-NOV-NEXT: beq a4, a0, .LBB45_10
; CHECK-NOV-NEXT: .LBB45_22: # %entry
-; CHECK-NOV-NEXT: mv s0, a4
-; CHECK-NOV-NEXT: bltz a1, .LBB45_7
-; CHECK-NOV-NEXT: .LBB45_23: # %entry
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: bgez s1, .LBB45_8
-; CHECK-NOV-NEXT: j .LBB45_9
-; CHECK-NOV-NEXT: .LBB45_24: # %entry
-; CHECK-NOV-NEXT: mv a4, a3
-; CHECK-NOV-NEXT: bltu a3, s0, .LBB45_11
-; CHECK-NOV-NEXT: .LBB45_25: # %entry
-; CHECK-NOV-NEXT: mv s0, a3
-; CHECK-NOV-NEXT: beq s1, a0, .LBB45_12
-; CHECK-NOV-NEXT: .LBB45_26: # %entry
-; CHECK-NOV-NEXT: mv s0, a4
+; CHECK-NOV-NEXT: mv s0, a5
; CHECK-NOV-NEXT: mv a4, a2
-; CHECK-NOV-NEXT: bgez a1, .LBB45_13
-; CHECK-NOV-NEXT: .LBB45_27: # %entry
+; CHECK-NOV-NEXT: bgez a1, .LBB45_11
+; CHECK-NOV-NEXT: .LBB45_23: # %entry
; CHECK-NOV-NEXT: mv a4, a3
-; CHECK-NOV-NEXT: bltu a3, a2, .LBB45_14
-; CHECK-NOV-NEXT: .LBB45_28: # %entry
+; CHECK-NOV-NEXT: bltu a3, a2, .LBB45_12
+; CHECK-NOV-NEXT: .LBB45_24: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: bne a1, a0, .LBB45_15
-; CHECK-NOV-NEXT: j .LBB45_16
+; CHECK-NOV-NEXT: bne a1, a0, .LBB45_13
+; CHECK-NOV-NEXT: j .LBB45_14
;
; CHECK-V-LABEL: stest_f64i64_mm:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: li a2, -1
; CHECK-V-NEXT: srli a3, a2, 1
; CHECK-V-NEXT: mv a4, s0
-; CHECK-V-NEXT: bgez s1, .LBB45_17
+; CHECK-V-NEXT: bgez s1, .LBB45_15
; CHECK-V-NEXT: # %bb.1: # %entry
-; CHECK-V-NEXT: bgeu s0, a3, .LBB45_18
+; CHECK-V-NEXT: bgeu s0, a3, .LBB45_16
; CHECK-V-NEXT: .LBB45_2: # %entry
-; CHECK-V-NEXT: bnez s1, .LBB45_19
+; CHECK-V-NEXT: bnez s1, .LBB45_17
; CHECK-V-NEXT: .LBB45_3: # %entry
; CHECK-V-NEXT: mv a4, a0
-; CHECK-V-NEXT: bgez a1, .LBB45_20
+; CHECK-V-NEXT: bgez a1, .LBB45_18
; CHECK-V-NEXT: .LBB45_4: # %entry
-; CHECK-V-NEXT: bgeu a0, a3, .LBB45_21
+; CHECK-V-NEXT: bgeu a0, a3, .LBB45_19
; CHECK-V-NEXT: .LBB45_5: # %entry
-; CHECK-V-NEXT: bnez a1, .LBB45_22
+; CHECK-V-NEXT: beqz a1, .LBB45_7
; CHECK-V-NEXT: .LBB45_6: # %entry
-; CHECK-V-NEXT: bgez a1, .LBB45_23
+; CHECK-V-NEXT: mv a0, a4
; CHECK-V-NEXT: .LBB45_7: # %entry
-; CHECK-V-NEXT: bltz s1, .LBB45_9
-; CHECK-V-NEXT: .LBB45_8: # %entry
-; CHECK-V-NEXT: li s1, 0
-; CHECK-V-NEXT: .LBB45_9: # %entry
+; CHECK-V-NEXT: slti a3, s1, 0
+; CHECK-V-NEXT: neg a3, a3
+; CHECK-V-NEXT: and a4, a3, s1
+; CHECK-V-NEXT: slti a6, a1, 0
; CHECK-V-NEXT: slli a3, a2, 63
-; CHECK-V-NEXT: mv a4, s0
-; CHECK-V-NEXT: bltz s1, .LBB45_24
-; CHECK-V-NEXT: # %bb.10: # %entry
-; CHECK-V-NEXT: bgeu a3, s0, .LBB45_25
+; CHECK-V-NEXT: mv a5, s0
+; CHECK-V-NEXT: bltz a4, .LBB45_20
+; CHECK-V-NEXT: # %bb.8: # %entry
+; CHECK-V-NEXT: neg a6, a6
+; CHECK-V-NEXT: bgeu a3, s0, .LBB45_21
+; CHECK-V-NEXT: .LBB45_9: # %entry
+; CHECK-V-NEXT: and a1, a6, a1
+; CHECK-V-NEXT: bne a4, a2, .LBB45_22
+; CHECK-V-NEXT: .LBB45_10: # %entry
+; CHECK-V-NEXT: mv a4, a0
+; CHECK-V-NEXT: bltz a1, .LBB45_23
; CHECK-V-NEXT: .LBB45_11: # %entry
-; CHECK-V-NEXT: bne s1, a2, .LBB45_26
+; CHECK-V-NEXT: bgeu a3, a0, .LBB45_24
; CHECK-V-NEXT: .LBB45_12: # %entry
-; CHECK-V-NEXT: mv a4, a0
-; CHECK-V-NEXT: bltz a1, .LBB45_27
+; CHECK-V-NEXT: beq a1, a2, .LBB45_14
; CHECK-V-NEXT: .LBB45_13: # %entry
-; CHECK-V-NEXT: bgeu a3, a0, .LBB45_28
-; CHECK-V-NEXT: .LBB45_14: # %entry
-; CHECK-V-NEXT: beq a1, a2, .LBB45_16
-; CHECK-V-NEXT: .LBB45_15: # %entry
; CHECK-V-NEXT: mv a0, a4
-; CHECK-V-NEXT: .LBB45_16: # %entry
+; CHECK-V-NEXT: .LBB45_14: # %entry
; CHECK-V-NEXT: sd a0, 24(sp)
; CHECK-V-NEXT: sd s0, 32(sp)
; CHECK-V-NEXT: addi a0, sp, 24
; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: addi sp, sp, 80
; CHECK-V-NEXT: ret
-; CHECK-V-NEXT: .LBB45_17: # %entry
+; CHECK-V-NEXT: .LBB45_15: # %entry
; CHECK-V-NEXT: mv a4, a3
; CHECK-V-NEXT: bltu s0, a3, .LBB45_2
-; CHECK-V-NEXT: .LBB45_18: # %entry
+; CHECK-V-NEXT: .LBB45_16: # %entry
; CHECK-V-NEXT: mv s0, a3
; CHECK-V-NEXT: beqz s1, .LBB45_3
-; CHECK-V-NEXT: .LBB45_19: # %entry
+; CHECK-V-NEXT: .LBB45_17: # %entry
; CHECK-V-NEXT: mv s0, a4
; CHECK-V-NEXT: mv a4, a0
; CHECK-V-NEXT: bltz a1, .LBB45_4
-; CHECK-V-NEXT: .LBB45_20: # %entry
+; CHECK-V-NEXT: .LBB45_18: # %entry
; CHECK-V-NEXT: mv a4, a3
; CHECK-V-NEXT: bltu a0, a3, .LBB45_5
-; CHECK-V-NEXT: .LBB45_21: # %entry
+; CHECK-V-NEXT: .LBB45_19: # %entry
; CHECK-V-NEXT: mv a0, a3
-; CHECK-V-NEXT: beqz a1, .LBB45_6
-; CHECK-V-NEXT: .LBB45_22: # %entry
-; CHECK-V-NEXT: mv a0, a4
-; CHECK-V-NEXT: bltz a1, .LBB45_7
-; CHECK-V-NEXT: .LBB45_23: # %entry
-; CHECK-V-NEXT: li a1, 0
-; CHECK-V-NEXT: bgez s1, .LBB45_8
-; CHECK-V-NEXT: j .LBB45_9
-; CHECK-V-NEXT: .LBB45_24: # %entry
-; CHECK-V-NEXT: mv a4, a3
-; CHECK-V-NEXT: bltu a3, s0, .LBB45_11
-; CHECK-V-NEXT: .LBB45_25: # %entry
+; CHECK-V-NEXT: bnez a1, .LBB45_6
+; CHECK-V-NEXT: j .LBB45_7
+; CHECK-V-NEXT: .LBB45_20: # %entry
+; CHECK-V-NEXT: mv a5, a3
+; CHECK-V-NEXT: neg a6, a6
+; CHECK-V-NEXT: bltu a3, s0, .LBB45_9
+; CHECK-V-NEXT: .LBB45_21: # %entry
; CHECK-V-NEXT: mv s0, a3
-; CHECK-V-NEXT: beq s1, a2, .LBB45_12
-; CHECK-V-NEXT: .LBB45_26: # %entry
-; CHECK-V-NEXT: mv s0, a4
+; CHECK-V-NEXT: and a1, a6, a1
+; CHECK-V-NEXT: beq a4, a2, .LBB45_10
+; CHECK-V-NEXT: .LBB45_22: # %entry
+; CHECK-V-NEXT: mv s0, a5
; CHECK-V-NEXT: mv a4, a0
-; CHECK-V-NEXT: bgez a1, .LBB45_13
-; CHECK-V-NEXT: .LBB45_27: # %entry
+; CHECK-V-NEXT: bgez a1, .LBB45_11
+; CHECK-V-NEXT: .LBB45_23: # %entry
; CHECK-V-NEXT: mv a4, a3
-; CHECK-V-NEXT: bltu a3, a0, .LBB45_14
-; CHECK-V-NEXT: .LBB45_28: # %entry
+; CHECK-V-NEXT: bltu a3, a0, .LBB45_12
+; CHECK-V-NEXT: .LBB45_24: # %entry
; CHECK-V-NEXT: mv a0, a3
-; CHECK-V-NEXT: bne a1, a2, .LBB45_15
-; CHECK-V-NEXT: j .LBB45_16
+; CHECK-V-NEXT: bne a1, a2, .LBB45_13
+; CHECK-V-NEXT: j .LBB45_14
entry:
%conv = fptosi <2 x double> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
; CHECK-NOV-NEXT: mv s1, a1
; CHECK-NOV-NEXT: fmv.d fa0, fs0
; CHECK-NOV-NEXT: call __fixunsdfti@plt
-; CHECK-NOV-NEXT: mv a2, a0
-; CHECK-NOV-NEXT: mv a3, a1
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: beqz a3, .LBB46_2
-; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: mv a2, a1
-; CHECK-NOV-NEXT: .LBB46_2: # %entry
-; CHECK-NOV-NEXT: li a4, 1
-; CHECK-NOV-NEXT: mv a0, a1
-; CHECK-NOV-NEXT: bne a3, a4, .LBB46_7
-; CHECK-NOV-NEXT: # %bb.3: # %entry
-; CHECK-NOV-NEXT: bnez s1, .LBB46_8
-; CHECK-NOV-NEXT: .LBB46_4: # %entry
-; CHECK-NOV-NEXT: beq s1, a4, .LBB46_6
-; CHECK-NOV-NEXT: .LBB46_5: # %entry
-; CHECK-NOV-NEXT: mv a1, s0
-; CHECK-NOV-NEXT: .LBB46_6: # %entry
+; CHECK-NOV-NEXT: snez a2, a1
+; CHECK-NOV-NEXT: addi a2, a2, -1
+; CHECK-NOV-NEXT: and a0, a2, a0
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: seqz a1, a1
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: and a0, a1, a0
+; CHECK-NOV-NEXT: snez a1, s1
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: and a1, a1, s0
+; CHECK-NOV-NEXT: addi a2, s1, -1
+; CHECK-NOV-NEXT: seqz a2, a2
+; CHECK-NOV-NEXT: addi a2, a2, -1
+; CHECK-NOV-NEXT: and a1, a2, a1
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: addi sp, sp, 32
; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB46_7: # %entry
-; CHECK-NOV-NEXT: mv a0, a2
-; CHECK-NOV-NEXT: beqz s1, .LBB46_4
-; CHECK-NOV-NEXT: .LBB46_8: # %entry
-; CHECK-NOV-NEXT: mv s0, a1
-; CHECK-NOV-NEXT: bne s1, a4, .LBB46_5
-; CHECK-NOV-NEXT: j .LBB46_6
;
; CHECK-V-LABEL: utest_f64i64_mm:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixunsdfti@plt
-; CHECK-V-NEXT: li a2, 0
-; CHECK-V-NEXT: beqz s1, .LBB46_2
-; CHECK-V-NEXT: # %bb.1: # %entry
-; CHECK-V-NEXT: mv s0, a2
-; CHECK-V-NEXT: .LBB46_2: # %entry
-; CHECK-V-NEXT: li a4, 1
-; CHECK-V-NEXT: mv a3, a2
-; CHECK-V-NEXT: bne s1, a4, .LBB46_7
-; CHECK-V-NEXT: # %bb.3: # %entry
-; CHECK-V-NEXT: bnez a1, .LBB46_8
-; CHECK-V-NEXT: .LBB46_4: # %entry
-; CHECK-V-NEXT: beq a1, a4, .LBB46_6
-; CHECK-V-NEXT: .LBB46_5: # %entry
-; CHECK-V-NEXT: mv a2, a0
-; CHECK-V-NEXT: .LBB46_6: # %entry
-; CHECK-V-NEXT: sd a2, 24(sp)
-; CHECK-V-NEXT: sd a3, 32(sp)
+; CHECK-V-NEXT: snez a2, s1
+; CHECK-V-NEXT: addi a2, a2, -1
+; CHECK-V-NEXT: and a2, a2, s0
+; CHECK-V-NEXT: addi a3, s1, -1
+; CHECK-V-NEXT: seqz a3, a3
+; CHECK-V-NEXT: addi a3, a3, -1
+; CHECK-V-NEXT: and a2, a3, a2
+; CHECK-V-NEXT: snez a3, a1
+; CHECK-V-NEXT: addi a3, a3, -1
+; CHECK-V-NEXT: and a0, a3, a0
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: seqz a1, a1
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: and a0, a1, a0
+; CHECK-V-NEXT: sd a0, 24(sp)
+; CHECK-V-NEXT: sd a2, 32(sp)
; CHECK-V-NEXT: addi a0, sp, 24
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vle64.v v8, (a0)
; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: addi sp, sp, 80
; CHECK-V-NEXT: ret
-; CHECK-V-NEXT: .LBB46_7: # %entry
-; CHECK-V-NEXT: mv a3, s0
-; CHECK-V-NEXT: beqz a1, .LBB46_4
-; CHECK-V-NEXT: .LBB46_8: # %entry
-; CHECK-V-NEXT: mv a0, a2
-; CHECK-V-NEXT: bne a1, a4, .LBB46_5
-; CHECK-V-NEXT: j .LBB46_6
entry:
%conv = fptoui <2 x double> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
; CHECK-NOV-NEXT: fmv.d fa0, fs0
; CHECK-NOV-NEXT: call __fixdfti@plt
; CHECK-NOV-NEXT: mv a2, a1
-; CHECK-NOV-NEXT: li a5, 1
-; CHECK-NOV-NEXT: mv a3, a1
-; CHECK-NOV-NEXT: bgtz a1, .LBB47_12
+; CHECK-NOV-NEXT: blez a1, .LBB47_2
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: mv a4, s1
-; CHECK-NOV-NEXT: bgtz s1, .LBB47_13
+; CHECK-NOV-NEXT: li a2, 1
; CHECK-NOV-NEXT: .LBB47_2: # %entry
-; CHECK-NOV-NEXT: bgtz a2, .LBB47_14
-; CHECK-NOV-NEXT: .LBB47_3: # %entry
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: bne a2, a5, .LBB47_15
+; CHECK-NOV-NEXT: mv a4, s1
+; CHECK-NOV-NEXT: blez s1, .LBB47_4
+; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: li a4, 1
; CHECK-NOV-NEXT: .LBB47_4: # %entry
-; CHECK-NOV-NEXT: bgtz s1, .LBB47_16
-; CHECK-NOV-NEXT: .LBB47_5: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: bne s1, a5, .LBB47_17
+; CHECK-NOV-NEXT: sgtz a3, a1
+; CHECK-NOV-NEXT: addi a3, a3, -1
+; CHECK-NOV-NEXT: and a3, a3, a0
+; CHECK-NOV-NEXT: addi a0, a1, -1
+; CHECK-NOV-NEXT: seqz a0, a0
+; CHECK-NOV-NEXT: addi a1, a0, -1
+; CHECK-NOV-NEXT: sgtz a0, s1
+; CHECK-NOV-NEXT: addi a0, a0, -1
+; CHECK-NOV-NEXT: and a0, a0, s0
+; CHECK-NOV-NEXT: addi a5, s1, -1
+; CHECK-NOV-NEXT: seqz a5, a5
+; CHECK-NOV-NEXT: addi a5, a5, -1
+; CHECK-NOV-NEXT: and a0, a5, a0
+; CHECK-NOV-NEXT: beqz a4, .LBB47_6
+; CHECK-NOV-NEXT: # %bb.5: # %entry
+; CHECK-NOV-NEXT: sgtz a4, a4
+; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: and a0, a4, a0
; CHECK-NOV-NEXT: .LBB47_6: # %entry
-; CHECK-NOV-NEXT: mv a2, a0
-; CHECK-NOV-NEXT: blez a4, .LBB47_18
-; CHECK-NOV-NEXT: .LBB47_7: # %entry
-; CHECK-NOV-NEXT: bnez a4, .LBB47_19
+; CHECK-NOV-NEXT: and a1, a1, a3
+; CHECK-NOV-NEXT: beqz a2, .LBB47_8
+; CHECK-NOV-NEXT: # %bb.7: # %entry
+; CHECK-NOV-NEXT: sgtz a2, a2
+; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: and a1, a2, a1
; CHECK-NOV-NEXT: .LBB47_8: # %entry
-; CHECK-NOV-NEXT: mv a2, a1
-; CHECK-NOV-NEXT: blez a3, .LBB47_20
-; CHECK-NOV-NEXT: .LBB47_9: # %entry
-; CHECK-NOV-NEXT: beqz a3, .LBB47_11
-; CHECK-NOV-NEXT: .LBB47_10: # %entry
-; CHECK-NOV-NEXT: mv a1, a2
-; CHECK-NOV-NEXT: .LBB47_11: # %entry
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: addi sp, sp, 32
; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB47_12: # %entry
-; CHECK-NOV-NEXT: li a3, 1
-; CHECK-NOV-NEXT: mv a4, s1
-; CHECK-NOV-NEXT: blez s1, .LBB47_2
-; CHECK-NOV-NEXT: .LBB47_13: # %entry
-; CHECK-NOV-NEXT: li a4, 1
-; CHECK-NOV-NEXT: blez a2, .LBB47_3
-; CHECK-NOV-NEXT: .LBB47_14: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: beq a2, a5, .LBB47_4
-; CHECK-NOV-NEXT: .LBB47_15: # %entry
-; CHECK-NOV-NEXT: mv a1, a0
-; CHECK-NOV-NEXT: blez s1, .LBB47_5
-; CHECK-NOV-NEXT: .LBB47_16: # %entry
-; CHECK-NOV-NEXT: li s0, 0
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: beq s1, a5, .LBB47_6
-; CHECK-NOV-NEXT: .LBB47_17: # %entry
-; CHECK-NOV-NEXT: mv a0, s0
-; CHECK-NOV-NEXT: mv a2, a0
-; CHECK-NOV-NEXT: bgtz a4, .LBB47_7
-; CHECK-NOV-NEXT: .LBB47_18: # %entry
-; CHECK-NOV-NEXT: li a2, 0
-; CHECK-NOV-NEXT: beqz a4, .LBB47_8
-; CHECK-NOV-NEXT: .LBB47_19: # %entry
-; CHECK-NOV-NEXT: mv a0, a2
-; CHECK-NOV-NEXT: mv a2, a1
-; CHECK-NOV-NEXT: bgtz a3, .LBB47_9
-; CHECK-NOV-NEXT: .LBB47_20: # %entry
-; CHECK-NOV-NEXT: li a2, 0
-; CHECK-NOV-NEXT: bnez a3, .LBB47_10
-; CHECK-NOV-NEXT: j .LBB47_11
;
; CHECK-V-LABEL: ustest_f64i64_mm:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixdfti@plt
-; CHECK-V-NEXT: li a5, 1
; CHECK-V-NEXT: mv a2, a1
-; CHECK-V-NEXT: bgtz a1, .LBB47_12
+; CHECK-V-NEXT: blez a1, .LBB47_2
; CHECK-V-NEXT: # %bb.1: # %entry
-; CHECK-V-NEXT: bgtz s0, .LBB47_13
+; CHECK-V-NEXT: li a2, 1
; CHECK-V-NEXT: .LBB47_2: # %entry
-; CHECK-V-NEXT: li a3, 0
-; CHECK-V-NEXT: bne s0, a5, .LBB47_14
-; CHECK-V-NEXT: .LBB47_3: # %entry
-; CHECK-V-NEXT: bgtz a1, .LBB47_15
+; CHECK-V-NEXT: sgtz a3, s0
+; CHECK-V-NEXT: addi a3, a3, -1
+; CHECK-V-NEXT: and a3, a3, s1
+; CHECK-V-NEXT: addi a4, s0, -1
+; CHECK-V-NEXT: seqz a4, a4
+; CHECK-V-NEXT: addi a4, a4, -1
+; CHECK-V-NEXT: sgtz a5, a1
+; CHECK-V-NEXT: addi a5, a5, -1
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: seqz a6, a1
+; CHECK-V-NEXT: blez s0, .LBB47_4
+; CHECK-V-NEXT: # %bb.3: # %entry
+; CHECK-V-NEXT: li s0, 1
; CHECK-V-NEXT: .LBB47_4: # %entry
-; CHECK-V-NEXT: li a4, 0
-; CHECK-V-NEXT: bne a1, a5, .LBB47_16
-; CHECK-V-NEXT: .LBB47_5: # %entry
-; CHECK-V-NEXT: bgtz s0, .LBB47_17
+; CHECK-V-NEXT: and a1, a5, a0
+; CHECK-V-NEXT: addi a5, a6, -1
+; CHECK-V-NEXT: and a0, a4, a3
+; CHECK-V-NEXT: beqz s0, .LBB47_6
+; CHECK-V-NEXT: # %bb.5: # %entry
+; CHECK-V-NEXT: sgtz a3, s0
+; CHECK-V-NEXT: neg a3, a3
+; CHECK-V-NEXT: and a0, a3, a0
; CHECK-V-NEXT: .LBB47_6: # %entry
-; CHECK-V-NEXT: mv a0, a3
-; CHECK-V-NEXT: blez s0, .LBB47_18
-; CHECK-V-NEXT: .LBB47_7: # %entry
-; CHECK-V-NEXT: bnez s0, .LBB47_19
+; CHECK-V-NEXT: and a1, a5, a1
+; CHECK-V-NEXT: beqz a2, .LBB47_8
+; CHECK-V-NEXT: # %bb.7: # %entry
+; CHECK-V-NEXT: sgtz a2, a2
+; CHECK-V-NEXT: neg a2, a2
+; CHECK-V-NEXT: and a1, a2, a1
; CHECK-V-NEXT: .LBB47_8: # %entry
-; CHECK-V-NEXT: mv a0, a4
-; CHECK-V-NEXT: blez a2, .LBB47_20
-; CHECK-V-NEXT: .LBB47_9: # %entry
-; CHECK-V-NEXT: beqz a2, .LBB47_11
-; CHECK-V-NEXT: .LBB47_10: # %entry
-; CHECK-V-NEXT: mv a4, a0
-; CHECK-V-NEXT: .LBB47_11: # %entry
-; CHECK-V-NEXT: sd a4, 24(sp)
-; CHECK-V-NEXT: sd a3, 32(sp)
+; CHECK-V-NEXT: sd a1, 24(sp)
+; CHECK-V-NEXT: sd a0, 32(sp)
; CHECK-V-NEXT: addi a0, sp, 24
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vle64.v v8, (a0)
; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: addi sp, sp, 80
; CHECK-V-NEXT: ret
-; CHECK-V-NEXT: .LBB47_12: # %entry
-; CHECK-V-NEXT: li a2, 1
-; CHECK-V-NEXT: blez s0, .LBB47_2
-; CHECK-V-NEXT: .LBB47_13: # %entry
-; CHECK-V-NEXT: li s1, 0
-; CHECK-V-NEXT: li a3, 0
-; CHECK-V-NEXT: beq s0, a5, .LBB47_3
-; CHECK-V-NEXT: .LBB47_14: # %entry
-; CHECK-V-NEXT: mv a3, s1
-; CHECK-V-NEXT: blez a1, .LBB47_4
-; CHECK-V-NEXT: .LBB47_15: # %entry
-; CHECK-V-NEXT: li a0, 0
-; CHECK-V-NEXT: li a4, 0
-; CHECK-V-NEXT: beq a1, a5, .LBB47_5
-; CHECK-V-NEXT: .LBB47_16: # %entry
-; CHECK-V-NEXT: mv a4, a0
-; CHECK-V-NEXT: blez s0, .LBB47_6
-; CHECK-V-NEXT: .LBB47_17: # %entry
-; CHECK-V-NEXT: li s0, 1
-; CHECK-V-NEXT: mv a0, a3
-; CHECK-V-NEXT: bgtz s0, .LBB47_7
-; CHECK-V-NEXT: .LBB47_18: # %entry
-; CHECK-V-NEXT: li a0, 0
-; CHECK-V-NEXT: beqz s0, .LBB47_8
-; CHECK-V-NEXT: .LBB47_19: # %entry
-; CHECK-V-NEXT: mv a3, a0
-; CHECK-V-NEXT: mv a0, a4
-; CHECK-V-NEXT: bgtz a2, .LBB47_9
-; CHECK-V-NEXT: .LBB47_20: # %entry
-; CHECK-V-NEXT: li a0, 0
-; CHECK-V-NEXT: bnez a2, .LBB47_10
-; CHECK-V-NEXT: j .LBB47_11
entry:
%conv = fptosi <2 x double> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
; CHECK-NOV-NEXT: li a0, -1
; CHECK-NOV-NEXT: srli a3, a0, 1
; CHECK-NOV-NEXT: mv a4, a2
-; CHECK-NOV-NEXT: bgez a1, .LBB48_17
+; CHECK-NOV-NEXT: bgez a1, .LBB48_15
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: bgeu a2, a3, .LBB48_18
+; CHECK-NOV-NEXT: bgeu a2, a3, .LBB48_16
; CHECK-NOV-NEXT: .LBB48_2: # %entry
-; CHECK-NOV-NEXT: bnez a1, .LBB48_19
+; CHECK-NOV-NEXT: bnez a1, .LBB48_17
; CHECK-NOV-NEXT: .LBB48_3: # %entry
; CHECK-NOV-NEXT: mv a4, s0
-; CHECK-NOV-NEXT: bgez s1, .LBB48_20
+; CHECK-NOV-NEXT: bgez s1, .LBB48_18
; CHECK-NOV-NEXT: .LBB48_4: # %entry
-; CHECK-NOV-NEXT: bgeu s0, a3, .LBB48_21
+; CHECK-NOV-NEXT: bgeu s0, a3, .LBB48_19
; CHECK-NOV-NEXT: .LBB48_5: # %entry
-; CHECK-NOV-NEXT: bnez s1, .LBB48_22
+; CHECK-NOV-NEXT: beqz s1, .LBB48_7
; CHECK-NOV-NEXT: .LBB48_6: # %entry
-; CHECK-NOV-NEXT: bgez a1, .LBB48_23
+; CHECK-NOV-NEXT: mv s0, a4
; CHECK-NOV-NEXT: .LBB48_7: # %entry
-; CHECK-NOV-NEXT: bltz s1, .LBB48_9
-; CHECK-NOV-NEXT: .LBB48_8: # %entry
-; CHECK-NOV-NEXT: li s1, 0
-; CHECK-NOV-NEXT: .LBB48_9: # %entry
+; CHECK-NOV-NEXT: slti a6, a1, 0
+; CHECK-NOV-NEXT: slti a3, s1, 0
+; CHECK-NOV-NEXT: neg a3, a3
+; CHECK-NOV-NEXT: and a4, a3, s1
; CHECK-NOV-NEXT: slli a3, a0, 63
-; CHECK-NOV-NEXT: mv a4, s0
-; CHECK-NOV-NEXT: bltz s1, .LBB48_24
-; CHECK-NOV-NEXT: # %bb.10: # %entry
-; CHECK-NOV-NEXT: bgeu a3, s0, .LBB48_25
+; CHECK-NOV-NEXT: mv a5, s0
+; CHECK-NOV-NEXT: bltz a4, .LBB48_20
+; CHECK-NOV-NEXT: # %bb.8: # %entry
+; CHECK-NOV-NEXT: neg a6, a6
+; CHECK-NOV-NEXT: bgeu a3, s0, .LBB48_21
+; CHECK-NOV-NEXT: .LBB48_9: # %entry
+; CHECK-NOV-NEXT: and a1, a6, a1
+; CHECK-NOV-NEXT: bne a4, a0, .LBB48_22
+; CHECK-NOV-NEXT: .LBB48_10: # %entry
+; CHECK-NOV-NEXT: mv a4, a2
+; CHECK-NOV-NEXT: bltz a1, .LBB48_23
; CHECK-NOV-NEXT: .LBB48_11: # %entry
-; CHECK-NOV-NEXT: bne s1, a0, .LBB48_26
+; CHECK-NOV-NEXT: bgeu a3, a2, .LBB48_24
; CHECK-NOV-NEXT: .LBB48_12: # %entry
-; CHECK-NOV-NEXT: mv a4, a2
-; CHECK-NOV-NEXT: bltz a1, .LBB48_27
+; CHECK-NOV-NEXT: beq a1, a0, .LBB48_14
; CHECK-NOV-NEXT: .LBB48_13: # %entry
-; CHECK-NOV-NEXT: bgeu a3, a2, .LBB48_28
-; CHECK-NOV-NEXT: .LBB48_14: # %entry
-; CHECK-NOV-NEXT: beq a1, a0, .LBB48_16
-; CHECK-NOV-NEXT: .LBB48_15: # %entry
; CHECK-NOV-NEXT: mv a2, a4
-; CHECK-NOV-NEXT: .LBB48_16: # %entry
+; CHECK-NOV-NEXT: .LBB48_14: # %entry
; CHECK-NOV-NEXT: mv a0, s0
; CHECK-NOV-NEXT: mv a1, a2
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: addi sp, sp, 32
; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB48_17: # %entry
+; CHECK-NOV-NEXT: .LBB48_15: # %entry
; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: bltu a2, a3, .LBB48_2
-; CHECK-NOV-NEXT: .LBB48_18: # %entry
+; CHECK-NOV-NEXT: .LBB48_16: # %entry
; CHECK-NOV-NEXT: mv a2, a3
; CHECK-NOV-NEXT: beqz a1, .LBB48_3
-; CHECK-NOV-NEXT: .LBB48_19: # %entry
+; CHECK-NOV-NEXT: .LBB48_17: # %entry
; CHECK-NOV-NEXT: mv a2, a4
; CHECK-NOV-NEXT: mv a4, s0
; CHECK-NOV-NEXT: bltz s1, .LBB48_4
-; CHECK-NOV-NEXT: .LBB48_20: # %entry
+; CHECK-NOV-NEXT: .LBB48_18: # %entry
; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: bltu s0, a3, .LBB48_5
+; CHECK-NOV-NEXT: .LBB48_19: # %entry
+; CHECK-NOV-NEXT: mv s0, a3
+; CHECK-NOV-NEXT: bnez s1, .LBB48_6
+; CHECK-NOV-NEXT: j .LBB48_7
+; CHECK-NOV-NEXT: .LBB48_20: # %entry
+; CHECK-NOV-NEXT: mv a5, a3
+; CHECK-NOV-NEXT: neg a6, a6
+; CHECK-NOV-NEXT: bltu a3, s0, .LBB48_9
; CHECK-NOV-NEXT: .LBB48_21: # %entry
; CHECK-NOV-NEXT: mv s0, a3
-; CHECK-NOV-NEXT: beqz s1, .LBB48_6
+; CHECK-NOV-NEXT: and a1, a6, a1
+; CHECK-NOV-NEXT: beq a4, a0, .LBB48_10
; CHECK-NOV-NEXT: .LBB48_22: # %entry
-; CHECK-NOV-NEXT: mv s0, a4
-; CHECK-NOV-NEXT: bltz a1, .LBB48_7
-; CHECK-NOV-NEXT: .LBB48_23: # %entry
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: bgez s1, .LBB48_8
-; CHECK-NOV-NEXT: j .LBB48_9
-; CHECK-NOV-NEXT: .LBB48_24: # %entry
-; CHECK-NOV-NEXT: mv a4, a3
-; CHECK-NOV-NEXT: bltu a3, s0, .LBB48_11
-; CHECK-NOV-NEXT: .LBB48_25: # %entry
-; CHECK-NOV-NEXT: mv s0, a3
-; CHECK-NOV-NEXT: beq s1, a0, .LBB48_12
-; CHECK-NOV-NEXT: .LBB48_26: # %entry
-; CHECK-NOV-NEXT: mv s0, a4
+; CHECK-NOV-NEXT: mv s0, a5
; CHECK-NOV-NEXT: mv a4, a2
-; CHECK-NOV-NEXT: bgez a1, .LBB48_13
-; CHECK-NOV-NEXT: .LBB48_27: # %entry
+; CHECK-NOV-NEXT: bgez a1, .LBB48_11
+; CHECK-NOV-NEXT: .LBB48_23: # %entry
; CHECK-NOV-NEXT: mv a4, a3
-; CHECK-NOV-NEXT: bltu a3, a2, .LBB48_14
-; CHECK-NOV-NEXT: .LBB48_28: # %entry
+; CHECK-NOV-NEXT: bltu a3, a2, .LBB48_12
+; CHECK-NOV-NEXT: .LBB48_24: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: bne a1, a0, .LBB48_15
-; CHECK-NOV-NEXT: j .LBB48_16
+; CHECK-NOV-NEXT: bne a1, a0, .LBB48_13
+; CHECK-NOV-NEXT: j .LBB48_14
;
; CHECK-V-LABEL: stest_f32i64_mm:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: li a2, -1
; CHECK-V-NEXT: srli a3, a2, 1
; CHECK-V-NEXT: mv a4, s0
-; CHECK-V-NEXT: bgez s1, .LBB48_17
+; CHECK-V-NEXT: bgez s1, .LBB48_15
; CHECK-V-NEXT: # %bb.1: # %entry
-; CHECK-V-NEXT: bgeu s0, a3, .LBB48_18
+; CHECK-V-NEXT: bgeu s0, a3, .LBB48_16
; CHECK-V-NEXT: .LBB48_2: # %entry
-; CHECK-V-NEXT: bnez s1, .LBB48_19
+; CHECK-V-NEXT: bnez s1, .LBB48_17
; CHECK-V-NEXT: .LBB48_3: # %entry
; CHECK-V-NEXT: mv a4, a0
-; CHECK-V-NEXT: bgez a1, .LBB48_20
+; CHECK-V-NEXT: bgez a1, .LBB48_18
; CHECK-V-NEXT: .LBB48_4: # %entry
-; CHECK-V-NEXT: bgeu a0, a3, .LBB48_21
+; CHECK-V-NEXT: bgeu a0, a3, .LBB48_19
; CHECK-V-NEXT: .LBB48_5: # %entry
-; CHECK-V-NEXT: bnez a1, .LBB48_22
+; CHECK-V-NEXT: beqz a1, .LBB48_7
; CHECK-V-NEXT: .LBB48_6: # %entry
-; CHECK-V-NEXT: bgez a1, .LBB48_23
+; CHECK-V-NEXT: mv a0, a4
; CHECK-V-NEXT: .LBB48_7: # %entry
-; CHECK-V-NEXT: bltz s1, .LBB48_9
-; CHECK-V-NEXT: .LBB48_8: # %entry
-; CHECK-V-NEXT: li s1, 0
-; CHECK-V-NEXT: .LBB48_9: # %entry
+; CHECK-V-NEXT: slti a3, s1, 0
+; CHECK-V-NEXT: neg a3, a3
+; CHECK-V-NEXT: and a4, a3, s1
+; CHECK-V-NEXT: slti a6, a1, 0
; CHECK-V-NEXT: slli a3, a2, 63
-; CHECK-V-NEXT: mv a4, s0
-; CHECK-V-NEXT: bltz s1, .LBB48_24
-; CHECK-V-NEXT: # %bb.10: # %entry
-; CHECK-V-NEXT: bgeu a3, s0, .LBB48_25
+; CHECK-V-NEXT: mv a5, s0
+; CHECK-V-NEXT: bltz a4, .LBB48_20
+; CHECK-V-NEXT: # %bb.8: # %entry
+; CHECK-V-NEXT: neg a6, a6
+; CHECK-V-NEXT: bgeu a3, s0, .LBB48_21
+; CHECK-V-NEXT: .LBB48_9: # %entry
+; CHECK-V-NEXT: and a1, a6, a1
+; CHECK-V-NEXT: bne a4, a2, .LBB48_22
+; CHECK-V-NEXT: .LBB48_10: # %entry
+; CHECK-V-NEXT: mv a4, a0
+; CHECK-V-NEXT: bltz a1, .LBB48_23
; CHECK-V-NEXT: .LBB48_11: # %entry
-; CHECK-V-NEXT: bne s1, a2, .LBB48_26
+; CHECK-V-NEXT: bgeu a3, a0, .LBB48_24
; CHECK-V-NEXT: .LBB48_12: # %entry
-; CHECK-V-NEXT: mv a4, a0
-; CHECK-V-NEXT: bltz a1, .LBB48_27
+; CHECK-V-NEXT: beq a1, a2, .LBB48_14
; CHECK-V-NEXT: .LBB48_13: # %entry
-; CHECK-V-NEXT: bgeu a3, a0, .LBB48_28
-; CHECK-V-NEXT: .LBB48_14: # %entry
-; CHECK-V-NEXT: beq a1, a2, .LBB48_16
-; CHECK-V-NEXT: .LBB48_15: # %entry
; CHECK-V-NEXT: mv a0, a4
-; CHECK-V-NEXT: .LBB48_16: # %entry
+; CHECK-V-NEXT: .LBB48_14: # %entry
; CHECK-V-NEXT: sd a0, 24(sp)
; CHECK-V-NEXT: sd s0, 32(sp)
; CHECK-V-NEXT: addi a0, sp, 24
; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: addi sp, sp, 80
; CHECK-V-NEXT: ret
-; CHECK-V-NEXT: .LBB48_17: # %entry
+; CHECK-V-NEXT: .LBB48_15: # %entry
; CHECK-V-NEXT: mv a4, a3
; CHECK-V-NEXT: bltu s0, a3, .LBB48_2
-; CHECK-V-NEXT: .LBB48_18: # %entry
+; CHECK-V-NEXT: .LBB48_16: # %entry
; CHECK-V-NEXT: mv s0, a3
; CHECK-V-NEXT: beqz s1, .LBB48_3
-; CHECK-V-NEXT: .LBB48_19: # %entry
+; CHECK-V-NEXT: .LBB48_17: # %entry
; CHECK-V-NEXT: mv s0, a4
; CHECK-V-NEXT: mv a4, a0
; CHECK-V-NEXT: bltz a1, .LBB48_4
-; CHECK-V-NEXT: .LBB48_20: # %entry
+; CHECK-V-NEXT: .LBB48_18: # %entry
; CHECK-V-NEXT: mv a4, a3
; CHECK-V-NEXT: bltu a0, a3, .LBB48_5
-; CHECK-V-NEXT: .LBB48_21: # %entry
+; CHECK-V-NEXT: .LBB48_19: # %entry
; CHECK-V-NEXT: mv a0, a3
-; CHECK-V-NEXT: beqz a1, .LBB48_6
-; CHECK-V-NEXT: .LBB48_22: # %entry
-; CHECK-V-NEXT: mv a0, a4
-; CHECK-V-NEXT: bltz a1, .LBB48_7
-; CHECK-V-NEXT: .LBB48_23: # %entry
-; CHECK-V-NEXT: li a1, 0
-; CHECK-V-NEXT: bgez s1, .LBB48_8
-; CHECK-V-NEXT: j .LBB48_9
-; CHECK-V-NEXT: .LBB48_24: # %entry
-; CHECK-V-NEXT: mv a4, a3
-; CHECK-V-NEXT: bltu a3, s0, .LBB48_11
-; CHECK-V-NEXT: .LBB48_25: # %entry
+; CHECK-V-NEXT: bnez a1, .LBB48_6
+; CHECK-V-NEXT: j .LBB48_7
+; CHECK-V-NEXT: .LBB48_20: # %entry
+; CHECK-V-NEXT: mv a5, a3
+; CHECK-V-NEXT: neg a6, a6
+; CHECK-V-NEXT: bltu a3, s0, .LBB48_9
+; CHECK-V-NEXT: .LBB48_21: # %entry
; CHECK-V-NEXT: mv s0, a3
-; CHECK-V-NEXT: beq s1, a2, .LBB48_12
-; CHECK-V-NEXT: .LBB48_26: # %entry
-; CHECK-V-NEXT: mv s0, a4
+; CHECK-V-NEXT: and a1, a6, a1
+; CHECK-V-NEXT: beq a4, a2, .LBB48_10
+; CHECK-V-NEXT: .LBB48_22: # %entry
+; CHECK-V-NEXT: mv s0, a5
; CHECK-V-NEXT: mv a4, a0
-; CHECK-V-NEXT: bgez a1, .LBB48_13
-; CHECK-V-NEXT: .LBB48_27: # %entry
+; CHECK-V-NEXT: bgez a1, .LBB48_11
+; CHECK-V-NEXT: .LBB48_23: # %entry
; CHECK-V-NEXT: mv a4, a3
-; CHECK-V-NEXT: bltu a3, a0, .LBB48_14
-; CHECK-V-NEXT: .LBB48_28: # %entry
+; CHECK-V-NEXT: bltu a3, a0, .LBB48_12
+; CHECK-V-NEXT: .LBB48_24: # %entry
; CHECK-V-NEXT: mv a0, a3
-; CHECK-V-NEXT: bne a1, a2, .LBB48_15
-; CHECK-V-NEXT: j .LBB48_16
+; CHECK-V-NEXT: bne a1, a2, .LBB48_13
+; CHECK-V-NEXT: j .LBB48_14
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
; CHECK-NOV-NEXT: mv s1, a1
; CHECK-NOV-NEXT: fmv.s fa0, fs0
; CHECK-NOV-NEXT: call __fixunssfti@plt
-; CHECK-NOV-NEXT: mv a2, a0
-; CHECK-NOV-NEXT: mv a3, a1
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: beqz a3, .LBB49_2
-; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: mv a2, a1
-; CHECK-NOV-NEXT: .LBB49_2: # %entry
-; CHECK-NOV-NEXT: li a4, 1
-; CHECK-NOV-NEXT: mv a0, a1
-; CHECK-NOV-NEXT: bne a3, a4, .LBB49_7
-; CHECK-NOV-NEXT: # %bb.3: # %entry
-; CHECK-NOV-NEXT: bnez s1, .LBB49_8
-; CHECK-NOV-NEXT: .LBB49_4: # %entry
-; CHECK-NOV-NEXT: beq s1, a4, .LBB49_6
-; CHECK-NOV-NEXT: .LBB49_5: # %entry
-; CHECK-NOV-NEXT: mv a1, s0
-; CHECK-NOV-NEXT: .LBB49_6: # %entry
+; CHECK-NOV-NEXT: snez a2, a1
+; CHECK-NOV-NEXT: addi a2, a2, -1
+; CHECK-NOV-NEXT: and a0, a2, a0
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: seqz a1, a1
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: and a0, a1, a0
+; CHECK-NOV-NEXT: snez a1, s1
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: and a1, a1, s0
+; CHECK-NOV-NEXT: addi a2, s1, -1
+; CHECK-NOV-NEXT: seqz a2, a2
+; CHECK-NOV-NEXT: addi a2, a2, -1
+; CHECK-NOV-NEXT: and a1, a2, a1
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: addi sp, sp, 32
; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB49_7: # %entry
-; CHECK-NOV-NEXT: mv a0, a2
-; CHECK-NOV-NEXT: beqz s1, .LBB49_4
-; CHECK-NOV-NEXT: .LBB49_8: # %entry
-; CHECK-NOV-NEXT: mv s0, a1
-; CHECK-NOV-NEXT: bne s1, a4, .LBB49_5
-; CHECK-NOV-NEXT: j .LBB49_6
;
; CHECK-V-LABEL: utest_f32i64_mm:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixunssfti@plt
-; CHECK-V-NEXT: li a2, 0
-; CHECK-V-NEXT: beqz s1, .LBB49_2
-; CHECK-V-NEXT: # %bb.1: # %entry
-; CHECK-V-NEXT: mv s0, a2
-; CHECK-V-NEXT: .LBB49_2: # %entry
-; CHECK-V-NEXT: li a4, 1
-; CHECK-V-NEXT: mv a3, a2
-; CHECK-V-NEXT: bne s1, a4, .LBB49_7
-; CHECK-V-NEXT: # %bb.3: # %entry
-; CHECK-V-NEXT: bnez a1, .LBB49_8
-; CHECK-V-NEXT: .LBB49_4: # %entry
-; CHECK-V-NEXT: beq a1, a4, .LBB49_6
-; CHECK-V-NEXT: .LBB49_5: # %entry
-; CHECK-V-NEXT: mv a2, a0
-; CHECK-V-NEXT: .LBB49_6: # %entry
-; CHECK-V-NEXT: sd a2, 24(sp)
-; CHECK-V-NEXT: sd a3, 32(sp)
+; CHECK-V-NEXT: snez a2, s1
+; CHECK-V-NEXT: addi a2, a2, -1
+; CHECK-V-NEXT: and a2, a2, s0
+; CHECK-V-NEXT: addi a3, s1, -1
+; CHECK-V-NEXT: seqz a3, a3
+; CHECK-V-NEXT: addi a3, a3, -1
+; CHECK-V-NEXT: and a2, a3, a2
+; CHECK-V-NEXT: snez a3, a1
+; CHECK-V-NEXT: addi a3, a3, -1
+; CHECK-V-NEXT: and a0, a3, a0
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: seqz a1, a1
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: and a0, a1, a0
+; CHECK-V-NEXT: sd a0, 24(sp)
+; CHECK-V-NEXT: sd a2, 32(sp)
; CHECK-V-NEXT: addi a0, sp, 24
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vle64.v v8, (a0)
; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: addi sp, sp, 80
; CHECK-V-NEXT: ret
-; CHECK-V-NEXT: .LBB49_7: # %entry
-; CHECK-V-NEXT: mv a3, s0
-; CHECK-V-NEXT: beqz a1, .LBB49_4
-; CHECK-V-NEXT: .LBB49_8: # %entry
-; CHECK-V-NEXT: mv a0, a2
-; CHECK-V-NEXT: bne a1, a4, .LBB49_5
-; CHECK-V-NEXT: j .LBB49_6
entry:
%conv = fptoui <2 x float> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
; CHECK-NOV-NEXT: fmv.s fa0, fs0
; CHECK-NOV-NEXT: call __fixsfti@plt
; CHECK-NOV-NEXT: mv a2, a1
-; CHECK-NOV-NEXT: li a5, 1
-; CHECK-NOV-NEXT: mv a3, a1
-; CHECK-NOV-NEXT: bgtz a1, .LBB50_12
+; CHECK-NOV-NEXT: blez a1, .LBB50_2
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: mv a4, s1
-; CHECK-NOV-NEXT: bgtz s1, .LBB50_13
+; CHECK-NOV-NEXT: li a2, 1
; CHECK-NOV-NEXT: .LBB50_2: # %entry
-; CHECK-NOV-NEXT: bgtz a2, .LBB50_14
-; CHECK-NOV-NEXT: .LBB50_3: # %entry
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: bne a2, a5, .LBB50_15
+; CHECK-NOV-NEXT: mv a4, s1
+; CHECK-NOV-NEXT: blez s1, .LBB50_4
+; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: li a4, 1
; CHECK-NOV-NEXT: .LBB50_4: # %entry
-; CHECK-NOV-NEXT: bgtz s1, .LBB50_16
-; CHECK-NOV-NEXT: .LBB50_5: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: bne s1, a5, .LBB50_17
+; CHECK-NOV-NEXT: sgtz a3, a1
+; CHECK-NOV-NEXT: addi a3, a3, -1
+; CHECK-NOV-NEXT: and a3, a3, a0
+; CHECK-NOV-NEXT: addi a0, a1, -1
+; CHECK-NOV-NEXT: seqz a0, a0
+; CHECK-NOV-NEXT: addi a1, a0, -1
+; CHECK-NOV-NEXT: sgtz a0, s1
+; CHECK-NOV-NEXT: addi a0, a0, -1
+; CHECK-NOV-NEXT: and a0, a0, s0
+; CHECK-NOV-NEXT: addi a5, s1, -1
+; CHECK-NOV-NEXT: seqz a5, a5
+; CHECK-NOV-NEXT: addi a5, a5, -1
+; CHECK-NOV-NEXT: and a0, a5, a0
+; CHECK-NOV-NEXT: beqz a4, .LBB50_6
+; CHECK-NOV-NEXT: # %bb.5: # %entry
+; CHECK-NOV-NEXT: sgtz a4, a4
+; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: and a0, a4, a0
; CHECK-NOV-NEXT: .LBB50_6: # %entry
-; CHECK-NOV-NEXT: mv a2, a0
-; CHECK-NOV-NEXT: blez a4, .LBB50_18
-; CHECK-NOV-NEXT: .LBB50_7: # %entry
-; CHECK-NOV-NEXT: bnez a4, .LBB50_19
+; CHECK-NOV-NEXT: and a1, a1, a3
+; CHECK-NOV-NEXT: beqz a2, .LBB50_8
+; CHECK-NOV-NEXT: # %bb.7: # %entry
+; CHECK-NOV-NEXT: sgtz a2, a2
+; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: and a1, a2, a1
; CHECK-NOV-NEXT: .LBB50_8: # %entry
-; CHECK-NOV-NEXT: mv a2, a1
-; CHECK-NOV-NEXT: blez a3, .LBB50_20
-; CHECK-NOV-NEXT: .LBB50_9: # %entry
-; CHECK-NOV-NEXT: beqz a3, .LBB50_11
-; CHECK-NOV-NEXT: .LBB50_10: # %entry
-; CHECK-NOV-NEXT: mv a1, a2
-; CHECK-NOV-NEXT: .LBB50_11: # %entry
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: addi sp, sp, 32
; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB50_12: # %entry
-; CHECK-NOV-NEXT: li a3, 1
-; CHECK-NOV-NEXT: mv a4, s1
-; CHECK-NOV-NEXT: blez s1, .LBB50_2
-; CHECK-NOV-NEXT: .LBB50_13: # %entry
-; CHECK-NOV-NEXT: li a4, 1
-; CHECK-NOV-NEXT: blez a2, .LBB50_3
-; CHECK-NOV-NEXT: .LBB50_14: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: beq a2, a5, .LBB50_4
-; CHECK-NOV-NEXT: .LBB50_15: # %entry
-; CHECK-NOV-NEXT: mv a1, a0
-; CHECK-NOV-NEXT: blez s1, .LBB50_5
-; CHECK-NOV-NEXT: .LBB50_16: # %entry
-; CHECK-NOV-NEXT: li s0, 0
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: beq s1, a5, .LBB50_6
-; CHECK-NOV-NEXT: .LBB50_17: # %entry
-; CHECK-NOV-NEXT: mv a0, s0
-; CHECK-NOV-NEXT: mv a2, a0
-; CHECK-NOV-NEXT: bgtz a4, .LBB50_7
-; CHECK-NOV-NEXT: .LBB50_18: # %entry
-; CHECK-NOV-NEXT: li a2, 0
-; CHECK-NOV-NEXT: beqz a4, .LBB50_8
-; CHECK-NOV-NEXT: .LBB50_19: # %entry
-; CHECK-NOV-NEXT: mv a0, a2
-; CHECK-NOV-NEXT: mv a2, a1
-; CHECK-NOV-NEXT: bgtz a3, .LBB50_9
-; CHECK-NOV-NEXT: .LBB50_20: # %entry
-; CHECK-NOV-NEXT: li a2, 0
-; CHECK-NOV-NEXT: bnez a3, .LBB50_10
-; CHECK-NOV-NEXT: j .LBB50_11
;
; CHECK-V-LABEL: ustest_f32i64_mm:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixsfti@plt
-; CHECK-V-NEXT: li a5, 1
; CHECK-V-NEXT: mv a2, a1
-; CHECK-V-NEXT: bgtz a1, .LBB50_12
+; CHECK-V-NEXT: blez a1, .LBB50_2
; CHECK-V-NEXT: # %bb.1: # %entry
-; CHECK-V-NEXT: bgtz s0, .LBB50_13
+; CHECK-V-NEXT: li a2, 1
; CHECK-V-NEXT: .LBB50_2: # %entry
-; CHECK-V-NEXT: li a3, 0
-; CHECK-V-NEXT: bne s0, a5, .LBB50_14
-; CHECK-V-NEXT: .LBB50_3: # %entry
-; CHECK-V-NEXT: bgtz a1, .LBB50_15
+; CHECK-V-NEXT: sgtz a3, s0
+; CHECK-V-NEXT: addi a3, a3, -1
+; CHECK-V-NEXT: and a3, a3, s1
+; CHECK-V-NEXT: addi a4, s0, -1
+; CHECK-V-NEXT: seqz a4, a4
+; CHECK-V-NEXT: addi a4, a4, -1
+; CHECK-V-NEXT: sgtz a5, a1
+; CHECK-V-NEXT: addi a5, a5, -1
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: seqz a6, a1
+; CHECK-V-NEXT: blez s0, .LBB50_4
+; CHECK-V-NEXT: # %bb.3: # %entry
+; CHECK-V-NEXT: li s0, 1
; CHECK-V-NEXT: .LBB50_4: # %entry
-; CHECK-V-NEXT: li a4, 0
-; CHECK-V-NEXT: bne a1, a5, .LBB50_16
-; CHECK-V-NEXT: .LBB50_5: # %entry
-; CHECK-V-NEXT: bgtz s0, .LBB50_17
+; CHECK-V-NEXT: and a1, a5, a0
+; CHECK-V-NEXT: addi a5, a6, -1
+; CHECK-V-NEXT: and a0, a4, a3
+; CHECK-V-NEXT: beqz s0, .LBB50_6
+; CHECK-V-NEXT: # %bb.5: # %entry
+; CHECK-V-NEXT: sgtz a3, s0
+; CHECK-V-NEXT: neg a3, a3
+; CHECK-V-NEXT: and a0, a3, a0
; CHECK-V-NEXT: .LBB50_6: # %entry
-; CHECK-V-NEXT: mv a0, a3
-; CHECK-V-NEXT: blez s0, .LBB50_18
-; CHECK-V-NEXT: .LBB50_7: # %entry
-; CHECK-V-NEXT: bnez s0, .LBB50_19
+; CHECK-V-NEXT: and a1, a5, a1
+; CHECK-V-NEXT: beqz a2, .LBB50_8
+; CHECK-V-NEXT: # %bb.7: # %entry
+; CHECK-V-NEXT: sgtz a2, a2
+; CHECK-V-NEXT: neg a2, a2
+; CHECK-V-NEXT: and a1, a2, a1
; CHECK-V-NEXT: .LBB50_8: # %entry
-; CHECK-V-NEXT: mv a0, a4
-; CHECK-V-NEXT: blez a2, .LBB50_20
-; CHECK-V-NEXT: .LBB50_9: # %entry
-; CHECK-V-NEXT: beqz a2, .LBB50_11
-; CHECK-V-NEXT: .LBB50_10: # %entry
-; CHECK-V-NEXT: mv a4, a0
-; CHECK-V-NEXT: .LBB50_11: # %entry
-; CHECK-V-NEXT: sd a4, 24(sp)
-; CHECK-V-NEXT: sd a3, 32(sp)
+; CHECK-V-NEXT: sd a1, 24(sp)
+; CHECK-V-NEXT: sd a0, 32(sp)
; CHECK-V-NEXT: addi a0, sp, 24
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vle64.v v8, (a0)
; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: addi sp, sp, 80
; CHECK-V-NEXT: ret
-; CHECK-V-NEXT: .LBB50_12: # %entry
-; CHECK-V-NEXT: li a2, 1
-; CHECK-V-NEXT: blez s0, .LBB50_2
-; CHECK-V-NEXT: .LBB50_13: # %entry
-; CHECK-V-NEXT: li s1, 0
-; CHECK-V-NEXT: li a3, 0
-; CHECK-V-NEXT: beq s0, a5, .LBB50_3
-; CHECK-V-NEXT: .LBB50_14: # %entry
-; CHECK-V-NEXT: mv a3, s1
-; CHECK-V-NEXT: blez a1, .LBB50_4
-; CHECK-V-NEXT: .LBB50_15: # %entry
-; CHECK-V-NEXT: li a0, 0
-; CHECK-V-NEXT: li a4, 0
-; CHECK-V-NEXT: beq a1, a5, .LBB50_5
-; CHECK-V-NEXT: .LBB50_16: # %entry
-; CHECK-V-NEXT: mv a4, a0
-; CHECK-V-NEXT: blez s0, .LBB50_6
-; CHECK-V-NEXT: .LBB50_17: # %entry
-; CHECK-V-NEXT: li s0, 1
-; CHECK-V-NEXT: mv a0, a3
-; CHECK-V-NEXT: bgtz s0, .LBB50_7
-; CHECK-V-NEXT: .LBB50_18: # %entry
-; CHECK-V-NEXT: li a0, 0
-; CHECK-V-NEXT: beqz s0, .LBB50_8
-; CHECK-V-NEXT: .LBB50_19: # %entry
-; CHECK-V-NEXT: mv a3, a0
-; CHECK-V-NEXT: mv a0, a4
-; CHECK-V-NEXT: bgtz a2, .LBB50_9
-; CHECK-V-NEXT: .LBB50_20: # %entry
-; CHECK-V-NEXT: li a0, 0
-; CHECK-V-NEXT: bnez a2, .LBB50_10
-; CHECK-V-NEXT: j .LBB50_11
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
; CHECK-NOV-NEXT: li a0, -1
; CHECK-NOV-NEXT: srli a3, a0, 1
; CHECK-NOV-NEXT: mv a4, a2
-; CHECK-NOV-NEXT: bgez a1, .LBB51_17
+; CHECK-NOV-NEXT: bgez a1, .LBB51_15
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: bgeu a2, a3, .LBB51_18
+; CHECK-NOV-NEXT: bgeu a2, a3, .LBB51_16
; CHECK-NOV-NEXT: .LBB51_2: # %entry
-; CHECK-NOV-NEXT: bnez a1, .LBB51_19
+; CHECK-NOV-NEXT: bnez a1, .LBB51_17
; CHECK-NOV-NEXT: .LBB51_3: # %entry
; CHECK-NOV-NEXT: mv a4, s0
-; CHECK-NOV-NEXT: bgez s1, .LBB51_20
+; CHECK-NOV-NEXT: bgez s1, .LBB51_18
; CHECK-NOV-NEXT: .LBB51_4: # %entry
-; CHECK-NOV-NEXT: bgeu s0, a3, .LBB51_21
+; CHECK-NOV-NEXT: bgeu s0, a3, .LBB51_19
; CHECK-NOV-NEXT: .LBB51_5: # %entry
-; CHECK-NOV-NEXT: bnez s1, .LBB51_22
+; CHECK-NOV-NEXT: beqz s1, .LBB51_7
; CHECK-NOV-NEXT: .LBB51_6: # %entry
-; CHECK-NOV-NEXT: bgez a1, .LBB51_23
+; CHECK-NOV-NEXT: mv s0, a4
; CHECK-NOV-NEXT: .LBB51_7: # %entry
-; CHECK-NOV-NEXT: bltz s1, .LBB51_9
-; CHECK-NOV-NEXT: .LBB51_8: # %entry
-; CHECK-NOV-NEXT: li s1, 0
-; CHECK-NOV-NEXT: .LBB51_9: # %entry
+; CHECK-NOV-NEXT: slti a6, a1, 0
+; CHECK-NOV-NEXT: slti a3, s1, 0
+; CHECK-NOV-NEXT: neg a3, a3
+; CHECK-NOV-NEXT: and a4, a3, s1
; CHECK-NOV-NEXT: slli a3, a0, 63
-; CHECK-NOV-NEXT: mv a4, s0
-; CHECK-NOV-NEXT: bltz s1, .LBB51_24
-; CHECK-NOV-NEXT: # %bb.10: # %entry
-; CHECK-NOV-NEXT: bgeu a3, s0, .LBB51_25
+; CHECK-NOV-NEXT: mv a5, s0
+; CHECK-NOV-NEXT: bltz a4, .LBB51_20
+; CHECK-NOV-NEXT: # %bb.8: # %entry
+; CHECK-NOV-NEXT: neg a6, a6
+; CHECK-NOV-NEXT: bgeu a3, s0, .LBB51_21
+; CHECK-NOV-NEXT: .LBB51_9: # %entry
+; CHECK-NOV-NEXT: and a1, a6, a1
+; CHECK-NOV-NEXT: bne a4, a0, .LBB51_22
+; CHECK-NOV-NEXT: .LBB51_10: # %entry
+; CHECK-NOV-NEXT: mv a4, a2
+; CHECK-NOV-NEXT: bltz a1, .LBB51_23
; CHECK-NOV-NEXT: .LBB51_11: # %entry
-; CHECK-NOV-NEXT: bne s1, a0, .LBB51_26
+; CHECK-NOV-NEXT: bgeu a3, a2, .LBB51_24
; CHECK-NOV-NEXT: .LBB51_12: # %entry
-; CHECK-NOV-NEXT: mv a4, a2
-; CHECK-NOV-NEXT: bltz a1, .LBB51_27
+; CHECK-NOV-NEXT: beq a1, a0, .LBB51_14
; CHECK-NOV-NEXT: .LBB51_13: # %entry
-; CHECK-NOV-NEXT: bgeu a3, a2, .LBB51_28
-; CHECK-NOV-NEXT: .LBB51_14: # %entry
-; CHECK-NOV-NEXT: beq a1, a0, .LBB51_16
-; CHECK-NOV-NEXT: .LBB51_15: # %entry
; CHECK-NOV-NEXT: mv a2, a4
-; CHECK-NOV-NEXT: .LBB51_16: # %entry
+; CHECK-NOV-NEXT: .LBB51_14: # %entry
; CHECK-NOV-NEXT: mv a0, s0
; CHECK-NOV-NEXT: mv a1, a2
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: addi sp, sp, 32
; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB51_17: # %entry
+; CHECK-NOV-NEXT: .LBB51_15: # %entry
; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: bltu a2, a3, .LBB51_2
-; CHECK-NOV-NEXT: .LBB51_18: # %entry
+; CHECK-NOV-NEXT: .LBB51_16: # %entry
; CHECK-NOV-NEXT: mv a2, a3
; CHECK-NOV-NEXT: beqz a1, .LBB51_3
-; CHECK-NOV-NEXT: .LBB51_19: # %entry
+; CHECK-NOV-NEXT: .LBB51_17: # %entry
; CHECK-NOV-NEXT: mv a2, a4
; CHECK-NOV-NEXT: mv a4, s0
; CHECK-NOV-NEXT: bltz s1, .LBB51_4
-; CHECK-NOV-NEXT: .LBB51_20: # %entry
+; CHECK-NOV-NEXT: .LBB51_18: # %entry
; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: bltu s0, a3, .LBB51_5
+; CHECK-NOV-NEXT: .LBB51_19: # %entry
+; CHECK-NOV-NEXT: mv s0, a3
+; CHECK-NOV-NEXT: bnez s1, .LBB51_6
+; CHECK-NOV-NEXT: j .LBB51_7
+; CHECK-NOV-NEXT: .LBB51_20: # %entry
+; CHECK-NOV-NEXT: mv a5, a3
+; CHECK-NOV-NEXT: neg a6, a6
+; CHECK-NOV-NEXT: bltu a3, s0, .LBB51_9
; CHECK-NOV-NEXT: .LBB51_21: # %entry
; CHECK-NOV-NEXT: mv s0, a3
-; CHECK-NOV-NEXT: beqz s1, .LBB51_6
+; CHECK-NOV-NEXT: and a1, a6, a1
+; CHECK-NOV-NEXT: beq a4, a0, .LBB51_10
; CHECK-NOV-NEXT: .LBB51_22: # %entry
-; CHECK-NOV-NEXT: mv s0, a4
-; CHECK-NOV-NEXT: bltz a1, .LBB51_7
-; CHECK-NOV-NEXT: .LBB51_23: # %entry
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: bgez s1, .LBB51_8
-; CHECK-NOV-NEXT: j .LBB51_9
-; CHECK-NOV-NEXT: .LBB51_24: # %entry
-; CHECK-NOV-NEXT: mv a4, a3
-; CHECK-NOV-NEXT: bltu a3, s0, .LBB51_11
-; CHECK-NOV-NEXT: .LBB51_25: # %entry
-; CHECK-NOV-NEXT: mv s0, a3
-; CHECK-NOV-NEXT: beq s1, a0, .LBB51_12
-; CHECK-NOV-NEXT: .LBB51_26: # %entry
-; CHECK-NOV-NEXT: mv s0, a4
+; CHECK-NOV-NEXT: mv s0, a5
; CHECK-NOV-NEXT: mv a4, a2
-; CHECK-NOV-NEXT: bgez a1, .LBB51_13
-; CHECK-NOV-NEXT: .LBB51_27: # %entry
+; CHECK-NOV-NEXT: bgez a1, .LBB51_11
+; CHECK-NOV-NEXT: .LBB51_23: # %entry
; CHECK-NOV-NEXT: mv a4, a3
-; CHECK-NOV-NEXT: bltu a3, a2, .LBB51_14
-; CHECK-NOV-NEXT: .LBB51_28: # %entry
+; CHECK-NOV-NEXT: bltu a3, a2, .LBB51_12
+; CHECK-NOV-NEXT: .LBB51_24: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: bne a1, a0, .LBB51_15
-; CHECK-NOV-NEXT: j .LBB51_16
+; CHECK-NOV-NEXT: bne a1, a0, .LBB51_13
+; CHECK-NOV-NEXT: j .LBB51_14
;
; CHECK-V-LABEL: stest_f16i64_mm:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: li a2, -1
; CHECK-V-NEXT: srli a3, a2, 1
; CHECK-V-NEXT: mv a4, a0
-; CHECK-V-NEXT: bgez a1, .LBB51_17
+; CHECK-V-NEXT: bgez a1, .LBB51_15
; CHECK-V-NEXT: # %bb.1: # %entry
-; CHECK-V-NEXT: bgeu a0, a3, .LBB51_18
+; CHECK-V-NEXT: bgeu a0, a3, .LBB51_16
; CHECK-V-NEXT: .LBB51_2: # %entry
-; CHECK-V-NEXT: bnez a1, .LBB51_19
+; CHECK-V-NEXT: bnez a1, .LBB51_17
; CHECK-V-NEXT: .LBB51_3: # %entry
; CHECK-V-NEXT: mv a4, s0
-; CHECK-V-NEXT: bgez s1, .LBB51_20
+; CHECK-V-NEXT: bgez s1, .LBB51_18
; CHECK-V-NEXT: .LBB51_4: # %entry
-; CHECK-V-NEXT: bgeu s0, a3, .LBB51_21
+; CHECK-V-NEXT: bgeu s0, a3, .LBB51_19
; CHECK-V-NEXT: .LBB51_5: # %entry
-; CHECK-V-NEXT: bnez s1, .LBB51_22
+; CHECK-V-NEXT: beqz s1, .LBB51_7
; CHECK-V-NEXT: .LBB51_6: # %entry
-; CHECK-V-NEXT: bgez a1, .LBB51_23
+; CHECK-V-NEXT: mv s0, a4
; CHECK-V-NEXT: .LBB51_7: # %entry
-; CHECK-V-NEXT: bltz s1, .LBB51_9
-; CHECK-V-NEXT: .LBB51_8: # %entry
-; CHECK-V-NEXT: li s1, 0
-; CHECK-V-NEXT: .LBB51_9: # %entry
+; CHECK-V-NEXT: slti a6, a1, 0
+; CHECK-V-NEXT: slti a3, s1, 0
+; CHECK-V-NEXT: neg a3, a3
+; CHECK-V-NEXT: and a4, a3, s1
; CHECK-V-NEXT: slli a3, a2, 63
-; CHECK-V-NEXT: mv a4, s0
-; CHECK-V-NEXT: bltz s1, .LBB51_24
-; CHECK-V-NEXT: # %bb.10: # %entry
-; CHECK-V-NEXT: bgeu a3, s0, .LBB51_25
+; CHECK-V-NEXT: mv a5, s0
+; CHECK-V-NEXT: bltz a4, .LBB51_20
+; CHECK-V-NEXT: # %bb.8: # %entry
+; CHECK-V-NEXT: neg a6, a6
+; CHECK-V-NEXT: bgeu a3, s0, .LBB51_21
+; CHECK-V-NEXT: .LBB51_9: # %entry
+; CHECK-V-NEXT: and a1, a6, a1
+; CHECK-V-NEXT: bne a4, a2, .LBB51_22
+; CHECK-V-NEXT: .LBB51_10: # %entry
+; CHECK-V-NEXT: mv a4, a0
+; CHECK-V-NEXT: bltz a1, .LBB51_23
; CHECK-V-NEXT: .LBB51_11: # %entry
-; CHECK-V-NEXT: bne s1, a2, .LBB51_26
+; CHECK-V-NEXT: bgeu a3, a0, .LBB51_24
; CHECK-V-NEXT: .LBB51_12: # %entry
-; CHECK-V-NEXT: mv a4, a0
-; CHECK-V-NEXT: bltz a1, .LBB51_27
+; CHECK-V-NEXT: beq a1, a2, .LBB51_14
; CHECK-V-NEXT: .LBB51_13: # %entry
-; CHECK-V-NEXT: bgeu a3, a0, .LBB51_28
-; CHECK-V-NEXT: .LBB51_14: # %entry
-; CHECK-V-NEXT: beq a1, a2, .LBB51_16
-; CHECK-V-NEXT: .LBB51_15: # %entry
; CHECK-V-NEXT: mv a0, a4
-; CHECK-V-NEXT: .LBB51_16: # %entry
+; CHECK-V-NEXT: .LBB51_14: # %entry
; CHECK-V-NEXT: sd a0, 8(sp)
; CHECK-V-NEXT: sd s0, 0(sp)
; CHECK-V-NEXT: addi a0, sp, 8
; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: addi sp, sp, 48
; CHECK-V-NEXT: ret
-; CHECK-V-NEXT: .LBB51_17: # %entry
+; CHECK-V-NEXT: .LBB51_15: # %entry
; CHECK-V-NEXT: mv a4, a3
; CHECK-V-NEXT: bltu a0, a3, .LBB51_2
-; CHECK-V-NEXT: .LBB51_18: # %entry
+; CHECK-V-NEXT: .LBB51_16: # %entry
; CHECK-V-NEXT: mv a0, a3
; CHECK-V-NEXT: beqz a1, .LBB51_3
-; CHECK-V-NEXT: .LBB51_19: # %entry
+; CHECK-V-NEXT: .LBB51_17: # %entry
; CHECK-V-NEXT: mv a0, a4
; CHECK-V-NEXT: mv a4, s0
; CHECK-V-NEXT: bltz s1, .LBB51_4
-; CHECK-V-NEXT: .LBB51_20: # %entry
+; CHECK-V-NEXT: .LBB51_18: # %entry
; CHECK-V-NEXT: mv a4, a3
; CHECK-V-NEXT: bltu s0, a3, .LBB51_5
+; CHECK-V-NEXT: .LBB51_19: # %entry
+; CHECK-V-NEXT: mv s0, a3
+; CHECK-V-NEXT: bnez s1, .LBB51_6
+; CHECK-V-NEXT: j .LBB51_7
+; CHECK-V-NEXT: .LBB51_20: # %entry
+; CHECK-V-NEXT: mv a5, a3
+; CHECK-V-NEXT: neg a6, a6
+; CHECK-V-NEXT: bltu a3, s0, .LBB51_9
; CHECK-V-NEXT: .LBB51_21: # %entry
; CHECK-V-NEXT: mv s0, a3
-; CHECK-V-NEXT: beqz s1, .LBB51_6
+; CHECK-V-NEXT: and a1, a6, a1
+; CHECK-V-NEXT: beq a4, a2, .LBB51_10
; CHECK-V-NEXT: .LBB51_22: # %entry
-; CHECK-V-NEXT: mv s0, a4
-; CHECK-V-NEXT: bltz a1, .LBB51_7
-; CHECK-V-NEXT: .LBB51_23: # %entry
-; CHECK-V-NEXT: li a1, 0
-; CHECK-V-NEXT: bgez s1, .LBB51_8
-; CHECK-V-NEXT: j .LBB51_9
-; CHECK-V-NEXT: .LBB51_24: # %entry
-; CHECK-V-NEXT: mv a4, a3
-; CHECK-V-NEXT: bltu a3, s0, .LBB51_11
-; CHECK-V-NEXT: .LBB51_25: # %entry
-; CHECK-V-NEXT: mv s0, a3
-; CHECK-V-NEXT: beq s1, a2, .LBB51_12
-; CHECK-V-NEXT: .LBB51_26: # %entry
-; CHECK-V-NEXT: mv s0, a4
+; CHECK-V-NEXT: mv s0, a5
; CHECK-V-NEXT: mv a4, a0
-; CHECK-V-NEXT: bgez a1, .LBB51_13
-; CHECK-V-NEXT: .LBB51_27: # %entry
+; CHECK-V-NEXT: bgez a1, .LBB51_11
+; CHECK-V-NEXT: .LBB51_23: # %entry
; CHECK-V-NEXT: mv a4, a3
-; CHECK-V-NEXT: bltu a3, a0, .LBB51_14
-; CHECK-V-NEXT: .LBB51_28: # %entry
+; CHECK-V-NEXT: bltu a3, a0, .LBB51_12
+; CHECK-V-NEXT: .LBB51_24: # %entry
; CHECK-V-NEXT: mv a0, a3
-; CHECK-V-NEXT: bne a1, a2, .LBB51_15
-; CHECK-V-NEXT: j .LBB51_16
+; CHECK-V-NEXT: bne a1, a2, .LBB51_13
+; CHECK-V-NEXT: j .LBB51_14
entry:
%conv = fptosi <2 x half> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
; CHECK-NOV-NEXT: .cfi_offset s0, -16
; CHECK-NOV-NEXT: .cfi_offset s1, -24
; CHECK-NOV-NEXT: .cfi_offset s2, -32
-; CHECK-NOV-NEXT: mv s2, a0
+; CHECK-NOV-NEXT: mv s0, a0
; CHECK-NOV-NEXT: mv a0, a1
; CHECK-NOV-NEXT: call __extendhfsf2@plt
; CHECK-NOV-NEXT: call __fixunssfti@plt
-; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: mv s1, a1
-; CHECK-NOV-NEXT: mv a0, s2
+; CHECK-NOV-NEXT: mv s1, a0
+; CHECK-NOV-NEXT: mv s2, a1
+; CHECK-NOV-NEXT: mv a0, s0
; CHECK-NOV-NEXT: call __extendhfsf2@plt
; CHECK-NOV-NEXT: call __fixunssfti@plt
-; CHECK-NOV-NEXT: mv a2, a0
-; CHECK-NOV-NEXT: mv a3, a1
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: beqz a3, .LBB52_2
-; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: mv a2, a1
-; CHECK-NOV-NEXT: .LBB52_2: # %entry
-; CHECK-NOV-NEXT: li a4, 1
-; CHECK-NOV-NEXT: mv a0, a1
-; CHECK-NOV-NEXT: bne a3, a4, .LBB52_7
-; CHECK-NOV-NEXT: # %bb.3: # %entry
-; CHECK-NOV-NEXT: bnez s1, .LBB52_8
-; CHECK-NOV-NEXT: .LBB52_4: # %entry
-; CHECK-NOV-NEXT: beq s1, a4, .LBB52_6
-; CHECK-NOV-NEXT: .LBB52_5: # %entry
-; CHECK-NOV-NEXT: mv a1, s0
-; CHECK-NOV-NEXT: .LBB52_6: # %entry
+; CHECK-NOV-NEXT: snez a2, a1
+; CHECK-NOV-NEXT: addi a2, a2, -1
+; CHECK-NOV-NEXT: and a0, a2, a0
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: seqz a1, a1
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: and a0, a1, a0
+; CHECK-NOV-NEXT: snez a1, s2
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: and a1, a1, s1
+; CHECK-NOV-NEXT: addi a2, s2, -1
+; CHECK-NOV-NEXT: seqz a2, a2
+; CHECK-NOV-NEXT: addi a2, a2, -1
+; CHECK-NOV-NEXT: and a1, a2, a1
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: addi sp, sp, 32
; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB52_7: # %entry
-; CHECK-NOV-NEXT: mv a0, a2
-; CHECK-NOV-NEXT: beqz s1, .LBB52_4
-; CHECK-NOV-NEXT: .LBB52_8: # %entry
-; CHECK-NOV-NEXT: mv s0, a1
-; CHECK-NOV-NEXT: bne s1, a4, .LBB52_5
-; CHECK-NOV-NEXT: j .LBB52_6
;
; CHECK-V-LABEL: utesth_f16i64_mm:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: .cfi_offset s2, -32
-; CHECK-V-NEXT: mv s2, a0
+; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv a0, a1
; CHECK-V-NEXT: call __extendhfsf2@plt
; CHECK-V-NEXT: call __fixunssfti@plt
-; CHECK-V-NEXT: mv s0, a0
-; CHECK-V-NEXT: mv s1, a1
-; CHECK-V-NEXT: mv a0, s2
+; CHECK-V-NEXT: mv s1, a0
+; CHECK-V-NEXT: mv s2, a1
+; CHECK-V-NEXT: mv a0, s0
; CHECK-V-NEXT: call __extendhfsf2@plt
; CHECK-V-NEXT: call __fixunssfti@plt
-; CHECK-V-NEXT: li a2, 0
-; CHECK-V-NEXT: beqz a1, .LBB52_2
-; CHECK-V-NEXT: # %bb.1: # %entry
-; CHECK-V-NEXT: mv a0, a2
-; CHECK-V-NEXT: .LBB52_2: # %entry
-; CHECK-V-NEXT: li a4, 1
-; CHECK-V-NEXT: mv a3, a2
-; CHECK-V-NEXT: bne a1, a4, .LBB52_7
-; CHECK-V-NEXT: # %bb.3: # %entry
-; CHECK-V-NEXT: bnez s1, .LBB52_8
-; CHECK-V-NEXT: .LBB52_4: # %entry
-; CHECK-V-NEXT: beq s1, a4, .LBB52_6
-; CHECK-V-NEXT: .LBB52_5: # %entry
-; CHECK-V-NEXT: mv a2, s0
-; CHECK-V-NEXT: .LBB52_6: # %entry
-; CHECK-V-NEXT: sd a2, 8(sp)
-; CHECK-V-NEXT: sd a3, 0(sp)
+; CHECK-V-NEXT: snez a2, a1
+; CHECK-V-NEXT: addi a2, a2, -1
+; CHECK-V-NEXT: and a0, a2, a0
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: seqz a1, a1
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: and a0, a1, a0
+; CHECK-V-NEXT: snez a1, s2
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: and a1, a1, s1
+; CHECK-V-NEXT: addi a2, s2, -1
+; CHECK-V-NEXT: seqz a2, a2
+; CHECK-V-NEXT: addi a2, a2, -1
+; CHECK-V-NEXT: and a1, a2, a1
+; CHECK-V-NEXT: sd a1, 8(sp)
+; CHECK-V-NEXT: sd a0, 0(sp)
; CHECK-V-NEXT: addi a0, sp, 8
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vle64.v v9, (a0)
; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: addi sp, sp, 48
; CHECK-V-NEXT: ret
-; CHECK-V-NEXT: .LBB52_7: # %entry
-; CHECK-V-NEXT: mv a3, a0
-; CHECK-V-NEXT: beqz s1, .LBB52_4
-; CHECK-V-NEXT: .LBB52_8: # %entry
-; CHECK-V-NEXT: mv s0, a2
-; CHECK-V-NEXT: bne s1, a4, .LBB52_5
-; CHECK-V-NEXT: j .LBB52_6
entry:
%conv = fptoui <2 x half> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
; CHECK-NOV-NEXT: call __extendhfsf2@plt
; CHECK-NOV-NEXT: call __fixsfti@plt
; CHECK-NOV-NEXT: mv a2, a1
-; CHECK-NOV-NEXT: li a5, 1
-; CHECK-NOV-NEXT: mv a3, a1
-; CHECK-NOV-NEXT: bgtz a1, .LBB53_12
+; CHECK-NOV-NEXT: blez a1, .LBB53_2
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: mv a4, s1
-; CHECK-NOV-NEXT: bgtz s1, .LBB53_13
+; CHECK-NOV-NEXT: li a2, 1
; CHECK-NOV-NEXT: .LBB53_2: # %entry
-; CHECK-NOV-NEXT: bgtz a2, .LBB53_14
-; CHECK-NOV-NEXT: .LBB53_3: # %entry
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: bne a2, a5, .LBB53_15
+; CHECK-NOV-NEXT: mv a4, s1
+; CHECK-NOV-NEXT: blez s1, .LBB53_4
+; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: li a4, 1
; CHECK-NOV-NEXT: .LBB53_4: # %entry
-; CHECK-NOV-NEXT: bgtz s1, .LBB53_16
-; CHECK-NOV-NEXT: .LBB53_5: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: bne s1, a5, .LBB53_17
+; CHECK-NOV-NEXT: sgtz a3, a1
+; CHECK-NOV-NEXT: addi a3, a3, -1
+; CHECK-NOV-NEXT: and a3, a3, a0
+; CHECK-NOV-NEXT: addi a0, a1, -1
+; CHECK-NOV-NEXT: seqz a0, a0
+; CHECK-NOV-NEXT: addi a1, a0, -1
+; CHECK-NOV-NEXT: sgtz a0, s1
+; CHECK-NOV-NEXT: addi a0, a0, -1
+; CHECK-NOV-NEXT: and a0, a0, s0
+; CHECK-NOV-NEXT: addi a5, s1, -1
+; CHECK-NOV-NEXT: seqz a5, a5
+; CHECK-NOV-NEXT: addi a5, a5, -1
+; CHECK-NOV-NEXT: and a0, a5, a0
+; CHECK-NOV-NEXT: beqz a4, .LBB53_6
+; CHECK-NOV-NEXT: # %bb.5: # %entry
+; CHECK-NOV-NEXT: sgtz a4, a4
+; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: and a0, a4, a0
; CHECK-NOV-NEXT: .LBB53_6: # %entry
-; CHECK-NOV-NEXT: mv a2, a0
-; CHECK-NOV-NEXT: blez a4, .LBB53_18
-; CHECK-NOV-NEXT: .LBB53_7: # %entry
-; CHECK-NOV-NEXT: bnez a4, .LBB53_19
+; CHECK-NOV-NEXT: and a1, a1, a3
+; CHECK-NOV-NEXT: beqz a2, .LBB53_8
+; CHECK-NOV-NEXT: # %bb.7: # %entry
+; CHECK-NOV-NEXT: sgtz a2, a2
+; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: and a1, a2, a1
; CHECK-NOV-NEXT: .LBB53_8: # %entry
-; CHECK-NOV-NEXT: mv a2, a1
-; CHECK-NOV-NEXT: blez a3, .LBB53_20
-; CHECK-NOV-NEXT: .LBB53_9: # %entry
-; CHECK-NOV-NEXT: beqz a3, .LBB53_11
-; CHECK-NOV-NEXT: .LBB53_10: # %entry
-; CHECK-NOV-NEXT: mv a1, a2
-; CHECK-NOV-NEXT: .LBB53_11: # %entry
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: addi sp, sp, 32
; CHECK-NOV-NEXT: ret
-; CHECK-NOV-NEXT: .LBB53_12: # %entry
-; CHECK-NOV-NEXT: li a3, 1
-; CHECK-NOV-NEXT: mv a4, s1
-; CHECK-NOV-NEXT: blez s1, .LBB53_2
-; CHECK-NOV-NEXT: .LBB53_13: # %entry
-; CHECK-NOV-NEXT: li a4, 1
-; CHECK-NOV-NEXT: blez a2, .LBB53_3
-; CHECK-NOV-NEXT: .LBB53_14: # %entry
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: li a1, 0
-; CHECK-NOV-NEXT: beq a2, a5, .LBB53_4
-; CHECK-NOV-NEXT: .LBB53_15: # %entry
-; CHECK-NOV-NEXT: mv a1, a0
-; CHECK-NOV-NEXT: blez s1, .LBB53_5
-; CHECK-NOV-NEXT: .LBB53_16: # %entry
-; CHECK-NOV-NEXT: li s0, 0
-; CHECK-NOV-NEXT: li a0, 0
-; CHECK-NOV-NEXT: beq s1, a5, .LBB53_6
-; CHECK-NOV-NEXT: .LBB53_17: # %entry
-; CHECK-NOV-NEXT: mv a0, s0
-; CHECK-NOV-NEXT: mv a2, a0
-; CHECK-NOV-NEXT: bgtz a4, .LBB53_7
-; CHECK-NOV-NEXT: .LBB53_18: # %entry
-; CHECK-NOV-NEXT: li a2, 0
-; CHECK-NOV-NEXT: beqz a4, .LBB53_8
-; CHECK-NOV-NEXT: .LBB53_19: # %entry
-; CHECK-NOV-NEXT: mv a0, a2
-; CHECK-NOV-NEXT: mv a2, a1
-; CHECK-NOV-NEXT: bgtz a3, .LBB53_9
-; CHECK-NOV-NEXT: .LBB53_20: # %entry
-; CHECK-NOV-NEXT: li a2, 0
-; CHECK-NOV-NEXT: bnez a3, .LBB53_10
-; CHECK-NOV-NEXT: j .LBB53_11
;
; CHECK-V-LABEL: ustest_f16i64_mm:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: mv a0, s2
; CHECK-V-NEXT: call __extendhfsf2@plt
; CHECK-V-NEXT: call __fixsfti@plt
-; CHECK-V-NEXT: li a5, 1
; CHECK-V-NEXT: mv a2, a1
-; CHECK-V-NEXT: bgtz a1, .LBB53_12
+; CHECK-V-NEXT: blez a1, .LBB53_2
; CHECK-V-NEXT: # %bb.1: # %entry
-; CHECK-V-NEXT: mv a4, s1
-; CHECK-V-NEXT: bgtz s1, .LBB53_13
+; CHECK-V-NEXT: li a2, 1
; CHECK-V-NEXT: .LBB53_2: # %entry
-; CHECK-V-NEXT: bgtz a1, .LBB53_14
-; CHECK-V-NEXT: .LBB53_3: # %entry
-; CHECK-V-NEXT: li a3, 0
-; CHECK-V-NEXT: bne a1, a5, .LBB53_15
+; CHECK-V-NEXT: mv a4, s1
+; CHECK-V-NEXT: blez s1, .LBB53_4
+; CHECK-V-NEXT: # %bb.3: # %entry
+; CHECK-V-NEXT: li a4, 1
; CHECK-V-NEXT: .LBB53_4: # %entry
-; CHECK-V-NEXT: bgtz s1, .LBB53_16
-; CHECK-V-NEXT: .LBB53_5: # %entry
-; CHECK-V-NEXT: li a0, 0
-; CHECK-V-NEXT: bne s1, a5, .LBB53_17
+; CHECK-V-NEXT: sgtz a3, a1
+; CHECK-V-NEXT: addi a3, a3, -1
+; CHECK-V-NEXT: and a3, a3, a0
+; CHECK-V-NEXT: addi a0, a1, -1
+; CHECK-V-NEXT: seqz a0, a0
+; CHECK-V-NEXT: addi a1, a0, -1
+; CHECK-V-NEXT: sgtz a0, s1
+; CHECK-V-NEXT: addi a0, a0, -1
+; CHECK-V-NEXT: and a0, a0, s0
+; CHECK-V-NEXT: addi a5, s1, -1
+; CHECK-V-NEXT: seqz a5, a5
+; CHECK-V-NEXT: addi a5, a5, -1
+; CHECK-V-NEXT: and a0, a5, a0
+; CHECK-V-NEXT: beqz a4, .LBB53_6
+; CHECK-V-NEXT: # %bb.5: # %entry
+; CHECK-V-NEXT: sgtz a4, a4
+; CHECK-V-NEXT: neg a4, a4
+; CHECK-V-NEXT: and a0, a4, a0
; CHECK-V-NEXT: .LBB53_6: # %entry
-; CHECK-V-NEXT: mv a1, a0
-; CHECK-V-NEXT: blez a4, .LBB53_18
-; CHECK-V-NEXT: .LBB53_7: # %entry
-; CHECK-V-NEXT: bnez a4, .LBB53_19
+; CHECK-V-NEXT: and a1, a1, a3
+; CHECK-V-NEXT: beqz a2, .LBB53_8
+; CHECK-V-NEXT: # %bb.7: # %entry
+; CHECK-V-NEXT: sgtz a2, a2
+; CHECK-V-NEXT: neg a2, a2
+; CHECK-V-NEXT: and a1, a2, a1
; CHECK-V-NEXT: .LBB53_8: # %entry
-; CHECK-V-NEXT: mv a1, a3
-; CHECK-V-NEXT: blez a2, .LBB53_20
-; CHECK-V-NEXT: .LBB53_9: # %entry
-; CHECK-V-NEXT: beqz a2, .LBB53_11
-; CHECK-V-NEXT: .LBB53_10: # %entry
-; CHECK-V-NEXT: mv a3, a1
-; CHECK-V-NEXT: .LBB53_11: # %entry
-; CHECK-V-NEXT: sd a3, 8(sp)
+; CHECK-V-NEXT: sd a1, 8(sp)
; CHECK-V-NEXT: sd a0, 0(sp)
; CHECK-V-NEXT: addi a0, sp, 8
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: addi sp, sp, 48
; CHECK-V-NEXT: ret
-; CHECK-V-NEXT: .LBB53_12: # %entry
-; CHECK-V-NEXT: li a2, 1
-; CHECK-V-NEXT: mv a4, s1
-; CHECK-V-NEXT: blez s1, .LBB53_2
-; CHECK-V-NEXT: .LBB53_13: # %entry
-; CHECK-V-NEXT: li a4, 1
-; CHECK-V-NEXT: blez a1, .LBB53_3
-; CHECK-V-NEXT: .LBB53_14: # %entry
-; CHECK-V-NEXT: li a0, 0
-; CHECK-V-NEXT: li a3, 0
-; CHECK-V-NEXT: beq a1, a5, .LBB53_4
-; CHECK-V-NEXT: .LBB53_15: # %entry
-; CHECK-V-NEXT: mv a3, a0
-; CHECK-V-NEXT: blez s1, .LBB53_5
-; CHECK-V-NEXT: .LBB53_16: # %entry
-; CHECK-V-NEXT: li s0, 0
-; CHECK-V-NEXT: li a0, 0
-; CHECK-V-NEXT: beq s1, a5, .LBB53_6
-; CHECK-V-NEXT: .LBB53_17: # %entry
-; CHECK-V-NEXT: mv a0, s0
-; CHECK-V-NEXT: mv a1, a0
-; CHECK-V-NEXT: bgtz a4, .LBB53_7
-; CHECK-V-NEXT: .LBB53_18: # %entry
-; CHECK-V-NEXT: li a1, 0
-; CHECK-V-NEXT: beqz a4, .LBB53_8
-; CHECK-V-NEXT: .LBB53_19: # %entry
-; CHECK-V-NEXT: mv a0, a1
-; CHECK-V-NEXT: mv a1, a3
-; CHECK-V-NEXT: bgtz a2, .LBB53_9
-; CHECK-V-NEXT: .LBB53_20: # %entry
-; CHECK-V-NEXT: li a1, 0
-; CHECK-V-NEXT: bnez a2, .LBB53_10
-; CHECK-V-NEXT: j .LBB53_11
entry:
%conv = fptosi <2 x half> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
define i16 @fcvt_si_h_sat(half %a) nounwind {
; RV32IZFH-LABEL: fcvt_si_h_sat:
; RV32IZFH: # %bb.0: # %start
-; RV32IZFH-NEXT: fcvt.s.h ft0, fa0
-; RV32IZFH-NEXT: feq.s a0, ft0, ft0
-; RV32IZFH-NEXT: beqz a0, .LBB1_2
-; RV32IZFH-NEXT: # %bb.1:
; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_0)
-; RV32IZFH-NEXT: flw ft1, %lo(.LCPI1_0)(a0)
+; RV32IZFH-NEXT: flw ft0, %lo(.LCPI1_0)(a0)
; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_1)
-; RV32IZFH-NEXT: flw ft2, %lo(.LCPI1_1)(a0)
-; RV32IZFH-NEXT: fmax.s ft0, ft0, ft1
-; RV32IZFH-NEXT: fmin.s ft0, ft0, ft2
+; RV32IZFH-NEXT: flw ft1, %lo(.LCPI1_1)(a0)
+; RV32IZFH-NEXT: fcvt.s.h ft2, fa0
+; RV32IZFH-NEXT: fmax.s ft0, ft2, ft0
+; RV32IZFH-NEXT: fmin.s ft0, ft0, ft1
; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz
-; RV32IZFH-NEXT: .LBB1_2: # %start
+; RV32IZFH-NEXT: feq.s a1, ft2, ft2
+; RV32IZFH-NEXT: seqz a1, a1
+; RV32IZFH-NEXT: addi a1, a1, -1
+; RV32IZFH-NEXT: and a0, a1, a0
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: fcvt_si_h_sat:
; RV64IZFH: # %bb.0: # %start
-; RV64IZFH-NEXT: fcvt.s.h ft0, fa0
-; RV64IZFH-NEXT: feq.s a0, ft0, ft0
-; RV64IZFH-NEXT: beqz a0, .LBB1_2
-; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: lui a0, %hi(.LCPI1_0)
-; RV64IZFH-NEXT: flw ft1, %lo(.LCPI1_0)(a0)
+; RV64IZFH-NEXT: flw ft0, %lo(.LCPI1_0)(a0)
; RV64IZFH-NEXT: lui a0, %hi(.LCPI1_1)
-; RV64IZFH-NEXT: flw ft2, %lo(.LCPI1_1)(a0)
-; RV64IZFH-NEXT: fmax.s ft0, ft0, ft1
-; RV64IZFH-NEXT: fmin.s ft0, ft0, ft2
+; RV64IZFH-NEXT: flw ft1, %lo(.LCPI1_1)(a0)
+; RV64IZFH-NEXT: fcvt.s.h ft2, fa0
+; RV64IZFH-NEXT: fmax.s ft0, ft2, ft0
+; RV64IZFH-NEXT: fmin.s ft0, ft0, ft1
; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz
-; RV64IZFH-NEXT: .LBB1_2: # %start
+; RV64IZFH-NEXT: feq.s a1, ft2, ft2
+; RV64IZFH-NEXT: seqz a1, a1
+; RV64IZFH-NEXT: addi a1, a1, -1
+; RV64IZFH-NEXT: and a0, a1, a0
; RV64IZFH-NEXT: ret
;
; RV32IDZFH-LABEL: fcvt_si_h_sat:
; RV32IDZFH: # %bb.0: # %start
-; RV32IDZFH-NEXT: fcvt.s.h ft0, fa0
-; RV32IDZFH-NEXT: feq.s a0, ft0, ft0
-; RV32IDZFH-NEXT: beqz a0, .LBB1_2
-; RV32IDZFH-NEXT: # %bb.1:
; RV32IDZFH-NEXT: lui a0, %hi(.LCPI1_0)
-; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI1_0)(a0)
+; RV32IDZFH-NEXT: flw ft0, %lo(.LCPI1_0)(a0)
; RV32IDZFH-NEXT: lui a0, %hi(.LCPI1_1)
-; RV32IDZFH-NEXT: flw ft2, %lo(.LCPI1_1)(a0)
-; RV32IDZFH-NEXT: fmax.s ft0, ft0, ft1
-; RV32IDZFH-NEXT: fmin.s ft0, ft0, ft2
+; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI1_1)(a0)
+; RV32IDZFH-NEXT: fcvt.s.h ft2, fa0
+; RV32IDZFH-NEXT: fmax.s ft0, ft2, ft0
+; RV32IDZFH-NEXT: fmin.s ft0, ft0, ft1
; RV32IDZFH-NEXT: fcvt.w.s a0, ft0, rtz
-; RV32IDZFH-NEXT: .LBB1_2: # %start
+; RV32IDZFH-NEXT: feq.s a1, ft2, ft2
+; RV32IDZFH-NEXT: seqz a1, a1
+; RV32IDZFH-NEXT: addi a1, a1, -1
+; RV32IDZFH-NEXT: and a0, a1, a0
; RV32IDZFH-NEXT: ret
;
; RV64IDZFH-LABEL: fcvt_si_h_sat:
; RV64IDZFH: # %bb.0: # %start
-; RV64IDZFH-NEXT: fcvt.s.h ft0, fa0
-; RV64IDZFH-NEXT: feq.s a0, ft0, ft0
-; RV64IDZFH-NEXT: beqz a0, .LBB1_2
-; RV64IDZFH-NEXT: # %bb.1:
; RV64IDZFH-NEXT: lui a0, %hi(.LCPI1_0)
-; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI1_0)(a0)
+; RV64IDZFH-NEXT: flw ft0, %lo(.LCPI1_0)(a0)
; RV64IDZFH-NEXT: lui a0, %hi(.LCPI1_1)
-; RV64IDZFH-NEXT: flw ft2, %lo(.LCPI1_1)(a0)
-; RV64IDZFH-NEXT: fmax.s ft0, ft0, ft1
-; RV64IDZFH-NEXT: fmin.s ft0, ft0, ft2
+; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI1_1)(a0)
+; RV64IDZFH-NEXT: fcvt.s.h ft2, fa0
+; RV64IDZFH-NEXT: fmax.s ft0, ft2, ft0
+; RV64IDZFH-NEXT: fmin.s ft0, ft0, ft1
; RV64IDZFH-NEXT: fcvt.l.s a0, ft0, rtz
-; RV64IDZFH-NEXT: .LBB1_2: # %start
+; RV64IDZFH-NEXT: feq.s a1, ft2, ft2
+; RV64IDZFH-NEXT: seqz a1, a1
+; RV64IDZFH-NEXT: addi a1, a1, -1
+; RV64IDZFH-NEXT: and a0, a1, a0
; RV64IDZFH-NEXT: ret
;
; RV32I-LABEL: fcvt_si_h_sat:
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __unordsf2@plt
-; RV32I-NEXT: mv a1, a0
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bnez a1, .LBB1_6
-; RV32I-NEXT: # %bb.5: # %start
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: .LBB1_6: # %start
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, s2
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2@plt
-; RV64I-NEXT: mv a1, a0
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bnez a1, .LBB1_6
-; RV64I-NEXT: # %bb.5: # %start
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: .LBB1_6: # %start
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s2
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; RV32I-NEXT: addi s0, a1, -1
; RV32I-NEXT: and a0, a0, s0
; RV32I-NEXT: call __extendhfsf2@plt
+; RV32I-NEXT: mv s3, a0
+; RV32I-NEXT: call __fixunssfsi@plt
; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: mv a0, s3
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __gesf2@plt
; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __fixunssfsi@plt
-; RV32I-NEXT: li s3, 0
-; RV32I-NEXT: bltz s2, .LBB3_2
-; RV32I-NEXT: # %bb.1: # %start
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: .LBB3_2: # %start
; RV32I-NEXT: lui a0, 292864
; RV32I-NEXT: addi a1, a0, -256
-; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a0, s3
; RV32I-NEXT: call __gtsf2@plt
-; RV32I-NEXT: bgtz a0, .LBB3_4
-; RV32I-NEXT: # %bb.3: # %start
-; RV32I-NEXT: mv s0, s3
-; RV32I-NEXT: .LBB3_4: # %start
+; RV32I-NEXT: bgtz a0, .LBB3_2
+; RV32I-NEXT: # %bb.1: # %start
+; RV32I-NEXT: slti a0, s2, 0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and s0, a0, s1
+; RV32I-NEXT: .LBB3_2: # %start
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV64I-NEXT: addiw s0, a1, -1
; RV64I-NEXT: and a0, a0, s0
; RV64I-NEXT: call __extendhfsf2@plt
+; RV64I-NEXT: mv s3, a0
+; RV64I-NEXT: call __fixunssfdi@plt
; RV64I-NEXT: mv s1, a0
+; RV64I-NEXT: mv a0, s3
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __gesf2@plt
; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: mv a0, s1
-; RV64I-NEXT: call __fixunssfdi@plt
-; RV64I-NEXT: li s3, 0
-; RV64I-NEXT: bltz s2, .LBB3_2
-; RV64I-NEXT: # %bb.1: # %start
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: .LBB3_2: # %start
; RV64I-NEXT: lui a0, 292864
; RV64I-NEXT: addiw a1, a0, -256
-; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: mv a0, s3
; RV64I-NEXT: call __gtsf2@plt
-; RV64I-NEXT: bgtz a0, .LBB3_4
-; RV64I-NEXT: # %bb.3: # %start
-; RV64I-NEXT: mv s0, s3
-; RV64I-NEXT: .LBB3_4: # %start
+; RV64I-NEXT: bgtz a0, .LBB3_2
+; RV64I-NEXT: # %bb.1: # %start
+; RV64I-NEXT: slti a0, s2, 0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and s0, a0, s1
+; RV64I-NEXT: .LBB3_2: # %start
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
define i32 @fcvt_w_h_sat(half %a) nounwind {
; CHECKIZFH-LABEL: fcvt_w_h_sat:
; CHECKIZFH: # %bb.0: # %start
-; CHECKIZFH-NEXT: feq.h a0, fa0, fa0
-; CHECKIZFH-NEXT: beqz a0, .LBB5_2
-; CHECKIZFH-NEXT: # %bb.1:
; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rtz
-; CHECKIZFH-NEXT: .LBB5_2: # %start
+; CHECKIZFH-NEXT: feq.h a1, fa0, fa0
+; CHECKIZFH-NEXT: seqz a1, a1
+; CHECKIZFH-NEXT: addi a1, a1, -1
+; CHECKIZFH-NEXT: and a0, a1, a0
; CHECKIZFH-NEXT: ret
;
; RV32IDZFH-LABEL: fcvt_w_h_sat:
; RV32IDZFH: # %bb.0: # %start
-; RV32IDZFH-NEXT: feq.h a0, fa0, fa0
-; RV32IDZFH-NEXT: beqz a0, .LBB5_2
-; RV32IDZFH-NEXT: # %bb.1:
; RV32IDZFH-NEXT: fcvt.w.h a0, fa0, rtz
-; RV32IDZFH-NEXT: .LBB5_2: # %start
+; RV32IDZFH-NEXT: feq.h a1, fa0, fa0
+; RV32IDZFH-NEXT: seqz a1, a1
+; RV32IDZFH-NEXT: addi a1, a1, -1
+; RV32IDZFH-NEXT: and a0, a1, a0
; RV32IDZFH-NEXT: ret
;
; RV64IDZFH-LABEL: fcvt_w_h_sat:
; RV64IDZFH: # %bb.0: # %start
-; RV64IDZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IDZFH-NEXT: beqz a0, .LBB5_2
-; RV64IDZFH-NEXT: # %bb.1:
; RV64IDZFH-NEXT: fcvt.w.h a0, fa0, rtz
-; RV64IDZFH-NEXT: .LBB5_2: # %start
+; RV64IDZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IDZFH-NEXT: seqz a1, a1
+; RV64IDZFH-NEXT: addi a1, a1, -1
+; RV64IDZFH-NEXT: and a0, a1, a0
; RV64IDZFH-NEXT: ret
;
; RV32I-LABEL: fcvt_w_h_sat:
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __unordsf2@plt
-; RV32I-NEXT: mv a1, a0
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bnez a1, .LBB5_6
-; RV32I-NEXT: # %bb.5: # %start
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: .LBB5_6: # %start
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, s2
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2@plt
-; RV64I-NEXT: mv a1, a0
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bnez a1, .LBB5_6
-; RV64I-NEXT: # %bb.5: # %start
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: .LBB5_6: # %start
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
define i32 @fcvt_wu_h_sat(half %a) nounwind {
; RV32IZFH-LABEL: fcvt_wu_h_sat:
; RV32IZFH: # %bb.0: # %start
-; RV32IZFH-NEXT: feq.h a0, fa0, fa0
-; RV32IZFH-NEXT: beqz a0, .LBB8_2
-; RV32IZFH-NEXT: # %bb.1:
; RV32IZFH-NEXT: fcvt.wu.h a0, fa0, rtz
-; RV32IZFH-NEXT: .LBB8_2: # %start
+; RV32IZFH-NEXT: feq.h a1, fa0, fa0
+; RV32IZFH-NEXT: seqz a1, a1
+; RV32IZFH-NEXT: addi a1, a1, -1
+; RV32IZFH-NEXT: and a0, a1, a0
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: fcvt_wu_h_sat:
; RV64IZFH: # %bb.0: # %start
-; RV64IZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IZFH-NEXT: beqz a0, .LBB8_2
-; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz
+; RV64IZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IZFH-NEXT: seqz a1, a1
+; RV64IZFH-NEXT: addi a1, a1, -1
+; RV64IZFH-NEXT: and a0, a0, a1
; RV64IZFH-NEXT: slli a0, a0, 32
; RV64IZFH-NEXT: srli a0, a0, 32
-; RV64IZFH-NEXT: .LBB8_2: # %start
; RV64IZFH-NEXT: ret
;
; RV32IDZFH-LABEL: fcvt_wu_h_sat:
; RV32IDZFH: # %bb.0: # %start
-; RV32IDZFH-NEXT: feq.h a0, fa0, fa0
-; RV32IDZFH-NEXT: beqz a0, .LBB8_2
-; RV32IDZFH-NEXT: # %bb.1:
; RV32IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz
-; RV32IDZFH-NEXT: .LBB8_2: # %start
+; RV32IDZFH-NEXT: feq.h a1, fa0, fa0
+; RV32IDZFH-NEXT: seqz a1, a1
+; RV32IDZFH-NEXT: addi a1, a1, -1
+; RV32IDZFH-NEXT: and a0, a1, a0
; RV32IDZFH-NEXT: ret
;
; RV64IDZFH-LABEL: fcvt_wu_h_sat:
; RV64IDZFH: # %bb.0: # %start
-; RV64IDZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IDZFH-NEXT: beqz a0, .LBB8_2
-; RV64IDZFH-NEXT: # %bb.1:
; RV64IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz
+; RV64IDZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IDZFH-NEXT: seqz a1, a1
+; RV64IDZFH-NEXT: addi a1, a1, -1
+; RV64IDZFH-NEXT: and a0, a0, a1
; RV64IDZFH-NEXT: slli a0, a0, 32
; RV64IDZFH-NEXT: srli a0, a0, 32
-; RV64IDZFH-NEXT: .LBB8_2: # %start
; RV64IDZFH-NEXT: ret
;
; RV32I-LABEL: fcvt_wu_h_sat:
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: call __extendhfsf2@plt
-; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __gesf2@plt
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __fixunssfdi@plt
-; RV64I-NEXT: li s1, 0
-; RV64I-NEXT: bltz s2, .LBB8_2
-; RV64I-NEXT: # %bb.1: # %start
; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: .LBB8_2: # %start
; RV64I-NEXT: lui a0, 325632
; RV64I-NEXT: addiw a1, a0, -1
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __gtsf2@plt
-; RV64I-NEXT: blez a0, .LBB8_4
-; RV64I-NEXT: # %bb.3:
+; RV64I-NEXT: bgtz a0, .LBB8_2
+; RV64I-NEXT: # %bb.1: # %start
+; RV64I-NEXT: slti a0, s0, 0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s1
+; RV64I-NEXT: j .LBB8_3
+; RV64I-NEXT: .LBB8_2:
; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: srli s1, a0, 32
-; RV64I-NEXT: .LBB8_4: # %start
-; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: .LBB8_3: # %start
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; RV32IZFH-NEXT: addi a1, a3, -1
; RV32IZFH-NEXT: .LBB10_4: # %start
; RV32IZFH-NEXT: feq.s a3, fs0, fs0
-; RV32IZFH-NEXT: bnez a3, .LBB10_6
-; RV32IZFH-NEXT: # %bb.5: # %start
-; RV32IZFH-NEXT: li a1, 0
-; RV32IZFH-NEXT: li a0, 0
-; RV32IZFH-NEXT: j .LBB10_7
-; RV32IZFH-NEXT: .LBB10_6:
-; RV32IZFH-NEXT: neg a3, s0
-; RV32IZFH-NEXT: and a0, a3, a0
+; RV32IZFH-NEXT: seqz a3, a3
+; RV32IZFH-NEXT: addi a3, a3, -1
+; RV32IZFH-NEXT: and a1, a3, a1
+; RV32IZFH-NEXT: seqz a4, s0
+; RV32IZFH-NEXT: addi a4, a4, -1
+; RV32IZFH-NEXT: and a0, a4, a0
; RV32IZFH-NEXT: seqz a2, a2
; RV32IZFH-NEXT: addi a2, a2, -1
-; RV32IZFH-NEXT: or a0, a0, a2
-; RV32IZFH-NEXT: .LBB10_7: # %start
+; RV32IZFH-NEXT: or a0, a2, a0
+; RV32IZFH-NEXT: and a0, a3, a0
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IZFH-LABEL: fcvt_l_h_sat:
; RV64IZFH: # %bb.0: # %start
-; RV64IZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IZFH-NEXT: beqz a0, .LBB10_2
-; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz
-; RV64IZFH-NEXT: .LBB10_2: # %start
+; RV64IZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IZFH-NEXT: seqz a1, a1
+; RV64IZFH-NEXT: addi a1, a1, -1
+; RV64IZFH-NEXT: and a0, a1, a0
; RV64IZFH-NEXT: ret
;
; RV32IDZFH-LABEL: fcvt_l_h_sat:
; RV32IDZFH-NEXT: addi a1, a3, -1
; RV32IDZFH-NEXT: .LBB10_4: # %start
; RV32IDZFH-NEXT: feq.s a3, fs0, fs0
-; RV32IDZFH-NEXT: bnez a3, .LBB10_6
-; RV32IDZFH-NEXT: # %bb.5: # %start
-; RV32IDZFH-NEXT: li a1, 0
-; RV32IDZFH-NEXT: li a0, 0
-; RV32IDZFH-NEXT: j .LBB10_7
-; RV32IDZFH-NEXT: .LBB10_6:
-; RV32IDZFH-NEXT: neg a3, s0
-; RV32IDZFH-NEXT: and a0, a3, a0
+; RV32IDZFH-NEXT: seqz a3, a3
+; RV32IDZFH-NEXT: addi a3, a3, -1
+; RV32IDZFH-NEXT: and a1, a3, a1
+; RV32IDZFH-NEXT: seqz a4, s0
+; RV32IDZFH-NEXT: addi a4, a4, -1
+; RV32IDZFH-NEXT: and a0, a4, a0
; RV32IDZFH-NEXT: seqz a2, a2
; RV32IDZFH-NEXT: addi a2, a2, -1
-; RV32IDZFH-NEXT: or a0, a0, a2
-; RV32IDZFH-NEXT: .LBB10_7: # %start
+; RV32IDZFH-NEXT: or a0, a2, a0
+; RV32IDZFH-NEXT: and a0, a3, a0
; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IDZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IDZFH-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
;
; RV64IDZFH-LABEL: fcvt_l_h_sat:
; RV64IDZFH: # %bb.0: # %start
-; RV64IDZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IDZFH-NEXT: beqz a0, .LBB10_2
-; RV64IDZFH-NEXT: # %bb.1:
; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rtz
-; RV64IDZFH-NEXT: .LBB10_2: # %start
+; RV64IDZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IDZFH-NEXT: seqz a1, a1
+; RV64IDZFH-NEXT: addi a1, a1, -1
+; RV64IDZFH-NEXT: and a0, a1, a0
; RV64IDZFH-NEXT: ret
;
; RV32I-LABEL: fcvt_l_h_sat:
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: slli a0, a0, 16
; RV32I-NEXT: srli a0, a0, 16
; RV32I-NEXT: call __extendhfsf2@plt
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui a0, 389120
-; RV32I-NEXT: addi s2, a0, -1
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s2
-; RV32I-NEXT: call __gtsf2@plt
-; RV32I-NEXT: li s0, 0
-; RV32I-NEXT: sgtz a0, a0
-; RV32I-NEXT: neg s5, a0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __fixsfdi@plt
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv s4, a1
+; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lui a1, 913408
-; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __gesf2@plt
-; RV32I-NEXT: bltz a0, .LBB10_2
+; RV32I-NEXT: mv s4, a0
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: call __fixsfdi@plt
+; RV32I-NEXT: mv s5, a0
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: call __unordsf2@plt
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi s1, a0, -1
+; RV32I-NEXT: lui a0, 389120
+; RV32I-NEXT: addi s3, a0, -1
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s3
+; RV32I-NEXT: call __gtsf2@plt
+; RV32I-NEXT: bgtz a0, .LBB10_2
; RV32I-NEXT: # %bb.1: # %start
-; RV32I-NEXT: or s5, s5, s3
+; RV32I-NEXT: slti a0, s4, 0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and s1, s1, a0
; RV32I-NEXT: .LBB10_2: # %start
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __unordsf2@plt
-; RV32I-NEXT: mv s3, s0
-; RV32I-NEXT: bnez a0, .LBB10_4
-; RV32I-NEXT: # %bb.3: # %start
-; RV32I-NEXT: mv s3, s5
-; RV32I-NEXT: .LBB10_4: # %start
; RV32I-NEXT: lui a1, 913408
-; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __gesf2@plt
-; RV32I-NEXT: lui s6, 524288
; RV32I-NEXT: lui s5, 524288
-; RV32I-NEXT: bltz a0, .LBB10_6
-; RV32I-NEXT: # %bb.5: # %start
-; RV32I-NEXT: mv s5, s4
-; RV32I-NEXT: .LBB10_6: # %start
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s2
+; RV32I-NEXT: lui s4, 524288
+; RV32I-NEXT: bltz a0, .LBB10_4
+; RV32I-NEXT: # %bb.3: # %start
+; RV32I-NEXT: mv s4, s2
+; RV32I-NEXT: .LBB10_4: # %start
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s3
; RV32I-NEXT: call __gtsf2@plt
-; RV32I-NEXT: bge s0, a0, .LBB10_8
-; RV32I-NEXT: # %bb.7:
-; RV32I-NEXT: addi s5, s6, -1
-; RV32I-NEXT: .LBB10_8: # %start
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __unordsf2@plt
-; RV32I-NEXT: bnez a0, .LBB10_10
-; RV32I-NEXT: # %bb.9: # %start
-; RV32I-NEXT: mv s0, s5
-; RV32I-NEXT: .LBB10_10: # %start
-; RV32I-NEXT: mv a0, s3
+; RV32I-NEXT: blez a0, .LBB10_6
+; RV32I-NEXT: # %bb.5:
+; RV32I-NEXT: addi s4, s5, -1
+; RV32I-NEXT: .LBB10_6: # %start
+; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: call __unordsf2@plt
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a1, a0, s4
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2@plt
-; RV64I-NEXT: mv a1, a0
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bnez a1, .LBB10_7
-; RV64I-NEXT: # %bb.6: # %start
-; RV64I-NEXT: mv a0, s1
-; RV64I-NEXT: .LBB10_7: # %start
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV32IZFH-NEXT: fcvt.s.h fs0, fa0
; RV32IZFH-NEXT: fmv.w.x ft0, zero
; RV32IZFH-NEXT: fle.s a0, ft0, fs0
-; RV32IZFH-NEXT: neg s0, a0
+; RV32IZFH-NEXT: seqz a0, a0
+; RV32IZFH-NEXT: addi s0, a0, -1
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixunssfdi@plt
; RV32IZFH-NEXT: lui a2, %hi(.LCPI12_0)
; RV32IZFH-NEXT: flt.s a2, ft0, fs0
; RV32IZFH-NEXT: seqz a2, a2
; RV32IZFH-NEXT: addi a2, a2, -1
-; RV32IZFH-NEXT: or a0, a0, a2
+; RV32IZFH-NEXT: or a0, a2, a0
; RV32IZFH-NEXT: and a1, s0, a1
-; RV32IZFH-NEXT: or a1, a1, a2
+; RV32IZFH-NEXT: or a1, a2, a1
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IZFH-LABEL: fcvt_lu_h_sat:
; RV64IZFH: # %bb.0: # %start
-; RV64IZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IZFH-NEXT: beqz a0, .LBB12_2
-; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz
-; RV64IZFH-NEXT: .LBB12_2: # %start
+; RV64IZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IZFH-NEXT: seqz a1, a1
+; RV64IZFH-NEXT: addi a1, a1, -1
+; RV64IZFH-NEXT: and a0, a1, a0
; RV64IZFH-NEXT: ret
;
; RV32IDZFH-LABEL: fcvt_lu_h_sat:
; RV32IDZFH-NEXT: fcvt.s.h fs0, fa0
; RV32IDZFH-NEXT: fmv.w.x ft0, zero
; RV32IDZFH-NEXT: fle.s a0, ft0, fs0
-; RV32IDZFH-NEXT: neg s0, a0
+; RV32IDZFH-NEXT: seqz a0, a0
+; RV32IDZFH-NEXT: addi s0, a0, -1
; RV32IDZFH-NEXT: fmv.s fa0, fs0
; RV32IDZFH-NEXT: call __fixunssfdi@plt
; RV32IDZFH-NEXT: lui a2, %hi(.LCPI12_0)
; RV32IDZFH-NEXT: flt.s a2, ft0, fs0
; RV32IDZFH-NEXT: seqz a2, a2
; RV32IDZFH-NEXT: addi a2, a2, -1
-; RV32IDZFH-NEXT: or a0, a0, a2
+; RV32IDZFH-NEXT: or a0, a2, a0
; RV32IDZFH-NEXT: and a1, s0, a1
-; RV32IDZFH-NEXT: or a1, a1, a2
+; RV32IDZFH-NEXT: or a1, a2, a1
; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IDZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IDZFH-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
;
; RV64IDZFH-LABEL: fcvt_lu_h_sat:
; RV64IDZFH: # %bb.0: # %start
-; RV64IDZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IDZFH-NEXT: beqz a0, .LBB12_2
-; RV64IDZFH-NEXT: # %bb.1:
; RV64IDZFH-NEXT: fcvt.lu.h a0, fa0, rtz
-; RV64IDZFH-NEXT: .LBB12_2: # %start
+; RV64IDZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IDZFH-NEXT: seqz a1, a1
+; RV64IDZFH-NEXT: addi a1, a1, -1
+; RV64IDZFH-NEXT: and a0, a1, a0
; RV64IDZFH-NEXT: ret
;
; RV32I-LABEL: fcvt_lu_h_sat:
define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV32IZFH-LABEL: fcvt_w_s_sat_i16:
; RV32IZFH: # %bb.0: # %start
-; RV32IZFH-NEXT: fcvt.s.h ft0, fa0
-; RV32IZFH-NEXT: feq.s a0, ft0, ft0
-; RV32IZFH-NEXT: beqz a0, .LBB32_2
-; RV32IZFH-NEXT: # %bb.1:
; RV32IZFH-NEXT: lui a0, %hi(.LCPI32_0)
-; RV32IZFH-NEXT: flw ft1, %lo(.LCPI32_0)(a0)
+; RV32IZFH-NEXT: flw ft0, %lo(.LCPI32_0)(a0)
; RV32IZFH-NEXT: lui a0, %hi(.LCPI32_1)
-; RV32IZFH-NEXT: flw ft2, %lo(.LCPI32_1)(a0)
-; RV32IZFH-NEXT: fmax.s ft0, ft0, ft1
-; RV32IZFH-NEXT: fmin.s ft0, ft0, ft2
+; RV32IZFH-NEXT: flw ft1, %lo(.LCPI32_1)(a0)
+; RV32IZFH-NEXT: fcvt.s.h ft2, fa0
+; RV32IZFH-NEXT: fmax.s ft0, ft2, ft0
+; RV32IZFH-NEXT: fmin.s ft0, ft0, ft1
; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz
-; RV32IZFH-NEXT: .LBB32_2: # %start
+; RV32IZFH-NEXT: feq.s a1, ft2, ft2
+; RV32IZFH-NEXT: seqz a1, a1
+; RV32IZFH-NEXT: addi a1, a1, -1
+; RV32IZFH-NEXT: and a0, a1, a0
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: fcvt_w_s_sat_i16:
; RV64IZFH: # %bb.0: # %start
-; RV64IZFH-NEXT: fcvt.s.h ft0, fa0
-; RV64IZFH-NEXT: feq.s a0, ft0, ft0
-; RV64IZFH-NEXT: beqz a0, .LBB32_2
-; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: lui a0, %hi(.LCPI32_0)
-; RV64IZFH-NEXT: flw ft1, %lo(.LCPI32_0)(a0)
+; RV64IZFH-NEXT: flw ft0, %lo(.LCPI32_0)(a0)
; RV64IZFH-NEXT: lui a0, %hi(.LCPI32_1)
-; RV64IZFH-NEXT: flw ft2, %lo(.LCPI32_1)(a0)
-; RV64IZFH-NEXT: fmax.s ft0, ft0, ft1
-; RV64IZFH-NEXT: fmin.s ft0, ft0, ft2
+; RV64IZFH-NEXT: flw ft1, %lo(.LCPI32_1)(a0)
+; RV64IZFH-NEXT: fcvt.s.h ft2, fa0
+; RV64IZFH-NEXT: fmax.s ft0, ft2, ft0
+; RV64IZFH-NEXT: fmin.s ft0, ft0, ft1
; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz
-; RV64IZFH-NEXT: .LBB32_2: # %start
+; RV64IZFH-NEXT: feq.s a1, ft2, ft2
+; RV64IZFH-NEXT: seqz a1, a1
+; RV64IZFH-NEXT: addi a1, a1, -1
+; RV64IZFH-NEXT: and a0, a1, a0
; RV64IZFH-NEXT: ret
;
; RV32IDZFH-LABEL: fcvt_w_s_sat_i16:
; RV32IDZFH: # %bb.0: # %start
-; RV32IDZFH-NEXT: fcvt.s.h ft0, fa0
-; RV32IDZFH-NEXT: feq.s a0, ft0, ft0
-; RV32IDZFH-NEXT: beqz a0, .LBB32_2
-; RV32IDZFH-NEXT: # %bb.1:
; RV32IDZFH-NEXT: lui a0, %hi(.LCPI32_0)
-; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI32_0)(a0)
+; RV32IDZFH-NEXT: flw ft0, %lo(.LCPI32_0)(a0)
; RV32IDZFH-NEXT: lui a0, %hi(.LCPI32_1)
-; RV32IDZFH-NEXT: flw ft2, %lo(.LCPI32_1)(a0)
-; RV32IDZFH-NEXT: fmax.s ft0, ft0, ft1
-; RV32IDZFH-NEXT: fmin.s ft0, ft0, ft2
+; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI32_1)(a0)
+; RV32IDZFH-NEXT: fcvt.s.h ft2, fa0
+; RV32IDZFH-NEXT: fmax.s ft0, ft2, ft0
+; RV32IDZFH-NEXT: fmin.s ft0, ft0, ft1
; RV32IDZFH-NEXT: fcvt.w.s a0, ft0, rtz
-; RV32IDZFH-NEXT: .LBB32_2: # %start
+; RV32IDZFH-NEXT: feq.s a1, ft2, ft2
+; RV32IDZFH-NEXT: seqz a1, a1
+; RV32IDZFH-NEXT: addi a1, a1, -1
+; RV32IDZFH-NEXT: and a0, a1, a0
; RV32IDZFH-NEXT: ret
;
; RV64IDZFH-LABEL: fcvt_w_s_sat_i16:
; RV64IDZFH: # %bb.0: # %start
-; RV64IDZFH-NEXT: fcvt.s.h ft0, fa0
-; RV64IDZFH-NEXT: feq.s a0, ft0, ft0
-; RV64IDZFH-NEXT: beqz a0, .LBB32_2
-; RV64IDZFH-NEXT: # %bb.1:
; RV64IDZFH-NEXT: lui a0, %hi(.LCPI32_0)
-; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI32_0)(a0)
+; RV64IDZFH-NEXT: flw ft0, %lo(.LCPI32_0)(a0)
; RV64IDZFH-NEXT: lui a0, %hi(.LCPI32_1)
-; RV64IDZFH-NEXT: flw ft2, %lo(.LCPI32_1)(a0)
-; RV64IDZFH-NEXT: fmax.s ft0, ft0, ft1
-; RV64IDZFH-NEXT: fmin.s ft0, ft0, ft2
+; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI32_1)(a0)
+; RV64IDZFH-NEXT: fcvt.s.h ft2, fa0
+; RV64IDZFH-NEXT: fmax.s ft0, ft2, ft0
+; RV64IDZFH-NEXT: fmin.s ft0, ft0, ft1
; RV64IDZFH-NEXT: fcvt.l.s a0, ft0, rtz
-; RV64IDZFH-NEXT: .LBB32_2: # %start
+; RV64IDZFH-NEXT: feq.s a1, ft2, ft2
+; RV64IDZFH-NEXT: seqz a1, a1
+; RV64IDZFH-NEXT: addi a1, a1, -1
+; RV64IDZFH-NEXT: and a0, a1, a0
; RV64IDZFH-NEXT: ret
;
; RV32I-LABEL: fcvt_w_s_sat_i16:
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __unordsf2@plt
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: bnez a0, .LBB32_6
-; RV32I-NEXT: # %bb.5: # %start
-; RV32I-NEXT: mv a1, s2
-; RV32I-NEXT: .LBB32_6: # %start
-; RV32I-NEXT: slli a0, a1, 16
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, s2
+; RV32I-NEXT: slli a0, a0, 16
; RV32I-NEXT: srai a0, a0, 16
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2@plt
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: bnez a0, .LBB32_6
-; RV64I-NEXT: # %bb.5: # %start
-; RV64I-NEXT: mv a1, s2
-; RV64I-NEXT: .LBB32_6: # %start
-; RV64I-NEXT: slli a0, a1, 48
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s2
+; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a1, 16
-; RV32I-NEXT: addi s2, a1, -1
-; RV32I-NEXT: and a0, a0, s2
+; RV32I-NEXT: addi s3, a1, -1
+; RV32I-NEXT: and a0, a0, s3
; RV32I-NEXT: call __extendhfsf2@plt
+; RV32I-NEXT: mv s2, a0
+; RV32I-NEXT: call __fixunssfsi@plt
; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __gesf2@plt
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __fixunssfsi@plt
-; RV32I-NEXT: li s3, 0
-; RV32I-NEXT: bltz s1, .LBB34_2
-; RV32I-NEXT: # %bb.1: # %start
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: .LBB34_2: # %start
; RV32I-NEXT: lui a0, 292864
; RV32I-NEXT: addi a1, a0, -256
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __gtsf2@plt
-; RV32I-NEXT: mv a1, s2
-; RV32I-NEXT: bgtz a0, .LBB34_4
-; RV32I-NEXT: # %bb.3: # %start
; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: .LBB34_4: # %start
-; RV32I-NEXT: and a0, a1, s2
+; RV32I-NEXT: bgtz a0, .LBB34_2
+; RV32I-NEXT: # %bb.1: # %start
+; RV32I-NEXT: slti a0, s1, 0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a1, a0, s0
+; RV32I-NEXT: .LBB34_2: # %start
+; RV32I-NEXT: and a0, a1, s3
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: lui a1, 16
-; RV64I-NEXT: addiw s2, a1, -1
-; RV64I-NEXT: and a0, a0, s2
+; RV64I-NEXT: addiw s3, a1, -1
+; RV64I-NEXT: and a0, a0, s3
; RV64I-NEXT: call __extendhfsf2@plt
+; RV64I-NEXT: mv s2, a0
+; RV64I-NEXT: call __fixunssfdi@plt
; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __gesf2@plt
; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __fixunssfdi@plt
-; RV64I-NEXT: li s3, 0
-; RV64I-NEXT: bltz s1, .LBB34_2
-; RV64I-NEXT: # %bb.1: # %start
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: .LBB34_2: # %start
; RV64I-NEXT: lui a0, 292864
; RV64I-NEXT: addiw a1, a0, -256
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __gtsf2@plt
-; RV64I-NEXT: mv a1, s2
-; RV64I-NEXT: bgtz a0, .LBB34_4
-; RV64I-NEXT: # %bb.3: # %start
; RV64I-NEXT: mv a1, s3
-; RV64I-NEXT: .LBB34_4: # %start
-; RV64I-NEXT: and a0, a1, s2
+; RV64I-NEXT: bgtz a0, .LBB34_2
+; RV64I-NEXT: # %bb.1: # %start
+; RV64I-NEXT: slti a0, s1, 0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a1, a0, s0
+; RV64I-NEXT: .LBB34_2: # %start
+; RV64I-NEXT: and a0, a1, s3
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind {
; RV32IZFH-LABEL: fcvt_w_s_sat_i8:
; RV32IZFH: # %bb.0: # %start
-; RV32IZFH-NEXT: fcvt.s.h ft0, fa0
-; RV32IZFH-NEXT: feq.s a0, ft0, ft0
-; RV32IZFH-NEXT: beqz a0, .LBB36_2
-; RV32IZFH-NEXT: # %bb.1:
; RV32IZFH-NEXT: lui a0, %hi(.LCPI36_0)
-; RV32IZFH-NEXT: flw ft1, %lo(.LCPI36_0)(a0)
+; RV32IZFH-NEXT: flw ft0, %lo(.LCPI36_0)(a0)
; RV32IZFH-NEXT: lui a0, %hi(.LCPI36_1)
-; RV32IZFH-NEXT: flw ft2, %lo(.LCPI36_1)(a0)
-; RV32IZFH-NEXT: fmax.s ft0, ft0, ft1
-; RV32IZFH-NEXT: fmin.s ft0, ft0, ft2
+; RV32IZFH-NEXT: flw ft1, %lo(.LCPI36_1)(a0)
+; RV32IZFH-NEXT: fcvt.s.h ft2, fa0
+; RV32IZFH-NEXT: fmax.s ft0, ft2, ft0
+; RV32IZFH-NEXT: fmin.s ft0, ft0, ft1
; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz
-; RV32IZFH-NEXT: .LBB36_2: # %start
+; RV32IZFH-NEXT: feq.s a1, ft2, ft2
+; RV32IZFH-NEXT: seqz a1, a1
+; RV32IZFH-NEXT: addi a1, a1, -1
+; RV32IZFH-NEXT: and a0, a1, a0
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: fcvt_w_s_sat_i8:
; RV64IZFH: # %bb.0: # %start
-; RV64IZFH-NEXT: fcvt.s.h ft0, fa0
-; RV64IZFH-NEXT: feq.s a0, ft0, ft0
-; RV64IZFH-NEXT: beqz a0, .LBB36_2
-; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: lui a0, %hi(.LCPI36_0)
-; RV64IZFH-NEXT: flw ft1, %lo(.LCPI36_0)(a0)
+; RV64IZFH-NEXT: flw ft0, %lo(.LCPI36_0)(a0)
; RV64IZFH-NEXT: lui a0, %hi(.LCPI36_1)
-; RV64IZFH-NEXT: flw ft2, %lo(.LCPI36_1)(a0)
-; RV64IZFH-NEXT: fmax.s ft0, ft0, ft1
-; RV64IZFH-NEXT: fmin.s ft0, ft0, ft2
+; RV64IZFH-NEXT: flw ft1, %lo(.LCPI36_1)(a0)
+; RV64IZFH-NEXT: fcvt.s.h ft2, fa0
+; RV64IZFH-NEXT: fmax.s ft0, ft2, ft0
+; RV64IZFH-NEXT: fmin.s ft0, ft0, ft1
; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz
-; RV64IZFH-NEXT: .LBB36_2: # %start
+; RV64IZFH-NEXT: feq.s a1, ft2, ft2
+; RV64IZFH-NEXT: seqz a1, a1
+; RV64IZFH-NEXT: addi a1, a1, -1
+; RV64IZFH-NEXT: and a0, a1, a0
; RV64IZFH-NEXT: ret
;
; RV32IDZFH-LABEL: fcvt_w_s_sat_i8:
; RV32IDZFH: # %bb.0: # %start
-; RV32IDZFH-NEXT: fcvt.s.h ft0, fa0
-; RV32IDZFH-NEXT: feq.s a0, ft0, ft0
-; RV32IDZFH-NEXT: beqz a0, .LBB36_2
-; RV32IDZFH-NEXT: # %bb.1:
; RV32IDZFH-NEXT: lui a0, %hi(.LCPI36_0)
-; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI36_0)(a0)
+; RV32IDZFH-NEXT: flw ft0, %lo(.LCPI36_0)(a0)
; RV32IDZFH-NEXT: lui a0, %hi(.LCPI36_1)
-; RV32IDZFH-NEXT: flw ft2, %lo(.LCPI36_1)(a0)
-; RV32IDZFH-NEXT: fmax.s ft0, ft0, ft1
-; RV32IDZFH-NEXT: fmin.s ft0, ft0, ft2
+; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI36_1)(a0)
+; RV32IDZFH-NEXT: fcvt.s.h ft2, fa0
+; RV32IDZFH-NEXT: fmax.s ft0, ft2, ft0
+; RV32IDZFH-NEXT: fmin.s ft0, ft0, ft1
; RV32IDZFH-NEXT: fcvt.w.s a0, ft0, rtz
-; RV32IDZFH-NEXT: .LBB36_2: # %start
+; RV32IDZFH-NEXT: feq.s a1, ft2, ft2
+; RV32IDZFH-NEXT: seqz a1, a1
+; RV32IDZFH-NEXT: addi a1, a1, -1
+; RV32IDZFH-NEXT: and a0, a1, a0
; RV32IDZFH-NEXT: ret
;
; RV64IDZFH-LABEL: fcvt_w_s_sat_i8:
; RV64IDZFH: # %bb.0: # %start
-; RV64IDZFH-NEXT: fcvt.s.h ft0, fa0
-; RV64IDZFH-NEXT: feq.s a0, ft0, ft0
-; RV64IDZFH-NEXT: beqz a0, .LBB36_2
-; RV64IDZFH-NEXT: # %bb.1:
; RV64IDZFH-NEXT: lui a0, %hi(.LCPI36_0)
-; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI36_0)(a0)
+; RV64IDZFH-NEXT: flw ft0, %lo(.LCPI36_0)(a0)
; RV64IDZFH-NEXT: lui a0, %hi(.LCPI36_1)
-; RV64IDZFH-NEXT: flw ft2, %lo(.LCPI36_1)(a0)
-; RV64IDZFH-NEXT: fmax.s ft0, ft0, ft1
-; RV64IDZFH-NEXT: fmin.s ft0, ft0, ft2
+; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI36_1)(a0)
+; RV64IDZFH-NEXT: fcvt.s.h ft2, fa0
+; RV64IDZFH-NEXT: fmax.s ft0, ft2, ft0
+; RV64IDZFH-NEXT: fmin.s ft0, ft0, ft1
; RV64IDZFH-NEXT: fcvt.l.s a0, ft0, rtz
-; RV64IDZFH-NEXT: .LBB36_2: # %start
+; RV64IDZFH-NEXT: feq.s a1, ft2, ft2
+; RV64IDZFH-NEXT: seqz a1, a1
+; RV64IDZFH-NEXT: addi a1, a1, -1
+; RV64IDZFH-NEXT: and a0, a1, a0
; RV64IDZFH-NEXT: ret
;
; RV32I-LABEL: fcvt_w_s_sat_i8:
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __unordsf2@plt
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: bnez a0, .LBB36_6
-; RV32I-NEXT: # %bb.5: # %start
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: .LBB36_6: # %start
-; RV32I-NEXT: slli a0, a1, 24
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, s1
+; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: srai a0, a0, 24
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2@plt
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: bnez a0, .LBB36_6
-; RV64I-NEXT: # %bb.5: # %start
-; RV64I-NEXT: mv a1, s1
-; RV64I-NEXT: .LBB36_6: # %start
-; RV64I-NEXT: slli a0, a1, 56
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s1
+; RV64I-NEXT: slli a0, a0, 56
; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV32I-NEXT: slli a0, a0, 16
; RV32I-NEXT: srli a0, a0, 16
; RV32I-NEXT: call __extendhfsf2@plt
-; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __gesf2@plt
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __fixunssfsi@plt
-; RV32I-NEXT: li s2, 0
-; RV32I-NEXT: bltz s1, .LBB38_2
-; RV32I-NEXT: # %bb.1: # %start
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: .LBB38_2: # %start
+; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: lui a1, 276464
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __gtsf2@plt
; RV32I-NEXT: li a1, 255
-; RV32I-NEXT: bgtz a0, .LBB38_4
-; RV32I-NEXT: # %bb.3: # %start
-; RV32I-NEXT: mv a1, s2
-; RV32I-NEXT: .LBB38_4: # %start
+; RV32I-NEXT: bgtz a0, .LBB38_2
+; RV32I-NEXT: # %bb.1: # %start
+; RV32I-NEXT: slti a0, s0, 0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a1, a0, s1
+; RV32I-NEXT: .LBB38_2: # %start
; RV32I-NEXT: andi a0, a1, 255
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: call __extendhfsf2@plt
-; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __gesf2@plt
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __fixunssfdi@plt
-; RV64I-NEXT: li s2, 0
-; RV64I-NEXT: bltz s1, .LBB38_2
-; RV64I-NEXT: # %bb.1: # %start
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: .LBB38_2: # %start
+; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: lui a1, 276464
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __gtsf2@plt
; RV64I-NEXT: li a1, 255
-; RV64I-NEXT: bgtz a0, .LBB38_4
-; RV64I-NEXT: # %bb.3: # %start
-; RV64I-NEXT: mv a1, s2
-; RV64I-NEXT: .LBB38_4: # %start
+; RV64I-NEXT: bgtz a0, .LBB38_2
+; RV64I-NEXT: # %bb.1: # %start
+; RV64I-NEXT: slti a0, s0, 0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a1, a0, s1
+; RV64I-NEXT: .LBB38_2: # %start
; RV64I-NEXT: andi a0, a1, 255
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
; RV32IZFH-LABEL: fcvt_wu_h_sat_zext:
; RV32IZFH: # %bb.0: # %start
-; RV32IZFH-NEXT: feq.h a0, fa0, fa0
-; RV32IZFH-NEXT: beqz a0, .LBB39_2
-; RV32IZFH-NEXT: # %bb.1:
; RV32IZFH-NEXT: fcvt.wu.h a0, fa0, rtz
-; RV32IZFH-NEXT: .LBB39_2: # %start
+; RV32IZFH-NEXT: feq.h a1, fa0, fa0
+; RV32IZFH-NEXT: seqz a1, a1
+; RV32IZFH-NEXT: addi a1, a1, -1
+; RV32IZFH-NEXT: and a0, a1, a0
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: fcvt_wu_h_sat_zext:
; RV64IZFH: # %bb.0: # %start
-; RV64IZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IZFH-NEXT: beqz a0, .LBB39_2
-; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz
+; RV64IZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IZFH-NEXT: seqz a1, a1
+; RV64IZFH-NEXT: addi a1, a1, -1
+; RV64IZFH-NEXT: and a0, a0, a1
; RV64IZFH-NEXT: slli a0, a0, 32
; RV64IZFH-NEXT: srli a0, a0, 32
-; RV64IZFH-NEXT: .LBB39_2: # %start
; RV64IZFH-NEXT: ret
;
; RV32IDZFH-LABEL: fcvt_wu_h_sat_zext:
; RV32IDZFH: # %bb.0: # %start
-; RV32IDZFH-NEXT: feq.h a0, fa0, fa0
-; RV32IDZFH-NEXT: beqz a0, .LBB39_2
-; RV32IDZFH-NEXT: # %bb.1:
; RV32IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz
-; RV32IDZFH-NEXT: .LBB39_2: # %start
+; RV32IDZFH-NEXT: feq.h a1, fa0, fa0
+; RV32IDZFH-NEXT: seqz a1, a1
+; RV32IDZFH-NEXT: addi a1, a1, -1
+; RV32IDZFH-NEXT: and a0, a1, a0
; RV32IDZFH-NEXT: ret
;
; RV64IDZFH-LABEL: fcvt_wu_h_sat_zext:
; RV64IDZFH: # %bb.0: # %start
-; RV64IDZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IDZFH-NEXT: beqz a0, .LBB39_2
-; RV64IDZFH-NEXT: # %bb.1:
; RV64IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz
+; RV64IDZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IDZFH-NEXT: seqz a1, a1
+; RV64IDZFH-NEXT: addi a1, a1, -1
+; RV64IDZFH-NEXT: and a0, a0, a1
; RV64IDZFH-NEXT: slli a0, a0, 32
; RV64IDZFH-NEXT: srli a0, a0, 32
-; RV64IDZFH-NEXT: .LBB39_2: # %start
; RV64IDZFH-NEXT: ret
;
; RV32I-LABEL: fcvt_wu_h_sat_zext:
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: call __extendhfsf2@plt
-; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __gesf2@plt
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __fixunssfdi@plt
-; RV64I-NEXT: li s2, 0
-; RV64I-NEXT: bltz s1, .LBB39_2
-; RV64I-NEXT: # %bb.1: # %start
-; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: .LBB39_2: # %start
+; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: lui a0, 325632
; RV64I-NEXT: addiw a1, a0, -1
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __gtsf2@plt
-; RV64I-NEXT: blez a0, .LBB39_4
-; RV64I-NEXT: # %bb.3:
+; RV64I-NEXT: bgtz a0, .LBB39_2
+; RV64I-NEXT: # %bb.1: # %start
+; RV64I-NEXT: slti a0, s0, 0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s1
+; RV64I-NEXT: j .LBB39_3
+; RV64I-NEXT: .LBB39_2:
; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: srli s2, a0, 32
-; RV64I-NEXT: .LBB39_4: # %start
-; RV64I-NEXT: slli a0, s2, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: .LBB39_3: # %start
+; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
define signext i32 @fcvt_w_h_sat_sext(half %a) nounwind {
; CHECKIZFH-LABEL: fcvt_w_h_sat_sext:
; CHECKIZFH: # %bb.0: # %start
-; CHECKIZFH-NEXT: feq.h a0, fa0, fa0
-; CHECKIZFH-NEXT: beqz a0, .LBB40_2
-; CHECKIZFH-NEXT: # %bb.1:
; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rtz
-; CHECKIZFH-NEXT: .LBB40_2: # %start
+; CHECKIZFH-NEXT: feq.h a1, fa0, fa0
+; CHECKIZFH-NEXT: seqz a1, a1
+; CHECKIZFH-NEXT: addi a1, a1, -1
+; CHECKIZFH-NEXT: and a0, a1, a0
; CHECKIZFH-NEXT: ret
;
; RV32IDZFH-LABEL: fcvt_w_h_sat_sext:
; RV32IDZFH: # %bb.0: # %start
-; RV32IDZFH-NEXT: feq.h a0, fa0, fa0
-; RV32IDZFH-NEXT: beqz a0, .LBB40_2
-; RV32IDZFH-NEXT: # %bb.1:
; RV32IDZFH-NEXT: fcvt.w.h a0, fa0, rtz
-; RV32IDZFH-NEXT: .LBB40_2: # %start
+; RV32IDZFH-NEXT: feq.h a1, fa0, fa0
+; RV32IDZFH-NEXT: seqz a1, a1
+; RV32IDZFH-NEXT: addi a1, a1, -1
+; RV32IDZFH-NEXT: and a0, a1, a0
; RV32IDZFH-NEXT: ret
;
; RV64IDZFH-LABEL: fcvt_w_h_sat_sext:
; RV64IDZFH: # %bb.0: # %start
-; RV64IDZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IDZFH-NEXT: beqz a0, .LBB40_2
-; RV64IDZFH-NEXT: # %bb.1:
; RV64IDZFH-NEXT: fcvt.w.h a0, fa0, rtz
-; RV64IDZFH-NEXT: .LBB40_2: # %start
+; RV64IDZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IDZFH-NEXT: seqz a1, a1
+; RV64IDZFH-NEXT: addi a1, a1, -1
+; RV64IDZFH-NEXT: and a0, a1, a0
; RV64IDZFH-NEXT: ret
;
; RV32I-LABEL: fcvt_w_h_sat_sext:
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __unordsf2@plt
-; RV32I-NEXT: mv a1, a0
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bnez a1, .LBB40_6
-; RV32I-NEXT: # %bb.5: # %start
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: .LBB40_6: # %start
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, s2
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2@plt
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: bnez a0, .LBB40_6
-; RV64I-NEXT: # %bb.5: # %start
-; RV64I-NEXT: mv a1, s2
-; RV64I-NEXT: .LBB40_6: # %start
-; RV64I-NEXT: sext.w a0, a1
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s2
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
define signext i32 @test_floor_si32(half %x) {
; CHECKIZFH-LABEL: test_floor_si32:
; CHECKIZFH: # %bb.0:
-; CHECKIZFH-NEXT: feq.h a0, fa0, fa0
-; CHECKIZFH-NEXT: beqz a0, .LBB0_2
-; CHECKIZFH-NEXT: # %bb.1:
; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rdn
-; CHECKIZFH-NEXT: .LBB0_2:
+; CHECKIZFH-NEXT: feq.h a1, fa0, fa0
+; CHECKIZFH-NEXT: seqz a1, a1
+; CHECKIZFH-NEXT: addi a1, a1, -1
+; CHECKIZFH-NEXT: and a0, a1, a0
; CHECKIZFH-NEXT: ret
%a = call half @llvm.floor.f16(half %x)
%b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
; RV32IZFH-NEXT: addi a1, a3, -1
; RV32IZFH-NEXT: .LBB1_4:
; RV32IZFH-NEXT: feq.s a3, fs0, fs0
-; RV32IZFH-NEXT: bnez a3, .LBB1_6
-; RV32IZFH-NEXT: # %bb.5:
-; RV32IZFH-NEXT: li a1, 0
-; RV32IZFH-NEXT: li a0, 0
-; RV32IZFH-NEXT: j .LBB1_7
-; RV32IZFH-NEXT: .LBB1_6:
-; RV32IZFH-NEXT: neg a3, s0
-; RV32IZFH-NEXT: and a0, a3, a0
+; RV32IZFH-NEXT: seqz a3, a3
+; RV32IZFH-NEXT: addi a3, a3, -1
+; RV32IZFH-NEXT: and a1, a3, a1
+; RV32IZFH-NEXT: seqz a4, s0
+; RV32IZFH-NEXT: addi a4, a4, -1
+; RV32IZFH-NEXT: and a0, a4, a0
; RV32IZFH-NEXT: seqz a2, a2
; RV32IZFH-NEXT: addi a2, a2, -1
-; RV32IZFH-NEXT: or a0, a0, a2
-; RV32IZFH-NEXT: .LBB1_7:
+; RV32IZFH-NEXT: or a0, a2, a0
+; RV32IZFH-NEXT: and a0, a3, a0
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IZFH-LABEL: test_floor_si64:
; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IZFH-NEXT: beqz a0, .LBB1_2
-; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rdn
-; RV64IZFH-NEXT: .LBB1_2:
+; RV64IZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IZFH-NEXT: seqz a1, a1
+; RV64IZFH-NEXT: addi a1, a1, -1
+; RV64IZFH-NEXT: and a0, a1, a0
; RV64IZFH-NEXT: ret
%a = call half @llvm.floor.f16(half %x)
%b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
define signext i32 @test_floor_ui32(half %x) {
; CHECKIZFH-LABEL: test_floor_ui32:
; CHECKIZFH: # %bb.0:
-; CHECKIZFH-NEXT: feq.h a0, fa0, fa0
-; CHECKIZFH-NEXT: beqz a0, .LBB2_2
-; CHECKIZFH-NEXT: # %bb.1:
; CHECKIZFH-NEXT: fcvt.wu.h a0, fa0, rdn
-; CHECKIZFH-NEXT: .LBB2_2:
+; CHECKIZFH-NEXT: feq.h a1, fa0, fa0
+; CHECKIZFH-NEXT: seqz a1, a1
+; CHECKIZFH-NEXT: addi a1, a1, -1
+; CHECKIZFH-NEXT: and a0, a1, a0
; CHECKIZFH-NEXT: ret
%a = call half @llvm.floor.f16(half %x)
%b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
; RV32IZFH-NEXT: fcvt.s.h fs0, ft0
; RV32IZFH-NEXT: fmv.w.x ft0, zero
; RV32IZFH-NEXT: fle.s a0, ft0, fs0
-; RV32IZFH-NEXT: neg s0, a0
+; RV32IZFH-NEXT: seqz a0, a0
+; RV32IZFH-NEXT: addi s0, a0, -1
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixunssfdi@plt
; RV32IZFH-NEXT: lui a2, %hi(.LCPI3_0)
; RV32IZFH-NEXT: flt.s a2, ft0, fs0
; RV32IZFH-NEXT: seqz a2, a2
; RV32IZFH-NEXT: addi a2, a2, -1
-; RV32IZFH-NEXT: or a0, a0, a2
+; RV32IZFH-NEXT: or a0, a2, a0
; RV32IZFH-NEXT: and a1, s0, a1
-; RV32IZFH-NEXT: or a1, a1, a2
+; RV32IZFH-NEXT: or a1, a2, a1
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IZFH-LABEL: test_floor_ui64:
; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IZFH-NEXT: beqz a0, .LBB3_2
-; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rdn
-; RV64IZFH-NEXT: .LBB3_2:
+; RV64IZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IZFH-NEXT: seqz a1, a1
+; RV64IZFH-NEXT: addi a1, a1, -1
+; RV64IZFH-NEXT: and a0, a1, a0
; RV64IZFH-NEXT: ret
%a = call half @llvm.floor.f16(half %x)
%b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
define signext i32 @test_ceil_si32(half %x) {
; CHECKIZFH-LABEL: test_ceil_si32:
; CHECKIZFH: # %bb.0:
-; CHECKIZFH-NEXT: feq.h a0, fa0, fa0
-; CHECKIZFH-NEXT: beqz a0, .LBB4_2
-; CHECKIZFH-NEXT: # %bb.1:
; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rup
-; CHECKIZFH-NEXT: .LBB4_2:
+; CHECKIZFH-NEXT: feq.h a1, fa0, fa0
+; CHECKIZFH-NEXT: seqz a1, a1
+; CHECKIZFH-NEXT: addi a1, a1, -1
+; CHECKIZFH-NEXT: and a0, a1, a0
; CHECKIZFH-NEXT: ret
%a = call half @llvm.ceil.f16(half %x)
%b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
; RV32IZFH-NEXT: addi a1, a3, -1
; RV32IZFH-NEXT: .LBB5_4:
; RV32IZFH-NEXT: feq.s a3, fs0, fs0
-; RV32IZFH-NEXT: bnez a3, .LBB5_6
-; RV32IZFH-NEXT: # %bb.5:
-; RV32IZFH-NEXT: li a1, 0
-; RV32IZFH-NEXT: li a0, 0
-; RV32IZFH-NEXT: j .LBB5_7
-; RV32IZFH-NEXT: .LBB5_6:
-; RV32IZFH-NEXT: neg a3, s0
-; RV32IZFH-NEXT: and a0, a3, a0
+; RV32IZFH-NEXT: seqz a3, a3
+; RV32IZFH-NEXT: addi a3, a3, -1
+; RV32IZFH-NEXT: and a1, a3, a1
+; RV32IZFH-NEXT: seqz a4, s0
+; RV32IZFH-NEXT: addi a4, a4, -1
+; RV32IZFH-NEXT: and a0, a4, a0
; RV32IZFH-NEXT: seqz a2, a2
; RV32IZFH-NEXT: addi a2, a2, -1
-; RV32IZFH-NEXT: or a0, a0, a2
-; RV32IZFH-NEXT: .LBB5_7:
+; RV32IZFH-NEXT: or a0, a2, a0
+; RV32IZFH-NEXT: and a0, a3, a0
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IZFH-LABEL: test_ceil_si64:
; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IZFH-NEXT: beqz a0, .LBB5_2
-; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rup
-; RV64IZFH-NEXT: .LBB5_2:
+; RV64IZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IZFH-NEXT: seqz a1, a1
+; RV64IZFH-NEXT: addi a1, a1, -1
+; RV64IZFH-NEXT: and a0, a1, a0
; RV64IZFH-NEXT: ret
%a = call half @llvm.ceil.f16(half %x)
%b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
define signext i32 @test_ceil_ui32(half %x) {
; CHECKIZFH-LABEL: test_ceil_ui32:
; CHECKIZFH: # %bb.0:
-; CHECKIZFH-NEXT: feq.h a0, fa0, fa0
-; CHECKIZFH-NEXT: beqz a0, .LBB6_2
-; CHECKIZFH-NEXT: # %bb.1:
; CHECKIZFH-NEXT: fcvt.wu.h a0, fa0, rup
-; CHECKIZFH-NEXT: .LBB6_2:
+; CHECKIZFH-NEXT: feq.h a1, fa0, fa0
+; CHECKIZFH-NEXT: seqz a1, a1
+; CHECKIZFH-NEXT: addi a1, a1, -1
+; CHECKIZFH-NEXT: and a0, a1, a0
; CHECKIZFH-NEXT: ret
%a = call half @llvm.ceil.f16(half %x)
%b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
; RV32IZFH-NEXT: fcvt.s.h fs0, ft0
; RV32IZFH-NEXT: fmv.w.x ft0, zero
; RV32IZFH-NEXT: fle.s a0, ft0, fs0
-; RV32IZFH-NEXT: neg s0, a0
+; RV32IZFH-NEXT: seqz a0, a0
+; RV32IZFH-NEXT: addi s0, a0, -1
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixunssfdi@plt
; RV32IZFH-NEXT: lui a2, %hi(.LCPI7_0)
; RV32IZFH-NEXT: flt.s a2, ft0, fs0
; RV32IZFH-NEXT: seqz a2, a2
; RV32IZFH-NEXT: addi a2, a2, -1
-; RV32IZFH-NEXT: or a0, a0, a2
+; RV32IZFH-NEXT: or a0, a2, a0
; RV32IZFH-NEXT: and a1, s0, a1
-; RV32IZFH-NEXT: or a1, a1, a2
+; RV32IZFH-NEXT: or a1, a2, a1
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IZFH-LABEL: test_ceil_ui64:
; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IZFH-NEXT: beqz a0, .LBB7_2
-; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rup
-; RV64IZFH-NEXT: .LBB7_2:
+; RV64IZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IZFH-NEXT: seqz a1, a1
+; RV64IZFH-NEXT: addi a1, a1, -1
+; RV64IZFH-NEXT: and a0, a1, a0
; RV64IZFH-NEXT: ret
%a = call half @llvm.ceil.f16(half %x)
%b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
define signext i32 @test_trunc_si32(half %x) {
; CHECKIZFH-LABEL: test_trunc_si32:
; CHECKIZFH: # %bb.0:
-; CHECKIZFH-NEXT: feq.h a0, fa0, fa0
-; CHECKIZFH-NEXT: beqz a0, .LBB8_2
-; CHECKIZFH-NEXT: # %bb.1:
; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rtz
-; CHECKIZFH-NEXT: .LBB8_2:
+; CHECKIZFH-NEXT: feq.h a1, fa0, fa0
+; CHECKIZFH-NEXT: seqz a1, a1
+; CHECKIZFH-NEXT: addi a1, a1, -1
+; CHECKIZFH-NEXT: and a0, a1, a0
; CHECKIZFH-NEXT: ret
%a = call half @llvm.trunc.f16(half %x)
%b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
; RV32IZFH-NEXT: addi a1, a3, -1
; RV32IZFH-NEXT: .LBB9_4:
; RV32IZFH-NEXT: feq.s a3, fs0, fs0
-; RV32IZFH-NEXT: bnez a3, .LBB9_6
-; RV32IZFH-NEXT: # %bb.5:
-; RV32IZFH-NEXT: li a1, 0
-; RV32IZFH-NEXT: li a0, 0
-; RV32IZFH-NEXT: j .LBB9_7
-; RV32IZFH-NEXT: .LBB9_6:
-; RV32IZFH-NEXT: neg a3, s0
-; RV32IZFH-NEXT: and a0, a3, a0
+; RV32IZFH-NEXT: seqz a3, a3
+; RV32IZFH-NEXT: addi a3, a3, -1
+; RV32IZFH-NEXT: and a1, a3, a1
+; RV32IZFH-NEXT: seqz a4, s0
+; RV32IZFH-NEXT: addi a4, a4, -1
+; RV32IZFH-NEXT: and a0, a4, a0
; RV32IZFH-NEXT: seqz a2, a2
; RV32IZFH-NEXT: addi a2, a2, -1
-; RV32IZFH-NEXT: or a0, a0, a2
-; RV32IZFH-NEXT: .LBB9_7:
+; RV32IZFH-NEXT: or a0, a2, a0
+; RV32IZFH-NEXT: and a0, a3, a0
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IZFH-LABEL: test_trunc_si64:
; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IZFH-NEXT: beqz a0, .LBB9_2
-; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz
-; RV64IZFH-NEXT: .LBB9_2:
+; RV64IZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IZFH-NEXT: seqz a1, a1
+; RV64IZFH-NEXT: addi a1, a1, -1
+; RV64IZFH-NEXT: and a0, a1, a0
; RV64IZFH-NEXT: ret
%a = call half @llvm.trunc.f16(half %x)
%b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
define signext i32 @test_trunc_ui32(half %x) {
; CHECKIZFH-LABEL: test_trunc_ui32:
; CHECKIZFH: # %bb.0:
-; CHECKIZFH-NEXT: feq.h a0, fa0, fa0
-; CHECKIZFH-NEXT: beqz a0, .LBB10_2
-; CHECKIZFH-NEXT: # %bb.1:
; CHECKIZFH-NEXT: fcvt.wu.h a0, fa0, rtz
-; CHECKIZFH-NEXT: .LBB10_2:
+; CHECKIZFH-NEXT: feq.h a1, fa0, fa0
+; CHECKIZFH-NEXT: seqz a1, a1
+; CHECKIZFH-NEXT: addi a1, a1, -1
+; CHECKIZFH-NEXT: and a0, a1, a0
; CHECKIZFH-NEXT: ret
%a = call half @llvm.trunc.f16(half %x)
%b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
; RV32IZFH-NEXT: fcvt.s.h fs0, ft0
; RV32IZFH-NEXT: fmv.w.x ft0, zero
; RV32IZFH-NEXT: fle.s a0, ft0, fs0
-; RV32IZFH-NEXT: neg s0, a0
+; RV32IZFH-NEXT: seqz a0, a0
+; RV32IZFH-NEXT: addi s0, a0, -1
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixunssfdi@plt
; RV32IZFH-NEXT: lui a2, %hi(.LCPI11_0)
; RV32IZFH-NEXT: flt.s a2, ft0, fs0
; RV32IZFH-NEXT: seqz a2, a2
; RV32IZFH-NEXT: addi a2, a2, -1
-; RV32IZFH-NEXT: or a0, a0, a2
+; RV32IZFH-NEXT: or a0, a2, a0
; RV32IZFH-NEXT: and a1, s0, a1
-; RV32IZFH-NEXT: or a1, a1, a2
+; RV32IZFH-NEXT: or a1, a2, a1
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IZFH-LABEL: test_trunc_ui64:
; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IZFH-NEXT: beqz a0, .LBB11_2
-; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz
-; RV64IZFH-NEXT: .LBB11_2:
+; RV64IZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IZFH-NEXT: seqz a1, a1
+; RV64IZFH-NEXT: addi a1, a1, -1
+; RV64IZFH-NEXT: and a0, a1, a0
; RV64IZFH-NEXT: ret
%a = call half @llvm.trunc.f16(half %x)
%b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
define signext i32 @test_round_si32(half %x) {
; CHECKIZFH-LABEL: test_round_si32:
; CHECKIZFH: # %bb.0:
-; CHECKIZFH-NEXT: feq.h a0, fa0, fa0
-; CHECKIZFH-NEXT: beqz a0, .LBB12_2
-; CHECKIZFH-NEXT: # %bb.1:
; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rmm
-; CHECKIZFH-NEXT: .LBB12_2:
+; CHECKIZFH-NEXT: feq.h a1, fa0, fa0
+; CHECKIZFH-NEXT: seqz a1, a1
+; CHECKIZFH-NEXT: addi a1, a1, -1
+; CHECKIZFH-NEXT: and a0, a1, a0
; CHECKIZFH-NEXT: ret
%a = call half @llvm.round.f16(half %x)
%b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
; RV32IZFH-NEXT: addi a1, a3, -1
; RV32IZFH-NEXT: .LBB13_4:
; RV32IZFH-NEXT: feq.s a3, fs0, fs0
-; RV32IZFH-NEXT: bnez a3, .LBB13_6
-; RV32IZFH-NEXT: # %bb.5:
-; RV32IZFH-NEXT: li a1, 0
-; RV32IZFH-NEXT: li a0, 0
-; RV32IZFH-NEXT: j .LBB13_7
-; RV32IZFH-NEXT: .LBB13_6:
-; RV32IZFH-NEXT: neg a3, s0
-; RV32IZFH-NEXT: and a0, a3, a0
+; RV32IZFH-NEXT: seqz a3, a3
+; RV32IZFH-NEXT: addi a3, a3, -1
+; RV32IZFH-NEXT: and a1, a3, a1
+; RV32IZFH-NEXT: seqz a4, s0
+; RV32IZFH-NEXT: addi a4, a4, -1
+; RV32IZFH-NEXT: and a0, a4, a0
; RV32IZFH-NEXT: seqz a2, a2
; RV32IZFH-NEXT: addi a2, a2, -1
-; RV32IZFH-NEXT: or a0, a0, a2
-; RV32IZFH-NEXT: .LBB13_7:
+; RV32IZFH-NEXT: or a0, a2, a0
+; RV32IZFH-NEXT: and a0, a3, a0
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IZFH-LABEL: test_round_si64:
; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IZFH-NEXT: beqz a0, .LBB13_2
-; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rmm
-; RV64IZFH-NEXT: .LBB13_2:
+; RV64IZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IZFH-NEXT: seqz a1, a1
+; RV64IZFH-NEXT: addi a1, a1, -1
+; RV64IZFH-NEXT: and a0, a1, a0
; RV64IZFH-NEXT: ret
%a = call half @llvm.round.f16(half %x)
%b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
define signext i32 @test_round_ui32(half %x) {
; CHECKIZFH-LABEL: test_round_ui32:
; CHECKIZFH: # %bb.0:
-; CHECKIZFH-NEXT: feq.h a0, fa0, fa0
-; CHECKIZFH-NEXT: beqz a0, .LBB14_2
-; CHECKIZFH-NEXT: # %bb.1:
; CHECKIZFH-NEXT: fcvt.wu.h a0, fa0, rmm
-; CHECKIZFH-NEXT: .LBB14_2:
+; CHECKIZFH-NEXT: feq.h a1, fa0, fa0
+; CHECKIZFH-NEXT: seqz a1, a1
+; CHECKIZFH-NEXT: addi a1, a1, -1
+; CHECKIZFH-NEXT: and a0, a1, a0
; CHECKIZFH-NEXT: ret
%a = call half @llvm.round.f16(half %x)
%b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
; RV32IZFH-NEXT: fcvt.s.h fs0, ft0
; RV32IZFH-NEXT: fmv.w.x ft0, zero
; RV32IZFH-NEXT: fle.s a0, ft0, fs0
-; RV32IZFH-NEXT: neg s0, a0
+; RV32IZFH-NEXT: seqz a0, a0
+; RV32IZFH-NEXT: addi s0, a0, -1
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixunssfdi@plt
; RV32IZFH-NEXT: lui a2, %hi(.LCPI15_0)
; RV32IZFH-NEXT: flt.s a2, ft0, fs0
; RV32IZFH-NEXT: seqz a2, a2
; RV32IZFH-NEXT: addi a2, a2, -1
-; RV32IZFH-NEXT: or a0, a0, a2
+; RV32IZFH-NEXT: or a0, a2, a0
; RV32IZFH-NEXT: and a1, s0, a1
-; RV32IZFH-NEXT: or a1, a1, a2
+; RV32IZFH-NEXT: or a1, a2, a1
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IZFH-LABEL: test_round_ui64:
; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IZFH-NEXT: beqz a0, .LBB15_2
-; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rmm
-; RV64IZFH-NEXT: .LBB15_2:
+; RV64IZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IZFH-NEXT: seqz a1, a1
+; RV64IZFH-NEXT: addi a1, a1, -1
+; RV64IZFH-NEXT: and a0, a1, a0
; RV64IZFH-NEXT: ret
%a = call half @llvm.round.f16(half %x)
%b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
define signext i32 @test_roundeven_si32(half %x) {
; CHECKIZFH-LABEL: test_roundeven_si32:
; CHECKIZFH: # %bb.0:
-; CHECKIZFH-NEXT: feq.h a0, fa0, fa0
-; CHECKIZFH-NEXT: beqz a0, .LBB16_2
-; CHECKIZFH-NEXT: # %bb.1:
; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rne
-; CHECKIZFH-NEXT: .LBB16_2:
+; CHECKIZFH-NEXT: feq.h a1, fa0, fa0
+; CHECKIZFH-NEXT: seqz a1, a1
+; CHECKIZFH-NEXT: addi a1, a1, -1
+; CHECKIZFH-NEXT: and a0, a1, a0
; CHECKIZFH-NEXT: ret
%a = call half @llvm.roundeven.f16(half %x)
%b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
; RV32IZFH-NEXT: addi a1, a3, -1
; RV32IZFH-NEXT: .LBB17_4:
; RV32IZFH-NEXT: feq.s a3, fs0, fs0
-; RV32IZFH-NEXT: bnez a3, .LBB17_6
-; RV32IZFH-NEXT: # %bb.5:
-; RV32IZFH-NEXT: li a1, 0
-; RV32IZFH-NEXT: li a0, 0
-; RV32IZFH-NEXT: j .LBB17_7
-; RV32IZFH-NEXT: .LBB17_6:
-; RV32IZFH-NEXT: neg a3, s0
-; RV32IZFH-NEXT: and a0, a3, a0
+; RV32IZFH-NEXT: seqz a3, a3
+; RV32IZFH-NEXT: addi a3, a3, -1
+; RV32IZFH-NEXT: and a1, a3, a1
+; RV32IZFH-NEXT: seqz a4, s0
+; RV32IZFH-NEXT: addi a4, a4, -1
+; RV32IZFH-NEXT: and a0, a4, a0
; RV32IZFH-NEXT: seqz a2, a2
; RV32IZFH-NEXT: addi a2, a2, -1
-; RV32IZFH-NEXT: or a0, a0, a2
-; RV32IZFH-NEXT: .LBB17_7:
+; RV32IZFH-NEXT: or a0, a2, a0
+; RV32IZFH-NEXT: and a0, a3, a0
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IZFH-LABEL: test_roundeven_si64:
; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IZFH-NEXT: beqz a0, .LBB17_2
-; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rne
-; RV64IZFH-NEXT: .LBB17_2:
+; RV64IZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IZFH-NEXT: seqz a1, a1
+; RV64IZFH-NEXT: addi a1, a1, -1
+; RV64IZFH-NEXT: and a0, a1, a0
; RV64IZFH-NEXT: ret
%a = call half @llvm.roundeven.f16(half %x)
%b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
define signext i32 @test_roundeven_ui32(half %x) {
; CHECKIZFH-LABEL: test_roundeven_ui32:
; CHECKIZFH: # %bb.0:
-; CHECKIZFH-NEXT: feq.h a0, fa0, fa0
-; CHECKIZFH-NEXT: beqz a0, .LBB18_2
-; CHECKIZFH-NEXT: # %bb.1:
; CHECKIZFH-NEXT: fcvt.wu.h a0, fa0, rne
-; CHECKIZFH-NEXT: .LBB18_2:
+; CHECKIZFH-NEXT: feq.h a1, fa0, fa0
+; CHECKIZFH-NEXT: seqz a1, a1
+; CHECKIZFH-NEXT: addi a1, a1, -1
+; CHECKIZFH-NEXT: and a0, a1, a0
; CHECKIZFH-NEXT: ret
%a = call half @llvm.roundeven.f16(half %x)
%b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
; RV32IZFH-NEXT: fcvt.s.h fs0, ft0
; RV32IZFH-NEXT: fmv.w.x ft0, zero
; RV32IZFH-NEXT: fle.s a0, ft0, fs0
-; RV32IZFH-NEXT: neg s0, a0
+; RV32IZFH-NEXT: seqz a0, a0
+; RV32IZFH-NEXT: addi s0, a0, -1
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixunssfdi@plt
; RV32IZFH-NEXT: lui a2, %hi(.LCPI19_0)
; RV32IZFH-NEXT: flt.s a2, ft0, fs0
; RV32IZFH-NEXT: seqz a2, a2
; RV32IZFH-NEXT: addi a2, a2, -1
-; RV32IZFH-NEXT: or a0, a0, a2
+; RV32IZFH-NEXT: or a0, a2, a0
; RV32IZFH-NEXT: and a1, s0, a1
-; RV32IZFH-NEXT: or a1, a1, a2
+; RV32IZFH-NEXT: or a1, a2, a1
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
;
; RV64IZFH-LABEL: test_roundeven_ui64:
; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: feq.h a0, fa0, fa0
-; RV64IZFH-NEXT: beqz a0, .LBB19_2
-; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rne
-; RV64IZFH-NEXT: .LBB19_2:
+; RV64IZFH-NEXT: feq.h a1, fa0, fa0
+; RV64IZFH-NEXT: seqz a1, a1
+; RV64IZFH-NEXT: addi a1, a1, -1
+; RV64IZFH-NEXT: and a0, a1, a0
; RV64IZFH-NEXT: ret
%a = call half @llvm.roundeven.f16(half %x)
%b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
define i64 @rotl_64(i64 %x, i64 %y) nounwind {
; RV32I-LABEL: rotl_64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi a5, a2, -32
-; RV32I-NEXT: mv a4, a1
-; RV32I-NEXT: bltz a5, .LBB2_2
+; RV32I-NEXT: sll a4, a0, a2
+; RV32I-NEXT: addi a3, a2, -32
+; RV32I-NEXT: slti a5, a3, 0
+; RV32I-NEXT: neg a5, a5
+; RV32I-NEXT: bltz a3, .LBB2_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sll a1, a0, a5
+; RV32I-NEXT: sll a3, a0, a3
; RV32I-NEXT: j .LBB2_3
; RV32I-NEXT: .LBB2_2:
-; RV32I-NEXT: sll a1, a4, a2
-; RV32I-NEXT: xori a3, a2, 31
-; RV32I-NEXT: srli a6, a0, 1
-; RV32I-NEXT: srl a3, a6, a3
-; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: sll a3, a1, a2
+; RV32I-NEXT: xori a6, a2, 31
+; RV32I-NEXT: srli a7, a0, 1
+; RV32I-NEXT: srl a6, a7, a6
+; RV32I-NEXT: or a3, a3, a6
; RV32I-NEXT: .LBB2_3:
-; RV32I-NEXT: neg a6, a2
-; RV32I-NEXT: li a3, 32
-; RV32I-NEXT: sub a7, a3, a2
-; RV32I-NEXT: srl a3, a4, a6
-; RV32I-NEXT: bltz a7, .LBB2_6
+; RV32I-NEXT: and a4, a5, a4
+; RV32I-NEXT: neg a7, a2
+; RV32I-NEXT: li a5, 32
+; RV32I-NEXT: sub a6, a5, a2
+; RV32I-NEXT: srl a5, a1, a7
+; RV32I-NEXT: bltz a6, .LBB2_5
; RV32I-NEXT: # %bb.4:
-; RV32I-NEXT: bltz a5, .LBB2_7
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: j .LBB2_6
; RV32I-NEXT: .LBB2_5:
-; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB2_6:
-; RV32I-NEXT: srl a6, a0, a6
+; RV32I-NEXT: srl a0, a0, a7
; RV32I-NEXT: li a7, 64
-; RV32I-NEXT: sub a7, a7, a2
-; RV32I-NEXT: xori a7, a7, 31
-; RV32I-NEXT: slli a4, a4, 1
-; RV32I-NEXT: sll a4, a4, a7
-; RV32I-NEXT: or a4, a6, a4
-; RV32I-NEXT: or a1, a1, a3
-; RV32I-NEXT: mv a3, a4
-; RV32I-NEXT: bgez a5, .LBB2_5
-; RV32I-NEXT: .LBB2_7:
-; RV32I-NEXT: sll a0, a0, a2
-; RV32I-NEXT: or a3, a3, a0
-; RV32I-NEXT: mv a0, a3
+; RV32I-NEXT: sub a2, a7, a2
+; RV32I-NEXT: xori a2, a2, 31
+; RV32I-NEXT: slli a1, a1, 1
+; RV32I-NEXT: sll a1, a1, a2
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: .LBB2_6:
+; RV32I-NEXT: slti a1, a6, 0
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: or a1, a3, a1
+; RV32I-NEXT: or a0, a4, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: rotl_64:
;
; RV32ZBB-LABEL: rotl_64:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: addi a5, a2, -32
-; RV32ZBB-NEXT: mv a4, a1
-; RV32ZBB-NEXT: bltz a5, .LBB2_2
+; RV32ZBB-NEXT: sll a4, a0, a2
+; RV32ZBB-NEXT: addi a3, a2, -32
+; RV32ZBB-NEXT: slti a5, a3, 0
+; RV32ZBB-NEXT: neg a5, a5
+; RV32ZBB-NEXT: bltz a3, .LBB2_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sll a1, a0, a5
+; RV32ZBB-NEXT: sll a3, a0, a3
; RV32ZBB-NEXT: j .LBB2_3
; RV32ZBB-NEXT: .LBB2_2:
-; RV32ZBB-NEXT: sll a1, a4, a2
-; RV32ZBB-NEXT: xori a3, a2, 31
-; RV32ZBB-NEXT: srli a6, a0, 1
-; RV32ZBB-NEXT: srl a3, a6, a3
-; RV32ZBB-NEXT: or a1, a1, a3
+; RV32ZBB-NEXT: sll a3, a1, a2
+; RV32ZBB-NEXT: xori a6, a2, 31
+; RV32ZBB-NEXT: srli a7, a0, 1
+; RV32ZBB-NEXT: srl a6, a7, a6
+; RV32ZBB-NEXT: or a3, a3, a6
; RV32ZBB-NEXT: .LBB2_3:
-; RV32ZBB-NEXT: neg a6, a2
-; RV32ZBB-NEXT: li a3, 32
-; RV32ZBB-NEXT: sub a7, a3, a2
-; RV32ZBB-NEXT: srl a3, a4, a6
-; RV32ZBB-NEXT: bltz a7, .LBB2_6
+; RV32ZBB-NEXT: and a4, a5, a4
+; RV32ZBB-NEXT: neg a7, a2
+; RV32ZBB-NEXT: li a5, 32
+; RV32ZBB-NEXT: sub a6, a5, a2
+; RV32ZBB-NEXT: srl a5, a1, a7
+; RV32ZBB-NEXT: bltz a6, .LBB2_5
; RV32ZBB-NEXT: # %bb.4:
-; RV32ZBB-NEXT: bltz a5, .LBB2_7
+; RV32ZBB-NEXT: mv a0, a5
+; RV32ZBB-NEXT: j .LBB2_6
; RV32ZBB-NEXT: .LBB2_5:
-; RV32ZBB-NEXT: mv a0, a3
-; RV32ZBB-NEXT: ret
-; RV32ZBB-NEXT: .LBB2_6:
-; RV32ZBB-NEXT: srl a6, a0, a6
+; RV32ZBB-NEXT: srl a0, a0, a7
; RV32ZBB-NEXT: li a7, 64
-; RV32ZBB-NEXT: sub a7, a7, a2
-; RV32ZBB-NEXT: xori a7, a7, 31
-; RV32ZBB-NEXT: slli a4, a4, 1
-; RV32ZBB-NEXT: sll a4, a4, a7
-; RV32ZBB-NEXT: or a4, a6, a4
-; RV32ZBB-NEXT: or a1, a1, a3
-; RV32ZBB-NEXT: mv a3, a4
-; RV32ZBB-NEXT: bgez a5, .LBB2_5
-; RV32ZBB-NEXT: .LBB2_7:
-; RV32ZBB-NEXT: sll a0, a0, a2
-; RV32ZBB-NEXT: or a3, a3, a0
-; RV32ZBB-NEXT: mv a0, a3
+; RV32ZBB-NEXT: sub a2, a7, a2
+; RV32ZBB-NEXT: xori a2, a2, 31
+; RV32ZBB-NEXT: slli a1, a1, 1
+; RV32ZBB-NEXT: sll a1, a1, a2
+; RV32ZBB-NEXT: or a0, a0, a1
+; RV32ZBB-NEXT: .LBB2_6:
+; RV32ZBB-NEXT: slti a1, a6, 0
+; RV32ZBB-NEXT: neg a1, a1
+; RV32ZBB-NEXT: and a1, a1, a5
+; RV32ZBB-NEXT: or a1, a3, a1
+; RV32ZBB-NEXT: or a0, a4, a0
; RV32ZBB-NEXT: ret
;
; RV64ZBB-LABEL: rotl_64:
define i64 @rotr_64(i64 %x, i64 %y) nounwind {
; RV32I-LABEL: rotr_64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi a5, a2, -32
-; RV32I-NEXT: mv a4, a0
-; RV32I-NEXT: bltz a5, .LBB3_2
+; RV32I-NEXT: srl a4, a1, a2
+; RV32I-NEXT: addi a3, a2, -32
+; RV32I-NEXT: slti a5, a3, 0
+; RV32I-NEXT: neg a5, a5
+; RV32I-NEXT: bltz a3, .LBB3_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: srl a0, a1, a5
+; RV32I-NEXT: srl a3, a1, a3
; RV32I-NEXT: j .LBB3_3
; RV32I-NEXT: .LBB3_2:
-; RV32I-NEXT: srl a0, a4, a2
-; RV32I-NEXT: xori a3, a2, 31
-; RV32I-NEXT: slli a6, a1, 1
-; RV32I-NEXT: sll a3, a6, a3
-; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: srl a3, a0, a2
+; RV32I-NEXT: xori a6, a2, 31
+; RV32I-NEXT: slli a7, a1, 1
+; RV32I-NEXT: sll a6, a7, a6
+; RV32I-NEXT: or a3, a3, a6
; RV32I-NEXT: .LBB3_3:
-; RV32I-NEXT: neg a6, a2
-; RV32I-NEXT: li a3, 32
-; RV32I-NEXT: sub a7, a3, a2
-; RV32I-NEXT: sll a3, a4, a6
-; RV32I-NEXT: bltz a7, .LBB3_6
+; RV32I-NEXT: and a4, a5, a4
+; RV32I-NEXT: neg a7, a2
+; RV32I-NEXT: li a5, 32
+; RV32I-NEXT: sub a6, a5, a2
+; RV32I-NEXT: sll a5, a0, a7
+; RV32I-NEXT: bltz a6, .LBB3_5
; RV32I-NEXT: # %bb.4:
-; RV32I-NEXT: bltz a5, .LBB3_7
+; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: j .LBB3_6
; RV32I-NEXT: .LBB3_5:
-; RV32I-NEXT: mv a1, a3
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB3_6:
-; RV32I-NEXT: sll a6, a1, a6
+; RV32I-NEXT: sll a1, a1, a7
; RV32I-NEXT: li a7, 64
-; RV32I-NEXT: sub a7, a7, a2
-; RV32I-NEXT: xori a7, a7, 31
-; RV32I-NEXT: srli a4, a4, 1
-; RV32I-NEXT: srl a4, a4, a7
-; RV32I-NEXT: or a4, a6, a4
-; RV32I-NEXT: or a0, a0, a3
-; RV32I-NEXT: mv a3, a4
-; RV32I-NEXT: bgez a5, .LBB3_5
-; RV32I-NEXT: .LBB3_7:
-; RV32I-NEXT: srl a1, a1, a2
-; RV32I-NEXT: or a3, a3, a1
-; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: sub a2, a7, a2
+; RV32I-NEXT: xori a2, a2, 31
+; RV32I-NEXT: srli a0, a0, 1
+; RV32I-NEXT: srl a0, a0, a2
+; RV32I-NEXT: or a1, a1, a0
+; RV32I-NEXT: .LBB3_6:
+; RV32I-NEXT: slti a0, a6, 0
+; RV32I-NEXT: neg a0, a0
+; RV32I-NEXT: and a0, a0, a5
+; RV32I-NEXT: or a0, a3, a0
+; RV32I-NEXT: or a1, a4, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: rotr_64:
;
; RV32ZBB-LABEL: rotr_64:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: addi a5, a2, -32
-; RV32ZBB-NEXT: mv a4, a0
-; RV32ZBB-NEXT: bltz a5, .LBB3_2
+; RV32ZBB-NEXT: srl a4, a1, a2
+; RV32ZBB-NEXT: addi a3, a2, -32
+; RV32ZBB-NEXT: slti a5, a3, 0
+; RV32ZBB-NEXT: neg a5, a5
+; RV32ZBB-NEXT: bltz a3, .LBB3_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: srl a0, a1, a5
+; RV32ZBB-NEXT: srl a3, a1, a3
; RV32ZBB-NEXT: j .LBB3_3
; RV32ZBB-NEXT: .LBB3_2:
-; RV32ZBB-NEXT: srl a0, a4, a2
-; RV32ZBB-NEXT: xori a3, a2, 31
-; RV32ZBB-NEXT: slli a6, a1, 1
-; RV32ZBB-NEXT: sll a3, a6, a3
-; RV32ZBB-NEXT: or a0, a0, a3
+; RV32ZBB-NEXT: srl a3, a0, a2
+; RV32ZBB-NEXT: xori a6, a2, 31
+; RV32ZBB-NEXT: slli a7, a1, 1
+; RV32ZBB-NEXT: sll a6, a7, a6
+; RV32ZBB-NEXT: or a3, a3, a6
; RV32ZBB-NEXT: .LBB3_3:
-; RV32ZBB-NEXT: neg a6, a2
-; RV32ZBB-NEXT: li a3, 32
-; RV32ZBB-NEXT: sub a7, a3, a2
-; RV32ZBB-NEXT: sll a3, a4, a6
-; RV32ZBB-NEXT: bltz a7, .LBB3_6
+; RV32ZBB-NEXT: and a4, a5, a4
+; RV32ZBB-NEXT: neg a7, a2
+; RV32ZBB-NEXT: li a5, 32
+; RV32ZBB-NEXT: sub a6, a5, a2
+; RV32ZBB-NEXT: sll a5, a0, a7
+; RV32ZBB-NEXT: bltz a6, .LBB3_5
; RV32ZBB-NEXT: # %bb.4:
-; RV32ZBB-NEXT: bltz a5, .LBB3_7
+; RV32ZBB-NEXT: mv a1, a5
+; RV32ZBB-NEXT: j .LBB3_6
; RV32ZBB-NEXT: .LBB3_5:
-; RV32ZBB-NEXT: mv a1, a3
-; RV32ZBB-NEXT: ret
-; RV32ZBB-NEXT: .LBB3_6:
-; RV32ZBB-NEXT: sll a6, a1, a6
+; RV32ZBB-NEXT: sll a1, a1, a7
; RV32ZBB-NEXT: li a7, 64
-; RV32ZBB-NEXT: sub a7, a7, a2
-; RV32ZBB-NEXT: xori a7, a7, 31
-; RV32ZBB-NEXT: srli a4, a4, 1
-; RV32ZBB-NEXT: srl a4, a4, a7
-; RV32ZBB-NEXT: or a4, a6, a4
-; RV32ZBB-NEXT: or a0, a0, a3
-; RV32ZBB-NEXT: mv a3, a4
-; RV32ZBB-NEXT: bgez a5, .LBB3_5
-; RV32ZBB-NEXT: .LBB3_7:
-; RV32ZBB-NEXT: srl a1, a1, a2
-; RV32ZBB-NEXT: or a3, a3, a1
-; RV32ZBB-NEXT: mv a1, a3
+; RV32ZBB-NEXT: sub a2, a7, a2
+; RV32ZBB-NEXT: xori a2, a2, 31
+; RV32ZBB-NEXT: srli a0, a0, 1
+; RV32ZBB-NEXT: srl a0, a0, a2
+; RV32ZBB-NEXT: or a1, a1, a0
+; RV32ZBB-NEXT: .LBB3_6:
+; RV32ZBB-NEXT: slti a0, a6, 0
+; RV32ZBB-NEXT: neg a0, a0
+; RV32ZBB-NEXT: and a0, a0, a5
+; RV32ZBB-NEXT: or a0, a3, a0
+; RV32ZBB-NEXT: or a1, a4, a1
; RV32ZBB-NEXT: ret
;
; RV64ZBB-LABEL: rotr_64:
define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind {
; RV32I-LABEL: rotl_64_mask:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi a5, a2, -32
-; RV32I-NEXT: mv a3, a1
-; RV32I-NEXT: bltz a5, .LBB10_2
+; RV32I-NEXT: addi a4, a2, -32
+; RV32I-NEXT: bltz a4, .LBB10_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sll a1, a0, a5
+; RV32I-NEXT: sll a3, a0, a4
; RV32I-NEXT: j .LBB10_3
; RV32I-NEXT: .LBB10_2:
-; RV32I-NEXT: sll a1, a3, a2
-; RV32I-NEXT: xori a4, a2, 31
+; RV32I-NEXT: sll a3, a1, a2
+; RV32I-NEXT: xori a5, a2, 31
; RV32I-NEXT: srli a6, a0, 1
-; RV32I-NEXT: srl a4, a6, a4
-; RV32I-NEXT: or a1, a1, a4
+; RV32I-NEXT: srl a5, a6, a5
+; RV32I-NEXT: or a3, a3, a5
; RV32I-NEXT: .LBB10_3:
-; RV32I-NEXT: neg a6, a2
-; RV32I-NEXT: andi a4, a6, 63
-; RV32I-NEXT: addi a7, a4, -32
+; RV32I-NEXT: sll a5, a0, a2
+; RV32I-NEXT: slti a4, a4, 0
+; RV32I-NEXT: neg a4, a4
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: neg a5, a2
+; RV32I-NEXT: srl a2, a1, a5
+; RV32I-NEXT: andi a6, a5, 63
+; RV32I-NEXT: addi a7, a6, -32
+; RV32I-NEXT: slti t0, a7, 0
+; RV32I-NEXT: neg t0, t0
+; RV32I-NEXT: and a2, t0, a2
; RV32I-NEXT: bltz a7, .LBB10_5
; RV32I-NEXT: # %bb.4:
-; RV32I-NEXT: srl a4, a3, a7
-; RV32I-NEXT: bltz a5, .LBB10_6
-; RV32I-NEXT: j .LBB10_7
+; RV32I-NEXT: srl a0, a1, a7
+; RV32I-NEXT: j .LBB10_6
; RV32I-NEXT: .LBB10_5:
-; RV32I-NEXT: srl a7, a0, a6
-; RV32I-NEXT: xori a4, a4, 31
-; RV32I-NEXT: slli t0, a3, 1
-; RV32I-NEXT: sll a4, t0, a4
-; RV32I-NEXT: or a4, a7, a4
-; RV32I-NEXT: srl a3, a3, a6
-; RV32I-NEXT: or a1, a1, a3
-; RV32I-NEXT: bgez a5, .LBB10_7
+; RV32I-NEXT: srl a0, a0, a5
+; RV32I-NEXT: xori a5, a6, 31
+; RV32I-NEXT: slli a1, a1, 1
+; RV32I-NEXT: sll a1, a1, a5
+; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: .LBB10_6:
-; RV32I-NEXT: sll a0, a0, a2
-; RV32I-NEXT: or a4, a4, a0
-; RV32I-NEXT: .LBB10_7:
-; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: or a0, a4, a0
+; RV32I-NEXT: or a1, a3, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: rotl_64_mask:
;
; RV32ZBB-LABEL: rotl_64_mask:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: addi a5, a2, -32
-; RV32ZBB-NEXT: mv a3, a1
-; RV32ZBB-NEXT: bltz a5, .LBB10_2
+; RV32ZBB-NEXT: addi a4, a2, -32
+; RV32ZBB-NEXT: bltz a4, .LBB10_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sll a1, a0, a5
+; RV32ZBB-NEXT: sll a3, a0, a4
; RV32ZBB-NEXT: j .LBB10_3
; RV32ZBB-NEXT: .LBB10_2:
-; RV32ZBB-NEXT: sll a1, a3, a2
-; RV32ZBB-NEXT: xori a4, a2, 31
+; RV32ZBB-NEXT: sll a3, a1, a2
+; RV32ZBB-NEXT: xori a5, a2, 31
; RV32ZBB-NEXT: srli a6, a0, 1
-; RV32ZBB-NEXT: srl a4, a6, a4
-; RV32ZBB-NEXT: or a1, a1, a4
+; RV32ZBB-NEXT: srl a5, a6, a5
+; RV32ZBB-NEXT: or a3, a3, a5
; RV32ZBB-NEXT: .LBB10_3:
-; RV32ZBB-NEXT: neg a6, a2
-; RV32ZBB-NEXT: andi a4, a6, 63
-; RV32ZBB-NEXT: addi a7, a4, -32
+; RV32ZBB-NEXT: sll a5, a0, a2
+; RV32ZBB-NEXT: slti a4, a4, 0
+; RV32ZBB-NEXT: neg a4, a4
+; RV32ZBB-NEXT: and a4, a4, a5
+; RV32ZBB-NEXT: neg a5, a2
+; RV32ZBB-NEXT: srl a2, a1, a5
+; RV32ZBB-NEXT: andi a6, a5, 63
+; RV32ZBB-NEXT: addi a7, a6, -32
+; RV32ZBB-NEXT: slti t0, a7, 0
+; RV32ZBB-NEXT: neg t0, t0
+; RV32ZBB-NEXT: and a2, t0, a2
; RV32ZBB-NEXT: bltz a7, .LBB10_5
; RV32ZBB-NEXT: # %bb.4:
-; RV32ZBB-NEXT: srl a4, a3, a7
-; RV32ZBB-NEXT: bltz a5, .LBB10_6
-; RV32ZBB-NEXT: j .LBB10_7
+; RV32ZBB-NEXT: srl a0, a1, a7
+; RV32ZBB-NEXT: j .LBB10_6
; RV32ZBB-NEXT: .LBB10_5:
-; RV32ZBB-NEXT: srl a7, a0, a6
-; RV32ZBB-NEXT: xori a4, a4, 31
-; RV32ZBB-NEXT: slli t0, a3, 1
-; RV32ZBB-NEXT: sll a4, t0, a4
-; RV32ZBB-NEXT: or a4, a7, a4
-; RV32ZBB-NEXT: srl a3, a3, a6
-; RV32ZBB-NEXT: or a1, a1, a3
-; RV32ZBB-NEXT: bgez a5, .LBB10_7
+; RV32ZBB-NEXT: srl a0, a0, a5
+; RV32ZBB-NEXT: xori a5, a6, 31
+; RV32ZBB-NEXT: slli a1, a1, 1
+; RV32ZBB-NEXT: sll a1, a1, a5
+; RV32ZBB-NEXT: or a0, a0, a1
; RV32ZBB-NEXT: .LBB10_6:
-; RV32ZBB-NEXT: sll a0, a0, a2
-; RV32ZBB-NEXT: or a4, a4, a0
-; RV32ZBB-NEXT: .LBB10_7:
-; RV32ZBB-NEXT: mv a0, a4
+; RV32ZBB-NEXT: or a0, a4, a0
+; RV32ZBB-NEXT: or a1, a3, a2
; RV32ZBB-NEXT: ret
;
; RV64ZBB-LABEL: rotl_64_mask:
define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV32I-LABEL: rotl_64_mask_and_127_and_63:
; RV32I: # %bb.0:
-; RV32I-NEXT: andi a4, a2, 127
-; RV32I-NEXT: addi a5, a4, -32
-; RV32I-NEXT: mv a3, a1
-; RV32I-NEXT: bltz a5, .LBB11_2
+; RV32I-NEXT: andi a3, a2, 127
+; RV32I-NEXT: addi a4, a3, -32
+; RV32I-NEXT: bltz a4, .LBB11_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sll a1, a0, a5
+; RV32I-NEXT: sll a3, a0, a4
; RV32I-NEXT: j .LBB11_3
; RV32I-NEXT: .LBB11_2:
-; RV32I-NEXT: sll a1, a3, a2
+; RV32I-NEXT: sll a5, a1, a2
; RV32I-NEXT: srli a6, a0, 1
-; RV32I-NEXT: xori a4, a4, 31
-; RV32I-NEXT: srl a4, a6, a4
-; RV32I-NEXT: or a1, a1, a4
+; RV32I-NEXT: xori a3, a3, 31
+; RV32I-NEXT: srl a3, a6, a3
+; RV32I-NEXT: or a3, a5, a3
; RV32I-NEXT: .LBB11_3:
-; RV32I-NEXT: neg a6, a2
-; RV32I-NEXT: andi a4, a6, 63
-; RV32I-NEXT: addi a7, a4, -32
+; RV32I-NEXT: sll a5, a0, a2
+; RV32I-NEXT: slti a4, a4, 0
+; RV32I-NEXT: neg a4, a4
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: neg a5, a2
+; RV32I-NEXT: srl a2, a1, a5
+; RV32I-NEXT: andi a6, a5, 63
+; RV32I-NEXT: addi a7, a6, -32
+; RV32I-NEXT: slti t0, a7, 0
+; RV32I-NEXT: neg t0, t0
+; RV32I-NEXT: and a2, t0, a2
; RV32I-NEXT: bltz a7, .LBB11_5
; RV32I-NEXT: # %bb.4:
-; RV32I-NEXT: srl a4, a3, a7
-; RV32I-NEXT: bltz a5, .LBB11_6
-; RV32I-NEXT: j .LBB11_7
+; RV32I-NEXT: srl a0, a1, a7
+; RV32I-NEXT: j .LBB11_6
; RV32I-NEXT: .LBB11_5:
-; RV32I-NEXT: srl a7, a0, a6
-; RV32I-NEXT: xori a4, a4, 31
-; RV32I-NEXT: slli t0, a3, 1
-; RV32I-NEXT: sll a4, t0, a4
-; RV32I-NEXT: or a4, a7, a4
-; RV32I-NEXT: srl a3, a3, a6
-; RV32I-NEXT: or a1, a1, a3
-; RV32I-NEXT: bgez a5, .LBB11_7
+; RV32I-NEXT: srl a0, a0, a5
+; RV32I-NEXT: xori a5, a6, 31
+; RV32I-NEXT: slli a1, a1, 1
+; RV32I-NEXT: sll a1, a1, a5
+; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: .LBB11_6:
-; RV32I-NEXT: sll a0, a0, a2
-; RV32I-NEXT: or a4, a4, a0
-; RV32I-NEXT: .LBB11_7:
-; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: or a0, a4, a0
+; RV32I-NEXT: or a1, a3, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: rotl_64_mask_and_127_and_63:
;
; RV32ZBB-LABEL: rotl_64_mask_and_127_and_63:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: andi a4, a2, 127
-; RV32ZBB-NEXT: addi a5, a4, -32
-; RV32ZBB-NEXT: mv a3, a1
-; RV32ZBB-NEXT: bltz a5, .LBB11_2
+; RV32ZBB-NEXT: andi a3, a2, 127
+; RV32ZBB-NEXT: addi a4, a3, -32
+; RV32ZBB-NEXT: bltz a4, .LBB11_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sll a1, a0, a5
+; RV32ZBB-NEXT: sll a3, a0, a4
; RV32ZBB-NEXT: j .LBB11_3
; RV32ZBB-NEXT: .LBB11_2:
-; RV32ZBB-NEXT: sll a1, a3, a2
+; RV32ZBB-NEXT: sll a5, a1, a2
; RV32ZBB-NEXT: srli a6, a0, 1
-; RV32ZBB-NEXT: xori a4, a4, 31
-; RV32ZBB-NEXT: srl a4, a6, a4
-; RV32ZBB-NEXT: or a1, a1, a4
+; RV32ZBB-NEXT: xori a3, a3, 31
+; RV32ZBB-NEXT: srl a3, a6, a3
+; RV32ZBB-NEXT: or a3, a5, a3
; RV32ZBB-NEXT: .LBB11_3:
-; RV32ZBB-NEXT: neg a6, a2
-; RV32ZBB-NEXT: andi a4, a6, 63
-; RV32ZBB-NEXT: addi a7, a4, -32
+; RV32ZBB-NEXT: sll a5, a0, a2
+; RV32ZBB-NEXT: slti a4, a4, 0
+; RV32ZBB-NEXT: neg a4, a4
+; RV32ZBB-NEXT: and a4, a4, a5
+; RV32ZBB-NEXT: neg a5, a2
+; RV32ZBB-NEXT: srl a2, a1, a5
+; RV32ZBB-NEXT: andi a6, a5, 63
+; RV32ZBB-NEXT: addi a7, a6, -32
+; RV32ZBB-NEXT: slti t0, a7, 0
+; RV32ZBB-NEXT: neg t0, t0
+; RV32ZBB-NEXT: and a2, t0, a2
; RV32ZBB-NEXT: bltz a7, .LBB11_5
; RV32ZBB-NEXT: # %bb.4:
-; RV32ZBB-NEXT: srl a4, a3, a7
-; RV32ZBB-NEXT: bltz a5, .LBB11_6
-; RV32ZBB-NEXT: j .LBB11_7
+; RV32ZBB-NEXT: srl a0, a1, a7
+; RV32ZBB-NEXT: j .LBB11_6
; RV32ZBB-NEXT: .LBB11_5:
-; RV32ZBB-NEXT: srl a7, a0, a6
-; RV32ZBB-NEXT: xori a4, a4, 31
-; RV32ZBB-NEXT: slli t0, a3, 1
-; RV32ZBB-NEXT: sll a4, t0, a4
-; RV32ZBB-NEXT: or a4, a7, a4
-; RV32ZBB-NEXT: srl a3, a3, a6
-; RV32ZBB-NEXT: or a1, a1, a3
-; RV32ZBB-NEXT: bgez a5, .LBB11_7
+; RV32ZBB-NEXT: srl a0, a0, a5
+; RV32ZBB-NEXT: xori a5, a6, 31
+; RV32ZBB-NEXT: slli a1, a1, 1
+; RV32ZBB-NEXT: sll a1, a1, a5
+; RV32ZBB-NEXT: or a0, a0, a1
; RV32ZBB-NEXT: .LBB11_6:
-; RV32ZBB-NEXT: sll a0, a0, a2
-; RV32ZBB-NEXT: or a4, a4, a0
-; RV32ZBB-NEXT: .LBB11_7:
-; RV32ZBB-NEXT: mv a0, a4
+; RV32ZBB-NEXT: or a0, a4, a0
+; RV32ZBB-NEXT: or a1, a3, a2
; RV32ZBB-NEXT: ret
;
; RV64ZBB-LABEL: rotl_64_mask_and_127_and_63:
define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind {
; RV32I-LABEL: rotr_64_mask:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi a5, a2, -32
-; RV32I-NEXT: mv a3, a0
-; RV32I-NEXT: bltz a5, .LBB13_2
+; RV32I-NEXT: srl a4, a1, a2
+; RV32I-NEXT: addi a3, a2, -32
+; RV32I-NEXT: slti a5, a3, 0
+; RV32I-NEXT: neg a5, a5
+; RV32I-NEXT: bltz a3, .LBB13_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: srl a0, a1, a5
+; RV32I-NEXT: srl a3, a1, a3
; RV32I-NEXT: j .LBB13_3
; RV32I-NEXT: .LBB13_2:
-; RV32I-NEXT: srl a0, a3, a2
-; RV32I-NEXT: xori a4, a2, 31
-; RV32I-NEXT: slli a6, a1, 1
-; RV32I-NEXT: sll a4, a6, a4
-; RV32I-NEXT: or a0, a0, a4
+; RV32I-NEXT: srl a3, a0, a2
+; RV32I-NEXT: xori a6, a2, 31
+; RV32I-NEXT: slli a7, a1, 1
+; RV32I-NEXT: sll a6, a7, a6
+; RV32I-NEXT: or a3, a3, a6
; RV32I-NEXT: .LBB13_3:
; RV32I-NEXT: neg a6, a2
-; RV32I-NEXT: andi a4, a6, 63
-; RV32I-NEXT: addi a7, a4, -32
+; RV32I-NEXT: andi t0, a6, 63
+; RV32I-NEXT: addi a7, t0, -32
+; RV32I-NEXT: and a2, a5, a4
; RV32I-NEXT: bltz a7, .LBB13_5
; RV32I-NEXT: # %bb.4:
-; RV32I-NEXT: sll a4, a3, a7
-; RV32I-NEXT: bltz a5, .LBB13_6
-; RV32I-NEXT: j .LBB13_7
+; RV32I-NEXT: sll a1, a0, a7
+; RV32I-NEXT: j .LBB13_6
; RV32I-NEXT: .LBB13_5:
-; RV32I-NEXT: sll a7, a1, a6
-; RV32I-NEXT: xori a4, a4, 31
-; RV32I-NEXT: srli t0, a3, 1
-; RV32I-NEXT: srl a4, t0, a4
-; RV32I-NEXT: or a4, a7, a4
-; RV32I-NEXT: sll a3, a3, a6
-; RV32I-NEXT: or a0, a0, a3
-; RV32I-NEXT: bgez a5, .LBB13_7
+; RV32I-NEXT: sll a1, a1, a6
+; RV32I-NEXT: xori a4, t0, 31
+; RV32I-NEXT: srli a5, a0, 1
+; RV32I-NEXT: srl a4, a5, a4
+; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: .LBB13_6:
-; RV32I-NEXT: srl a1, a1, a2
-; RV32I-NEXT: or a4, a4, a1
-; RV32I-NEXT: .LBB13_7:
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: sll a0, a0, a6
+; RV32I-NEXT: slti a4, a7, 0
+; RV32I-NEXT: neg a4, a4
+; RV32I-NEXT: and a0, a4, a0
+; RV32I-NEXT: or a0, a3, a0
+; RV32I-NEXT: or a1, a2, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: rotr_64_mask:
;
; RV32ZBB-LABEL: rotr_64_mask:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: addi a5, a2, -32
-; RV32ZBB-NEXT: mv a3, a0
-; RV32ZBB-NEXT: bltz a5, .LBB13_2
+; RV32ZBB-NEXT: srl a4, a1, a2
+; RV32ZBB-NEXT: addi a3, a2, -32
+; RV32ZBB-NEXT: slti a5, a3, 0
+; RV32ZBB-NEXT: neg a5, a5
+; RV32ZBB-NEXT: bltz a3, .LBB13_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: srl a0, a1, a5
+; RV32ZBB-NEXT: srl a3, a1, a3
; RV32ZBB-NEXT: j .LBB13_3
; RV32ZBB-NEXT: .LBB13_2:
-; RV32ZBB-NEXT: srl a0, a3, a2
-; RV32ZBB-NEXT: xori a4, a2, 31
-; RV32ZBB-NEXT: slli a6, a1, 1
-; RV32ZBB-NEXT: sll a4, a6, a4
-; RV32ZBB-NEXT: or a0, a0, a4
+; RV32ZBB-NEXT: srl a3, a0, a2
+; RV32ZBB-NEXT: xori a6, a2, 31
+; RV32ZBB-NEXT: slli a7, a1, 1
+; RV32ZBB-NEXT: sll a6, a7, a6
+; RV32ZBB-NEXT: or a3, a3, a6
; RV32ZBB-NEXT: .LBB13_3:
; RV32ZBB-NEXT: neg a6, a2
-; RV32ZBB-NEXT: andi a4, a6, 63
-; RV32ZBB-NEXT: addi a7, a4, -32
+; RV32ZBB-NEXT: andi t0, a6, 63
+; RV32ZBB-NEXT: addi a7, t0, -32
+; RV32ZBB-NEXT: and a2, a5, a4
; RV32ZBB-NEXT: bltz a7, .LBB13_5
; RV32ZBB-NEXT: # %bb.4:
-; RV32ZBB-NEXT: sll a4, a3, a7
-; RV32ZBB-NEXT: bltz a5, .LBB13_6
-; RV32ZBB-NEXT: j .LBB13_7
+; RV32ZBB-NEXT: sll a1, a0, a7
+; RV32ZBB-NEXT: j .LBB13_6
; RV32ZBB-NEXT: .LBB13_5:
-; RV32ZBB-NEXT: sll a7, a1, a6
-; RV32ZBB-NEXT: xori a4, a4, 31
-; RV32ZBB-NEXT: srli t0, a3, 1
-; RV32ZBB-NEXT: srl a4, t0, a4
-; RV32ZBB-NEXT: or a4, a7, a4
-; RV32ZBB-NEXT: sll a3, a3, a6
-; RV32ZBB-NEXT: or a0, a0, a3
-; RV32ZBB-NEXT: bgez a5, .LBB13_7
+; RV32ZBB-NEXT: sll a1, a1, a6
+; RV32ZBB-NEXT: xori a4, t0, 31
+; RV32ZBB-NEXT: srli a5, a0, 1
+; RV32ZBB-NEXT: srl a4, a5, a4
+; RV32ZBB-NEXT: or a1, a1, a4
; RV32ZBB-NEXT: .LBB13_6:
-; RV32ZBB-NEXT: srl a1, a1, a2
-; RV32ZBB-NEXT: or a4, a4, a1
-; RV32ZBB-NEXT: .LBB13_7:
-; RV32ZBB-NEXT: mv a1, a4
+; RV32ZBB-NEXT: sll a0, a0, a6
+; RV32ZBB-NEXT: slti a4, a7, 0
+; RV32ZBB-NEXT: neg a4, a4
+; RV32ZBB-NEXT: and a0, a4, a0
+; RV32ZBB-NEXT: or a0, a3, a0
+; RV32ZBB-NEXT: or a1, a2, a1
; RV32ZBB-NEXT: ret
;
; RV64ZBB-LABEL: rotr_64_mask:
define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV32I-LABEL: rotr_64_mask_and_127_and_63:
; RV32I: # %bb.0:
-; RV32I-NEXT: andi a4, a2, 127
-; RV32I-NEXT: addi a5, a4, -32
-; RV32I-NEXT: mv a3, a0
-; RV32I-NEXT: bltz a5, .LBB14_2
+; RV32I-NEXT: srl a4, a1, a2
+; RV32I-NEXT: andi a3, a2, 127
+; RV32I-NEXT: addi a6, a3, -32
+; RV32I-NEXT: slti a5, a6, 0
+; RV32I-NEXT: neg a5, a5
+; RV32I-NEXT: bltz a6, .LBB14_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: srl a0, a1, a5
+; RV32I-NEXT: srl a3, a1, a6
; RV32I-NEXT: j .LBB14_3
; RV32I-NEXT: .LBB14_2:
-; RV32I-NEXT: srl a0, a3, a2
-; RV32I-NEXT: slli a6, a1, 1
-; RV32I-NEXT: xori a4, a4, 31
-; RV32I-NEXT: sll a4, a6, a4
-; RV32I-NEXT: or a0, a0, a4
+; RV32I-NEXT: srl a6, a0, a2
+; RV32I-NEXT: slli a7, a1, 1
+; RV32I-NEXT: xori a3, a3, 31
+; RV32I-NEXT: sll a3, a7, a3
+; RV32I-NEXT: or a3, a6, a3
; RV32I-NEXT: .LBB14_3:
; RV32I-NEXT: neg a6, a2
-; RV32I-NEXT: andi a4, a6, 63
-; RV32I-NEXT: addi a7, a4, -32
+; RV32I-NEXT: andi t0, a6, 63
+; RV32I-NEXT: addi a7, t0, -32
+; RV32I-NEXT: and a2, a5, a4
; RV32I-NEXT: bltz a7, .LBB14_5
; RV32I-NEXT: # %bb.4:
-; RV32I-NEXT: sll a4, a3, a7
-; RV32I-NEXT: bltz a5, .LBB14_6
-; RV32I-NEXT: j .LBB14_7
+; RV32I-NEXT: sll a1, a0, a7
+; RV32I-NEXT: j .LBB14_6
; RV32I-NEXT: .LBB14_5:
-; RV32I-NEXT: sll a7, a1, a6
-; RV32I-NEXT: xori a4, a4, 31
-; RV32I-NEXT: srli t0, a3, 1
-; RV32I-NEXT: srl a4, t0, a4
-; RV32I-NEXT: or a4, a7, a4
-; RV32I-NEXT: sll a3, a3, a6
-; RV32I-NEXT: or a0, a0, a3
-; RV32I-NEXT: bgez a5, .LBB14_7
+; RV32I-NEXT: sll a1, a1, a6
+; RV32I-NEXT: xori a4, t0, 31
+; RV32I-NEXT: srli a5, a0, 1
+; RV32I-NEXT: srl a4, a5, a4
+; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: .LBB14_6:
-; RV32I-NEXT: srl a1, a1, a2
-; RV32I-NEXT: or a4, a4, a1
-; RV32I-NEXT: .LBB14_7:
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: sll a0, a0, a6
+; RV32I-NEXT: slti a4, a7, 0
+; RV32I-NEXT: neg a4, a4
+; RV32I-NEXT: and a0, a4, a0
+; RV32I-NEXT: or a0, a3, a0
+; RV32I-NEXT: or a1, a2, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: rotr_64_mask_and_127_and_63:
;
; RV32ZBB-LABEL: rotr_64_mask_and_127_and_63:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: andi a4, a2, 127
-; RV32ZBB-NEXT: addi a5, a4, -32
-; RV32ZBB-NEXT: mv a3, a0
-; RV32ZBB-NEXT: bltz a5, .LBB14_2
+; RV32ZBB-NEXT: srl a4, a1, a2
+; RV32ZBB-NEXT: andi a3, a2, 127
+; RV32ZBB-NEXT: addi a6, a3, -32
+; RV32ZBB-NEXT: slti a5, a6, 0
+; RV32ZBB-NEXT: neg a5, a5
+; RV32ZBB-NEXT: bltz a6, .LBB14_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: srl a0, a1, a5
+; RV32ZBB-NEXT: srl a3, a1, a6
; RV32ZBB-NEXT: j .LBB14_3
; RV32ZBB-NEXT: .LBB14_2:
-; RV32ZBB-NEXT: srl a0, a3, a2
-; RV32ZBB-NEXT: slli a6, a1, 1
-; RV32ZBB-NEXT: xori a4, a4, 31
-; RV32ZBB-NEXT: sll a4, a6, a4
-; RV32ZBB-NEXT: or a0, a0, a4
+; RV32ZBB-NEXT: srl a6, a0, a2
+; RV32ZBB-NEXT: slli a7, a1, 1
+; RV32ZBB-NEXT: xori a3, a3, 31
+; RV32ZBB-NEXT: sll a3, a7, a3
+; RV32ZBB-NEXT: or a3, a6, a3
; RV32ZBB-NEXT: .LBB14_3:
; RV32ZBB-NEXT: neg a6, a2
-; RV32ZBB-NEXT: andi a4, a6, 63
-; RV32ZBB-NEXT: addi a7, a4, -32
+; RV32ZBB-NEXT: andi t0, a6, 63
+; RV32ZBB-NEXT: addi a7, t0, -32
+; RV32ZBB-NEXT: and a2, a5, a4
; RV32ZBB-NEXT: bltz a7, .LBB14_5
; RV32ZBB-NEXT: # %bb.4:
-; RV32ZBB-NEXT: sll a4, a3, a7
-; RV32ZBB-NEXT: bltz a5, .LBB14_6
-; RV32ZBB-NEXT: j .LBB14_7
+; RV32ZBB-NEXT: sll a1, a0, a7
+; RV32ZBB-NEXT: j .LBB14_6
; RV32ZBB-NEXT: .LBB14_5:
-; RV32ZBB-NEXT: sll a7, a1, a6
-; RV32ZBB-NEXT: xori a4, a4, 31
-; RV32ZBB-NEXT: srli t0, a3, 1
-; RV32ZBB-NEXT: srl a4, t0, a4
-; RV32ZBB-NEXT: or a4, a7, a4
-; RV32ZBB-NEXT: sll a3, a3, a6
-; RV32ZBB-NEXT: or a0, a0, a3
-; RV32ZBB-NEXT: bgez a5, .LBB14_7
+; RV32ZBB-NEXT: sll a1, a1, a6
+; RV32ZBB-NEXT: xori a4, t0, 31
+; RV32ZBB-NEXT: srli a5, a0, 1
+; RV32ZBB-NEXT: srl a4, a5, a4
+; RV32ZBB-NEXT: or a1, a1, a4
; RV32ZBB-NEXT: .LBB14_6:
-; RV32ZBB-NEXT: srl a1, a1, a2
-; RV32ZBB-NEXT: or a4, a4, a1
-; RV32ZBB-NEXT: .LBB14_7:
-; RV32ZBB-NEXT: mv a1, a4
+; RV32ZBB-NEXT: sll a0, a0, a6
+; RV32ZBB-NEXT: slti a4, a7, 0
+; RV32ZBB-NEXT: neg a4, a4
+; RV32ZBB-NEXT: and a0, a4, a0
+; RV32ZBB-NEXT: or a0, a3, a0
+; RV32ZBB-NEXT: or a1, a2, a1
; RV32ZBB-NEXT: ret
;
; RV64ZBB-LABEL: rotr_64_mask_and_127_and_63:
; RV32I-NEXT: bltz a0, .LBB17_6
; RV32I-NEXT: # %bb.5:
; RV32I-NEXT: sll a3, a2, a0
-; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: j .LBB17_7
; RV32I-NEXT: .LBB17_6:
-; RV32I-NEXT: sll a0, a3, a4
-; RV32I-NEXT: srli a3, a2, 1
+; RV32I-NEXT: sll a3, a3, a4
+; RV32I-NEXT: srli a7, a2, 1
; RV32I-NEXT: xori a6, a6, 31
-; RV32I-NEXT: srl a3, a3, a6
-; RV32I-NEXT: or a3, a0, a3
-; RV32I-NEXT: sll a0, a2, a4
-; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: srl a6, a7, a6
+; RV32I-NEXT: or a3, a3, a6
; RV32I-NEXT: .LBB17_7:
+; RV32I-NEXT: sll a2, a2, a4
+; RV32I-NEXT: slti a0, a0, 0
+; RV32I-NEXT: neg a0, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: sltu a1, a0, a1
; RV32I-NEXT: add a2, a5, a3
; RV32I-NEXT: add a1, a2, a1
; RV32ZBB-NEXT: bltz a0, .LBB17_6
; RV32ZBB-NEXT: # %bb.5:
; RV32ZBB-NEXT: sll a3, a2, a0
-; RV32ZBB-NEXT: mv a0, a1
; RV32ZBB-NEXT: j .LBB17_7
; RV32ZBB-NEXT: .LBB17_6:
-; RV32ZBB-NEXT: sll a0, a3, a4
-; RV32ZBB-NEXT: srli a3, a2, 1
+; RV32ZBB-NEXT: sll a3, a3, a4
+; RV32ZBB-NEXT: srli a7, a2, 1
; RV32ZBB-NEXT: xori a6, a6, 31
-; RV32ZBB-NEXT: srl a3, a3, a6
-; RV32ZBB-NEXT: or a3, a0, a3
-; RV32ZBB-NEXT: sll a0, a2, a4
-; RV32ZBB-NEXT: add a0, a1, a0
+; RV32ZBB-NEXT: srl a6, a7, a6
+; RV32ZBB-NEXT: or a3, a3, a6
; RV32ZBB-NEXT: .LBB17_7:
+; RV32ZBB-NEXT: sll a2, a2, a4
+; RV32ZBB-NEXT: slti a0, a0, 0
+; RV32ZBB-NEXT: neg a0, a0
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: add a0, a1, a0
; RV32ZBB-NEXT: sltu a1, a0, a1
; RV32ZBB-NEXT: add a2, a5, a3
; RV32ZBB-NEXT: add a1, a2, a1
; RV32I-NEXT: bltz a0, .LBB19_6
; RV32I-NEXT: # %bb.5:
; RV32I-NEXT: sll a3, a2, a0
-; RV32I-NEXT: mv a0, a6
; RV32I-NEXT: j .LBB19_7
; RV32I-NEXT: .LBB19_6:
-; RV32I-NEXT: sll a0, a3, a4
-; RV32I-NEXT: srli a3, a2, 1
+; RV32I-NEXT: sll a3, a3, a4
+; RV32I-NEXT: srli a7, a2, 1
; RV32I-NEXT: xori a5, a5, 31
-; RV32I-NEXT: srl a3, a3, a5
-; RV32I-NEXT: or a3, a0, a3
-; RV32I-NEXT: sll a0, a2, a4
-; RV32I-NEXT: add a0, a6, a0
+; RV32I-NEXT: srl a5, a7, a5
+; RV32I-NEXT: or a3, a3, a5
; RV32I-NEXT: .LBB19_7:
+; RV32I-NEXT: sll a2, a2, a4
+; RV32I-NEXT: slti a0, a0, 0
+; RV32I-NEXT: neg a0, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: add a0, a6, a0
; RV32I-NEXT: sltu a2, a0, a6
; RV32I-NEXT: add a1, a1, a3
; RV32I-NEXT: add a1, a1, a2
; RV32ZBB-NEXT: bltz a0, .LBB19_6
; RV32ZBB-NEXT: # %bb.5:
; RV32ZBB-NEXT: sll a3, a2, a0
-; RV32ZBB-NEXT: mv a0, a6
; RV32ZBB-NEXT: j .LBB19_7
; RV32ZBB-NEXT: .LBB19_6:
-; RV32ZBB-NEXT: sll a0, a3, a4
-; RV32ZBB-NEXT: srli a3, a2, 1
+; RV32ZBB-NEXT: sll a3, a3, a4
+; RV32ZBB-NEXT: srli a7, a2, 1
; RV32ZBB-NEXT: xori a5, a5, 31
-; RV32ZBB-NEXT: srl a3, a3, a5
-; RV32ZBB-NEXT: or a3, a0, a3
-; RV32ZBB-NEXT: sll a0, a2, a4
-; RV32ZBB-NEXT: add a0, a6, a0
+; RV32ZBB-NEXT: srl a5, a7, a5
+; RV32ZBB-NEXT: or a3, a3, a5
; RV32ZBB-NEXT: .LBB19_7:
+; RV32ZBB-NEXT: sll a2, a2, a4
+; RV32ZBB-NEXT: slti a0, a0, 0
+; RV32ZBB-NEXT: neg a0, a0
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: add a0, a6, a0
; RV32ZBB-NEXT: sltu a2, a0, a6
; RV32ZBB-NEXT: add a1, a1, a3
; RV32ZBB-NEXT: add a1, a1, a2
}
define i64 @not_shl_one_i64(i64 %x) {
-; RV32I-LABEL: not_shl_one_i64:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a1, 1
-; RV32I-NEXT: sll a2, a1, a0
-; RV32I-NEXT: addi a0, a0, -32
-; RV32I-NEXT: sll a1, a1, a0
-; RV32I-NEXT: slti a0, a0, 0
-; RV32I-NEXT: neg a3, a0
-; RV32I-NEXT: not a1, a1
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: not a2, a2
-; RV32I-NEXT: addi a0, a0, -1
-; RV32I-NEXT: or a0, a0, a2
-; RV32I-NEXT: ret
-;
-; RV32ZBB-ZBKB-LABEL: not_shl_one_i64:
-; RV32ZBB-ZBKB: # %bb.0:
-; RV32ZBB-ZBKB-NEXT: addi a1, a0, -32
-; RV32ZBB-ZBKB-NEXT: li a2, -2
-; RV32ZBB-ZBKB-NEXT: rol a3, a2, a1
-; RV32ZBB-ZBKB-NEXT: slti a4, a1, 0
-; RV32ZBB-ZBKB-NEXT: neg a1, a4
-; RV32ZBB-ZBKB-NEXT: or a1, a1, a3
-; RV32ZBB-ZBKB-NEXT: rol a0, a2, a0
-; RV32ZBB-ZBKB-NEXT: addi a2, a4, -1
-; RV32ZBB-ZBKB-NEXT: or a0, a2, a0
-; RV32ZBB-ZBKB-NEXT: ret
+; CHECK-LABEL: not_shl_one_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: sll a2, a1, a0
+; CHECK-NEXT: addi a0, a0, -32
+; CHECK-NEXT: slti a3, a0, 0
+; CHECK-NEXT: neg a4, a3
+; CHECK-NEXT: and a2, a4, a2
+; CHECK-NEXT: sll a0, a1, a0
+; CHECK-NEXT: addi a1, a3, -1
+; CHECK-NEXT: and a1, a1, a0
+; CHECK-NEXT: not a0, a2
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: ret
%1 = shl i64 1, %x
%2 = xor i64 %1, -1
ret i64 %2
define i64 @bclr_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: bclr_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: andi a3, a2, 63
-; RV32I-NEXT: addi a4, a3, -32
; RV32I-NEXT: li a3, 1
-; RV32I-NEXT: bltz a4, .LBB2_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sll a2, a3, a4
-; RV32I-NEXT: not a2, a2
-; RV32I-NEXT: and a1, a1, a2
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB2_2:
+; RV32I-NEXT: sll a4, a3, a2
+; RV32I-NEXT: andi a2, a2, 63
+; RV32I-NEXT: addi a2, a2, -32
+; RV32I-NEXT: slti a5, a2, 0
+; RV32I-NEXT: neg a6, a5
+; RV32I-NEXT: and a4, a6, a4
; RV32I-NEXT: sll a2, a3, a2
+; RV32I-NEXT: addi a3, a5, -1
+; RV32I-NEXT: and a2, a3, a2
+; RV32I-NEXT: not a3, a4
; RV32I-NEXT: not a2, a2
-; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: and a0, a3, a0
+; RV32I-NEXT: and a1, a2, a1
; RV32I-NEXT: ret
;
; RV32ZBS-LABEL: bclr_i64:
; RV32ZBS: # %bb.0:
; RV32ZBS-NEXT: andi a3, a2, 63
; RV32ZBS-NEXT: addi a3, a3, -32
-; RV32ZBS-NEXT: bltz a3, .LBB2_2
-; RV32ZBS-NEXT: # %bb.1:
-; RV32ZBS-NEXT: bclr a1, a1, a3
-; RV32ZBS-NEXT: ret
-; RV32ZBS-NEXT: .LBB2_2:
-; RV32ZBS-NEXT: bclr a0, a0, a2
+; RV32ZBS-NEXT: slti a4, a3, 0
+; RV32ZBS-NEXT: neg a5, a4
+; RV32ZBS-NEXT: bset a2, zero, a2
+; RV32ZBS-NEXT: and a2, a5, a2
+; RV32ZBS-NEXT: bset a3, zero, a3
+; RV32ZBS-NEXT: addi a4, a4, -1
+; RV32ZBS-NEXT: and a3, a4, a3
+; RV32ZBS-NEXT: not a3, a3
+; RV32ZBS-NEXT: not a2, a2
+; RV32ZBS-NEXT: and a0, a2, a0
+; RV32ZBS-NEXT: and a1, a3, a1
; RV32ZBS-NEXT: ret
%and = and i64 %b, 63
%shl = shl nuw i64 1, %and
define signext i64 @bset_i64_zero(i64 signext %a) nounwind {
; RV32I-LABEL: bset_i64_zero:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi a1, a0, -32
-; RV32I-NEXT: li a2, 1
-; RV32I-NEXT: bltz a1, .LBB7_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: sll a1, a2, a1
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB7_2:
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: sll a0, a2, a0
+; RV32I-NEXT: li a1, 1
+; RV32I-NEXT: sll a2, a1, a0
+; RV32I-NEXT: addi a3, a0, -32
+; RV32I-NEXT: slti a4, a3, 0
+; RV32I-NEXT: neg a0, a4
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: sll a1, a1, a3
+; RV32I-NEXT: addi a2, a4, -1
+; RV32I-NEXT: and a1, a2, a1
; RV32I-NEXT: ret
;
; RV32ZBS-LABEL: bset_i64_zero:
; RV32ZBS: # %bb.0:
; RV32ZBS-NEXT: addi a1, a0, -32
-; RV32ZBS-NEXT: bltz a1, .LBB7_2
-; RV32ZBS-NEXT: # %bb.1:
-; RV32ZBS-NEXT: li a0, 0
-; RV32ZBS-NEXT: bset a1, zero, a1
-; RV32ZBS-NEXT: ret
-; RV32ZBS-NEXT: .LBB7_2:
-; RV32ZBS-NEXT: li a1, 0
+; RV32ZBS-NEXT: slti a2, a1, 0
+; RV32ZBS-NEXT: neg a3, a2
; RV32ZBS-NEXT: bset a0, zero, a0
+; RV32ZBS-NEXT: and a0, a3, a0
+; RV32ZBS-NEXT: bset a1, zero, a1
+; RV32ZBS-NEXT: addi a2, a2, -1
+; RV32ZBS-NEXT: and a1, a2, a1
; RV32ZBS-NEXT: ret
%shl = shl i64 1, %a
ret i64 %shl
define signext i32 @ffs_i32(i32 signext %a) nounwind {
; RV64I-LABEL: ffs_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: li s0, 0
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: neg a0, a0
-; RV64I-NEXT: and a0, s1, a0
+; RV64I-NEXT: and a0, s0, a0
; RV64I-NEXT: lui a1, 30667
; RV64I-NEXT: addiw a1, a1, 1329
; RV64I-NEXT: call __muldi3@plt
; RV64I-NEXT: li a1, 32
-; RV64I-NEXT: beqz s1, .LBB9_2
+; RV64I-NEXT: beqz s0, .LBB9_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: srliw a0, a0, 27
; RV64I-NEXT: lui a1, %hi(.LCPI9_0)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a1, 0(a0)
; RV64I-NEXT: .LBB9_2:
-; RV64I-NEXT: beqz s1, .LBB9_4
-; RV64I-NEXT: # %bb.3:
-; RV64I-NEXT: addi s0, a1, 1
-; RV64I-NEXT: .LBB9_4:
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: addi a0, a1, 1
+; RV64I-NEXT: seqz a1, s0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: ffs_i32:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: mv a1, a0
-; RV64ZBB-NEXT: li a0, 0
-; RV64ZBB-NEXT: beqz a1, .LBB9_2
-; RV64ZBB-NEXT: # %bb.1:
-; RV64ZBB-NEXT: ctzw a0, a1
-; RV64ZBB-NEXT: addi a0, a0, 1
-; RV64ZBB-NEXT: .LBB9_2:
+; RV64ZBB-NEXT: ctzw a1, a0
+; RV64ZBB-NEXT: addi a1, a1, 1
+; RV64ZBB-NEXT: seqz a0, a0
+; RV64ZBB-NEXT: addi a0, a0, -1
+; RV64ZBB-NEXT: and a0, a0, a1
; RV64ZBB-NEXT: ret
%1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
%2 = add i32 %1, 1
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 3
+; CHECK-NEXT: srli a2, a1, 3
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v25, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB32_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: vslidedown.vx v2, v0, a2
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3)
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfabs.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v2
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a2, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v2
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a2
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a0, a1, .LBB32_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_4:
+; CHECK-NEXT: .LBB32_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-LABEL: vp_ceil_vv_nxv16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: mv a2, a0
-; CHECK-NEXT: bltu a0, a1, .LBB33_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: sub a2, a0, a1
; CHECK-NEXT: lui a3, %hi(.LCPI33_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3)
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT: vfabs.v v24, v8
+; CHECK-NEXT: vfabs.v v24, v16
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: fsrmi a2, 3
-; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a2
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: sub a1, a0, a1
-; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB33_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: .LBB33_4:
-; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: bltu a0, a1, .LBB33_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v8
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: fsrmi a0, 3
-; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
; CHECK-LABEL: vfpext_v32f32_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v1, v0
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-NEXT: bltu a0, a2, .LBB7_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB7_2:
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; CHECK-NEXT: vslidedown.vi v24, v8, 16
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfwcvt.f.f.v v16, v24, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB7_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB7_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB7_4:
+; CHECK-NEXT: .LBB7_2:
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfwcvt.f.f.v v24, v8, v0.t
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-NEXT: bltu a0, a2, .LBB7_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB7_2:
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfncvt.f.f.w v8, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB7_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB7_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB7_4:
+; CHECK-NEXT: .LBB7_2:
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
-; CHECK-NEXT: addi a2, a0, -64
; CHECK-NEXT: vslidedown.vi v0, v0, 8
-; CHECK-NEXT: bltu a0, a2, .LBB4_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB4_2:
+; CHECK-NEXT: addi a1, a0, -64
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB4_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB4_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 64
-; CHECK-NEXT: .LBB4_4:
+; CHECK-NEXT: .LBB4_2:
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vi v3, v0, 8
+; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vi v2, v0, 4
+; CHECK-NEXT: vslidedown.vi v27, v3, 4
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v0, v27, 2
+; CHECK-NEXT: addi a2, a1, 512
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: addi a3, a1, 640
+; CHECK-NEXT: vle64.v v8, (a3)
; CHECK-NEXT: addi a3, a7, -64
-; CHECK-NEXT: vslidedown.vi v2, v0, 8
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: bltu a7, a3, .LBB16_2
+; CHECK-NEXT: sltu a4, a7, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a4, a4, a3
+; CHECK-NEXT: addi a3, a4, -32
+; CHECK-NEXT: sltu a5, a4, a3
+; CHECK-NEXT: addi a5, a5, -1
+; CHECK-NEXT: and a3, a5, a3
+; CHECK-NEXT: addi a5, a3, -16
+; CHECK-NEXT: sltu a6, a3, a5
+; CHECK-NEXT: addi a6, a6, -1
+; CHECK-NEXT: and a5, a6, a5
+; CHECK-NEXT: vle64.v v16, (a2)
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: li a6, 40
+; CHECK-NEXT: mul a2, a2, a6
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma
+; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: addi a5, a1, 128
+; CHECK-NEXT: bltu a3, a2, .LBB16_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: li a3, 16
; CHECK-NEXT: .LBB16_2:
-; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v3, v2, 4
-; CHECK-NEXT: addi a6, a4, -32
-; CHECK-NEXT: addi a3, a1, 640
-; CHECK-NEXT: mv a5, a2
-; CHECK-NEXT: bltu a4, a6, .LBB16_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a5, a6
-; CHECK-NEXT: .LBB16_4:
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v0, v3, 2
+; CHECK-NEXT: vslidedown.vi v4, v2, 2
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v16, (a3)
-; CHECK-NEXT: addi t0, a5, -16
-; CHECK-NEXT: addi a6, a1, 512
-; CHECK-NEXT: mv a3, a2
-; CHECK-NEXT: bltu a5, t0, .LBB16_6
-; CHECK-NEXT: # %bb.5:
-; CHECK-NEXT: mv a3, t0
-; CHECK-NEXT: .LBB16_6:
-; CHECK-NEXT: vle64.v v8, (a6)
+; CHECK-NEXT: vle64.v v8, (a5)
; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma
-; CHECK-NEXT: li a3, 16
+; CHECK-NEXT: li a3, 64
+; CHECK-NEXT: vmv1r.v v0, v27
+; CHECK-NEXT: csrr a5, vlenb
+; CHECK-NEXT: li a6, 40
+; CHECK-NEXT: mul a5, a5, a6
+; CHECK-NEXT: add a5, sp, a5
+; CHECK-NEXT: addi a5, a5, 16
+; CHECK-NEXT: vl8re8.v v16, (a5) # Unknown-size Folded Reload
; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t
+; CHECK-NEXT: csrr a5, vlenb
+; CHECK-NEXT: li a6, 48
+; CHECK-NEXT: mul a5, a5, a6
+; CHECK-NEXT: add a5, sp, a5
+; CHECK-NEXT: addi a5, a5, 16
+; CHECK-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
+; CHECK-NEXT: bltu a7, a3, .LBB16_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: li a7, 64
+; CHECK-NEXT: .LBB16_4:
+; CHECK-NEXT: li a3, 32
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v16, (a1)
+; CHECK-NEXT: addi a5, a7, -32
+; CHECK-NEXT: sltu a6, a7, a5
+; CHECK-NEXT: addi a6, a6, -1
+; CHECK-NEXT: and a5, a6, a5
+; CHECK-NEXT: addi a6, a5, -16
+; CHECK-NEXT: sltu t0, a5, a6
+; CHECK-NEXT: addi t0, t0, -1
+; CHECK-NEXT: and a6, t0, a6
+; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v4
+; CHECK-NEXT: vnsrl.wi v24, v8, 0, v0.t
; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: slli a6, a6, 4
+; CHECK-NEXT: slli a6, a6, 3
; CHECK-NEXT: add a6, sp, a6
; CHECK-NEXT: addi a6, a6, 16
; CHECK-NEXT: vs8r.v v24, (a6) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a5, a3, .LBB16_8
-; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: bltu a5, a2, .LBB16_6
+; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: li a5, 16
-; CHECK-NEXT: .LBB16_8:
+; CHECK-NEXT: .LBB16_6:
+; CHECK-NEXT: addi a6, a1, 384
+; CHECK-NEXT: addi a1, a1, 256
; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma
-; CHECK-NEXT: li a5, 64
-; CHECK-NEXT: vmv1r.v v0, v3
-; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
-; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: li t0, 48
-; CHECK-NEXT: mul a6, a6, t0
-; CHECK-NEXT: add a6, sp, a6
-; CHECK-NEXT: addi a6, a6, 16
-; CHECK-NEXT: vs8r.v v16, (a6) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a7, a5, .LBB16_10
+; CHECK-NEXT: vmv1r.v v0, v2
+; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t
+; CHECK-NEXT: csrr a5, vlenb
+; CHECK-NEXT: li t0, 40
+; CHECK-NEXT: mul a5, a5, t0
+; CHECK-NEXT: add a5, sp, a5
+; CHECK-NEXT: addi a5, a5, 16
+; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
+; CHECK-NEXT: bltu a4, a3, .LBB16_8
+; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: li a4, 32
+; CHECK-NEXT: .LBB16_8:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v4, v3, 2
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v16, (a6)
+; CHECK-NEXT: vle64.v v24, (a1)
+; CHECK-NEXT: mv a1, a4
+; CHECK-NEXT: bltu a4, a2, .LBB16_10
; CHECK-NEXT: # %bb.9:
-; CHECK-NEXT: li a7, 64
+; CHECK-NEXT: li a1, 16
; CHECK-NEXT: .LBB16_10:
-; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v3, v1, 4
-; CHECK-NEXT: addi t0, a7, -32
-; CHECK-NEXT: addi a5, a1, 128
-; CHECK-NEXT: mv a6, a2
-; CHECK-NEXT: bltu a7, t0, .LBB16_12
-; CHECK-NEXT: # %bb.11:
-; CHECK-NEXT: mv a6, t0
-; CHECK-NEXT: .LBB16_12:
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v0, v3, 2
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v16, (a5)
-; CHECK-NEXT: addi a5, a6, -16
-; CHECK-NEXT: mv t0, a2
-; CHECK-NEXT: bltu a6, a5, .LBB16_14
-; CHECK-NEXT: # %bb.13:
-; CHECK-NEXT: mv t0, a5
-; CHECK-NEXT: .LBB16_14:
-; CHECK-NEXT: vle64.v v8, (a1)
-; CHECK-NEXT: li a5, 32
-; CHECK-NEXT: vsetvli zero, t0, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t
-; CHECK-NEXT: csrr t0, vlenb
-; CHECK-NEXT: slli t0, t0, 3
-; CHECK-NEXT: add t0, sp, t0
-; CHECK-NEXT: addi t0, t0, 16
-; CHECK-NEXT: vs8r.v v24, (t0) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a6, a3, .LBB16_16
-; CHECK-NEXT: # %bb.15:
-; CHECK-NEXT: li a6, 16
-; CHECK-NEXT: .LBB16_16:
-; CHECK-NEXT: addi t0, a1, 384
-; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma
+; CHECK-NEXT: vslidedown.vi v2, v1, 2
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v3
-; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
-; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: li t1, 40
-; CHECK-NEXT: mul a6, a6, t1
-; CHECK-NEXT: add a6, sp, a6
-; CHECK-NEXT: addi a6, a6, 16
-; CHECK-NEXT: vs8r.v v16, (a6) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a4, a5, .LBB16_18
-; CHECK-NEXT: # %bb.17:
-; CHECK-NEXT: li a4, 32
-; CHECK-NEXT: .LBB16_18:
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v0, v2, 2
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (t0)
-; CHECK-NEXT: addi t0, a4, -16
-; CHECK-NEXT: addi a6, a1, 256
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: bltu a4, t0, .LBB16_20
-; CHECK-NEXT: # %bb.19:
-; CHECK-NEXT: mv a1, t0
-; CHECK-NEXT: .LBB16_20:
-; CHECK-NEXT: vle64.v v8, (a6)
+; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t
+; CHECK-NEXT: addi a1, a4, -16
+; CHECK-NEXT: sltu a4, a4, a1
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a1, a4, a1
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v4
+; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t
; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a4, a3, .LBB16_22
-; CHECK-NEXT: # %bb.21:
-; CHECK-NEXT: li a4, 16
-; CHECK-NEXT: .LBB16_22:
-; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v2
-; CHECK-NEXT: vnsrl.wi v24, v8, 0, v0.t
-; CHECK-NEXT: bltu a7, a5, .LBB16_24
-; CHECK-NEXT: # %bb.23:
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: bltu a7, a3, .LBB16_12
+; CHECK-NEXT: # %bb.11:
; CHECK-NEXT: li a7, 32
-; CHECK-NEXT: .LBB16_24:
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: addi a1, a7, -16
-; CHECK-NEXT: vslidedown.vi v0, v1, 2
-; CHECK-NEXT: bltu a7, a1, .LBB16_26
-; CHECK-NEXT: # %bb.25:
-; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB16_26:
-; CHECK-NEXT: vsetvli zero, a5, e32, m8, tu, ma
+; CHECK-NEXT: .LBB16_12:
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, tu, ma
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a4, 48
; CHECK-NEXT: mul a1, a1, a4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vslideup.vi v8, v16, 16
+; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vslideup.vi v16, v24, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a4, 48
; CHECK-NEXT: mul a1, a1, a4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a4, 40
; CHECK-NEXT: mul a1, a1, a4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vslideup.vi v8, v16, 16
+; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vslideup.vi v16, v24, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a4, 40
; CHECK-NEXT: mul a1, a1, a4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vslideup.vi v24, v8, 16
-; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vslideup.vi v8, v16, 16
+; CHECK-NEXT: addi a1, a7, -16
+; CHECK-NEXT: sltu a4, a7, a1
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a1, a4, a1
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a2, 24
-; CHECK-NEXT: mul a1, a1, a2
+; CHECK-NEXT: li a4, 24
+; CHECK-NEXT: mul a1, a1, a4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a7, a3, .LBB16_28
-; CHECK-NEXT: # %bb.27:
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: bltu a7, a2, .LBB16_14
+; CHECK-NEXT: # %bb.13:
; CHECK-NEXT: li a7, 16
-; CHECK-NEXT: .LBB16_28:
+; CHECK-NEXT: .LBB16_14:
; CHECK-NEXT: vsetvli zero, a7, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
-; CHECK-NEXT: vsetvli zero, a5, e32, m8, tu, ma
+; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, tu, ma
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vslideup.vi v16, v8, 16
-; CHECK-NEXT: vse32.v v16, (a0)
+; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vslideup.vi v24, v16, 16
+; CHECK-NEXT: vse32.v v24, (a0)
; CHECK-NEXT: addi a1, a0, 256
-; CHECK-NEXT: vse32.v v24, (a1)
+; CHECK-NEXT: vse32.v v8, (a1)
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: li a3, 40
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-NEXT: bltu a0, a2, .LBB17_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB17_2:
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB17_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB17_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB17_4:
+; CHECK-NEXT: .LBB17_2:
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: vmv1r.v v25, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
-; CHECK-NEXT: vslidedown.vi v25, v0, 2
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: vslidedown.vi v1, v0, 2
+; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: li a1, 16
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfabs.v v8, v16, v0.t
+; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v25, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a1, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a0, a1, .LBB26_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB26_4:
-; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfabs.v v16, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
-; CHECK-NEXT: fsrmi a0, 3
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
define <32 x double> @vp_ceil_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v32f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
-; CHECK-NEXT: vmset.m v24
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: vmset.m v1
+; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB27_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: li a1, 16
; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: lui a2, %hi(.LCPI27_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vfabs.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmv1r.v v25, v24
-; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t
+; CHECK-NEXT: vmv1r.v v2, v1
+; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a1, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v2
+; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a0, a1, .LBB27_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB27_4:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfabs.v v16, v8, v0.t
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: vmv1r.v v25, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
-; CHECK-NEXT: vslidedown.vi v25, v0, 2
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: vslidedown.vi v1, v0, 2
+; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: li a1, 16
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfabs.v v8, v16, v0.t
+; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v25, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a1, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a0, a1, .LBB26_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB26_4:
-; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfabs.v v16, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
-; CHECK-NEXT: fsrmi a0, 2
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
define <32 x double> @vp_floor_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v32f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
-; CHECK-NEXT: vmset.m v24
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: vmset.m v1
+; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB27_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: li a1, 16
; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: lui a2, %hi(.LCPI27_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vfabs.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmv1r.v v25, v24
-; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t
+; CHECK-NEXT: vmv1r.v v2, v1
+; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a1, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v2
+; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a0, a1, .LBB27_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB27_4:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfabs.v v16, v8, v0.t
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 1
; RV32-NEXT: lui a0, %hi(.LCPI10_0)
; RV32-NEXT: fld ft0, %lo(.LCPI10_0)(a0)
; RV32-NEXT: lui a0, %hi(.LCPI10_1)
; RV32-NEXT: fld ft1, %lo(.LCPI10_1)(a0)
+; RV32-NEXT: vslidedown.vi v9, v8, 1
; RV32-NEXT: vfmv.f.s ft2, v9
-; RV32-NEXT: feq.d a0, ft2, ft2
-; RV32-NEXT: beqz a0, .LBB10_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: fmax.d ft2, ft2, ft0
-; RV32-NEXT: fmin.d ft2, ft2, ft1
-; RV32-NEXT: fcvt.w.d a0, ft2, rtz
-; RV32-NEXT: .LBB10_2:
+; RV32-NEXT: fmax.d ft3, ft2, ft0
+; RV32-NEXT: fmin.d ft3, ft3, ft1
+; RV32-NEXT: fcvt.w.d a0, ft3, rtz
+; RV32-NEXT: feq.d a2, ft2, ft2
+; RV32-NEXT: seqz a2, a2
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a0, a2, a0
; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; RV32-NEXT: vmv.v.x v9, a0
; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV32-NEXT: vfmv.f.s ft2, v8
-; RV32-NEXT: feq.d a0, ft2, ft2
-; RV32-NEXT: beqz a0, .LBB10_4
-; RV32-NEXT: # %bb.3:
; RV32-NEXT: fmax.d ft0, ft2, ft0
; RV32-NEXT: fmin.d ft0, ft0, ft1
; RV32-NEXT: fcvt.w.d a0, ft0, rtz
-; RV32-NEXT: .LBB10_4:
+; RV32-NEXT: feq.d a2, ft2, ft2
+; RV32-NEXT: seqz a2, a2
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a0, a2, a0
; RV32-NEXT: vsetivli zero, 2, e8, mf8, tu, ma
; RV32-NEXT: vmv.s.x v9, a0
; RV32-NEXT: vse8.v v9, (a1)
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 1
; RV64-NEXT: lui a0, %hi(.LCPI10_0)
; RV64-NEXT: fld ft0, %lo(.LCPI10_0)(a0)
; RV64-NEXT: lui a0, %hi(.LCPI10_1)
; RV64-NEXT: fld ft1, %lo(.LCPI10_1)(a0)
+; RV64-NEXT: vslidedown.vi v9, v8, 1
; RV64-NEXT: vfmv.f.s ft2, v9
-; RV64-NEXT: feq.d a0, ft2, ft2
-; RV64-NEXT: beqz a0, .LBB10_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: fmax.d ft2, ft2, ft0
-; RV64-NEXT: fmin.d ft2, ft2, ft1
-; RV64-NEXT: fcvt.l.d a0, ft2, rtz
-; RV64-NEXT: .LBB10_2:
+; RV64-NEXT: fmax.d ft3, ft2, ft0
+; RV64-NEXT: fmin.d ft3, ft3, ft1
+; RV64-NEXT: fcvt.l.d a0, ft3, rtz
+; RV64-NEXT: feq.d a2, ft2, ft2
+; RV64-NEXT: seqz a2, a2
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a0, a2, a0
; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; RV64-NEXT: vmv.v.x v9, a0
; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64-NEXT: vfmv.f.s ft2, v8
-; RV64-NEXT: feq.d a0, ft2, ft2
-; RV64-NEXT: beqz a0, .LBB10_4
-; RV64-NEXT: # %bb.3:
; RV64-NEXT: fmax.d ft0, ft2, ft0
; RV64-NEXT: fmin.d ft0, ft0, ft1
; RV64-NEXT: fcvt.l.d a0, ft0, rtz
-; RV64-NEXT: .LBB10_4:
+; RV64-NEXT: feq.d a2, ft2, ft2
+; RV64-NEXT: seqz a2, a2
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a0, a2, a0
; RV64-NEXT: vsetivli zero, 2, e8, mf8, tu, ma
; RV64-NEXT: vmv.s.x v9, a0
; RV64-NEXT: vse8.v v9, (a1)
; RV32-NEXT: lui a0, %hi(.LCPI12_1)
; RV32-NEXT: fld ft1, %lo(.LCPI12_1)(a0)
; RV32-NEXT: vfmv.f.s ft2, v8
-; RV32-NEXT: feq.d a0, ft2, ft2
-; RV32-NEXT: beqz a0, .LBB12_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: fmax.d ft2, ft2, ft0
-; RV32-NEXT: fmin.d ft2, ft2, ft1
-; RV32-NEXT: fcvt.w.d a0, ft2, rtz
-; RV32-NEXT: .LBB12_2:
+; RV32-NEXT: fmax.d ft3, ft2, ft0
+; RV32-NEXT: fmin.d ft3, ft3, ft1
+; RV32-NEXT: fcvt.w.d a0, ft3, rtz
+; RV32-NEXT: feq.d a2, ft2, ft2
+; RV32-NEXT: seqz a2, a2
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a0, a2, a0
; RV32-NEXT: sb a0, 8(sp)
; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
; RV32-NEXT: vslidedown.vi v12, v8, 7
; RV32-NEXT: vfmv.f.s ft2, v12
-; RV32-NEXT: feq.d a0, ft2, ft2
-; RV32-NEXT: beqz a0, .LBB12_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: fmax.d ft2, ft2, ft0
-; RV32-NEXT: fmin.d ft2, ft2, ft1
-; RV32-NEXT: fcvt.w.d a0, ft2, rtz
-; RV32-NEXT: .LBB12_4:
+; RV32-NEXT: fmax.d ft3, ft2, ft0
+; RV32-NEXT: fmin.d ft3, ft3, ft1
+; RV32-NEXT: fcvt.w.d a0, ft3, rtz
+; RV32-NEXT: feq.d a2, ft2, ft2
+; RV32-NEXT: seqz a2, a2
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a0, a2, a0
+; RV32-NEXT: sb a0, 15(sp)
; RV32-NEXT: vslidedown.vi v12, v8, 6
; RV32-NEXT: vfmv.f.s ft2, v12
+; RV32-NEXT: fmax.d ft3, ft2, ft0
+; RV32-NEXT: fmin.d ft3, ft3, ft1
+; RV32-NEXT: fcvt.w.d a0, ft3, rtz
; RV32-NEXT: feq.d a2, ft2, ft2
-; RV32-NEXT: sb a0, 15(sp)
-; RV32-NEXT: bnez a2, .LBB12_6
-; RV32-NEXT: # %bb.5:
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: j .LBB12_7
-; RV32-NEXT: .LBB12_6:
-; RV32-NEXT: fmax.d ft2, ft2, ft0
-; RV32-NEXT: fmin.d ft2, ft2, ft1
-; RV32-NEXT: fcvt.w.d a0, ft2, rtz
-; RV32-NEXT: .LBB12_7:
+; RV32-NEXT: seqz a2, a2
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a0, a2, a0
+; RV32-NEXT: sb a0, 14(sp)
; RV32-NEXT: vslidedown.vi v12, v8, 5
; RV32-NEXT: vfmv.f.s ft2, v12
+; RV32-NEXT: fmax.d ft3, ft2, ft0
+; RV32-NEXT: fmin.d ft3, ft3, ft1
+; RV32-NEXT: fcvt.w.d a0, ft3, rtz
; RV32-NEXT: feq.d a2, ft2, ft2
-; RV32-NEXT: sb a0, 14(sp)
-; RV32-NEXT: bnez a2, .LBB12_9
-; RV32-NEXT: # %bb.8:
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: j .LBB12_10
-; RV32-NEXT: .LBB12_9:
-; RV32-NEXT: fmax.d ft2, ft2, ft0
-; RV32-NEXT: fmin.d ft2, ft2, ft1
-; RV32-NEXT: fcvt.w.d a0, ft2, rtz
-; RV32-NEXT: .LBB12_10:
+; RV32-NEXT: seqz a2, a2
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a0, a2, a0
+; RV32-NEXT: sb a0, 13(sp)
; RV32-NEXT: vslidedown.vi v12, v8, 4
; RV32-NEXT: vfmv.f.s ft2, v12
+; RV32-NEXT: fmax.d ft3, ft2, ft0
+; RV32-NEXT: fmin.d ft3, ft3, ft1
+; RV32-NEXT: fcvt.w.d a0, ft3, rtz
; RV32-NEXT: feq.d a2, ft2, ft2
-; RV32-NEXT: sb a0, 13(sp)
-; RV32-NEXT: bnez a2, .LBB12_12
-; RV32-NEXT: # %bb.11:
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: j .LBB12_13
-; RV32-NEXT: .LBB12_12:
-; RV32-NEXT: fmax.d ft2, ft2, ft0
-; RV32-NEXT: fmin.d ft2, ft2, ft1
-; RV32-NEXT: fcvt.w.d a0, ft2, rtz
-; RV32-NEXT: .LBB12_13:
+; RV32-NEXT: seqz a2, a2
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a0, a2, a0
+; RV32-NEXT: sb a0, 12(sp)
; RV32-NEXT: vslidedown.vi v12, v8, 3
; RV32-NEXT: vfmv.f.s ft2, v12
+; RV32-NEXT: fmax.d ft3, ft2, ft0
+; RV32-NEXT: fmin.d ft3, ft3, ft1
+; RV32-NEXT: fcvt.w.d a0, ft3, rtz
; RV32-NEXT: feq.d a2, ft2, ft2
-; RV32-NEXT: sb a0, 12(sp)
-; RV32-NEXT: bnez a2, .LBB12_15
-; RV32-NEXT: # %bb.14:
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: j .LBB12_16
-; RV32-NEXT: .LBB12_15:
-; RV32-NEXT: fmax.d ft2, ft2, ft0
-; RV32-NEXT: fmin.d ft2, ft2, ft1
-; RV32-NEXT: fcvt.w.d a0, ft2, rtz
-; RV32-NEXT: .LBB12_16:
+; RV32-NEXT: seqz a2, a2
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a0, a2, a0
+; RV32-NEXT: sb a0, 11(sp)
; RV32-NEXT: vslidedown.vi v12, v8, 2
; RV32-NEXT: vfmv.f.s ft2, v12
+; RV32-NEXT: fmax.d ft3, ft2, ft0
+; RV32-NEXT: fmin.d ft3, ft3, ft1
+; RV32-NEXT: fcvt.w.d a0, ft3, rtz
; RV32-NEXT: feq.d a2, ft2, ft2
-; RV32-NEXT: sb a0, 11(sp)
-; RV32-NEXT: bnez a2, .LBB12_18
-; RV32-NEXT: # %bb.17:
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: j .LBB12_19
-; RV32-NEXT: .LBB12_18:
-; RV32-NEXT: fmax.d ft2, ft2, ft0
-; RV32-NEXT: fmin.d ft2, ft2, ft1
-; RV32-NEXT: fcvt.w.d a0, ft2, rtz
-; RV32-NEXT: .LBB12_19:
+; RV32-NEXT: seqz a2, a2
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a0, a2, a0
+; RV32-NEXT: sb a0, 10(sp)
; RV32-NEXT: vslidedown.vi v8, v8, 1
; RV32-NEXT: vfmv.f.s ft2, v8
-; RV32-NEXT: feq.d a2, ft2, ft2
-; RV32-NEXT: sb a0, 10(sp)
-; RV32-NEXT: bnez a2, .LBB12_21
-; RV32-NEXT: # %bb.20:
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: j .LBB12_22
-; RV32-NEXT: .LBB12_21:
; RV32-NEXT: fmax.d ft0, ft2, ft0
; RV32-NEXT: fmin.d ft0, ft0, ft1
; RV32-NEXT: fcvt.w.d a0, ft0, rtz
-; RV32-NEXT: .LBB12_22:
+; RV32-NEXT: feq.d a2, ft2, ft2
+; RV32-NEXT: seqz a2, a2
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a0, a2, a0
; RV32-NEXT: sb a0, 9(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: lui a0, %hi(.LCPI12_1)
; RV64-NEXT: fld ft1, %lo(.LCPI12_1)(a0)
; RV64-NEXT: vfmv.f.s ft2, v8
-; RV64-NEXT: feq.d a0, ft2, ft2
-; RV64-NEXT: beqz a0, .LBB12_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: fmax.d ft2, ft2, ft0
-; RV64-NEXT: fmin.d ft2, ft2, ft1
-; RV64-NEXT: fcvt.l.d a0, ft2, rtz
-; RV64-NEXT: .LBB12_2:
+; RV64-NEXT: fmax.d ft3, ft2, ft0
+; RV64-NEXT: fmin.d ft3, ft3, ft1
+; RV64-NEXT: fcvt.l.d a0, ft3, rtz
+; RV64-NEXT: feq.d a2, ft2, ft2
+; RV64-NEXT: seqz a2, a2
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a0, a2, a0
; RV64-NEXT: sb a0, 8(sp)
; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma
; RV64-NEXT: vslidedown.vi v12, v8, 7
; RV64-NEXT: vfmv.f.s ft2, v12
-; RV64-NEXT: feq.d a0, ft2, ft2
-; RV64-NEXT: beqz a0, .LBB12_4
-; RV64-NEXT: # %bb.3:
-; RV64-NEXT: fmax.d ft2, ft2, ft0
-; RV64-NEXT: fmin.d ft2, ft2, ft1
-; RV64-NEXT: fcvt.l.d a0, ft2, rtz
-; RV64-NEXT: .LBB12_4:
+; RV64-NEXT: fmax.d ft3, ft2, ft0
+; RV64-NEXT: fmin.d ft3, ft3, ft1
+; RV64-NEXT: fcvt.l.d a0, ft3, rtz
+; RV64-NEXT: feq.d a2, ft2, ft2
+; RV64-NEXT: seqz a2, a2
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: sb a0, 15(sp)
; RV64-NEXT: vslidedown.vi v12, v8, 6
; RV64-NEXT: vfmv.f.s ft2, v12
+; RV64-NEXT: fmax.d ft3, ft2, ft0
+; RV64-NEXT: fmin.d ft3, ft3, ft1
+; RV64-NEXT: fcvt.l.d a0, ft3, rtz
; RV64-NEXT: feq.d a2, ft2, ft2
-; RV64-NEXT: sb a0, 15(sp)
-; RV64-NEXT: bnez a2, .LBB12_6
-; RV64-NEXT: # %bb.5:
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: j .LBB12_7
-; RV64-NEXT: .LBB12_6:
-; RV64-NEXT: fmax.d ft2, ft2, ft0
-; RV64-NEXT: fmin.d ft2, ft2, ft1
-; RV64-NEXT: fcvt.l.d a0, ft2, rtz
-; RV64-NEXT: .LBB12_7:
+; RV64-NEXT: seqz a2, a2
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: sb a0, 14(sp)
; RV64-NEXT: vslidedown.vi v12, v8, 5
; RV64-NEXT: vfmv.f.s ft2, v12
+; RV64-NEXT: fmax.d ft3, ft2, ft0
+; RV64-NEXT: fmin.d ft3, ft3, ft1
+; RV64-NEXT: fcvt.l.d a0, ft3, rtz
; RV64-NEXT: feq.d a2, ft2, ft2
-; RV64-NEXT: sb a0, 14(sp)
-; RV64-NEXT: bnez a2, .LBB12_9
-; RV64-NEXT: # %bb.8:
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: j .LBB12_10
-; RV64-NEXT: .LBB12_9:
-; RV64-NEXT: fmax.d ft2, ft2, ft0
-; RV64-NEXT: fmin.d ft2, ft2, ft1
-; RV64-NEXT: fcvt.l.d a0, ft2, rtz
-; RV64-NEXT: .LBB12_10:
+; RV64-NEXT: seqz a2, a2
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: sb a0, 13(sp)
; RV64-NEXT: vslidedown.vi v12, v8, 4
; RV64-NEXT: vfmv.f.s ft2, v12
+; RV64-NEXT: fmax.d ft3, ft2, ft0
+; RV64-NEXT: fmin.d ft3, ft3, ft1
+; RV64-NEXT: fcvt.l.d a0, ft3, rtz
; RV64-NEXT: feq.d a2, ft2, ft2
-; RV64-NEXT: sb a0, 13(sp)
-; RV64-NEXT: bnez a2, .LBB12_12
-; RV64-NEXT: # %bb.11:
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: j .LBB12_13
-; RV64-NEXT: .LBB12_12:
-; RV64-NEXT: fmax.d ft2, ft2, ft0
-; RV64-NEXT: fmin.d ft2, ft2, ft1
-; RV64-NEXT: fcvt.l.d a0, ft2, rtz
-; RV64-NEXT: .LBB12_13:
+; RV64-NEXT: seqz a2, a2
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: sb a0, 12(sp)
; RV64-NEXT: vslidedown.vi v12, v8, 3
; RV64-NEXT: vfmv.f.s ft2, v12
+; RV64-NEXT: fmax.d ft3, ft2, ft0
+; RV64-NEXT: fmin.d ft3, ft3, ft1
+; RV64-NEXT: fcvt.l.d a0, ft3, rtz
; RV64-NEXT: feq.d a2, ft2, ft2
-; RV64-NEXT: sb a0, 12(sp)
-; RV64-NEXT: bnez a2, .LBB12_15
-; RV64-NEXT: # %bb.14:
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: j .LBB12_16
-; RV64-NEXT: .LBB12_15:
-; RV64-NEXT: fmax.d ft2, ft2, ft0
-; RV64-NEXT: fmin.d ft2, ft2, ft1
-; RV64-NEXT: fcvt.l.d a0, ft2, rtz
-; RV64-NEXT: .LBB12_16:
+; RV64-NEXT: seqz a2, a2
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: sb a0, 11(sp)
; RV64-NEXT: vslidedown.vi v12, v8, 2
; RV64-NEXT: vfmv.f.s ft2, v12
+; RV64-NEXT: fmax.d ft3, ft2, ft0
+; RV64-NEXT: fmin.d ft3, ft3, ft1
+; RV64-NEXT: fcvt.l.d a0, ft3, rtz
; RV64-NEXT: feq.d a2, ft2, ft2
-; RV64-NEXT: sb a0, 11(sp)
-; RV64-NEXT: bnez a2, .LBB12_18
-; RV64-NEXT: # %bb.17:
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: j .LBB12_19
-; RV64-NEXT: .LBB12_18:
-; RV64-NEXT: fmax.d ft2, ft2, ft0
-; RV64-NEXT: fmin.d ft2, ft2, ft1
-; RV64-NEXT: fcvt.l.d a0, ft2, rtz
-; RV64-NEXT: .LBB12_19:
+; RV64-NEXT: seqz a2, a2
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: sb a0, 10(sp)
; RV64-NEXT: vslidedown.vi v8, v8, 1
; RV64-NEXT: vfmv.f.s ft2, v8
-; RV64-NEXT: feq.d a2, ft2, ft2
-; RV64-NEXT: sb a0, 10(sp)
-; RV64-NEXT: bnez a2, .LBB12_21
-; RV64-NEXT: # %bb.20:
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: j .LBB12_22
-; RV64-NEXT: .LBB12_21:
; RV64-NEXT: fmax.d ft0, ft2, ft0
; RV64-NEXT: fmin.d ft0, ft0, ft1
; RV64-NEXT: fcvt.l.d a0, ft0, rtz
-; RV64-NEXT: .LBB12_22:
+; RV64-NEXT: feq.d a2, ft2, ft2
+; RV64-NEXT: seqz a2, a2
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a0, a2, a0
; RV64-NEXT: sb a0, 9(sp)
; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-LABEL: vfptosi_v32i64_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-NEXT: bltu a0, a2, .LBB25_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB25_2:
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB25_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB25_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB25_4:
+; CHECK-NEXT: .LBB25_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t
define <32 x i64> @vfptosi_v32i64_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfptosi_v32i64_v32f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, -16
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: bltu a0, a1, .LBB26_2
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB26_2:
-; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16
-; CHECK-NEXT: bltu a0, a1, .LBB26_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB26_4:
-; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16
; CHECK-NEXT: ret
%v = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl)
ret <32 x i64> %v
; CHECK-LABEL: vfptoui_v32i64_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-NEXT: bltu a0, a2, .LBB25_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB25_2:
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB25_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB25_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB25_4:
+; CHECK-NEXT: .LBB25_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t
define <32 x i64> @vfptoui_v32i64_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfptoui_v32i64_v32f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, -16
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: bltu a0, a1, .LBB26_2
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB26_2:
-; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16
-; CHECK-NEXT: bltu a0, a1, .LBB26_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB26_4:
-; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16
; CHECK-NEXT: ret
%v = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl)
ret <32 x i64> %v
define float @vpreduce_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_fadd_v64f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a2, a0, -32
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a0, a2, .LBB8_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB8_2:
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; CHECK-NEXT: li a2, 32
; CHECK-NEXT: vslidedown.vi v24, v0, 4
-; CHECK-NEXT: bltu a0, a2, .LBB8_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 32
-; CHECK-NEXT: .LBB8_4:
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB8_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: .LBB8_2:
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v25, fa0
-; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v8, ft0
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
+; CHECK-NEXT: addi a1, a0, -32
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfredusum.vs v8, v16, v8, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v8
define float @vpreduce_ord_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_ord_fadd_v64f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a2, a0, -32
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a0, a2, .LBB9_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB9_2:
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; CHECK-NEXT: li a2, 32
; CHECK-NEXT: vslidedown.vi v24, v0, 4
-; CHECK-NEXT: bltu a0, a2, .LBB9_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 32
-; CHECK-NEXT: .LBB9_4:
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB9_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: .LBB9_2:
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v25, fa0
-; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v8, ft0
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
+; CHECK-NEXT: addi a1, a0, -32
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfredosum.vs v8, v16, v8, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v8
define signext i32 @vpreduce_xor_v64i32(i32 signext %s, <64 x i32> %v, <64 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_xor_v64i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a3, a1, -32
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: bltu a1, a3, .LBB49_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB49_2:
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; CHECK-NEXT: li a3, 32
; CHECK-NEXT: vslidedown.vi v24, v0, 4
-; CHECK-NEXT: bltu a1, a3, .LBB49_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: .LBB49_4:
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: bltu a1, a3, .LBB49_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: .LBB49_2:
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v25, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, tu, ma
; CHECK-NEXT: vredxor.vs v25, v8, v25, v0.t
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
-; CHECK-NEXT: vsetvli zero, a2, e32, m8, tu, ma
+; CHECK-NEXT: addi a0, a1, -32
+; CHECK-NEXT: sltu a1, a1, a0
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a0, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vredxor.vs v8, v16, v8, v0.t
; CHECK-NEXT: vmv.x.s a0, v8
define signext i1 @vpreduce_and_v256i1(i1 signext %s, <256 x i1> %v, <256 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_and_v256i1:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a2, a1, -128
-; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: bltu a1, a2, .LBB14_2
+; CHECK-NEXT: li a3, 128
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: bltu a1, a3, .LBB14_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: li a2, 128
; CHECK-NEXT: .LBB14_2:
-; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT: vmnot.m v8, v8
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vcpop.m a2, v8, v0.t
-; CHECK-NEXT: li a3, 128
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vmnot.m v11, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vcpop.m a2, v11, v0.t
; CHECK-NEXT: seqz a2, a2
-; CHECK-NEXT: bltu a1, a3, .LBB14_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a1, 128
-; CHECK-NEXT: .LBB14_4:
+; CHECK-NEXT: and a0, a2, a0
+; CHECK-NEXT: addi a2, a1, -128
+; CHECK-NEXT: sltu a1, a1, a2
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a1, a1, a2
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vmnot.m v8, v11
-; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vcpop.m a1, v8, v0.t
; CHECK-NEXT: seqz a1, a1
; CHECK-NEXT: and a0, a1, a0
-; CHECK-NEXT: and a0, a2, a0
; CHECK-NEXT: neg a0, a0
; CHECK-NEXT: ret
%r = call i1 @llvm.vp.reduce.and.v256i1(i1 %s, <256 x i1> %v, <256 x i1> %m, i32 %evl)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: vmv1r.v v25, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
-; CHECK-NEXT: vslidedown.vi v25, v0, 2
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: vslidedown.vi v1, v0, 2
+; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: li a1, 16
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfabs.v v8, v16, v0.t
+; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v25, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a1, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a0, a1, .LBB26_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB26_4:
-; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfabs.v v16, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
-; CHECK-NEXT: fsrmi a0, 4
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t
+; CHECK-NEXT: fsrmi a0, 4
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
define <32 x double> @vp_round_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v32f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
-; CHECK-NEXT: vmset.m v24
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: vmset.m v1
+; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB27_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: li a1, 16
; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: lui a2, %hi(.LCPI27_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vfabs.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmv1r.v v25, v24
-; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t
+; CHECK-NEXT: vmv1r.v v2, v1
+; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a1, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v2
+; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a0, a1, .LBB27_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB27_4:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfabs.v v16, v8, v0.t
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: vmv1r.v v25, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
-; CHECK-NEXT: vslidedown.vi v25, v0, 2
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: vslidedown.vi v1, v0, 2
+; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: li a1, 16
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfabs.v v8, v16, v0.t
+; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v25, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a0, a1, .LBB26_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB26_4:
-; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfabs.v v16, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
-; CHECK-NEXT: fsrmi a0, 0
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
define <32 x double> @vp_roundeven_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v32f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
-; CHECK-NEXT: vmset.m v24
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: vmset.m v1
+; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB27_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: li a1, 16
; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: lui a2, %hi(.LCPI27_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vfabs.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmv1r.v v25, v24
-; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t
+; CHECK-NEXT: vmv1r.v v2, v1
+; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v2
+; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a0, a1, .LBB27_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB27_4:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfabs.v v16, v8, v0.t
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: vmv1r.v v25, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
-; CHECK-NEXT: vslidedown.vi v25, v0, 2
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: vslidedown.vi v1, v0, 2
+; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: li a1, 16
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfabs.v v8, v16, v0.t
+; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v25, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a1, 1
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a0, a1, .LBB26_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB26_4:
-; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfabs.v v16, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
-; CHECK-NEXT: fsrmi a0, 1
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
define <32 x double> @vp_roundtozero_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_v32f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
-; CHECK-NEXT: vmset.m v24
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: vmset.m v1
+; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB27_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: li a1, 16
; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: lui a2, %hi(.LCPI27_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vfabs.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmv1r.v v25, v24
-; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t
+; CHECK-NEXT: vmv1r.v v2, v1
+; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a1, 1
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v2
+; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a0, a1, .LBB27_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB27_4:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfabs.v v16, v8, v0.t
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 1
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: vmv1r.v v1, v0
-; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a3, 0
; CHECK-NEXT: li a1, 64
-; CHECK-NEXT: addi a4, a0, 128
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vle16.v v24, (a4)
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: addi a3, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a0, a0, 128
+; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
-; CHECK-NEXT: addi a4, a2, -64
+; CHECK-NEXT: addi a0, a2, -64
+; CHECK-NEXT: sltu a3, a2, a0
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a0, a3, a0
; CHECK-NEXT: vslidedown.vi v0, v0, 8
-; CHECK-NEXT: bltu a2, a4, .LBB43_2
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v1, v16, v8, v0.t
+; CHECK-NEXT: bltu a2, a1, .LBB43_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a4
-; CHECK-NEXT: .LBB43_2:
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a3, e16, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v2, v16, v24, v0.t
-; CHECK-NEXT: bltu a2, a1, .LBB43_4
-; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a2, 64
-; CHECK-NEXT: .LBB43_4:
+; CHECK-NEXT: .LBB43_2:
; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmfeq.vv v16, v24, v8, v0.t
+; CHECK-NEXT: vmfeq.vv v16, v8, v24, v0.t
; CHECK-NEXT: vsetivli zero, 16, e8, m1, tu, ma
-; CHECK-NEXT: vslideup.vi v16, v2, 8
+; CHECK-NEXT: vslideup.vi v16, v1, 8
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 24
-; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vmv1r.v v2, v0
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: addi a3, a2, -16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a2, a3, .LBB87_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a3
-; CHECK-NEXT: .LBB87_2:
-; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v0, v0, 2
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: addi a1, a0, 128
+; CHECK-NEXT: vle64.v v8, (a1)
+; CHECK-NEXT: addi a1, a2, -16
+; CHECK-NEXT: sltu a3, a2, a1
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a1, a3, a1
+; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vmfeq.vv v1, v16, v8, v0.t
-; CHECK-NEXT: bltu a2, a0, .LBB87_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a2, a0, .LBB87_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 16
-; CHECK-NEXT: .LBB87_4:
+; CHECK-NEXT: .LBB87_2:
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmfeq.vv v16, v24, v8, v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmfeq.vv v16, v8, v24, v0.t
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, ma
; CHECK-NEXT: vslideup.vi v16, v1, 2
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a4, 24
-; CHECK-NEXT: mul a1, a1, a4
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: li a4, 128
-; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
-; CHECK-NEXT: vle8.v v24, (a0)
+; CHECK-NEXT: vmv1r.v v2, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: addi a0, a0, 128
-; CHECK-NEXT: mv a1, a3
-; CHECK-NEXT: bltu a3, a4, .LBB51_2
-; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 128
-; CHECK-NEXT: .LBB51_2:
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: vlm.v v24, (a2)
-; CHECK-NEXT: vle8.v v16, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: addi a0, a3, -128
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vmseq.vv v1, v8, v16, v0.t
-; CHECK-NEXT: bltu a3, a0, .LBB51_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a4, a0
-; CHECK-NEXT: .LBB51_4:
-; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vlm.v v0, (a2)
+; CHECK-NEXT: addi a2, a0, 128
+; CHECK-NEXT: vle8.v v8, (a2)
+; CHECK-NEXT: addi a2, a3, -128
+; CHECK-NEXT: sltu a4, a3, a2
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: vle8.v v24, (a0)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: and a0, a4, a2
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vmseq.vv v1, v16, v8, v0.t
+; CHECK-NEXT: bltu a3, a1, .LBB51_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a3, 128
+; CHECK-NEXT: .LBB51_2:
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmseq.vv v8, v16, v24, v0.t
-; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vmv1r.v v8, v1
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
define <256 x i1> @icmp_eq_vx_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: icmp_eq_vx_v256i8:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT: vlm.v v25, (a1)
-; CHECK-NEXT: addi a4, a2, -128
-; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a2, a4, .LBB52_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a4
-; CHECK-NEXT: .LBB52_2:
+; CHECK-NEXT: vlm.v v0, (a1)
+; CHECK-NEXT: addi a1, a2, -128
+; CHECK-NEXT: sltu a4, a2, a1
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a1, a4, a1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t
-; CHECK-NEXT: bltu a2, a3, .LBB52_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a2, a3, .LBB52_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 128
-; CHECK-NEXT: .LBB52_4:
+; CHECK-NEXT: .LBB52_2:
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
define <256 x i1> @icmp_eq_vx_swap_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: icmp_eq_vx_swap_v256i8:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT: vlm.v v25, (a1)
-; CHECK-NEXT: addi a4, a2, -128
-; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a2, a4, .LBB53_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a4
-; CHECK-NEXT: .LBB53_2:
+; CHECK-NEXT: vlm.v v0, (a1)
+; CHECK-NEXT: addi a1, a2, -128
+; CHECK-NEXT: sltu a4, a2, a1
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a1, a4, a1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t
-; CHECK-NEXT: bltu a2, a3, .LBB53_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a2, a3, .LBB53_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 128
-; CHECK-NEXT: .LBB53_4:
+; CHECK-NEXT: .LBB53_2:
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: vmv1r.v v1, v0
-; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a3, 0
; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: addi a4, a0, 128
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vle32.v v24, (a4)
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: addi a3, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a0, a0, 128
+; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; CHECK-NEXT: addi a4, a2, -32
+; CHECK-NEXT: addi a0, a2, -32
+; CHECK-NEXT: sltu a3, a2, a0
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a0, a3, a0
; CHECK-NEXT: vslidedown.vi v0, v0, 4
-; CHECK-NEXT: bltu a2, a4, .LBB99_2
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmseq.vv v1, v16, v8, v0.t
+; CHECK-NEXT: bltu a2, a1, .LBB99_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a4
-; CHECK-NEXT: .LBB99_2:
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vmseq.vv v2, v16, v24, v0.t
-; CHECK-NEXT: bltu a2, a1, .LBB99_4
-; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a2, 32
-; CHECK-NEXT: .LBB99_4:
+; CHECK-NEXT: .LBB99_2:
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmseq.vv v16, v24, v8, v0.t
+; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vi v16, v2, 4
+; CHECK-NEXT: vslideup.vi v16, v1, 4
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-LABEL: icmp_eq_vx_v64i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; CHECK-NEXT: addi a3, a1, -32
; CHECK-NEXT: vslidedown.vi v0, v0, 4
-; CHECK-NEXT: bltu a1, a3, .LBB100_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB100_2:
+; CHECK-NEXT: addi a2, a1, -32
+; CHECK-NEXT: sltu a3, a1, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; CHECK-NEXT: li a2, 32
; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB100_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a1, a2, .LBB100_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: .LBB100_4:
+; CHECK-NEXT: .LBB100_2:
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
; CHECK-LABEL: icmp_eq_vx_swap_v64i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; CHECK-NEXT: addi a3, a1, -32
; CHECK-NEXT: vslidedown.vi v0, v0, 4
-; CHECK-NEXT: bltu a1, a3, .LBB101_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB101_2:
+; CHECK-NEXT: addi a2, a1, -32
+; CHECK-NEXT: sltu a3, a1, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; CHECK-NEXT: li a2, 32
; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB101_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a1, a2, .LBB101_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: .LBB101_4:
+; CHECK-NEXT: .LBB101_2:
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
; CHECK-LABEL: vsext_v32i64_v32i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v1, v0
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-NEXT: bltu a0, a2, .LBB12_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB12_2:
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; CHECK-NEXT: vslidedown.vi v24, v8, 16
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vsext.vf2 v16, v24, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB12_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB12_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB12_4:
+; CHECK-NEXT: .LBB12_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vsext.vf2 v24, v8, v0.t
define <32 x i64> @vsext_v32i64_v32i32_unmasked(<32 x i32> %va, i32 zeroext %evl) {
; CHECK-LABEL: vsext_v32i64_v32i32_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a2, a0, -16
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a0, a2, .LBB13_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB13_2:
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; CHECK-NEXT: vslidedown.vi v24, v8, 16
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vsext.vf2 v16, v24
-; CHECK-NEXT: bltu a0, a1, .LBB13_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB13_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB13_4:
+; CHECK-NEXT: .LBB13_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vsext.vf2 v24, v8
; CHECK-NEXT: vmv.v.v v8, v24
; CHECK-LABEL: vsitofp_v32f64_v32i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-NEXT: bltu a0, a2, .LBB25_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB25_2:
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB25_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB25_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB25_4:
+; CHECK-NEXT: .LBB25_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
define <32 x double> @vsitofp_v32f64_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
; CHECK-LABEL: vsitofp_v32f64_v32i64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, -16
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: bltu a0, a1, .LBB26_2
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB26_2:
-; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vfcvt.f.x.v v16, v16
-; CHECK-NEXT: bltu a0, a1, .LBB26_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB26_4:
-; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.f.x.v v8, v8
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v16, v16
; CHECK-NEXT: ret
%v = call <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl)
ret <32 x double> %v
define <32 x double> @strided_vpload_v32f64(double* %ptr, i32 signext %stride, <32 x i1> %m, i32 zeroext %evl) nounwind {
; CHECK-RV32-LABEL: strided_vpload_v32f64:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: addi a4, a2, -16
+; CHECK-RV32-NEXT: li a4, 16
; CHECK-RV32-NEXT: vmv1r.v v8, v0
-; CHECK-RV32-NEXT: li a3, 0
+; CHECK-RV32-NEXT: mv a3, a2
; CHECK-RV32-NEXT: bltu a2, a4, .LBB33_2
; CHECK-RV32-NEXT: # %bb.1:
-; CHECK-RV32-NEXT: mv a3, a4
+; CHECK-RV32-NEXT: li a3, 16
; CHECK-RV32-NEXT: .LBB33_2:
-; CHECK-RV32-NEXT: li a4, 16
-; CHECK-RV32-NEXT: bltu a2, a4, .LBB33_4
-; CHECK-RV32-NEXT: # %bb.3:
-; CHECK-RV32-NEXT: li a2, 16
-; CHECK-RV32-NEXT: .LBB33_4:
-; CHECK-RV32-NEXT: mul a4, a2, a1
+; CHECK-RV32-NEXT: mul a4, a3, a1
; CHECK-RV32-NEXT: add a4, a0, a4
+; CHECK-RV32-NEXT: addi a5, a2, -16
+; CHECK-RV32-NEXT: sltu a2, a2, a5
+; CHECK-RV32-NEXT: addi a2, a2, -1
+; CHECK-RV32-NEXT: and a2, a2, a5
; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2
-; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-RV32-NEXT: vlse64.v v16, (a4), a1, v0.t
; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV32-NEXT: vlse64.v v16, (a4), a1, v0.t
+; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV32-NEXT: vmv1r.v v0, v8
; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1, v0.t
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64-LABEL: strided_vpload_v32f64:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: addi a4, a2, -16
+; CHECK-RV64-NEXT: li a4, 16
; CHECK-RV64-NEXT: vmv1r.v v8, v0
-; CHECK-RV64-NEXT: li a3, 0
+; CHECK-RV64-NEXT: mv a3, a2
; CHECK-RV64-NEXT: bltu a2, a4, .LBB33_2
; CHECK-RV64-NEXT: # %bb.1:
-; CHECK-RV64-NEXT: mv a3, a4
+; CHECK-RV64-NEXT: li a3, 16
; CHECK-RV64-NEXT: .LBB33_2:
-; CHECK-RV64-NEXT: li a4, 16
-; CHECK-RV64-NEXT: bltu a2, a4, .LBB33_4
-; CHECK-RV64-NEXT: # %bb.3:
-; CHECK-RV64-NEXT: li a2, 16
-; CHECK-RV64-NEXT: .LBB33_4:
-; CHECK-RV64-NEXT: mul a4, a2, a1
+; CHECK-RV64-NEXT: mul a4, a3, a1
; CHECK-RV64-NEXT: add a4, a0, a4
+; CHECK-RV64-NEXT: addi a5, a2, -16
+; CHECK-RV64-NEXT: sltu a2, a2, a5
+; CHECK-RV64-NEXT: addi a2, a2, -1
+; CHECK-RV64-NEXT: and a2, a2, a5
; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2
-; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-RV64-NEXT: vlse64.v v16, (a4), a1, v0.t
; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64-NEXT: vlse64.v v16, (a4), a1, v0.t
+; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV64-NEXT: vmv1r.v v0, v8
; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1, v0.t
; CHECK-RV64-NEXT: ret
define <32 x double> @strided_vpload_v32f64_allones_mask(double* %ptr, i32 signext %stride, i32 zeroext %evl) nounwind {
; CHECK-RV32-LABEL: strided_vpload_v32f64_allones_mask:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: addi a4, a2, -16
-; CHECK-RV32-NEXT: li a3, 0
+; CHECK-RV32-NEXT: li a4, 16
+; CHECK-RV32-NEXT: mv a3, a2
; CHECK-RV32-NEXT: bltu a2, a4, .LBB34_2
; CHECK-RV32-NEXT: # %bb.1:
-; CHECK-RV32-NEXT: mv a3, a4
+; CHECK-RV32-NEXT: li a3, 16
; CHECK-RV32-NEXT: .LBB34_2:
-; CHECK-RV32-NEXT: li a4, 16
-; CHECK-RV32-NEXT: bltu a2, a4, .LBB34_4
-; CHECK-RV32-NEXT: # %bb.3:
-; CHECK-RV32-NEXT: li a2, 16
-; CHECK-RV32-NEXT: .LBB34_4:
-; CHECK-RV32-NEXT: mul a4, a2, a1
+; CHECK-RV32-NEXT: mul a4, a3, a1
; CHECK-RV32-NEXT: add a4, a0, a4
-; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-RV32-NEXT: vlse64.v v16, (a4), a1
+; CHECK-RV32-NEXT: addi a5, a2, -16
+; CHECK-RV32-NEXT: sltu a2, a2, a5
+; CHECK-RV32-NEXT: addi a2, a2, -1
+; CHECK-RV32-NEXT: and a2, a2, a5
; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV32-NEXT: vlse64.v v16, (a4), a1
+; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64-LABEL: strided_vpload_v32f64_allones_mask:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: addi a4, a2, -16
-; CHECK-RV64-NEXT: li a3, 0
+; CHECK-RV64-NEXT: li a4, 16
+; CHECK-RV64-NEXT: mv a3, a2
; CHECK-RV64-NEXT: bltu a2, a4, .LBB34_2
; CHECK-RV64-NEXT: # %bb.1:
-; CHECK-RV64-NEXT: mv a3, a4
+; CHECK-RV64-NEXT: li a3, 16
; CHECK-RV64-NEXT: .LBB34_2:
-; CHECK-RV64-NEXT: li a4, 16
-; CHECK-RV64-NEXT: bltu a2, a4, .LBB34_4
-; CHECK-RV64-NEXT: # %bb.3:
-; CHECK-RV64-NEXT: li a2, 16
-; CHECK-RV64-NEXT: .LBB34_4:
-; CHECK-RV64-NEXT: mul a4, a2, a1
+; CHECK-RV64-NEXT: mul a4, a3, a1
; CHECK-RV64-NEXT: add a4, a0, a4
-; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-RV64-NEXT: vlse64.v v16, (a4), a1
+; CHECK-RV64-NEXT: addi a5, a2, -16
+; CHECK-RV64-NEXT: sltu a2, a2, a5
+; CHECK-RV64-NEXT: addi a2, a2, -1
+; CHECK-RV64-NEXT: and a2, a2, a5
; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64-NEXT: vlse64.v v16, (a4), a1
+; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1
; CHECK-RV64-NEXT: ret
%one = insertelement <32 x i1> poison, i1 true, i32 0
; CHECK-RV32-NEXT: # %bb.1:
; CHECK-RV32-NEXT: li a3, 32
; CHECK-RV32-NEXT: .LBB35_2:
-; CHECK-RV32-NEXT: addi a5, a3, -16
-; CHECK-RV32-NEXT: li a7, 0
-; CHECK-RV32-NEXT: bltu a3, a5, .LBB35_4
+; CHECK-RV32-NEXT: mul a5, a3, a2
+; CHECK-RV32-NEXT: addi a6, a4, -32
+; CHECK-RV32-NEXT: sltu a4, a4, a6
+; CHECK-RV32-NEXT: addi a4, a4, -1
+; CHECK-RV32-NEXT: and a6, a4, a6
+; CHECK-RV32-NEXT: li a4, 16
+; CHECK-RV32-NEXT: add a5, a1, a5
+; CHECK-RV32-NEXT: bltu a6, a4, .LBB35_4
; CHECK-RV32-NEXT: # %bb.3:
-; CHECK-RV32-NEXT: mv a7, a5
-; CHECK-RV32-NEXT: .LBB35_4:
; CHECK-RV32-NEXT: li a6, 16
-; CHECK-RV32-NEXT: mv a5, a3
-; CHECK-RV32-NEXT: bltu a3, a6, .LBB35_6
+; CHECK-RV32-NEXT: .LBB35_4:
+; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4
+; CHECK-RV32-NEXT: vsetvli zero, a6, e64, m8, ta, ma
+; CHECK-RV32-NEXT: vlse64.v v16, (a5), a2, v0.t
+; CHECK-RV32-NEXT: addi a5, a3, -16
+; CHECK-RV32-NEXT: sltu a6, a3, a5
+; CHECK-RV32-NEXT: addi a6, a6, -1
+; CHECK-RV32-NEXT: and a5, a6, a5
+; CHECK-RV32-NEXT: bltu a3, a4, .LBB35_6
; CHECK-RV32-NEXT: # %bb.5:
-; CHECK-RV32-NEXT: li a5, 16
+; CHECK-RV32-NEXT: li a3, 16
; CHECK-RV32-NEXT: .LBB35_6:
-; CHECK-RV32-NEXT: mul t0, a5, a2
-; CHECK-RV32-NEXT: add t0, a1, t0
+; CHECK-RV32-NEXT: mul a4, a3, a2
+; CHECK-RV32-NEXT: add a4, a1, a4
; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2
-; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma
-; CHECK-RV32-NEXT: vlse64.v v16, (t0), a2, v0.t
-; CHECK-RV32-NEXT: addi t0, a4, -32
-; CHECK-RV32-NEXT: li a7, 0
-; CHECK-RV32-NEXT: bltu a4, t0, .LBB35_8
-; CHECK-RV32-NEXT: # %bb.7:
-; CHECK-RV32-NEXT: mv a7, t0
-; CHECK-RV32-NEXT: .LBB35_8:
-; CHECK-RV32-NEXT: bltu a7, a6, .LBB35_10
-; CHECK-RV32-NEXT: # %bb.9:
-; CHECK-RV32-NEXT: li a7, 16
-; CHECK-RV32-NEXT: .LBB35_10:
-; CHECK-RV32-NEXT: mul a3, a3, a2
-; CHECK-RV32-NEXT: add a3, a1, a3
-; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4
-; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma
-; CHECK-RV32-NEXT: vlse64.v v24, (a3), a2, v0.t
; CHECK-RV32-NEXT: vsetvli zero, a5, e64, m8, ta, ma
+; CHECK-RV32-NEXT: vlse64.v v24, (a4), a2, v0.t
+; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV32-NEXT: vmv1r.v v0, v8
; CHECK-RV32-NEXT: vlse64.v v8, (a1), a2, v0.t
; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-RV32-NEXT: vse64.v v8, (a0)
; CHECK-RV32-NEXT: addi a1, a0, 256
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
-; CHECK-RV32-NEXT: vse64.v v24, (a1)
+; CHECK-RV32-NEXT: vse64.v v16, (a1)
; CHECK-RV32-NEXT: addi a0, a0, 128
; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-RV32-NEXT: vse64.v v16, (a0)
+; CHECK-RV32-NEXT: vse64.v v24, (a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64-LABEL: strided_load_v33f64:
; CHECK-RV64-NEXT: # %bb.1:
; CHECK-RV64-NEXT: li a4, 32
; CHECK-RV64-NEXT: .LBB35_2:
-; CHECK-RV64-NEXT: addi a5, a4, -16
-; CHECK-RV64-NEXT: li a7, 0
-; CHECK-RV64-NEXT: bltu a4, a5, .LBB35_4
+; CHECK-RV64-NEXT: mul a5, a4, a2
+; CHECK-RV64-NEXT: addi a6, a3, -32
+; CHECK-RV64-NEXT: sltu a3, a3, a6
+; CHECK-RV64-NEXT: addi a3, a3, -1
+; CHECK-RV64-NEXT: and a6, a3, a6
+; CHECK-RV64-NEXT: li a3, 16
+; CHECK-RV64-NEXT: add a5, a1, a5
+; CHECK-RV64-NEXT: bltu a6, a3, .LBB35_4
; CHECK-RV64-NEXT: # %bb.3:
-; CHECK-RV64-NEXT: mv a7, a5
-; CHECK-RV64-NEXT: .LBB35_4:
; CHECK-RV64-NEXT: li a6, 16
-; CHECK-RV64-NEXT: mv a5, a4
-; CHECK-RV64-NEXT: bltu a4, a6, .LBB35_6
+; CHECK-RV64-NEXT: .LBB35_4:
+; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4
+; CHECK-RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma
+; CHECK-RV64-NEXT: vlse64.v v16, (a5), a2, v0.t
+; CHECK-RV64-NEXT: addi a5, a4, -16
+; CHECK-RV64-NEXT: sltu a6, a4, a5
+; CHECK-RV64-NEXT: addi a6, a6, -1
+; CHECK-RV64-NEXT: and a5, a6, a5
+; CHECK-RV64-NEXT: bltu a4, a3, .LBB35_6
; CHECK-RV64-NEXT: # %bb.5:
-; CHECK-RV64-NEXT: li a5, 16
+; CHECK-RV64-NEXT: li a4, 16
; CHECK-RV64-NEXT: .LBB35_6:
-; CHECK-RV64-NEXT: mul t0, a5, a2
-; CHECK-RV64-NEXT: add t0, a1, t0
-; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2
-; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma
-; CHECK-RV64-NEXT: vlse64.v v16, (t0), a2, v0.t
-; CHECK-RV64-NEXT: addi t0, a3, -32
-; CHECK-RV64-NEXT: li a7, 0
-; CHECK-RV64-NEXT: bltu a3, t0, .LBB35_8
-; CHECK-RV64-NEXT: # %bb.7:
-; CHECK-RV64-NEXT: mv a7, t0
-; CHECK-RV64-NEXT: .LBB35_8:
-; CHECK-RV64-NEXT: bltu a7, a6, .LBB35_10
-; CHECK-RV64-NEXT: # %bb.9:
-; CHECK-RV64-NEXT: li a7, 16
-; CHECK-RV64-NEXT: .LBB35_10:
; CHECK-RV64-NEXT: mul a3, a4, a2
; CHECK-RV64-NEXT: add a3, a1, a3
-; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4
-; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma
-; CHECK-RV64-NEXT: vlse64.v v24, (a3), a2, v0.t
+; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2
; CHECK-RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma
+; CHECK-RV64-NEXT: vlse64.v v24, (a3), a2, v0.t
+; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma
; CHECK-RV64-NEXT: vmv1r.v v0, v8
; CHECK-RV64-NEXT: vlse64.v v8, (a1), a2, v0.t
; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-RV64-NEXT: vse64.v v8, (a0)
; CHECK-RV64-NEXT: addi a1, a0, 256
; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m8, ta, ma
-; CHECK-RV64-NEXT: vse64.v v24, (a1)
+; CHECK-RV64-NEXT: vse64.v v16, (a1)
; CHECK-RV64-NEXT: addi a0, a0, 128
; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-RV64-NEXT: vse64.v v16, (a0)
+; CHECK-RV64-NEXT: vse64.v v24, (a0)
; CHECK-RV64-NEXT: ret
%v = call <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0f64.i64(double* %ptr, i64 %stride, <33 x i1> %mask, i32 %evl)
ret <33 x double> %v
; CHECK-RV32-NEXT: # %bb.1:
; CHECK-RV32-NEXT: li a3, 16
; CHECK-RV32-NEXT: .LBB27_2:
-; CHECK-RV32-NEXT: li a4, 0
; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-RV32-NEXT: addi a5, a2, -16
; CHECK-RV32-NEXT: vsse64.v v8, (a0), a1, v0.t
-; CHECK-RV32-NEXT: bltu a2, a5, .LBB27_4
-; CHECK-RV32-NEXT: # %bb.3:
-; CHECK-RV32-NEXT: mv a4, a5
-; CHECK-RV32-NEXT: .LBB27_4:
-; CHECK-RV32-NEXT: mul a2, a3, a1
-; CHECK-RV32-NEXT: add a0, a0, a2
+; CHECK-RV32-NEXT: mul a3, a3, a1
+; CHECK-RV32-NEXT: add a0, a0, a3
+; CHECK-RV32-NEXT: addi a3, a2, -16
+; CHECK-RV32-NEXT: sltu a2, a2, a3
+; CHECK-RV32-NEXT: addi a2, a2, -1
+; CHECK-RV32-NEXT: and a2, a2, a3
; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-RV32-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma
+; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-RV32-NEXT: vsse64.v v16, (a0), a1, v0.t
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64-NEXT: # %bb.1:
; CHECK-RV64-NEXT: li a3, 16
; CHECK-RV64-NEXT: .LBB27_2:
-; CHECK-RV64-NEXT: li a4, 0
; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-RV64-NEXT: addi a5, a2, -16
; CHECK-RV64-NEXT: vsse64.v v8, (a0), a1, v0.t
-; CHECK-RV64-NEXT: bltu a2, a5, .LBB27_4
-; CHECK-RV64-NEXT: # %bb.3:
-; CHECK-RV64-NEXT: mv a4, a5
-; CHECK-RV64-NEXT: .LBB27_4:
-; CHECK-RV64-NEXT: mul a2, a3, a1
-; CHECK-RV64-NEXT: add a0, a0, a2
+; CHECK-RV64-NEXT: mul a3, a3, a1
+; CHECK-RV64-NEXT: add a0, a0, a3
+; CHECK-RV64-NEXT: addi a3, a2, -16
+; CHECK-RV64-NEXT: sltu a2, a2, a3
+; CHECK-RV64-NEXT: addi a2, a2, -1
+; CHECK-RV64-NEXT: and a2, a2, a3
; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-RV64-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma
+; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-RV64-NEXT: vsse64.v v16, (a0), a1, v0.t
; CHECK-RV64-NEXT: ret
call void @llvm.experimental.vp.strided.store.v32f64.p0f64.i32(<32 x double> %v, double* %ptr, i32 %stride, <32 x i1> %mask, i32 %evl)
; CHECK-RV32-NEXT: # %bb.1:
; CHECK-RV32-NEXT: li a3, 16
; CHECK-RV32-NEXT: .LBB28_2:
-; CHECK-RV32-NEXT: li a4, 0
; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-RV32-NEXT: addi a5, a2, -16
; CHECK-RV32-NEXT: vsse64.v v8, (a0), a1
-; CHECK-RV32-NEXT: bltu a2, a5, .LBB28_4
-; CHECK-RV32-NEXT: # %bb.3:
-; CHECK-RV32-NEXT: mv a4, a5
-; CHECK-RV32-NEXT: .LBB28_4:
-; CHECK-RV32-NEXT: mul a2, a3, a1
-; CHECK-RV32-NEXT: add a0, a0, a2
-; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma
+; CHECK-RV32-NEXT: mul a3, a3, a1
+; CHECK-RV32-NEXT: add a0, a0, a3
+; CHECK-RV32-NEXT: addi a3, a2, -16
+; CHECK-RV32-NEXT: sltu a2, a2, a3
+; CHECK-RV32-NEXT: addi a2, a2, -1
+; CHECK-RV32-NEXT: and a2, a2, a3
+; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-RV32-NEXT: vsse64.v v16, (a0), a1
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64-NEXT: # %bb.1:
; CHECK-RV64-NEXT: li a3, 16
; CHECK-RV64-NEXT: .LBB28_2:
-; CHECK-RV64-NEXT: li a4, 0
; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-RV64-NEXT: addi a5, a2, -16
; CHECK-RV64-NEXT: vsse64.v v8, (a0), a1
-; CHECK-RV64-NEXT: bltu a2, a5, .LBB28_4
-; CHECK-RV64-NEXT: # %bb.3:
-; CHECK-RV64-NEXT: mv a4, a5
-; CHECK-RV64-NEXT: .LBB28_4:
-; CHECK-RV64-NEXT: mul a2, a3, a1
-; CHECK-RV64-NEXT: add a0, a0, a2
-; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma
+; CHECK-RV64-NEXT: mul a3, a3, a1
+; CHECK-RV64-NEXT: add a0, a0, a3
+; CHECK-RV64-NEXT: addi a3, a2, -16
+; CHECK-RV64-NEXT: sltu a2, a2, a3
+; CHECK-RV64-NEXT: addi a2, a2, -1
+; CHECK-RV64-NEXT: and a2, a2, a3
+; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-RV64-NEXT: vsse64.v v16, (a0), a1
; CHECK-RV64-NEXT: ret
%one = insertelement <32 x i1> poison, i1 true, i32 0
; CHECK-LABEL: vuitofp_v32f64_v32i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-NEXT: bltu a0, a2, .LBB25_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB25_2:
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB25_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB25_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB25_4:
+; CHECK-NEXT: .LBB25_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
define <32 x double> @vuitofp_v32f64_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
; CHECK-LABEL: vuitofp_v32f64_v32i64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, -16
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: bltu a0, a1, .LBB26_2
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB26_2:
-; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vfcvt.f.xu.v v16, v16
-; CHECK-NEXT: bltu a0, a1, .LBB26_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB26_4:
-; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v16, v16
; CHECK-NEXT: ret
%v = call <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl)
ret <32 x double> %v
define <256 x i8> @vadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vadd_vi_v258i8:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
-; CHECK-NEXT: vlm.v v25, (a0)
-; CHECK-NEXT: addi a3, a1, -128
-; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a0, 0
-; CHECK-NEXT: bltu a1, a3, .LBB32_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a0, a3
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: vlm.v v0, (a0)
+; CHECK-NEXT: addi a0, a1, -128
+; CHECK-NEXT: sltu a3, a1, a0
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a0, a3, a0
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB32_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a1, a2, .LBB32_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 128
-; CHECK-NEXT: .LBB32_4:
+; CHECK-NEXT: .LBB32_2:
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
define <256 x i8> @vadd_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vadd_vi_v258i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, -128
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: bltu a0, a1, .LBB33_2
+; CHECK-NEXT: li a2, 128
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB33_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB33_2:
-; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: li a1, 128
-; CHECK-NEXT: vadd.vi v16, v16, -1
-; CHECK-NEXT: bltu a0, a1, .LBB33_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 128
-; CHECK-NEXT: .LBB33_4:
-; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: addi a1, a0, -128
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vadd.vi v16, v16, -1
; CHECK-NEXT: ret
%elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
%vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
; RV32-LABEL: vadd_vx_v32i64:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v1, v0
-; RV32-NEXT: li a1, 0
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: addi a2, a0, -16
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vmv.v.i v24, -1
-; RV32-NEXT: bltu a0, a2, .LBB108_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a1, a2
-; RV32-NEXT: .LBB108_2:
+; RV32-NEXT: addi a1, a0, -16
+; RV32-NEXT: sltu a2, a0, a1
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: li a1, 16
; RV32-NEXT: vadd.vv v16, v16, v24, v0.t
-; RV32-NEXT: bltu a0, a1, .LBB108_4
-; RV32-NEXT: # %bb.3:
+; RV32-NEXT: bltu a0, a1, .LBB108_2
+; RV32-NEXT: # %bb.1:
; RV32-NEXT: li a0, 16
-; RV32-NEXT: .LBB108_4:
+; RV32-NEXT: .LBB108_2:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
; RV64-LABEL: vadd_vx_v32i64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v24, v0
-; RV64-NEXT: li a1, 0
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: addi a2, a0, -16
; RV64-NEXT: vslidedown.vi v0, v0, 2
-; RV64-NEXT: bltu a0, a2, .LBB108_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a1, a2
-; RV64-NEXT: .LBB108_2:
+; RV64-NEXT: addi a1, a0, -16
+; RV64-NEXT: sltu a2, a0, a1
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a1, a2, a1
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: li a1, 16
; RV64-NEXT: vadd.vi v16, v16, -1, v0.t
-; RV64-NEXT: bltu a0, a1, .LBB108_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a0, a1, .LBB108_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a0, 16
-; RV64-NEXT: .LBB108_4:
+; RV64-NEXT: .LBB108_2:
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vadd.vi v8, v8, -1, v0.t
define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vadd_vi_v32i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: addi a2, a0, -16
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: li a2, 16
; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: mv a1, a0
; RV32-NEXT: bltu a0, a2, .LBB109_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a1, a2
+; RV32-NEXT: li a1, 16
; RV32-NEXT: .LBB109_2:
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT: li a1, 16
-; RV32-NEXT: vadd.vv v16, v16, v24
-; RV32-NEXT: bltu a0, a1, .LBB109_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: li a0, 16
-; RV32-NEXT: .LBB109_4:
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vadd.vv v8, v8, v24
+; RV32-NEXT: addi a1, a0, -16
+; RV32-NEXT: sltu a0, a0, a1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vadd.vv v16, v16, v24
; RV32-NEXT: ret
;
; RV64-LABEL: vadd_vi_v32i64_unmasked:
; RV64: # %bb.0:
-; RV64-NEXT: addi a1, a0, -16
-; RV64-NEXT: li a2, 0
-; RV64-NEXT: bltu a0, a1, .LBB109_2
+; RV64-NEXT: li a2, 16
+; RV64-NEXT: mv a1, a0
+; RV64-NEXT: bltu a0, a2, .LBB109_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a2, a1
-; RV64-NEXT: .LBB109_2:
-; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: li a1, 16
-; RV64-NEXT: vadd.vi v16, v16, -1
-; RV64-NEXT: bltu a0, a1, .LBB109_4
-; RV64-NEXT: # %bb.3:
-; RV64-NEXT: li a0, 16
-; RV64-NEXT: .LBB109_4:
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT: .LBB109_2:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vadd.vi v8, v8, -1
+; RV64-NEXT: addi a1, a0, -16
+; RV64-NEXT: sltu a0, a0, a1
+; RV64-NEXT: addi a0, a0, -1
+; RV64-NEXT: and a0, a0, a1
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT: vadd.vi v16, v16, -1
; RV64-NEXT: ret
%elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
%vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 24
-; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vmv1r.v v1, v0
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: addi a3, a2, -16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a2, a3, .LBB26_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a3
-; CHECK-NEXT: .LBB26_2:
-; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v0, v0, 2
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: addi a1, a0, 128
+; CHECK-NEXT: vle64.v v8, (a1)
+; CHECK-NEXT: addi a1, a2, -16
+; CHECK-NEXT: sltu a3, a2, a1
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a1, a3, a1
+; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vfsgnj.vv v16, v16, v8, v0.t
-; CHECK-NEXT: bltu a2, a0, .LBB26_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a2, a0, .LBB26_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 16
-; CHECK-NEXT: .LBB26_4:
+; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfsgnj.vv v8, v8, v24, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a1)
-; CHECK-NEXT: addi a3, a2, -16
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a2, a3, .LBB27_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a3
-; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: vle64.v v0, (a0)
-; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT: li a1, 16
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: bltu a2, a1, .LBB27_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: vfsgnj.vv v16, v16, v24
-; CHECK-NEXT: bltu a2, a0, .LBB27_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a2, 16
-; CHECK-NEXT: .LBB27_4:
-; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfsgnj.vv v8, v8, v0
+; CHECK-NEXT: addi a0, a2, -16
+; CHECK-NEXT: sltu a1, a2, a0
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a0, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfsgnj.vv v16, v16, v24
; CHECK-NEXT: ret
%head = insertelement <32 x i1> poison, i1 true, i32 0
%m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
; CHECK-LABEL: vfabs_vv_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-NEXT: bltu a0, a2, .LBB26_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfabs.v v16, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB26_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB26_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB26_4:
+; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfabs.v v8, v8, v0.t
define <32 x double> @vfabs_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_v32f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, -16
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: bltu a0, a1, .LBB27_2
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB27_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB27_2:
-; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vfabs.v v16, v16
-; CHECK-NEXT: bltu a0, a1, .LBB27_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB27_4:
-; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfabs.v v16, v16
; CHECK-NEXT: ret
%head = insertelement <32 x i1> poison, i1 true, i32 0
%m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 48
-; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vmv1r.v v1, v0
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-NEXT: addi a1, a2, 128
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 24
-; CHECK-NEXT: mul a1, a1, a3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: vle64.v v24, (a1)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 40
-; CHECK-NEXT: mul a1, a1, a3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: addi a3, a4, -16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 5
+; CHECK-NEXT: li a3, 24
+; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a4, a3, .LBB50_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a3
-; CHECK-NEXT: .LBB50_2:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v0, v0, 2
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v8, (a2)
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 4
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT: li a0, 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a2, 24
-; CHECK-NEXT: mul a1, a1, a2
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a1, a2, 128
+; CHECK-NEXT: addi a2, a4, -16
+; CHECK-NEXT: sltu a3, a4, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: addi a3, a0, 128
+; CHECK-NEXT: vle64.v v16, (a1)
+; CHECK-NEXT: vle64.v v8, (a3)
+; CHECK-NEXT: vle64.v v24, (a0)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu
+; CHECK-NEXT: li a0, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a2, 40
-; CHECK-NEXT: mul a1, a1, a2
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a2, 40
-; CHECK-NEXT: mul a1, a1, a2
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a4, a0, .LBB50_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: bltu a4, a0, .LBB50_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a4, 16
-; CHECK-NEXT: .LBB50_4:
+; CHECK-NEXT: .LBB50_2:
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 5
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 40
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 48
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vle64.v v24, (a1)
-; CHECK-NEXT: addi a3, a4, -16
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vle64.v v24, (a2)
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a4, a3, .LBB51_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a3
-; CHECK-NEXT: .LBB51_2:
-; CHECK-NEXT: vle64.v v8, (a2)
-; CHECK-NEXT: addi a2, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vle64.v v0, (a0)
-; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT: li a1, 16
+; CHECK-NEXT: mv a0, a4
+; CHECK-NEXT: bltu a4, a1, .LBB51_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmadd.vv v24, v16, v8
-; CHECK-NEXT: bltu a4, a0, .LBB51_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a4, 16
-; CHECK-NEXT: .LBB51_4:
-; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
+; CHECK-NEXT: .LBB51_2:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmadd.vv v0, v8, v24
+; CHECK-NEXT: addi a0, a4, -16
+; CHECK-NEXT: sltu a1, a4, a0
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a0, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmadd.vv v0, v16, v8
-; CHECK-NEXT: vmv.v.v v8, v0
-; CHECK-NEXT: vmv8r.v v16, v24
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmadd.vv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v0
+; CHECK-NEXT: vmv.v.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 24
-; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vmv1r.v v1, v0
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: addi a3, a2, -16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a2, a3, .LBB26_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a3
-; CHECK-NEXT: .LBB26_2:
-; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v0, v0, 2
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: addi a1, a0, 128
+; CHECK-NEXT: vle64.v v8, (a1)
+; CHECK-NEXT: addi a1, a2, -16
+; CHECK-NEXT: sltu a3, a2, a1
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a1, a3, a1
+; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vfmax.vv v16, v16, v8, v0.t
-; CHECK-NEXT: bltu a2, a0, .LBB26_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a2, a0, .LBB26_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 16
-; CHECK-NEXT: .LBB26_4:
+; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmax.vv v8, v24, v8, v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a1)
-; CHECK-NEXT: addi a3, a2, -16
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a2, a3, .LBB27_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a3
-; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: vle64.v v0, (a0)
-; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT: li a1, 16
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: bltu a2, a1, .LBB27_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: vfmax.vv v16, v16, v24
-; CHECK-NEXT: bltu a2, a0, .LBB27_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a2, 16
-; CHECK-NEXT: .LBB27_4:
-; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfmax.vv v8, v8, v0
+; CHECK-NEXT: addi a0, a2, -16
+; CHECK-NEXT: sltu a1, a2, a0
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a0, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v16, v16, v24
; CHECK-NEXT: ret
%head = insertelement <32 x i1> poison, i1 true, i32 0
%m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 24
-; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vmv1r.v v1, v0
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: addi a3, a2, -16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a2, a3, .LBB26_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a3
-; CHECK-NEXT: .LBB26_2:
-; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v0, v0, 2
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: addi a1, a0, 128
+; CHECK-NEXT: vle64.v v8, (a1)
+; CHECK-NEXT: addi a1, a2, -16
+; CHECK-NEXT: sltu a3, a2, a1
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a1, a3, a1
+; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vfmin.vv v16, v16, v8, v0.t
-; CHECK-NEXT: bltu a2, a0, .LBB26_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a2, a0, .LBB26_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 16
-; CHECK-NEXT: .LBB26_4:
+; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmin.vv v8, v24, v8, v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a1)
-; CHECK-NEXT: addi a3, a2, -16
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a2, a3, .LBB27_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a3
-; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: vle64.v v0, (a0)
-; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT: li a1, 16
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: bltu a2, a1, .LBB27_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: vfmin.vv v16, v16, v24
-; CHECK-NEXT: bltu a2, a0, .LBB27_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a2, 16
-; CHECK-NEXT: .LBB27_4:
-; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfmin.vv v8, v8, v0
+; CHECK-NEXT: addi a0, a2, -16
+; CHECK-NEXT: sltu a1, a2, a0
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a0, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v16, v16, v24
; CHECK-NEXT: ret
%head = insertelement <32 x i1> poison, i1 true, i32 0
%m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 48
-; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vmv1r.v v1, v0
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-NEXT: addi a1, a2, 128
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 24
-; CHECK-NEXT: mul a1, a1, a3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: vle64.v v24, (a1)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 40
-; CHECK-NEXT: mul a1, a1, a3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: addi a3, a4, -16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 5
+; CHECK-NEXT: li a3, 24
+; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a4, a3, .LBB50_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a3
-; CHECK-NEXT: .LBB50_2:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v0, v0, 2
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v8, (a2)
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 4
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT: li a0, 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a2, 24
-; CHECK-NEXT: mul a1, a1, a2
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a1, a2, 128
+; CHECK-NEXT: addi a2, a4, -16
+; CHECK-NEXT: sltu a3, a4, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: addi a3, a0, 128
+; CHECK-NEXT: vle64.v v16, (a1)
+; CHECK-NEXT: vle64.v v8, (a3)
+; CHECK-NEXT: vle64.v v24, (a0)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu
+; CHECK-NEXT: li a0, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a2, 40
-; CHECK-NEXT: mul a1, a1, a2
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a2, 40
-; CHECK-NEXT: mul a1, a1, a2
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a4, a0, .LBB50_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: bltu a4, a0, .LBB50_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a4, 16
-; CHECK-NEXT: .LBB50_4:
+; CHECK-NEXT: .LBB50_2:
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 5
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 40
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 48
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vle64.v v24, (a1)
-; CHECK-NEXT: addi a3, a4, -16
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vle64.v v24, (a2)
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a4, a3, .LBB51_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a3
-; CHECK-NEXT: .LBB51_2:
-; CHECK-NEXT: vle64.v v8, (a2)
-; CHECK-NEXT: addi a2, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vle64.v v0, (a0)
-; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT: li a1, 16
+; CHECK-NEXT: mv a0, a4
+; CHECK-NEXT: bltu a4, a1, .LBB51_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmadd.vv v24, v16, v8
-; CHECK-NEXT: bltu a4, a0, .LBB51_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a4, 16
-; CHECK-NEXT: .LBB51_4:
-; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
+; CHECK-NEXT: .LBB51_2:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmadd.vv v0, v8, v24
+; CHECK-NEXT: addi a0, a4, -16
+; CHECK-NEXT: sltu a1, a4, a0
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a0, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmadd.vv v0, v16, v8
-; CHECK-NEXT: vmv.v.v v8, v0
-; CHECK-NEXT: vmv8r.v v16, v24
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmadd.vv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v0
+; CHECK-NEXT: vmv.v.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-LABEL: vfneg_vv_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-NEXT: bltu a0, a2, .LBB26_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfneg.v v16, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB26_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB26_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB26_4:
+; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfneg.v v8, v8, v0.t
define <32 x double> @vfneg_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfneg_vv_v32f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, -16
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: bltu a0, a1, .LBB27_2
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB27_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB27_2:
-; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vfneg.v v16, v16
-; CHECK-NEXT: bltu a0, a1, .LBB27_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB27_4:
-; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfneg.v v8, v8
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfneg.v v16, v16
; CHECK-NEXT: ret
%head = insertelement <32 x i1> poison, i1 true, i32 0
%m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
; CHECK-LABEL: vfsqrt_vv_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-NEXT: bltu a0, a2, .LBB26_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfsqrt.v v16, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB26_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB26_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB26_4:
+; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
define <32 x double> @vfsqrt_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfsqrt_vv_v32f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, -16
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: bltu a0, a1, .LBB27_2
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB27_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB27_2:
-; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vfsqrt.v v16, v16
-; CHECK-NEXT: bltu a0, a1, .LBB27_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB27_4:
-; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfsqrt.v v8, v8
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfsqrt.v v16, v16
; CHECK-NEXT: ret
%head = insertelement <32 x i1> poison, i1 true, i32 0
%m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
define <256 x i8> @vmax_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmax_vx_v258i8:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT: vlm.v v25, (a1)
-; CHECK-NEXT: addi a4, a2, -128
-; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a2, a4, .LBB22_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a4
-; CHECK-NEXT: .LBB22_2:
+; CHECK-NEXT: vlm.v v0, (a1)
+; CHECK-NEXT: addi a1, a2, -128
+; CHECK-NEXT: sltu a4, a2, a1
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a1, a4, a1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a2, a3, .LBB22_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a2, a3, .LBB22_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 128
-; CHECK-NEXT: .LBB22_4:
+; CHECK-NEXT: .LBB22_2:
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t
define <256 x i8> @vmax_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext %evl) {
; CHECK-LABEL: vmax_vx_v258i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a2, a1, -128
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: bltu a1, a2, .LBB23_2
+; CHECK-NEXT: li a3, 128
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: bltu a1, a3, .LBB23_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a2
-; CHECK-NEXT: .LBB23_2:
-; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; CHECK-NEXT: li a2, 128
-; CHECK-NEXT: vmax.vx v16, v16, a0
-; CHECK-NEXT: bltu a1, a2, .LBB23_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a1, 128
-; CHECK-NEXT: .LBB23_4:
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: .LBB23_2:
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmax.vx v8, v8, a0
+; CHECK-NEXT: addi a2, a1, -128
+; CHECK-NEXT: sltu a1, a1, a2
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vmax.vx v16, v16, a0
; CHECK-NEXT: ret
%elt.head = insertelement <256 x i8> poison, i8 %b, i32 0
%vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
; RV32-LABEL: vmax_vx_v32i64:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v1, v0
-; RV32-NEXT: li a1, 0
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: addi a2, a0, -16
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vmv.v.i v24, -1
-; RV32-NEXT: bltu a0, a2, .LBB74_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a1, a2
-; RV32-NEXT: .LBB74_2:
+; RV32-NEXT: addi a1, a0, -16
+; RV32-NEXT: sltu a2, a0, a1
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: li a1, 16
; RV32-NEXT: vmax.vv v16, v16, v24, v0.t
-; RV32-NEXT: bltu a0, a1, .LBB74_4
-; RV32-NEXT: # %bb.3:
+; RV32-NEXT: bltu a0, a1, .LBB74_2
+; RV32-NEXT: # %bb.1:
; RV32-NEXT: li a0, 16
-; RV32-NEXT: .LBB74_4:
+; RV32-NEXT: .LBB74_2:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: vmax.vv v8, v8, v24, v0.t
; RV64-LABEL: vmax_vx_v32i64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v24, v0
-; RV64-NEXT: li a2, 0
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: addi a1, a0, -16
; RV64-NEXT: vslidedown.vi v0, v0, 2
-; RV64-NEXT: bltu a0, a1, .LBB74_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a2, a1
-; RV64-NEXT: .LBB74_2:
+; RV64-NEXT: addi a1, a0, -16
+; RV64-NEXT: sltu a2, a0, a1
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a2, a2, a1
; RV64-NEXT: li a1, -1
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: li a2, 16
; RV64-NEXT: vmax.vx v16, v16, a1, v0.t
-; RV64-NEXT: bltu a0, a2, .LBB74_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a0, a2, .LBB74_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a0, 16
-; RV64-NEXT: .LBB74_4:
+; RV64-NEXT: .LBB74_2:
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vmax.vx v8, v8, a1, v0.t
define <256 x i8> @vmaxu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmaxu_vx_v258i8:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT: vlm.v v25, (a1)
-; CHECK-NEXT: addi a4, a2, -128
-; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a2, a4, .LBB22_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a4
-; CHECK-NEXT: .LBB22_2:
+; CHECK-NEXT: vlm.v v0, (a1)
+; CHECK-NEXT: addi a1, a2, -128
+; CHECK-NEXT: sltu a4, a2, a1
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a1, a4, a1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a2, a3, .LBB22_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a2, a3, .LBB22_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 128
-; CHECK-NEXT: .LBB22_4:
+; CHECK-NEXT: .LBB22_2:
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t
define <256 x i8> @vmaxu_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext %evl) {
; CHECK-LABEL: vmaxu_vx_v258i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a2, a1, -128
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: bltu a1, a2, .LBB23_2
+; CHECK-NEXT: li a3, 128
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: bltu a1, a3, .LBB23_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a2
-; CHECK-NEXT: .LBB23_2:
-; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; CHECK-NEXT: li a2, 128
-; CHECK-NEXT: vmaxu.vx v16, v16, a0
-; CHECK-NEXT: bltu a1, a2, .LBB23_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a1, 128
-; CHECK-NEXT: .LBB23_4:
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: .LBB23_2:
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmaxu.vx v8, v8, a0
+; CHECK-NEXT: addi a2, a1, -128
+; CHECK-NEXT: sltu a1, a1, a2
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vmaxu.vx v16, v16, a0
; CHECK-NEXT: ret
%elt.head = insertelement <256 x i8> poison, i8 %b, i32 0
%vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
; RV32-LABEL: vmaxu_vx_v32i64:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v1, v0
-; RV32-NEXT: li a1, 0
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: addi a2, a0, -16
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vmv.v.i v24, -1
-; RV32-NEXT: bltu a0, a2, .LBB74_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a1, a2
-; RV32-NEXT: .LBB74_2:
+; RV32-NEXT: addi a1, a0, -16
+; RV32-NEXT: sltu a2, a0, a1
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: li a1, 16
; RV32-NEXT: vmaxu.vv v16, v16, v24, v0.t
-; RV32-NEXT: bltu a0, a1, .LBB74_4
-; RV32-NEXT: # %bb.3:
+; RV32-NEXT: bltu a0, a1, .LBB74_2
+; RV32-NEXT: # %bb.1:
; RV32-NEXT: li a0, 16
-; RV32-NEXT: .LBB74_4:
+; RV32-NEXT: .LBB74_2:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: vmaxu.vv v8, v8, v24, v0.t
; RV64-LABEL: vmaxu_vx_v32i64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v24, v0
-; RV64-NEXT: li a2, 0
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: addi a1, a0, -16
; RV64-NEXT: vslidedown.vi v0, v0, 2
-; RV64-NEXT: bltu a0, a1, .LBB74_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a2, a1
-; RV64-NEXT: .LBB74_2:
+; RV64-NEXT: addi a1, a0, -16
+; RV64-NEXT: sltu a2, a0, a1
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a2, a2, a1
; RV64-NEXT: li a1, -1
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: li a2, 16
; RV64-NEXT: vmaxu.vx v16, v16, a1, v0.t
-; RV64-NEXT: bltu a0, a2, .LBB74_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a0, a2, .LBB74_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a0, 16
-; RV64-NEXT: .LBB74_4:
+; RV64-NEXT: .LBB74_2:
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vmaxu.vx v8, v8, a1, v0.t
define <256 x i8> @vmin_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmin_vx_v258i8:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT: vlm.v v25, (a1)
-; CHECK-NEXT: addi a4, a2, -128
-; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a2, a4, .LBB22_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a4
-; CHECK-NEXT: .LBB22_2:
+; CHECK-NEXT: vlm.v v0, (a1)
+; CHECK-NEXT: addi a1, a2, -128
+; CHECK-NEXT: sltu a4, a2, a1
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a1, a4, a1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a2, a3, .LBB22_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a2, a3, .LBB22_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 128
-; CHECK-NEXT: .LBB22_4:
+; CHECK-NEXT: .LBB22_2:
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t
define <256 x i8> @vmin_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext %evl) {
; CHECK-LABEL: vmin_vx_v258i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a2, a1, -128
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: bltu a1, a2, .LBB23_2
+; CHECK-NEXT: li a3, 128
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: bltu a1, a3, .LBB23_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a2
-; CHECK-NEXT: .LBB23_2:
-; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; CHECK-NEXT: li a2, 128
-; CHECK-NEXT: vmin.vx v16, v16, a0
-; CHECK-NEXT: bltu a1, a2, .LBB23_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a1, 128
-; CHECK-NEXT: .LBB23_4:
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: .LBB23_2:
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: addi a2, a1, -128
+; CHECK-NEXT: sltu a1, a1, a2
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vmin.vx v16, v16, a0
; CHECK-NEXT: ret
%elt.head = insertelement <256 x i8> poison, i8 %b, i32 0
%vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
; RV32-LABEL: vmin_vx_v32i64:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v1, v0
-; RV32-NEXT: li a1, 0
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: addi a2, a0, -16
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vmv.v.i v24, -1
-; RV32-NEXT: bltu a0, a2, .LBB74_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a1, a2
-; RV32-NEXT: .LBB74_2:
+; RV32-NEXT: addi a1, a0, -16
+; RV32-NEXT: sltu a2, a0, a1
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: li a1, 16
; RV32-NEXT: vmin.vv v16, v16, v24, v0.t
-; RV32-NEXT: bltu a0, a1, .LBB74_4
-; RV32-NEXT: # %bb.3:
+; RV32-NEXT: bltu a0, a1, .LBB74_2
+; RV32-NEXT: # %bb.1:
; RV32-NEXT: li a0, 16
-; RV32-NEXT: .LBB74_4:
+; RV32-NEXT: .LBB74_2:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: vmin.vv v8, v8, v24, v0.t
; RV64-LABEL: vmin_vx_v32i64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v24, v0
-; RV64-NEXT: li a2, 0
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: addi a1, a0, -16
; RV64-NEXT: vslidedown.vi v0, v0, 2
-; RV64-NEXT: bltu a0, a1, .LBB74_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a2, a1
-; RV64-NEXT: .LBB74_2:
+; RV64-NEXT: addi a1, a0, -16
+; RV64-NEXT: sltu a2, a0, a1
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a2, a2, a1
; RV64-NEXT: li a1, -1
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: li a2, 16
; RV64-NEXT: vmin.vx v16, v16, a1, v0.t
-; RV64-NEXT: bltu a0, a2, .LBB74_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a0, a2, .LBB74_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a0, 16
-; RV64-NEXT: .LBB74_4:
+; RV64-NEXT: .LBB74_2:
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vmin.vx v8, v8, a1, v0.t
define <256 x i8> @vminu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vminu_vx_v258i8:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT: vlm.v v25, (a1)
-; CHECK-NEXT: addi a4, a2, -128
-; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a2, a4, .LBB22_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a4
-; CHECK-NEXT: .LBB22_2:
+; CHECK-NEXT: vlm.v v0, (a1)
+; CHECK-NEXT: addi a1, a2, -128
+; CHECK-NEXT: sltu a4, a2, a1
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a1, a4, a1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a2, a3, .LBB22_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a2, a3, .LBB22_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 128
-; CHECK-NEXT: .LBB22_4:
+; CHECK-NEXT: .LBB22_2:
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
define <256 x i8> @vminu_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext %evl) {
; CHECK-LABEL: vminu_vx_v258i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a2, a1, -128
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: bltu a1, a2, .LBB23_2
+; CHECK-NEXT: li a3, 128
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: bltu a1, a3, .LBB23_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a2
-; CHECK-NEXT: .LBB23_2:
-; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; CHECK-NEXT: li a2, 128
-; CHECK-NEXT: vminu.vx v16, v16, a0
-; CHECK-NEXT: bltu a1, a2, .LBB23_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a1, 128
-; CHECK-NEXT: .LBB23_4:
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: .LBB23_2:
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: addi a2, a1, -128
+; CHECK-NEXT: sltu a1, a1, a2
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vminu.vx v16, v16, a0
; CHECK-NEXT: ret
%elt.head = insertelement <256 x i8> poison, i8 %b, i32 0
%vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
; RV32-LABEL: vminu_vx_v32i64:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v1, v0
-; RV32-NEXT: li a1, 0
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: addi a2, a0, -16
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vmv.v.i v24, -1
-; RV32-NEXT: bltu a0, a2, .LBB74_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a1, a2
-; RV32-NEXT: .LBB74_2:
+; RV32-NEXT: addi a1, a0, -16
+; RV32-NEXT: sltu a2, a0, a1
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: li a1, 16
; RV32-NEXT: vminu.vv v16, v16, v24, v0.t
-; RV32-NEXT: bltu a0, a1, .LBB74_4
-; RV32-NEXT: # %bb.3:
+; RV32-NEXT: bltu a0, a1, .LBB74_2
+; RV32-NEXT: # %bb.1:
; RV32-NEXT: li a0, 16
-; RV32-NEXT: .LBB74_4:
+; RV32-NEXT: .LBB74_2:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: vminu.vv v8, v8, v24, v0.t
; RV64-LABEL: vminu_vx_v32i64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v24, v0
-; RV64-NEXT: li a2, 0
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: addi a1, a0, -16
; RV64-NEXT: vslidedown.vi v0, v0, 2
-; RV64-NEXT: bltu a0, a1, .LBB74_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a2, a1
-; RV64-NEXT: .LBB74_2:
+; RV64-NEXT: addi a1, a0, -16
+; RV64-NEXT: sltu a2, a0, a1
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a2, a2, a1
; RV64-NEXT: li a1, -1
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: li a2, 16
; RV64-NEXT: vminu.vx v16, v16, a1, v0.t
-; RV64-NEXT: bltu a0, a2, .LBB74_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a0, a2, .LBB74_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a0, 16
-; RV64-NEXT: .LBB74_4:
+; RV64-NEXT: .LBB74_2:
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vminu.vx v8, v8, a1, v0.t
;
; RV64-LABEL: vpgather_baseidx_v32i8:
; RV64: # %bb.0:
-; RV64-NEXT: addi a3, a1, -16
; RV64-NEXT: vmv1r.v v10, v0
-; RV64-NEXT: li a2, 0
-; RV64-NEXT: bltu a1, a3, .LBB13_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a2, a3
-; RV64-NEXT: .LBB13_2:
+; RV64-NEXT: addi a2, a1, -16
+; RV64-NEXT: sltu a3, a1, a2
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: and a2, a3, a2
; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma
; RV64-NEXT: vslidedown.vi v12, v8, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v12
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vi v0, v10, 2
+; RV64-NEXT: vslidedown.vi v0, v0, 2
; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, ma
; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t
; RV64-NEXT: li a2, 16
-; RV64-NEXT: bltu a1, a2, .LBB13_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a1, a2, .LBB13_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a1, 16
-; RV64-NEXT: .LBB13_4:
+; RV64-NEXT: .LBB13_2:
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
define <32 x double> @vpgather_v32f64(<32 x double*> %ptrs, <32 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_v32f64:
; RV32: # %bb.0:
-; RV32-NEXT: addi a2, a0, -16
; RV32-NEXT: vmv1r.v v1, v0
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: bltu a0, a2, .LBB86_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a1, a2
-; RV32-NEXT: .LBB86_2:
+; RV32-NEXT: addi a1, a0, -16
+; RV32-NEXT: sltu a2, a0, a1
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; RV32-NEXT: vslidedown.vi v24, v8, 16
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV32-NEXT: vslidedown.vi v0, v1, 2
+; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v16, (zero), v24, v0.t
; RV32-NEXT: li a1, 16
-; RV32-NEXT: bltu a0, a1, .LBB86_4
-; RV32-NEXT: # %bb.3:
+; RV32-NEXT: bltu a0, a1, .LBB86_2
+; RV32-NEXT: # %bb.1:
; RV32-NEXT: li a0, 16
-; RV32-NEXT: .LBB86_4:
+; RV32-NEXT: .LBB86_2:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t
;
; RV64-LABEL: vpgather_v32f64:
; RV64: # %bb.0:
-; RV64-NEXT: addi a2, a0, -16
; RV64-NEXT: vmv1r.v v24, v0
-; RV64-NEXT: li a1, 0
-; RV64-NEXT: bltu a0, a2, .LBB86_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a1, a2
-; RV64-NEXT: .LBB86_2:
+; RV64-NEXT: addi a1, a0, -16
+; RV64-NEXT: sltu a2, a0, a1
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a1, a2, a1
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vi v0, v24, 2
+; RV64-NEXT: vslidedown.vi v0, v0, 2
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t
; RV64-NEXT: li a1, 16
-; RV64-NEXT: bltu a0, a1, .LBB86_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a0, a1, .LBB86_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a0, 16
-; RV64-NEXT: .LBB86_4:
+; RV64-NEXT: .LBB86_2:
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
define <32 x double> @vpgather_baseidx_v32i8_v32f64(double* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_v32i8_v32f64:
; RV32: # %bb.0:
+; RV32-NEXT: vmv1r.v v1, v0
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vsext.vf4 v16, v8
-; RV32-NEXT: li a3, 16
-; RV32-NEXT: vsll.vi v16, v16, 3
-; RV32-NEXT: mv a2, a1
-; RV32-NEXT: bltu a1, a3, .LBB87_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: li a2, 16
-; RV32-NEXT: .LBB87_2:
-; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
-; RV32-NEXT: addi a3, a1, -16
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: bltu a1, a3, .LBB87_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: mv a2, a3
-; RV32-NEXT: .LBB87_4:
+; RV32-NEXT: vsll.vi v24, v16, 3
+; RV32-NEXT: addi a2, a1, -16
+; RV32-NEXT: sltu a3, a1, a2
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; RV32-NEXT: vslidedown.vi v24, v16, 16
+; RV32-NEXT: vslidedown.vi v8, v24, 16
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
+; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: bltu a1, a2, .LBB87_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: .LBB87_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_v32i8_v32f64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v10, v0
-; RV64-NEXT: li a2, 0
; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma
; RV64-NEXT: vslidedown.vi v12, v8, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v12
; RV64-NEXT: vsll.vi v16, v16, 3
-; RV64-NEXT: addi a3, a1, -16
; RV64-NEXT: vsext.vf8 v24, v8
-; RV64-NEXT: bltu a1, a3, .LBB87_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a2, a3
-; RV64-NEXT: .LBB87_2:
; RV64-NEXT: vsll.vi v24, v24, 3
+; RV64-NEXT: addi a2, a1, -16
+; RV64-NEXT: sltu a3, a1, a2
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: and a2, a3, a2
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vi v0, v10, 2
+; RV64-NEXT: vslidedown.vi v0, v0, 2
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
; RV64-NEXT: li a2, 16
-; RV64-NEXT: bltu a1, a2, .LBB87_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a1, a2, .LBB87_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a1, 16
-; RV64-NEXT: .LBB87_4:
+; RV64-NEXT: .LBB87_2:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v10
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(double* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_v32i8_v32f64:
; RV32: # %bb.0:
+; RV32-NEXT: vmv1r.v v1, v0
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vsext.vf4 v16, v8
-; RV32-NEXT: li a3, 16
-; RV32-NEXT: vsll.vi v16, v16, 3
-; RV32-NEXT: mv a2, a1
-; RV32-NEXT: bltu a1, a3, .LBB88_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: li a2, 16
-; RV32-NEXT: .LBB88_2:
-; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
-; RV32-NEXT: addi a3, a1, -16
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: bltu a1, a3, .LBB88_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: mv a2, a3
-; RV32-NEXT: .LBB88_4:
+; RV32-NEXT: vsll.vi v24, v16, 3
+; RV32-NEXT: addi a2, a1, -16
+; RV32-NEXT: sltu a3, a1, a2
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; RV32-NEXT: vslidedown.vi v24, v16, 16
+; RV32-NEXT: vslidedown.vi v8, v24, 16
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
+; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: bltu a1, a2, .LBB88_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: .LBB88_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_v32i8_v32f64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v10, v0
-; RV64-NEXT: li a2, 0
; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma
; RV64-NEXT: vslidedown.vi v12, v8, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v12
; RV64-NEXT: vsext.vf8 v24, v8
-; RV64-NEXT: addi a3, a1, -16
; RV64-NEXT: vsll.vi v16, v16, 3
-; RV64-NEXT: bltu a1, a3, .LBB88_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a2, a3
-; RV64-NEXT: .LBB88_2:
; RV64-NEXT: vsll.vi v24, v24, 3
+; RV64-NEXT: addi a2, a1, -16
+; RV64-NEXT: sltu a3, a1, a2
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: and a2, a3, a2
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vi v0, v10, 2
+; RV64-NEXT: vslidedown.vi v0, v0, 2
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
; RV64-NEXT: li a2, 16
-; RV64-NEXT: bltu a1, a2, .LBB88_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a1, a2, .LBB88_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a1, 16
-; RV64-NEXT: .LBB88_4:
+; RV64-NEXT: .LBB88_2:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v10
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(double* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_v32i8_v32f64:
; RV32: # %bb.0:
+; RV32-NEXT: vmv1r.v v1, v0
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vzext.vf4 v16, v8
-; RV32-NEXT: li a3, 16
-; RV32-NEXT: vsll.vi v16, v16, 3
-; RV32-NEXT: mv a2, a1
-; RV32-NEXT: bltu a1, a3, .LBB89_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: li a2, 16
-; RV32-NEXT: .LBB89_2:
-; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
-; RV32-NEXT: addi a3, a1, -16
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: bltu a1, a3, .LBB89_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: mv a2, a3
-; RV32-NEXT: .LBB89_4:
+; RV32-NEXT: vsll.vi v24, v16, 3
+; RV32-NEXT: addi a2, a1, -16
+; RV32-NEXT: sltu a3, a1, a2
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; RV32-NEXT: vslidedown.vi v24, v16, 16
+; RV32-NEXT: vslidedown.vi v8, v24, 16
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
+; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: bltu a1, a2, .LBB89_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: .LBB89_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_v32i8_v32f64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v10, v0
-; RV64-NEXT: li a2, 0
; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma
; RV64-NEXT: vslidedown.vi v12, v8, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vzext.vf8 v16, v12
; RV64-NEXT: vzext.vf8 v24, v8
-; RV64-NEXT: addi a3, a1, -16
; RV64-NEXT: vsll.vi v16, v16, 3
-; RV64-NEXT: bltu a1, a3, .LBB89_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a2, a3
-; RV64-NEXT: .LBB89_2:
; RV64-NEXT: vsll.vi v24, v24, 3
+; RV64-NEXT: addi a2, a1, -16
+; RV64-NEXT: sltu a3, a1, a2
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: and a2, a3, a2
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vi v0, v10, 2
+; RV64-NEXT: vslidedown.vi v0, v0, 2
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
; RV64-NEXT: li a2, 16
-; RV64-NEXT: bltu a1, a2, .LBB89_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a1, a2, .LBB89_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a1, 16
-; RV64-NEXT: .LBB89_4:
+; RV64-NEXT: .LBB89_2:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v10
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
define <32 x double> @vpgather_baseidx_v32i16_v32f64(double* %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_v32i16_v32f64:
; RV32: # %bb.0:
+; RV32-NEXT: vmv1r.v v1, v0
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vsext.vf2 v16, v8
-; RV32-NEXT: li a3, 16
-; RV32-NEXT: vsll.vi v16, v16, 3
-; RV32-NEXT: mv a2, a1
-; RV32-NEXT: bltu a1, a3, .LBB90_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: li a2, 16
-; RV32-NEXT: .LBB90_2:
-; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
-; RV32-NEXT: addi a3, a1, -16
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: bltu a1, a3, .LBB90_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: mv a2, a3
-; RV32-NEXT: .LBB90_4:
+; RV32-NEXT: vsll.vi v24, v16, 3
+; RV32-NEXT: addi a2, a1, -16
+; RV32-NEXT: sltu a3, a1, a2
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; RV32-NEXT: vslidedown.vi v24, v16, 16
+; RV32-NEXT: vslidedown.vi v8, v24, 16
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
+; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: bltu a1, a2, .LBB90_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: .LBB90_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_v32i16_v32f64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v12, v0
-; RV64-NEXT: li a2, 0
; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma
; RV64-NEXT: vslidedown.vi v16, v8, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v24, v16
; RV64-NEXT: vsll.vi v16, v24, 3
-; RV64-NEXT: addi a3, a1, -16
; RV64-NEXT: vsext.vf4 v24, v8
-; RV64-NEXT: bltu a1, a3, .LBB90_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a2, a3
-; RV64-NEXT: .LBB90_2:
; RV64-NEXT: vsll.vi v24, v24, 3
+; RV64-NEXT: addi a2, a1, -16
+; RV64-NEXT: sltu a3, a1, a2
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: and a2, a3, a2
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vi v0, v12, 2
+; RV64-NEXT: vslidedown.vi v0, v0, 2
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
; RV64-NEXT: li a2, 16
-; RV64-NEXT: bltu a1, a2, .LBB90_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a1, a2, .LBB90_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a1, 16
-; RV64-NEXT: .LBB90_4:
+; RV64-NEXT: .LBB90_2:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v12
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(double* %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_v32i16_v32f64:
; RV32: # %bb.0:
+; RV32-NEXT: vmv1r.v v1, v0
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vsext.vf2 v16, v8
-; RV32-NEXT: li a3, 16
-; RV32-NEXT: vsll.vi v16, v16, 3
-; RV32-NEXT: mv a2, a1
-; RV32-NEXT: bltu a1, a3, .LBB91_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: li a2, 16
-; RV32-NEXT: .LBB91_2:
-; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
-; RV32-NEXT: addi a3, a1, -16
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: bltu a1, a3, .LBB91_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: mv a2, a3
-; RV32-NEXT: .LBB91_4:
+; RV32-NEXT: vsll.vi v24, v16, 3
+; RV32-NEXT: addi a2, a1, -16
+; RV32-NEXT: sltu a3, a1, a2
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; RV32-NEXT: vslidedown.vi v24, v16, 16
+; RV32-NEXT: vslidedown.vi v8, v24, 16
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
+; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: bltu a1, a2, .LBB91_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: .LBB91_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_v32i16_v32f64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v12, v0
-; RV64-NEXT: li a2, 0
; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma
; RV64-NEXT: vslidedown.vi v16, v8, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf4 v0, v16
-; RV64-NEXT: vsext.vf4 v24, v8
-; RV64-NEXT: addi a3, a1, -16
-; RV64-NEXT: vsll.vi v16, v0, 3
-; RV64-NEXT: bltu a1, a3, .LBB91_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a2, a3
-; RV64-NEXT: .LBB91_2:
-; RV64-NEXT: vsll.vi v24, v24, 3
+; RV64-NEXT: vsext.vf4 v24, v16
+; RV64-NEXT: vsext.vf4 v0, v8
+; RV64-NEXT: vsll.vi v16, v24, 3
+; RV64-NEXT: vsll.vi v24, v0, 3
+; RV64-NEXT: addi a2, a1, -16
+; RV64-NEXT: sltu a3, a1, a2
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: and a2, a3, a2
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64-NEXT: vslidedown.vi v0, v12, 2
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
; RV64-NEXT: li a2, 16
-; RV64-NEXT: bltu a1, a2, .LBB91_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a1, a2, .LBB91_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a1, 16
-; RV64-NEXT: .LBB91_4:
+; RV64-NEXT: .LBB91_2:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v12
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(double* %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_v32i16_v32f64:
; RV32: # %bb.0:
+; RV32-NEXT: vmv1r.v v1, v0
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vzext.vf2 v16, v8
-; RV32-NEXT: li a3, 16
-; RV32-NEXT: vsll.vi v16, v16, 3
-; RV32-NEXT: mv a2, a1
-; RV32-NEXT: bltu a1, a3, .LBB92_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: li a2, 16
-; RV32-NEXT: .LBB92_2:
-; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
-; RV32-NEXT: addi a3, a1, -16
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: bltu a1, a3, .LBB92_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: mv a2, a3
-; RV32-NEXT: .LBB92_4:
+; RV32-NEXT: vsll.vi v24, v16, 3
+; RV32-NEXT: addi a2, a1, -16
+; RV32-NEXT: sltu a3, a1, a2
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; RV32-NEXT: vslidedown.vi v24, v16, 16
+; RV32-NEXT: vslidedown.vi v8, v24, 16
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
+; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: bltu a1, a2, .LBB92_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: .LBB92_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_v32i16_v32f64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v12, v0
-; RV64-NEXT: li a2, 0
; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma
; RV64-NEXT: vslidedown.vi v16, v8, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vzext.vf4 v0, v16
-; RV64-NEXT: vzext.vf4 v24, v8
-; RV64-NEXT: addi a3, a1, -16
-; RV64-NEXT: vsll.vi v16, v0, 3
-; RV64-NEXT: bltu a1, a3, .LBB92_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a2, a3
-; RV64-NEXT: .LBB92_2:
-; RV64-NEXT: vsll.vi v24, v24, 3
+; RV64-NEXT: vzext.vf4 v24, v16
+; RV64-NEXT: vzext.vf4 v0, v8
+; RV64-NEXT: vsll.vi v16, v24, 3
+; RV64-NEXT: vsll.vi v24, v0, 3
+; RV64-NEXT: addi a2, a1, -16
+; RV64-NEXT: sltu a3, a1, a2
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: and a2, a3, a2
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64-NEXT: vslidedown.vi v0, v12, 2
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
; RV64-NEXT: li a2, 16
-; RV64-NEXT: bltu a1, a2, .LBB92_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a1, a2, .LBB92_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a1, 16
-; RV64-NEXT: .LBB92_4:
+; RV64-NEXT: .LBB92_2:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v12
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
define <32 x double> @vpgather_baseidx_v32i32_v32f64(double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_v32i32_v32f64:
; RV32: # %bb.0:
+; RV32-NEXT: vmv1r.v v1, v0
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: li a3, 16
-; RV32-NEXT: vsll.vi v16, v8, 3
-; RV32-NEXT: mv a2, a1
-; RV32-NEXT: bltu a1, a3, .LBB93_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: li a2, 16
-; RV32-NEXT: .LBB93_2:
-; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
-; RV32-NEXT: addi a3, a1, -16
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: bltu a1, a3, .LBB93_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: mv a2, a3
-; RV32-NEXT: .LBB93_4:
+; RV32-NEXT: vsll.vi v24, v8, 3
+; RV32-NEXT: addi a2, a1, -16
+; RV32-NEXT: sltu a3, a1, a2
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; RV32-NEXT: vslidedown.vi v24, v16, 16
+; RV32-NEXT: vslidedown.vi v8, v24, 16
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
+; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: bltu a1, a2, .LBB93_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: .LBB93_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_v32i32_v32f64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v24, v0
-; RV64-NEXT: li a2, 0
; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; RV64-NEXT: vslidedown.vi v16, v8, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vsext.vf2 v0, v16
; RV64-NEXT: vsll.vi v16, v0, 3
-; RV64-NEXT: addi a3, a1, -16
; RV64-NEXT: vsext.vf2 v0, v8
-; RV64-NEXT: bltu a1, a3, .LBB93_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a2, a3
-; RV64-NEXT: .LBB93_2:
; RV64-NEXT: vsll.vi v8, v0, 3
+; RV64-NEXT: addi a2, a1, -16
+; RV64-NEXT: sltu a3, a1, a2
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: and a2, a3, a2
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64-NEXT: vslidedown.vi v0, v24, 2
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
; RV64-NEXT: li a2, 16
-; RV64-NEXT: bltu a1, a2, .LBB93_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a1, a2, .LBB93_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a1, 16
-; RV64-NEXT: .LBB93_4:
+; RV64-NEXT: .LBB93_2:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_v32i32_v32f64:
; RV32: # %bb.0:
+; RV32-NEXT: vmv1r.v v1, v0
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: li a3, 16
-; RV32-NEXT: vsll.vi v16, v8, 3
-; RV32-NEXT: mv a2, a1
-; RV32-NEXT: bltu a1, a3, .LBB94_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: li a2, 16
-; RV32-NEXT: .LBB94_2:
-; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
-; RV32-NEXT: addi a3, a1, -16
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: bltu a1, a3, .LBB94_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: mv a2, a3
-; RV32-NEXT: .LBB94_4:
+; RV32-NEXT: vsll.vi v24, v8, 3
+; RV32-NEXT: addi a2, a1, -16
+; RV32-NEXT: sltu a3, a1, a2
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; RV32-NEXT: vslidedown.vi v24, v16, 16
+; RV32-NEXT: vslidedown.vi v8, v24, 16
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
+; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: bltu a1, a2, .LBB94_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: .LBB94_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_v32i32_v32f64:
; RV64-NEXT: sub sp, sp, a2
; RV64-NEXT: addi a2, sp, 16
; RV64-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill
-; RV64-NEXT: li a2, 0
; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; RV64-NEXT: vslidedown.vi v16, v8, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf2 v24, v16
-; RV64-NEXT: vsext.vf2 v0, v8
-; RV64-NEXT: addi a3, a1, -16
-; RV64-NEXT: vsll.vi v16, v24, 3
-; RV64-NEXT: bltu a1, a3, .LBB94_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a2, a3
-; RV64-NEXT: .LBB94_2:
-; RV64-NEXT: vsll.vi v8, v0, 3
+; RV64-NEXT: vsext.vf2 v0, v16
+; RV64-NEXT: vsext.vf2 v24, v8
+; RV64-NEXT: vsll.vi v16, v0, 3
+; RV64-NEXT: vsll.vi v8, v24, 3
+; RV64-NEXT: addi a2, sp, 16
+; RV64-NEXT: vl1r.v v24, (a2) # Unknown-size Folded Reload
+; RV64-NEXT: addi a2, a1, -16
+; RV64-NEXT: sltu a3, a1, a2
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: and a2, a3, a2
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: addi a3, sp, 16
-; RV64-NEXT: vl1r.v v24, (a3) # Unknown-size Folded Reload
; RV64-NEXT: vslidedown.vi v0, v24, 2
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
; RV64-NEXT: li a2, 16
-; RV64-NEXT: bltu a1, a2, .LBB94_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a1, a2, .LBB94_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a1, 16
-; RV64-NEXT: .LBB94_4:
+; RV64-NEXT: .LBB94_2:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_v32i32_v32f64:
; RV32: # %bb.0:
+; RV32-NEXT: vmv1r.v v1, v0
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: li a3, 16
-; RV32-NEXT: vsll.vi v16, v8, 3
-; RV32-NEXT: mv a2, a1
-; RV32-NEXT: bltu a1, a3, .LBB95_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: li a2, 16
-; RV32-NEXT: .LBB95_2:
-; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
-; RV32-NEXT: addi a3, a1, -16
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: bltu a1, a3, .LBB95_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: mv a2, a3
-; RV32-NEXT: .LBB95_4:
+; RV32-NEXT: vsll.vi v24, v8, 3
+; RV32-NEXT: addi a2, a1, -16
+; RV32-NEXT: sltu a3, a1, a2
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; RV32-NEXT: vslidedown.vi v24, v16, 16
+; RV32-NEXT: vslidedown.vi v8, v24, 16
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
+; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: bltu a1, a2, .LBB95_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: .LBB95_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_v32i32_v32f64:
; RV64-NEXT: sub sp, sp, a2
; RV64-NEXT: addi a2, sp, 16
; RV64-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill
-; RV64-NEXT: li a2, 0
; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; RV64-NEXT: vslidedown.vi v16, v8, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vzext.vf2 v24, v16
-; RV64-NEXT: vzext.vf2 v0, v8
-; RV64-NEXT: addi a3, a1, -16
-; RV64-NEXT: vsll.vi v16, v24, 3
-; RV64-NEXT: bltu a1, a3, .LBB95_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a2, a3
-; RV64-NEXT: .LBB95_2:
-; RV64-NEXT: vsll.vi v8, v0, 3
+; RV64-NEXT: vzext.vf2 v0, v16
+; RV64-NEXT: vzext.vf2 v24, v8
+; RV64-NEXT: vsll.vi v16, v0, 3
+; RV64-NEXT: vsll.vi v8, v24, 3
+; RV64-NEXT: addi a2, sp, 16
+; RV64-NEXT: vl1r.v v24, (a2) # Unknown-size Folded Reload
+; RV64-NEXT: addi a2, a1, -16
+; RV64-NEXT: sltu a3, a1, a2
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: and a2, a3, a2
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: addi a3, sp, 16
-; RV64-NEXT: vl1r.v v24, (a3) # Unknown-size Folded Reload
; RV64-NEXT: vslidedown.vi v0, v24, 2
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
; RV64-NEXT: li a2, 16
-; RV64-NEXT: bltu a1, a2, .LBB95_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a1, a2, .LBB95_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a1, 16
-; RV64-NEXT: .LBB95_4:
+; RV64-NEXT: .LBB95_2:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
define <32 x double> @vpgather_baseidx_v32f64(double* %base, <32 x i64> %idxs, <32 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_v32f64:
; RV32: # %bb.0:
-; RV32-NEXT: vmv1r.v v1, v0
-; RV32-NEXT: li a2, 0
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vnsrl.wi v24, v16, 0
; RV32-NEXT: vnsrl.wi v16, v8, 0
-; RV32-NEXT: li a3, 32
-; RV32-NEXT: vsetvli zero, a3, e32, m8, tu, ma
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsetvli zero, a2, e32, m8, tu, ma
; RV32-NEXT: vslideup.vi v16, v24, 16
; RV32-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; RV32-NEXT: addi a3, a1, -16
-; RV32-NEXT: vsll.vi v24, v16, 3
+; RV32-NEXT: li a3, 16
+; RV32-NEXT: vsll.vi v16, v16, 3
+; RV32-NEXT: mv a2, a1
; RV32-NEXT: bltu a1, a3, .LBB96_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a2, a3
+; RV32-NEXT: li a2, 16
; RV32-NEXT: .LBB96_2:
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v24, 16
+; RV32-NEXT: vslidedown.vi v24, v16, 16
+; RV32-NEXT: addi a2, a1, -16
+; RV32-NEXT: sltu a1, a1, a2
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a1, a1, a2
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV32-NEXT: vslidedown.vi v0, v1, 2
-; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
-; RV32-NEXT: li a2, 16
-; RV32-NEXT: bltu a1, a2, .LBB96_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: li a1, 16
-; RV32-NEXT: .LBB96_4:
+; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT: vmv1r.v v0, v1
-; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_v32f64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v24, v0
-; RV64-NEXT: li a2, 0
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: addi a3, a1, -16
-; RV64-NEXT: vsll.vi v16, v16, 3
-; RV64-NEXT: bltu a1, a3, .LBB96_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a2, a3
-; RV64-NEXT: .LBB96_2:
; RV64-NEXT: vsll.vi v8, v8, 3
+; RV64-NEXT: vsll.vi v16, v16, 3
+; RV64-NEXT: addi a2, a1, -16
+; RV64-NEXT: sltu a3, a1, a2
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: and a2, a3, a2
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vi v0, v24, 2
+; RV64-NEXT: vslidedown.vi v0, v0, 2
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
; RV64-NEXT: li a2, 16
-; RV64-NEXT: bltu a1, a2, .LBB96_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a1, a2, .LBB96_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a1, 16
-; RV64-NEXT: .LBB96_4:
+; RV64-NEXT: .LBB96_2:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
define <32 x double> @vpload_v32f64(<32 x double>* %ptr, <32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpload_v32f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a3, a1, -16
; CHECK-NEXT: vmv1r.v v8, v0
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: bltu a1, a3, .LBB31_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB31_2:
+; CHECK-NEXT: addi a2, a1, -16
+; CHECK-NEXT: sltu a3, a1, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v0, v8, 2
+; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: addi a3, a0, 128
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v16, (a3), v0.t
; CHECK-NEXT: li a2, 16
-; CHECK-NEXT: bltu a1, a2, .LBB31_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a1, a2, .LBB31_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: .LBB31_4:
+; CHECK-NEXT: .LBB31_2:
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vle64.v v8, (a0), v0.t
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 32
; CHECK-NEXT: .LBB32_2:
-; CHECK-NEXT: addi a5, a3, -16
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: bltu a3, a5, .LBB32_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a4, a5
-; CHECK-NEXT: .LBB32_4:
+; CHECK-NEXT: addi a4, a3, -16
+; CHECK-NEXT: sltu a5, a3, a4
+; CHECK-NEXT: addi a5, a5, -1
+; CHECK-NEXT: and a4, a5, a4
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v8, 2
; CHECK-NEXT: addi a5, a1, 128
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v16, (a5), v0.t
-; CHECK-NEXT: addi a5, a2, -32
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: bltu a2, a5, .LBB32_6
-; CHECK-NEXT: # %bb.5:
-; CHECK-NEXT: mv a4, a5
-; CHECK-NEXT: .LBB32_6:
+; CHECK-NEXT: addi a4, a2, -32
+; CHECK-NEXT: sltu a2, a2, a4
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a4, a2, a4
; CHECK-NEXT: li a2, 16
-; CHECK-NEXT: bltu a4, a2, .LBB32_8
-; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: bltu a4, a2, .LBB32_4
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a4, 16
-; CHECK-NEXT: .LBB32_8:
+; CHECK-NEXT: .LBB32_4:
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v8, 4
; CHECK-NEXT: addi a5, a1, 256
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a5), v0.t
-; CHECK-NEXT: bltu a3, a2, .LBB32_10
-; CHECK-NEXT: # %bb.9:
+; CHECK-NEXT: bltu a3, a2, .LBB32_6
+; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: li a3, 16
-; CHECK-NEXT: .LBB32_10:
+; CHECK-NEXT: .LBB32_6:
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vle64.v v8, (a1), v0.t
declare <32 x double> @llvm.vp.merge.v32f64(<32 x i1>, <32 x double>, <32 x double>, i32)
define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vpmerge_vv_v32f64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 24
-; RV32-NEXT: mul a1, a1, a3
-; RV32-NEXT: sub sp, sp, a1
-; RV32-NEXT: addi a1, a0, 128
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vle64.v v24, (a1)
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 3
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: addi a3, a2, -16
-; RV32-NEXT: vmv1r.v v1, v0
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 4
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: bltu a2, a3, .LBB79_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a1, a3
-; RV32-NEXT: .LBB79_2:
-; RV32-NEXT: vle64.v v8, (a0)
-; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV32-NEXT: vslidedown.vi v0, v1, 2
-; RV32-NEXT: vsetvli zero, a1, e64, m8, tu, ma
-; RV32-NEXT: li a0, 16
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 4
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 3
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vmerge.vvm v16, v16, v24, v0
-; RV32-NEXT: bltu a2, a0, .LBB79_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: li a2, 16
-; RV32-NEXT: .LBB79_4:
-; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, ma
-; RV32-NEXT: vmv1r.v v0, v1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vmerge.vvm v8, v8, v24, v0
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 24
-; RV32-NEXT: mul a0, a0, a1
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: addi sp, sp, 16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vpmerge_vv_v32f64:
-; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 4
-; RV64-NEXT: sub sp, sp, a1
-; RV64-NEXT: addi a1, a0, 128
-; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vle64.v v24, (a1)
-; RV64-NEXT: addi a3, a2, -16
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vmv1r.v v1, v0
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 3
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: li a1, 0
-; RV64-NEXT: bltu a2, a3, .LBB79_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a1, a3
-; RV64-NEXT: .LBB79_2:
-; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vi v0, v1, 2
-; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, ma
-; RV64-NEXT: li a0, 16
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vmerge.vvm v24, v24, v16, v0
-; RV64-NEXT: bltu a2, a0, .LBB79_4
-; RV64-NEXT: # %bb.3:
-; RV64-NEXT: li a2, 16
-; RV64-NEXT: .LBB79_4:
-; RV64-NEXT: vsetvli zero, a2, e64, m8, tu, ma
-; RV64-NEXT: vmv1r.v v0, v1
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 3
-; RV64-NEXT: add a0, sp, a0
-; RV64-NEXT: addi a0, a0, 16
-; RV64-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
-; RV64-NEXT: vmerge.vvm v8, v8, v16, v0
-; RV64-NEXT: vmv8r.v v16, v24
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 4
-; RV64-NEXT: add sp, sp, a0
-; RV64-NEXT: addi sp, sp, 16
-; RV64-NEXT: ret
+; CHECK-LABEL: vpmerge_vv_v32f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: vmv1r.v v1, v0
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a1, a2, -16
+; CHECK-NEXT: sltu a3, a2, a1
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a1, a3, a1
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: addi a0, a0, 128
+; CHECK-NEXT: vle64.v v16, (a0)
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v0, v0, 2
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma
+; CHECK-NEXT: li a0, 16
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT: bltu a2, a0, .LBB79_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: .LBB79_2:
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, tu, ma
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
%v = call <32 x double> @llvm.vp.merge.v32f64(<32 x i1> %m, <32 x double> %va, <32 x double> %vb, i32 %evl)
ret <32 x double> %v
}
define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpmerge_vf_v32f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a0, a2, .LBB80_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB80_2:
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v0, v24, 2
+; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfmerge.vfm v16, v16, fa0, v0
-; CHECK-NEXT: bltu a0, a1, .LBB80_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB80_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB80_4:
+; CHECK-NEXT: .LBB80_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vle32.v v24, (a0)
-; RV32-NEXT: li a0, 16
-; RV32-NEXT: mv a2, a1
-; RV32-NEXT: bltu a1, a0, .LBB79_2
-; RV32-NEXT: # %bb.1:
; RV32-NEXT: li a2, 16
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: bltu a1, a2, .LBB79_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a0, 16
; RV32-NEXT: .LBB79_2:
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: addi a2, a1, -16
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t
-; RV32-NEXT: bltu a1, a2, .LBB79_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: mv a0, a2
-; RV32-NEXT: .LBB79_4:
+; RV32-NEXT: addi a0, a1, -16
+; RV32-NEXT: sltu a1, a1, a0
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a0, a1, a0
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; RV32-NEXT: vslidedown.vi v8, v24, 16
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 4
+; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: sub sp, sp, a1
+; RV64-NEXT: addi a1, a0, 128
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vle64.v v24, (a0)
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 3
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vle64.v v24, (a1)
; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: li a3, 16
-; RV64-NEXT: addi a0, a0, 128
-; RV64-NEXT: mv a1, a2
-; RV64-NEXT: bltu a2, a3, .LBB79_2
-; RV64-NEXT: # %bb.1:
+; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vle64.v v24, (a0)
; RV64-NEXT: li a1, 16
+; RV64-NEXT: mv a0, a2
+; RV64-NEXT: bltu a2, a1, .LBB79_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: li a0, 16
; RV64-NEXT: .LBB79_2:
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: vle64.v v16, (a0)
-; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT: addi a0, a2, -16
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 3
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t
-; RV64-NEXT: bltu a2, a0, .LBB79_4
-; RV64-NEXT: # %bb.3:
-; RV64-NEXT: mv a3, a0
-; RV64-NEXT: .LBB79_4:
+; RV64-NEXT: addi a0, a2, -16
+; RV64-NEXT: sltu a1, a2, a0
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64-NEXT: vslidedown.vi v0, v0, 2
-; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: addi a0, sp, 16
; RV64-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
+; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t
; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 4
+; RV64-NEXT: slli a0, a0, 3
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; RV32-NEXT: li a3, 32
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vle32.v v24, (a1)
-; RV32-NEXT: li a1, 16
+; RV32-NEXT: li a3, 16
; RV32-NEXT: vsll.vi v24, v24, 3
-; RV32-NEXT: mv a3, a2
-; RV32-NEXT: bltu a2, a1, .LBB80_2
+; RV32-NEXT: mv a1, a2
+; RV32-NEXT: bltu a2, a3, .LBB80_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: li a3, 16
+; RV32-NEXT: li a1, 16
; RV32-NEXT: .LBB80_2:
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: addi a3, a2, -16
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
-; RV32-NEXT: bltu a2, a3, .LBB80_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: mv a1, a3
-; RV32-NEXT: .LBB80_4:
+; RV32-NEXT: addi a1, a2, -16
+; RV32-NEXT: sltu a2, a2, a1
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: csrr a3, vlenb
-; RV64-NEXT: slli a3, a3, 4
+; RV64-NEXT: li a4, 10
+; RV64-NEXT: mul a3, a3, a4
; RV64-NEXT: sub sp, sp, a3
; RV64-NEXT: li a3, 32
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vle32.v v24, (a1)
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; RV64-NEXT: vslidedown.vi v16, v24, 16
+; RV64-NEXT: vslidedown.vi v0, v24, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf2 v8, v16
-; RV64-NEXT: vsext.vf2 v16, v24
+; RV64-NEXT: vsext.vf2 v16, v0
+; RV64-NEXT: vsll.vi v16, v16, 3
+; RV64-NEXT: vsext.vf2 v0, v24
; RV64-NEXT: li a3, 16
-; RV64-NEXT: vsll.vi v24, v16, 3
+; RV64-NEXT: vsll.vi v24, v0, 3
; RV64-NEXT: mv a1, a2
; RV64-NEXT: bltu a2, a3, .LBB80_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a1, 16
; RV64-NEXT: .LBB80_2:
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: vsll.vi v16, v8, 3
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT: addi a1, a2, -16
-; RV64-NEXT: addi a4, sp, 16
-; RV64-NEXT: vl8re8.v v8, (a4) # Unknown-size Folded Reload
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
-; RV64-NEXT: bltu a2, a1, .LBB80_4
-; RV64-NEXT: # %bb.3:
-; RV64-NEXT: mv a3, a1
-; RV64-NEXT: .LBB80_4:
+; RV64-NEXT: addi a1, a2, -16
+; RV64-NEXT: sltu a2, a2, a1
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a1, a2, a1
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64-NEXT: vslidedown.vi v0, v0, 2
-; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 4
+; RV64-NEXT: li a1, 10
+; RV64-NEXT: mul a0, a0, a1
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; RV32-NEXT: li a3, 32
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vle32.v v24, (a1)
-; RV32-NEXT: li a1, 16
+; RV32-NEXT: li a3, 16
; RV32-NEXT: vsll.vi v24, v24, 3
-; RV32-NEXT: mv a3, a2
-; RV32-NEXT: bltu a2, a1, .LBB81_2
+; RV32-NEXT: mv a1, a2
+; RV32-NEXT: bltu a2, a3, .LBB81_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: li a3, 16
+; RV32-NEXT: li a1, 16
; RV32-NEXT: .LBB81_2:
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: addi a3, a2, -16
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
-; RV32-NEXT: bltu a2, a3, .LBB81_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: mv a1, a3
-; RV32-NEXT: .LBB81_4:
+; RV32-NEXT: addi a1, a2, -16
+; RV32-NEXT: sltu a2, a2, a1
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: csrr a3, vlenb
-; RV64-NEXT: slli a3, a3, 4
+; RV64-NEXT: li a4, 10
+; RV64-NEXT: mul a3, a3, a4
; RV64-NEXT: sub sp, sp, a3
; RV64-NEXT: li a3, 32
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vle32.v v24, (a1)
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf2 v16, v24
+; RV64-NEXT: vsext.vf2 v0, v24
; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; RV64-NEXT: vslidedown.vi v24, v24, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf2 v8, v24
+; RV64-NEXT: vsext.vf2 v16, v24
+; RV64-NEXT: vsll.vi v16, v16, 3
; RV64-NEXT: li a3, 16
-; RV64-NEXT: vsll.vi v24, v16, 3
+; RV64-NEXT: vsll.vi v24, v0, 3
; RV64-NEXT: mv a1, a2
; RV64-NEXT: bltu a2, a3, .LBB81_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a1, 16
; RV64-NEXT: .LBB81_2:
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: vsll.vi v16, v8, 3
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT: addi a1, a2, -16
-; RV64-NEXT: addi a4, sp, 16
-; RV64-NEXT: vl8re8.v v8, (a4) # Unknown-size Folded Reload
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
-; RV64-NEXT: bltu a2, a1, .LBB81_4
-; RV64-NEXT: # %bb.3:
-; RV64-NEXT: mv a3, a1
-; RV64-NEXT: .LBB81_4:
+; RV64-NEXT: addi a1, a2, -16
+; RV64-NEXT: sltu a2, a2, a1
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a1, a2, a1
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64-NEXT: vslidedown.vi v0, v0, 2
-; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 4
+; RV64-NEXT: li a1, 10
+; RV64-NEXT: mul a0, a0, a1
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; RV32-NEXT: li a3, 32
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV32-NEXT: vle32.v v24, (a1)
-; RV32-NEXT: li a1, 16
+; RV32-NEXT: li a3, 16
; RV32-NEXT: vsll.vi v24, v24, 3
-; RV32-NEXT: mv a3, a2
-; RV32-NEXT: bltu a2, a1, .LBB82_2
+; RV32-NEXT: mv a1, a2
+; RV32-NEXT: bltu a2, a3, .LBB82_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: li a3, 16
+; RV32-NEXT: li a1, 16
; RV32-NEXT: .LBB82_2:
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: addi a3, a2, -16
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
-; RV32-NEXT: bltu a2, a3, .LBB82_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: mv a1, a3
-; RV32-NEXT: .LBB82_4:
+; RV32-NEXT: addi a1, a2, -16
+; RV32-NEXT: sltu a2, a2, a1
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: csrr a3, vlenb
-; RV64-NEXT: slli a3, a3, 4
+; RV64-NEXT: li a4, 10
+; RV64-NEXT: mul a3, a3, a4
; RV64-NEXT: sub sp, sp, a3
; RV64-NEXT: li a3, 32
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; RV64-NEXT: vle32.v v24, (a1)
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vzext.vf2 v16, v24
+; RV64-NEXT: vzext.vf2 v0, v24
; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; RV64-NEXT: vslidedown.vi v24, v24, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vzext.vf2 v8, v24
+; RV64-NEXT: vzext.vf2 v16, v24
+; RV64-NEXT: vsll.vi v16, v16, 3
; RV64-NEXT: li a3, 16
-; RV64-NEXT: vsll.vi v24, v16, 3
+; RV64-NEXT: vsll.vi v24, v0, 3
; RV64-NEXT: mv a1, a2
; RV64-NEXT: bltu a2, a3, .LBB82_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a1, 16
; RV64-NEXT: .LBB82_2:
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: vsll.vi v16, v8, 3
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT: addi a1, a2, -16
-; RV64-NEXT: addi a4, sp, 16
-; RV64-NEXT: vl8re8.v v8, (a4) # Unknown-size Folded Reload
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
-; RV64-NEXT: bltu a2, a1, .LBB82_4
-; RV64-NEXT: # %bb.3:
-; RV64-NEXT: mv a3, a1
-; RV64-NEXT: .LBB82_4:
+; RV64-NEXT: addi a1, a2, -16
+; RV64-NEXT: sltu a2, a2, a1
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a1, a2, a1
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64-NEXT: vslidedown.vi v0, v0, 2
-; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 4
+; RV64-NEXT: li a1, 10
+; RV64-NEXT: mul a0, a0, a1
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
define void @vpstore_v32f64(<32 x double> %val, <32 x double>* %ptr, <32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpstore_v32f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 16
-; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: bltu a1, a2, .LBB23_2
-; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 16
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: bltu a1, a3, .LBB23_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a2, 16
; CHECK-NEXT: .LBB23_2:
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-NEXT: addi a3, a1, -16
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vse64.v v8, (a0), v0.t
-; CHECK-NEXT: bltu a1, a3, .LBB23_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB23_4:
+; CHECK-NEXT: addi a2, a1, -16
+; CHECK-NEXT: sltu a1, a1, a2
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a1, a1, a2
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: addi a0, a0, 128
-; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vse64.v v16, (a0), v0.t
; CHECK-NEXT: ret
call void @llvm.vp.store.v32f64.p0v32f64(<32 x double> %val, <32 x double>* %ptr, <32 x i1> %m, i32 %evl)
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: li a4, 24
-; CHECK-NEXT: mul a2, a2, a4
-; CHECK-NEXT: sub sp, sp, a2
-; CHECK-NEXT: li a4, 128
-; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
-; CHECK-NEXT: vle8.v v24, (a1)
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 4
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv1r.v v1, v8
-; CHECK-NEXT: addi a1, a1, 128
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: bltu a3, a4, .LBB11_2
-; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vmv1r.v v2, v8
+; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: li a2, 128
-; CHECK-NEXT: .LBB11_2:
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vle8.v v24, (a1)
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vle8.v v24, (a0)
+; CHECK-NEXT: addi a0, a1, 128
+; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: addi a0, a3, -128
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
-; CHECK-NEXT: bltu a3, a0, .LBB11_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a4, a0
-; CHECK-NEXT: .LBB11_4:
-; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; CHECK-NEXT: sltu a4, a3, a0
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: vle8.v v16, (a1)
+; CHECK-NEXT: and a0, a4, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v2
+; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0
+; CHECK-NEXT: bltu a3, a2, .LBB11_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a3, 128
+; CHECK-NEXT: .LBB11_2:
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0
+; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT: vmv8r.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 24
-; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: addi a3, a2, -16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a2, a3, .LBB25_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a3
-; CHECK-NEXT: .LBB25_2:
+; CHECK-NEXT: addi a1, a2, -16
+; CHECK-NEXT: sltu a3, a2, a1
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a1, a3, a1
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: addi a3, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v0, v24, 2
+; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0
-; CHECK-NEXT: bltu a2, a0, .LBB25_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a2, a0, .LBB25_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 16
-; CHECK-NEXT: .LBB25_4:
+; CHECK-NEXT: .LBB25_2:
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: li a3, 32
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vle32.v v24, (a0)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: addi a1, a2, -32
+; CHECK-NEXT: sltu a3, a2, a1
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a1, a3, a1
+; CHECK-NEXT: li a3, 32
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: addi a4, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
; CHECK-NEXT: addi a0, a0, 128
-; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vi v0, v0, 4
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0
; CHECK-NEXT: bltu a2, a3, .LBB35_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: li a2, 32
; CHECK-NEXT: .LBB35_2:
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: vle32.v v16, (a0)
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: addi a0, a2, -32
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
-; CHECK-NEXT: bltu a2, a0, .LBB35_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a3, a0
-; CHECK-NEXT: .LBB35_4:
-; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v0, v0, 4
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-LABEL: vzext_v32i64_v32i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v1, v0
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
-; CHECK-NEXT: bltu a0, a2, .LBB12_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB12_2:
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; CHECK-NEXT: vslidedown.vi v24, v8, 16
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vzext.vf2 v16, v24, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB12_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB12_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB12_4:
+; CHECK-NEXT: .LBB12_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vzext.vf2 v24, v8, v0.t
define <32 x i64> @vzext_v32i64_v32i32_unmasked(<32 x i32> %va, i32 zeroext %evl) {
; CHECK-LABEL: vzext_v32i64_v32i32_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a2, a0, -16
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a0, a2, .LBB13_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB13_2:
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; CHECK-NEXT: vslidedown.vi v24, v8, 16
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vzext.vf2 v16, v24
-; CHECK-NEXT: bltu a0, a1, .LBB13_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB13_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB13_4:
+; CHECK-NEXT: .LBB13_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vzext.vf2 v24, v8
; CHECK-NEXT: vmv.v.v v8, v24
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 3
+; CHECK-NEXT: srli a2, a1, 3
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v25, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB32_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: vslidedown.vx v2, v0, a2
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3)
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfabs.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v2
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a2, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v2
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a2
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a0, a1, .LBB32_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_4:
+; CHECK-NEXT: .LBB32_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-LABEL: vp_floor_nxv16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: mv a2, a0
-; CHECK-NEXT: bltu a0, a1, .LBB33_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: sub a2, a0, a1
; CHECK-NEXT: lui a3, %hi(.LCPI33_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3)
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT: vfabs.v v24, v8
+; CHECK-NEXT: vfabs.v v24, v16
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: fsrmi a2, 2
-; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a2
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: sub a1, a0, a1
-; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB33_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: .LBB33_4:
-; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: bltu a0, a1, .LBB33_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v8
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: fsrmi a0, 2
-; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 3
+; CHECK-NEXT: srli a2, a1, 3
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v25, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB32_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: vslidedown.vx v2, v0, a2
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3)
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfabs.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v2
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a2, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v2
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a2
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a0, a1, .LBB32_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_4:
+; CHECK-NEXT: .LBB32_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-LABEL: vp_round_nxv16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: mv a2, a0
-; CHECK-NEXT: bltu a0, a1, .LBB33_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: sub a2, a0, a1
; CHECK-NEXT: lui a3, %hi(.LCPI33_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3)
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT: vfabs.v v24, v8
+; CHECK-NEXT: vfabs.v v24, v16
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: fsrmi a2, 4
-; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a2
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: sub a1, a0, a1
-; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB33_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: .LBB33_4:
-; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: bltu a0, a1, .LBB33_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v8
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: fsrmi a0, 4
-; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 3
+; CHECK-NEXT: srli a2, a1, 3
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v25, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB32_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: vslidedown.vx v2, v0, a2
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3)
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfabs.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v2
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a2, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v2
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a2
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a0, a1, .LBB32_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_4:
+; CHECK-NEXT: .LBB32_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-LABEL: vp_roundeven_nxv16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: mv a2, a0
-; CHECK-NEXT: bltu a0, a1, .LBB33_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: sub a2, a0, a1
; CHECK-NEXT: lui a3, %hi(.LCPI33_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3)
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT: vfabs.v v24, v8
+; CHECK-NEXT: vfabs.v v24, v16
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: fsrmi a2, 0
-; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a2
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: sub a1, a0, a1
-; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB33_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: .LBB33_4:
-; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: bltu a0, a1, .LBB33_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v8
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: fsrmi a0, 0
-; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 3
+; CHECK-NEXT: srli a2, a1, 3
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v25, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB32_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: vslidedown.vx v2, v0, a2
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3)
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfabs.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v2
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a2, 1
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v2
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a2
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a0, a1, .LBB32_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_4:
+; CHECK-NEXT: .LBB32_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
+; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 1
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-LABEL: vp_roundtozero_nxv16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: mv a2, a0
-; CHECK-NEXT: bltu a0, a1, .LBB33_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: sub a2, a0, a1
; CHECK-NEXT: lui a3, %hi(.LCPI33_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3)
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT: vfabs.v v24, v8
+; CHECK-NEXT: vfabs.v v24, v16
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: fsrmi a2, 1
-; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a2
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: sub a1, a0, a1
-; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB33_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: .LBB33_4:
-; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: bltu a0, a1, .LBB33_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v8
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: fsrmi a0, 1
-; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: vmv1r.v v1, v0
-; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: srli a1, a3, 1
-; CHECK-NEXT: vsetvli a5, zero, e8, m1, ta, ma
-; CHECK-NEXT: slli a5, a3, 3
-; CHECK-NEXT: add a5, a0, a5
-; CHECK-NEXT: vl8re16.v v24, (a5)
-; CHECK-NEXT: slli a3, a3, 2
-; CHECK-NEXT: sub a5, a2, a3
-; CHECK-NEXT: vslidedown.vx v0, v0, a1
-; CHECK-NEXT: bltu a2, a5, .LBB85_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a4, a5
-; CHECK-NEXT: .LBB85_2:
-; CHECK-NEXT: vl8re16.v v8, (a0)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-NEXT: slli a3, a1, 3
+; CHECK-NEXT: add a3, a0, a3
+; CHECK-NEXT: vl8re16.v v8, (a3)
+; CHECK-NEXT: slli a3, a1, 2
+; CHECK-NEXT: sub a4, a2, a3
+; CHECK-NEXT: sltu a5, a2, a4
+; CHECK-NEXT: addi a5, a5, -1
+; CHECK-NEXT: and a4, a5, a4
+; CHECK-NEXT: srli a1, a1, 1
+; CHECK-NEXT: vl8re16.v v0, (a0)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vslidedown.vx v0, v24, a1
; CHECK-NEXT: vsetvli zero, a4, e16, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v2, v16, v24, v0.t
-; CHECK-NEXT: bltu a2, a3, .LBB85_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: vmfeq.vv v1, v16, v8, v0.t
+; CHECK-NEXT: bltu a2, a3, .LBB85_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB85_4:
+; CHECK-NEXT: .LBB85_2:
; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmfeq.vv v16, v24, v8, v0.t
+; CHECK-NEXT: vmfeq.vv v16, v8, v24, v0.t
; CHECK-NEXT: add a0, a1, a1
; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma
-; CHECK-NEXT: vslideup.vx v16, v2, a1
+; CHECK-NEXT: vslideup.vx v16, v1, a1
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 5
+; CHECK-NEXT: li a3, 48
+; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 24
+; CHECK-NEXT: li a3, 40
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a3, vlenb
+; CHECK-NEXT: srli a4, a3, 2
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v1, v0, a4
; CHECK-NEXT: srli a1, a3, 3
-; CHECK-NEXT: slli a5, a3, 3
-; CHECK-NEXT: slli a7, a3, 1
-; CHECK-NEXT: add a4, a2, a5
-; CHECK-NEXT: mv t0, a6
-; CHECK-NEXT: bltu a6, a7, .LBB171_2
+; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a1
+; CHECK-NEXT: li a5, 24
+; CHECK-NEXT: slli t1, a3, 3
+; CHECK-NEXT: add a7, a2, t1
+; CHECK-NEXT: vl8re64.v v8, (a7)
+; CHECK-NEXT: mul t0, a3, a5
+; CHECK-NEXT: slli a5, a3, 1
+; CHECK-NEXT: slli t2, a3, 4
+; CHECK-NEXT: mv a7, a6
+; CHECK-NEXT: bltu a6, a5, .LBB171_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv t0, a7
+; CHECK-NEXT: mv a7, a5
; CHECK-NEXT: .LBB171_2:
-; CHECK-NEXT: li t1, 0
-; CHECK-NEXT: vsetvli t2, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vl8re64.v v16, (a4)
-; CHECK-NEXT: srli a4, a3, 2
-; CHECK-NEXT: sub t2, t0, a3
-; CHECK-NEXT: vslidedown.vx v0, v24, a1
-; CHECK-NEXT: bltu t0, t2, .LBB171_4
+; CHECK-NEXT: add t0, a2, t0
+; CHECK-NEXT: add t1, a0, t1
+; CHECK-NEXT: add t2, a2, t2
+; CHECK-NEXT: vl8re64.v v16, (a2)
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: sub a2, a7, a3
+; CHECK-NEXT: sltu t3, a7, a2
+; CHECK-NEXT: addi t3, t3, -1
+; CHECK-NEXT: and a2, t3, a2
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 5
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vl8re8.v v16, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vmfeq.vv v2, v16, v8, v0.t
+; CHECK-NEXT: bltu a7, a3, .LBB171_4
; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv t1, t2
+; CHECK-NEXT: mv a7, a3
; CHECK-NEXT: .LBB171_4:
-; CHECK-NEXT: li t2, 24
-; CHECK-NEXT: vsetvli t3, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v1, v24, a4
-; CHECK-NEXT: vl8re64.v v8, (a2)
-; CHECK-NEXT: csrr t3, vlenb
-; CHECK-NEXT: slli t3, t3, 3
-; CHECK-NEXT: add t3, sp, t3
-; CHECK-NEXT: addi t3, t3, 16
-; CHECK-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill
-; CHECK-NEXT: slli t3, a3, 4
-; CHECK-NEXT: vsetvli zero, t1, e64, m8, ta, ma
-; CHECK-NEXT: csrr t1, vlenb
-; CHECK-NEXT: slli t1, t1, 4
-; CHECK-NEXT: add t1, sp, t1
-; CHECK-NEXT: addi t1, t1, 16
-; CHECK-NEXT: vl8re8.v v8, (t1) # Unknown-size Folded Reload
-; CHECK-NEXT: vmfeq.vv v2, v8, v16, v0.t
-; CHECK-NEXT: bltu t0, a3, .LBB171_6
-; CHECK-NEXT: # %bb.5:
-; CHECK-NEXT: mv t0, a3
-; CHECK-NEXT: .LBB171_6:
-; CHECK-NEXT: li t1, 0
-; CHECK-NEXT: mul t4, a3, t2
-; CHECK-NEXT: add t2, a2, t3
-; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: csrr t0, vlenb
-; CHECK-NEXT: li t3, 24
-; CHECK-NEXT: mul t0, t0, t3
-; CHECK-NEXT: add t0, sp, t0
-; CHECK-NEXT: addi t0, t0, 16
-; CHECK-NEXT: vl8re8.v v24, (t0) # Unknown-size Folded Reload
-; CHECK-NEXT: csrr t0, vlenb
-; CHECK-NEXT: slli t0, t0, 3
-; CHECK-NEXT: add t0, sp, t0
-; CHECK-NEXT: addi t0, t0, 16
-; CHECK-NEXT: vl8re8.v v8, (t0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmfeq.vv v17, v24, v8, v0.t
-; CHECK-NEXT: sub t0, a6, a7
-; CHECK-NEXT: add a7, a1, a1
-; CHECK-NEXT: bltu a6, t0, .LBB171_8
-; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: mv t1, t0
-; CHECK-NEXT: .LBB171_8:
-; CHECK-NEXT: add a2, a2, t4
-; CHECK-NEXT: vl8re64.v v8, (t2)
-; CHECK-NEXT: csrr a6, vlenb
+; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vx v17, v1, a1
+; CHECK-NEXT: vl8re64.v v8, (t0)
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 5
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8re64.v v8, (t1)
+; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: li t0, 24
-; CHECK-NEXT: mul a6, a6, t0
-; CHECK-NEXT: add a6, sp, a6
-; CHECK-NEXT: addi a6, a6, 16
-; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill
-; CHECK-NEXT: vl8re64.v v8, (a0)
-; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: slli a6, a6, 4
-; CHECK-NEXT: add a6, sp, a6
-; CHECK-NEXT: addi a6, a6, 16
-; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill
-; CHECK-NEXT: add a0, a0, a5
-; CHECK-NEXT: vsetvli zero, a7, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vx v17, v2, a1
-; CHECK-NEXT: mv a5, t1
-; CHECK-NEXT: bltu t1, a3, .LBB171_10
-; CHECK-NEXT: # %bb.9:
-; CHECK-NEXT: mv a5, a3
-; CHECK-NEXT: .LBB171_10:
-; CHECK-NEXT: li a6, 0
-; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vx v16, v1, a1
-; CHECK-NEXT: vl8re64.v v8, (a2)
+; CHECK-NEXT: mul a2, a2, t0
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8re64.v v8, (t2)
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: vl8re64.v v8, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a2, 24
+; CHECK-NEXT: li a2, 40
; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmfeq.vv v18, v8, v24, v0.t
-; CHECK-NEXT: add a0, a4, a1
+; CHECK-NEXT: vmfeq.vv v18, v24, v8, v0.t
+; CHECK-NEXT: add a0, a1, a1
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
-; CHECK-NEXT: sub a0, t1, a3
-; CHECK-NEXT: vslideup.vx v17, v18, a4
-; CHECK-NEXT: bltu t1, a0, .LBB171_12
-; CHECK-NEXT: # %bb.11:
-; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: .LBB171_12:
-; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: sub a0, a6, a5
+; CHECK-NEXT: sltu a2, a6, a0
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a0, a2, a0
+; CHECK-NEXT: vslideup.vx v18, v2, a1
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: bltu a0, a3, .LBB171_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: .LBB171_6:
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vmfeq.vv v16, v24, v8, v0.t
+; CHECK-NEXT: add a2, a4, a1
+; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vx v18, v16, a4
+; CHECK-NEXT: sub a2, a0, a3
+; CHECK-NEXT: sltu a0, a0, a2
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v17
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a2, 24
+; CHECK-NEXT: mul a0, a0, a2
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmfeq.vv v16, v8, v24, v0.t
; CHECK-NEXT: slli a0, a1, 1
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vx v17, v16, a0
-; CHECK-NEXT: vmv1r.v v0, v17
+; CHECK-NEXT: vslideup.vx v18, v16, a0
+; CHECK-NEXT: vmv1r.v v0, v18
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 5
+; CHECK-NEXT: li a1, 48
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a4, 24
-; CHECK-NEXT: mul a1, a1, a4
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a4, a0, a1
-; CHECK-NEXT: vl8r.v v24, (a4)
-; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: slli a4, a4, 3
-; CHECK-NEXT: add a4, sp, a4
-; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
-; CHECK-NEXT: vsetvli a4, zero, e8, m8, ta, ma
-; CHECK-NEXT: vlm.v v25, (a2)
-; CHECK-NEXT: sub a4, a3, a1
-; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 4
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: bltu a3, a4, .LBB96_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a4
-; CHECK-NEXT: .LBB96_2:
-; CHECK-NEXT: vl8r.v v8, (a0)
+; CHECK-NEXT: vl8r.v v8, (a4)
+; CHECK-NEXT: vl8r.v v0, (a0)
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT: sub a0, a3, a1
+; CHECK-NEXT: vlm.v v0, (a2)
+; CHECK-NEXT: sltu a2, a3, a0
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a0, a2, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
; CHECK-NEXT: vmseq.vv v1, v16, v8, v0.t
-; CHECK-NEXT: bltu a3, a1, .LBB96_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a3, a1, .LBB96_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: .LBB96_4:
+; CHECK-NEXT: .LBB96_2:
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vmv1r.v v8, v1
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
define <vscale x 128 x i1> @icmp_eq_vx_nxv128i8(<vscale x 128 x i8> %va, i8 %b, <vscale x 128 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: icmp_eq_vx_nxv128i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: slli a3, a3, 3
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: bltu a2, a3, .LBB97_2
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v0, (a1)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub a3, a2, a1
+; CHECK-NEXT: sltu a4, a2, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t
+; CHECK-NEXT: bltu a2, a1, .LBB97_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB97_2:
-; CHECK-NEXT: li a5, 0
-; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, ma
-; CHECK-NEXT: vlm.v v24, (a1)
-; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
-; CHECK-NEXT: sub a1, a2, a3
-; CHECK-NEXT: vmseq.vx v25, v8, a0, v0.t
-; CHECK-NEXT: bltu a2, a1, .LBB97_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a5, a1
-; CHECK-NEXT: .LBB97_4:
-; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, ma
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vmseq.vx v8, v16, a0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vmv1r.v v8, v25
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 128 x i8> poison, i8 %b, i8 0
%vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
define <vscale x 128 x i1> @icmp_eq_vx_swap_nxv128i8(<vscale x 128 x i8> %va, i8 %b, <vscale x 128 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: icmp_eq_vx_swap_nxv128i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: slli a3, a3, 3
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: bltu a2, a3, .LBB98_2
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v0, (a1)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub a3, a2, a1
+; CHECK-NEXT: sltu a4, a2, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t
+; CHECK-NEXT: bltu a2, a1, .LBB98_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB98_2:
-; CHECK-NEXT: li a5, 0
-; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, ma
-; CHECK-NEXT: vlm.v v24, (a1)
-; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
-; CHECK-NEXT: sub a1, a2, a3
-; CHECK-NEXT: vmseq.vx v25, v8, a0, v0.t
-; CHECK-NEXT: bltu a2, a1, .LBB98_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a5, a1
-; CHECK-NEXT: .LBB98_4:
-; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, ma
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vmseq.vx v8, v16, a0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vmv1r.v v8, v25
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 128 x i8> poison, i8 %b, i8 0
%vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: vmv1r.v v1, v0
-; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: srli a1, a3, 2
-; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma
-; CHECK-NEXT: slli a5, a3, 3
-; CHECK-NEXT: add a5, a0, a5
-; CHECK-NEXT: vl8re32.v v24, (a5)
-; CHECK-NEXT: slli a3, a3, 1
-; CHECK-NEXT: sub a5, a2, a3
-; CHECK-NEXT: vslidedown.vx v0, v0, a1
-; CHECK-NEXT: bltu a2, a5, .LBB189_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a4, a5
-; CHECK-NEXT: .LBB189_2:
-; CHECK-NEXT: vl8re32.v v8, (a0)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: slli a3, a1, 3
+; CHECK-NEXT: add a3, a0, a3
+; CHECK-NEXT: vl8re32.v v8, (a3)
+; CHECK-NEXT: slli a3, a1, 1
+; CHECK-NEXT: sub a4, a2, a3
+; CHECK-NEXT: sltu a5, a2, a4
+; CHECK-NEXT: addi a5, a5, -1
+; CHECK-NEXT: and a4, a5, a4
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: vl8re32.v v0, (a0)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vslidedown.vx v0, v24, a1
; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma
-; CHECK-NEXT: vmseq.vv v2, v16, v24, v0.t
-; CHECK-NEXT: bltu a2, a3, .LBB189_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: vmseq.vv v1, v16, v8, v0.t
+; CHECK-NEXT: bltu a2, a3, .LBB189_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB189_4:
+; CHECK-NEXT: .LBB189_2:
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmseq.vv v16, v24, v8, v0.t
+; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t
; CHECK-NEXT: add a0, a1, a1
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vx v16, v2, a1
+; CHECK-NEXT: vslideup.vx v16, v1, a1
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-LABEL: icmp_eq_vx_nxv32i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a4, 0
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: srli a2, a3, 2
-; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma
-; CHECK-NEXT: slli a3, a3, 1
-; CHECK-NEXT: sub a5, a1, a3
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v0, a2
-; CHECK-NEXT: bltu a1, a5, .LBB190_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a4, a5
-; CHECK-NEXT: .LBB190_2:
+; CHECK-NEXT: slli a3, a3, 1
+; CHECK-NEXT: sub a4, a1, a3
+; CHECK-NEXT: sltu a5, a1, a4
+; CHECK-NEXT: addi a5, a5, -1
+; CHECK-NEXT: and a4, a5, a4
; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma
; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a3, .LBB190_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a1, a3, .LBB190_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a3
-; CHECK-NEXT: .LBB190_4:
+; CHECK-NEXT: .LBB190_2:
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
; CHECK-LABEL: icmp_eq_vx_swap_nxv32i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a4, 0
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: srli a2, a3, 2
-; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma
-; CHECK-NEXT: slli a3, a3, 1
-; CHECK-NEXT: sub a5, a1, a3
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v0, a2
-; CHECK-NEXT: bltu a1, a5, .LBB191_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a4, a5
-; CHECK-NEXT: .LBB191_2:
+; CHECK-NEXT: slli a3, a3, 1
+; CHECK-NEXT: sub a4, a1, a3
+; CHECK-NEXT: sltu a5, a1, a4
+; CHECK-NEXT: addi a5, a5, -1
+; CHECK-NEXT: and a4, a5, a4
; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma
; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a3, .LBB191_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a1, a3, .LBB191_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a3
-; CHECK-NEXT: .LBB191_4:
+; CHECK-NEXT: .LBB191_2:
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
; CHECK-RV32-LABEL: strided_load_nxv16f64:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: vmv1r.v v8, v0
-; CHECK-RV32-NEXT: li a2, 0
; CHECK-RV32-NEXT: csrr a4, vlenb
-; CHECK-RV32-NEXT: sub a6, a3, a4
+; CHECK-RV32-NEXT: sub a2, a3, a4
+; CHECK-RV32-NEXT: sltu a5, a3, a2
+; CHECK-RV32-NEXT: addi a5, a5, -1
+; CHECK-RV32-NEXT: and a2, a5, a2
; CHECK-RV32-NEXT: srli a5, a4, 3
-; CHECK-RV32-NEXT: bltu a3, a6, .LBB42_2
-; CHECK-RV32-NEXT: # %bb.1:
-; CHECK-RV32-NEXT: mv a2, a6
-; CHECK-RV32-NEXT: .LBB42_2:
; CHECK-RV32-NEXT: vsetvli a6, zero, e8, mf4, ta, ma
-; CHECK-RV32-NEXT: vslidedown.vx v0, v8, a5
-; CHECK-RV32-NEXT: bltu a3, a4, .LBB42_4
-; CHECK-RV32-NEXT: # %bb.3:
+; CHECK-RV32-NEXT: vslidedown.vx v0, v0, a5
+; CHECK-RV32-NEXT: bltu a3, a4, .LBB42_2
+; CHECK-RV32-NEXT: # %bb.1:
; CHECK-RV32-NEXT: mv a3, a4
-; CHECK-RV32-NEXT: .LBB42_4:
+; CHECK-RV32-NEXT: .LBB42_2:
; CHECK-RV32-NEXT: mul a4, a3, a1
; CHECK-RV32-NEXT: add a4, a0, a4
; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-RV64-LABEL: strided_load_nxv16f64:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: vmv1r.v v8, v0
-; CHECK-RV64-NEXT: li a3, 0
; CHECK-RV64-NEXT: csrr a4, vlenb
-; CHECK-RV64-NEXT: sub a6, a2, a4
+; CHECK-RV64-NEXT: sub a3, a2, a4
+; CHECK-RV64-NEXT: sltu a5, a2, a3
+; CHECK-RV64-NEXT: addi a5, a5, -1
+; CHECK-RV64-NEXT: and a3, a5, a3
; CHECK-RV64-NEXT: srli a5, a4, 3
-; CHECK-RV64-NEXT: bltu a2, a6, .LBB42_2
-; CHECK-RV64-NEXT: # %bb.1:
-; CHECK-RV64-NEXT: mv a3, a6
-; CHECK-RV64-NEXT: .LBB42_2:
; CHECK-RV64-NEXT: vsetvli a6, zero, e8, mf4, ta, ma
-; CHECK-RV64-NEXT: vslidedown.vx v0, v8, a5
-; CHECK-RV64-NEXT: bltu a2, a4, .LBB42_4
-; CHECK-RV64-NEXT: # %bb.3:
+; CHECK-RV64-NEXT: vslidedown.vx v0, v0, a5
+; CHECK-RV64-NEXT: bltu a2, a4, .LBB42_2
+; CHECK-RV64-NEXT: # %bb.1:
; CHECK-RV64-NEXT: mv a2, a4
-; CHECK-RV64-NEXT: .LBB42_4:
+; CHECK-RV64-NEXT: .LBB42_2:
; CHECK-RV64-NEXT: mul a4, a2, a1
; CHECK-RV64-NEXT: add a4, a0, a4
; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV32-LABEL: strided_load_nxv16f64_allones_mask:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: csrr a4, vlenb
-; CHECK-RV32-NEXT: sub a5, a3, a4
-; CHECK-RV32-NEXT: li a2, 0
-; CHECK-RV32-NEXT: bltu a3, a5, .LBB43_2
+; CHECK-RV32-NEXT: sub a2, a3, a4
+; CHECK-RV32-NEXT: sltu a5, a3, a2
+; CHECK-RV32-NEXT: addi a5, a5, -1
+; CHECK-RV32-NEXT: and a2, a5, a2
+; CHECK-RV32-NEXT: bltu a3, a4, .LBB43_2
; CHECK-RV32-NEXT: # %bb.1:
-; CHECK-RV32-NEXT: mv a2, a5
-; CHECK-RV32-NEXT: .LBB43_2:
-; CHECK-RV32-NEXT: bltu a3, a4, .LBB43_4
-; CHECK-RV32-NEXT: # %bb.3:
; CHECK-RV32-NEXT: mv a3, a4
-; CHECK-RV32-NEXT: .LBB43_4:
+; CHECK-RV32-NEXT: .LBB43_2:
; CHECK-RV32-NEXT: mul a4, a3, a1
; CHECK-RV32-NEXT: add a4, a0, a4
; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-RV64-LABEL: strided_load_nxv16f64_allones_mask:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: csrr a4, vlenb
-; CHECK-RV64-NEXT: sub a5, a2, a4
-; CHECK-RV64-NEXT: li a3, 0
-; CHECK-RV64-NEXT: bltu a2, a5, .LBB43_2
+; CHECK-RV64-NEXT: sub a3, a2, a4
+; CHECK-RV64-NEXT: sltu a5, a2, a3
+; CHECK-RV64-NEXT: addi a5, a5, -1
+; CHECK-RV64-NEXT: and a3, a5, a3
+; CHECK-RV64-NEXT: bltu a2, a4, .LBB43_2
; CHECK-RV64-NEXT: # %bb.1:
-; CHECK-RV64-NEXT: mv a3, a5
-; CHECK-RV64-NEXT: .LBB43_2:
-; CHECK-RV64-NEXT: bltu a2, a4, .LBB43_4
-; CHECK-RV64-NEXT: # %bb.3:
; CHECK-RV64-NEXT: mv a2, a4
-; CHECK-RV64-NEXT: .LBB43_4:
+; CHECK-RV64-NEXT: .LBB43_2:
; CHECK-RV64-NEXT: mul a4, a2, a1
; CHECK-RV64-NEXT: add a4, a0, a4
; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
define <vscale x 16 x double> @strided_load_nxv17f64(double* %ptr, i64 %stride, <vscale x 17 x i1> %mask, i32 zeroext %evl, <vscale x 1 x double>* %hi_ptr) {
; CHECK-RV32-LABEL: strided_load_nxv17f64:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: csrr a2, vlenb
-; CHECK-RV32-NEXT: slli a7, a2, 1
+; CHECK-RV32-NEXT: csrr a5, vlenb
+; CHECK-RV32-NEXT: slli a7, a5, 1
; CHECK-RV32-NEXT: vmv1r.v v8, v0
-; CHECK-RV32-NEXT: mv a5, a3
+; CHECK-RV32-NEXT: mv a2, a3
; CHECK-RV32-NEXT: bltu a3, a7, .LBB44_2
; CHECK-RV32-NEXT: # %bb.1:
-; CHECK-RV32-NEXT: mv a5, a7
+; CHECK-RV32-NEXT: mv a2, a7
; CHECK-RV32-NEXT: .LBB44_2:
-; CHECK-RV32-NEXT: sub a6, a5, a2
-; CHECK-RV32-NEXT: li t0, 0
-; CHECK-RV32-NEXT: bltu a5, a6, .LBB44_4
-; CHECK-RV32-NEXT: # %bb.3:
-; CHECK-RV32-NEXT: mv t0, a6
-; CHECK-RV32-NEXT: .LBB44_4:
-; CHECK-RV32-NEXT: srli a6, a2, 3
+; CHECK-RV32-NEXT: sub a6, a2, a5
+; CHECK-RV32-NEXT: sltu t0, a2, a6
+; CHECK-RV32-NEXT: addi t0, t0, -1
+; CHECK-RV32-NEXT: and t0, t0, a6
+; CHECK-RV32-NEXT: srli a6, a5, 3
; CHECK-RV32-NEXT: vsetvli t1, zero, e8, mf4, ta, ma
; CHECK-RV32-NEXT: vslidedown.vx v0, v8, a6
-; CHECK-RV32-NEXT: mv a6, a5
-; CHECK-RV32-NEXT: bltu a5, a2, .LBB44_6
-; CHECK-RV32-NEXT: # %bb.5:
; CHECK-RV32-NEXT: mv a6, a2
-; CHECK-RV32-NEXT: .LBB44_6:
+; CHECK-RV32-NEXT: bltu a2, a5, .LBB44_4
+; CHECK-RV32-NEXT: # %bb.3:
+; CHECK-RV32-NEXT: mv a6, a5
+; CHECK-RV32-NEXT: .LBB44_4:
; CHECK-RV32-NEXT: mul t1, a6, a1
; CHECK-RV32-NEXT: add t1, a0, t1
; CHECK-RV32-NEXT: vsetvli zero, t0, e64, m8, ta, ma
; CHECK-RV32-NEXT: vlse64.v v16, (t1), a1, v0.t
-; CHECK-RV32-NEXT: li t0, 0
-; CHECK-RV32-NEXT: sub t1, a3, a7
-; CHECK-RV32-NEXT: srli a7, a2, 2
-; CHECK-RV32-NEXT: bltu a3, t1, .LBB44_8
-; CHECK-RV32-NEXT: # %bb.7:
-; CHECK-RV32-NEXT: mv t0, t1
-; CHECK-RV32-NEXT: .LBB44_8:
-; CHECK-RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
-; CHECK-RV32-NEXT: vslidedown.vx v0, v8, a7
-; CHECK-RV32-NEXT: bltu t0, a2, .LBB44_10
-; CHECK-RV32-NEXT: # %bb.9:
-; CHECK-RV32-NEXT: mv t0, a2
-; CHECK-RV32-NEXT: .LBB44_10:
-; CHECK-RV32-NEXT: mul a2, a5, a1
+; CHECK-RV32-NEXT: sub a7, a3, a7
+; CHECK-RV32-NEXT: sltu a3, a3, a7
+; CHECK-RV32-NEXT: addi a3, a3, -1
+; CHECK-RV32-NEXT: and a3, a3, a7
+; CHECK-RV32-NEXT: bltu a3, a5, .LBB44_6
+; CHECK-RV32-NEXT: # %bb.5:
+; CHECK-RV32-NEXT: mv a3, a5
+; CHECK-RV32-NEXT: .LBB44_6:
+; CHECK-RV32-NEXT: srli a5, a5, 2
+; CHECK-RV32-NEXT: vsetvli a7, zero, e8, mf2, ta, ma
+; CHECK-RV32-NEXT: vslidedown.vx v0, v8, a5
+; CHECK-RV32-NEXT: mul a2, a2, a1
; CHECK-RV32-NEXT: add a2, a0, a2
-; CHECK-RV32-NEXT: vsetvli zero, t0, e64, m8, ta, ma
+; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV32-NEXT: vlse64.v v24, (a2), a1, v0.t
; CHECK-RV32-NEXT: vsetvli zero, a6, e64, m8, ta, ma
; CHECK-RV32-NEXT: vmv1r.v v0, v8
;
; CHECK-RV64-LABEL: strided_load_nxv17f64:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: csrr a4, vlenb
-; CHECK-RV64-NEXT: slli a7, a4, 1
+; CHECK-RV64-NEXT: csrr a5, vlenb
+; CHECK-RV64-NEXT: slli a7, a5, 1
; CHECK-RV64-NEXT: vmv1r.v v8, v0
-; CHECK-RV64-NEXT: mv a5, a2
+; CHECK-RV64-NEXT: mv a4, a2
; CHECK-RV64-NEXT: bltu a2, a7, .LBB44_2
; CHECK-RV64-NEXT: # %bb.1:
-; CHECK-RV64-NEXT: mv a5, a7
+; CHECK-RV64-NEXT: mv a4, a7
; CHECK-RV64-NEXT: .LBB44_2:
-; CHECK-RV64-NEXT: sub a6, a5, a4
-; CHECK-RV64-NEXT: li t0, 0
-; CHECK-RV64-NEXT: bltu a5, a6, .LBB44_4
-; CHECK-RV64-NEXT: # %bb.3:
-; CHECK-RV64-NEXT: mv t0, a6
-; CHECK-RV64-NEXT: .LBB44_4:
-; CHECK-RV64-NEXT: srli a6, a4, 3
+; CHECK-RV64-NEXT: sub a6, a4, a5
+; CHECK-RV64-NEXT: sltu t0, a4, a6
+; CHECK-RV64-NEXT: addi t0, t0, -1
+; CHECK-RV64-NEXT: and t0, t0, a6
+; CHECK-RV64-NEXT: srli a6, a5, 3
; CHECK-RV64-NEXT: vsetvli t1, zero, e8, mf4, ta, ma
; CHECK-RV64-NEXT: vslidedown.vx v0, v8, a6
-; CHECK-RV64-NEXT: mv a6, a5
-; CHECK-RV64-NEXT: bltu a5, a4, .LBB44_6
-; CHECK-RV64-NEXT: # %bb.5:
; CHECK-RV64-NEXT: mv a6, a4
-; CHECK-RV64-NEXT: .LBB44_6:
+; CHECK-RV64-NEXT: bltu a4, a5, .LBB44_4
+; CHECK-RV64-NEXT: # %bb.3:
+; CHECK-RV64-NEXT: mv a6, a5
+; CHECK-RV64-NEXT: .LBB44_4:
; CHECK-RV64-NEXT: mul t1, a6, a1
; CHECK-RV64-NEXT: add t1, a0, t1
; CHECK-RV64-NEXT: vsetvli zero, t0, e64, m8, ta, ma
; CHECK-RV64-NEXT: vlse64.v v16, (t1), a1, v0.t
-; CHECK-RV64-NEXT: li t0, 0
-; CHECK-RV64-NEXT: sub t1, a2, a7
-; CHECK-RV64-NEXT: srli a7, a4, 2
-; CHECK-RV64-NEXT: bltu a2, t1, .LBB44_8
-; CHECK-RV64-NEXT: # %bb.7:
-; CHECK-RV64-NEXT: mv t0, t1
-; CHECK-RV64-NEXT: .LBB44_8:
-; CHECK-RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
-; CHECK-RV64-NEXT: vslidedown.vx v0, v8, a7
-; CHECK-RV64-NEXT: bltu t0, a4, .LBB44_10
-; CHECK-RV64-NEXT: # %bb.9:
-; CHECK-RV64-NEXT: mv t0, a4
-; CHECK-RV64-NEXT: .LBB44_10:
-; CHECK-RV64-NEXT: mul a2, a5, a1
-; CHECK-RV64-NEXT: add a2, a0, a2
-; CHECK-RV64-NEXT: vsetvli zero, t0, e64, m8, ta, ma
-; CHECK-RV64-NEXT: vlse64.v v24, (a2), a1, v0.t
+; CHECK-RV64-NEXT: sub a7, a2, a7
+; CHECK-RV64-NEXT: sltu a2, a2, a7
+; CHECK-RV64-NEXT: addi a2, a2, -1
+; CHECK-RV64-NEXT: and a2, a2, a7
+; CHECK-RV64-NEXT: bltu a2, a5, .LBB44_6
+; CHECK-RV64-NEXT: # %bb.5:
+; CHECK-RV64-NEXT: mv a2, a5
+; CHECK-RV64-NEXT: .LBB44_6:
+; CHECK-RV64-NEXT: srli a5, a5, 2
+; CHECK-RV64-NEXT: vsetvli a7, zero, e8, mf2, ta, ma
+; CHECK-RV64-NEXT: vslidedown.vx v0, v8, a5
+; CHECK-RV64-NEXT: mul a4, a4, a1
+; CHECK-RV64-NEXT: add a4, a0, a4
+; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64-NEXT: vlse64.v v24, (a4), a1, v0.t
; CHECK-RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma
; CHECK-RV64-NEXT: vmv1r.v v0, v8
; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1, v0.t
define void @strided_store_nxv16f64(<vscale x 16 x double> %v, double* %ptr, i32 signext %stride, <vscale x 16 x i1> %mask, i32 zeroext %evl) {
; CHECK-RV32-LABEL: strided_store_nxv16f64:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: csrr a3, vlenb
-; CHECK-RV32-NEXT: mv a4, a2
-; CHECK-RV32-NEXT: bltu a2, a3, .LBB34_2
+; CHECK-RV32-NEXT: csrr a4, vlenb
+; CHECK-RV32-NEXT: mv a3, a2
+; CHECK-RV32-NEXT: bltu a2, a4, .LBB34_2
; CHECK-RV32-NEXT: # %bb.1:
-; CHECK-RV32-NEXT: mv a4, a3
+; CHECK-RV32-NEXT: mv a3, a4
; CHECK-RV32-NEXT: .LBB34_2:
-; CHECK-RV32-NEXT: li a5, 0
-; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma
+; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV32-NEXT: vsse64.v v8, (a0), a1, v0.t
-; CHECK-RV32-NEXT: srli a6, a3, 3
-; CHECK-RV32-NEXT: vsetvli a7, zero, e8, mf4, ta, ma
-; CHECK-RV32-NEXT: sub a3, a2, a3
-; CHECK-RV32-NEXT: vslidedown.vx v0, v0, a6
-; CHECK-RV32-NEXT: bltu a2, a3, .LBB34_4
-; CHECK-RV32-NEXT: # %bb.3:
-; CHECK-RV32-NEXT: mv a5, a3
-; CHECK-RV32-NEXT: .LBB34_4:
-; CHECK-RV32-NEXT: mul a2, a4, a1
-; CHECK-RV32-NEXT: add a0, a0, a2
-; CHECK-RV32-NEXT: vsetvli zero, a5, e64, m8, ta, ma
+; CHECK-RV32-NEXT: sub a5, a2, a4
+; CHECK-RV32-NEXT: sltu a2, a2, a5
+; CHECK-RV32-NEXT: addi a2, a2, -1
+; CHECK-RV32-NEXT: and a2, a2, a5
+; CHECK-RV32-NEXT: srli a4, a4, 3
+; CHECK-RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
+; CHECK-RV32-NEXT: vslidedown.vx v0, v0, a4
+; CHECK-RV32-NEXT: mul a3, a3, a1
+; CHECK-RV32-NEXT: add a0, a0, a3
+; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-RV32-NEXT: vsse64.v v16, (a0), a1, v0.t
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64-LABEL: strided_store_nxv16f64:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: csrr a3, vlenb
-; CHECK-RV64-NEXT: mv a4, a2
-; CHECK-RV64-NEXT: bltu a2, a3, .LBB34_2
+; CHECK-RV64-NEXT: csrr a4, vlenb
+; CHECK-RV64-NEXT: mv a3, a2
+; CHECK-RV64-NEXT: bltu a2, a4, .LBB34_2
; CHECK-RV64-NEXT: # %bb.1:
-; CHECK-RV64-NEXT: mv a4, a3
+; CHECK-RV64-NEXT: mv a3, a4
; CHECK-RV64-NEXT: .LBB34_2:
-; CHECK-RV64-NEXT: li a5, 0
-; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma
+; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV64-NEXT: vsse64.v v8, (a0), a1, v0.t
-; CHECK-RV64-NEXT: srli a6, a3, 3
-; CHECK-RV64-NEXT: vsetvli a7, zero, e8, mf4, ta, ma
-; CHECK-RV64-NEXT: sub a3, a2, a3
-; CHECK-RV64-NEXT: vslidedown.vx v0, v0, a6
-; CHECK-RV64-NEXT: bltu a2, a3, .LBB34_4
-; CHECK-RV64-NEXT: # %bb.3:
-; CHECK-RV64-NEXT: mv a5, a3
-; CHECK-RV64-NEXT: .LBB34_4:
-; CHECK-RV64-NEXT: mul a2, a4, a1
-; CHECK-RV64-NEXT: add a0, a0, a2
-; CHECK-RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma
+; CHECK-RV64-NEXT: sub a5, a2, a4
+; CHECK-RV64-NEXT: sltu a2, a2, a5
+; CHECK-RV64-NEXT: addi a2, a2, -1
+; CHECK-RV64-NEXT: and a2, a2, a5
+; CHECK-RV64-NEXT: srli a4, a4, 3
+; CHECK-RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
+; CHECK-RV64-NEXT: vslidedown.vx v0, v0, a4
+; CHECK-RV64-NEXT: mul a3, a3, a1
+; CHECK-RV64-NEXT: add a0, a0, a3
+; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-RV64-NEXT: vsse64.v v16, (a0), a1, v0.t
; CHECK-RV64-NEXT: ret
call void @llvm.experimental.vp.strided.store.nxv16f64.p0f64.i32(<vscale x 16 x double> %v, double* %ptr, i32 %stride, <vscale x 16 x i1> %mask, i32 %evl)
; CHECK-RV32-NEXT: # %bb.1:
; CHECK-RV32-NEXT: mv a3, a4
; CHECK-RV32-NEXT: .LBB35_2:
-; CHECK-RV32-NEXT: li a5, 0
; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-RV32-NEXT: sub a4, a2, a4
; CHECK-RV32-NEXT: vsse64.v v8, (a0), a1
-; CHECK-RV32-NEXT: bltu a2, a4, .LBB35_4
-; CHECK-RV32-NEXT: # %bb.3:
-; CHECK-RV32-NEXT: mv a5, a4
-; CHECK-RV32-NEXT: .LBB35_4:
-; CHECK-RV32-NEXT: mul a2, a3, a1
-; CHECK-RV32-NEXT: add a0, a0, a2
-; CHECK-RV32-NEXT: vsetvli zero, a5, e64, m8, ta, ma
+; CHECK-RV32-NEXT: sub a4, a2, a4
+; CHECK-RV32-NEXT: sltu a2, a2, a4
+; CHECK-RV32-NEXT: addi a2, a2, -1
+; CHECK-RV32-NEXT: and a2, a2, a4
+; CHECK-RV32-NEXT: mul a3, a3, a1
+; CHECK-RV32-NEXT: add a0, a0, a3
+; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-RV32-NEXT: vsse64.v v16, (a0), a1
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64-NEXT: # %bb.1:
; CHECK-RV64-NEXT: mv a3, a4
; CHECK-RV64-NEXT: .LBB35_2:
-; CHECK-RV64-NEXT: li a5, 0
; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-RV64-NEXT: sub a4, a2, a4
; CHECK-RV64-NEXT: vsse64.v v8, (a0), a1
-; CHECK-RV64-NEXT: bltu a2, a4, .LBB35_4
-; CHECK-RV64-NEXT: # %bb.3:
-; CHECK-RV64-NEXT: mv a5, a4
-; CHECK-RV64-NEXT: .LBB35_4:
-; CHECK-RV64-NEXT: mul a2, a3, a1
-; CHECK-RV64-NEXT: add a0, a0, a2
-; CHECK-RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma
+; CHECK-RV64-NEXT: sub a4, a2, a4
+; CHECK-RV64-NEXT: sltu a2, a2, a4
+; CHECK-RV64-NEXT: addi a2, a2, -1
+; CHECK-RV64-NEXT: and a2, a2, a4
+; CHECK-RV64-NEXT: mul a3, a3, a1
+; CHECK-RV64-NEXT: add a0, a0, a3
+; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-RV64-NEXT: vsse64.v v16, (a0), a1
; CHECK-RV64-NEXT: ret
%one = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
define void @strided_store_nxv17f64(<vscale x 17 x double> %v, double* %ptr, i32 signext %stride, <vscale x 17 x i1> %mask, i32 zeroext %evl) {
; CHECK-RV32-LABEL: strided_store_nxv17f64:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: addi sp, sp, -16
-; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
-; CHECK-RV32-NEXT: csrr a4, vlenb
-; CHECK-RV32-NEXT: slli a4, a4, 3
-; CHECK-RV32-NEXT: sub sp, sp, a4
; CHECK-RV32-NEXT: csrr a4, vlenb
-; CHECK-RV32-NEXT: slli a7, a4, 1
+; CHECK-RV32-NEXT: slli a6, a4, 1
; CHECK-RV32-NEXT: vmv1r.v v24, v0
-; CHECK-RV32-NEXT: addi a5, sp, 16
-; CHECK-RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
-; CHECK-RV32-NEXT: mv a6, a3
-; CHECK-RV32-NEXT: bltu a3, a7, .LBB36_2
+; CHECK-RV32-NEXT: mv a5, a3
+; CHECK-RV32-NEXT: bltu a3, a6, .LBB36_2
; CHECK-RV32-NEXT: # %bb.1:
-; CHECK-RV32-NEXT: mv a6, a7
-; CHECK-RV32-NEXT: .LBB36_2:
; CHECK-RV32-NEXT: mv a5, a6
-; CHECK-RV32-NEXT: bltu a6, a4, .LBB36_4
+; CHECK-RV32-NEXT: .LBB36_2:
+; CHECK-RV32-NEXT: mv a7, a5
+; CHECK-RV32-NEXT: bltu a5, a4, .LBB36_4
; CHECK-RV32-NEXT: # %bb.3:
-; CHECK-RV32-NEXT: mv a5, a4
+; CHECK-RV32-NEXT: mv a7, a4
; CHECK-RV32-NEXT: .LBB36_4:
-; CHECK-RV32-NEXT: li t0, 0
-; CHECK-RV32-NEXT: vl8re64.v v16, (a0)
-; CHECK-RV32-NEXT: vsetvli zero, a5, e64, m8, ta, ma
+; CHECK-RV32-NEXT: addi sp, sp, -16
+; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32-NEXT: csrr t0, vlenb
+; CHECK-RV32-NEXT: slli t0, t0, 3
+; CHECK-RV32-NEXT: sub sp, sp, t0
+; CHECK-RV32-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV32-NEXT: addi a0, sp, 16
+; CHECK-RV32-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma
; CHECK-RV32-NEXT: vmv1r.v v0, v24
; CHECK-RV32-NEXT: vsse64.v v8, (a1), a2, v0.t
-; CHECK-RV32-NEXT: sub a7, a3, a7
-; CHECK-RV32-NEXT: srli a0, a4, 2
-; CHECK-RV32-NEXT: bltu a3, a7, .LBB36_6
+; CHECK-RV32-NEXT: sub a0, a5, a4
+; CHECK-RV32-NEXT: sltu t0, a5, a0
+; CHECK-RV32-NEXT: addi t0, t0, -1
+; CHECK-RV32-NEXT: and a0, t0, a0
+; CHECK-RV32-NEXT: srli t0, a4, 3
+; CHECK-RV32-NEXT: vsetvli t1, zero, e8, mf4, ta, ma
+; CHECK-RV32-NEXT: vslidedown.vx v0, v24, t0
+; CHECK-RV32-NEXT: mul a7, a7, a2
+; CHECK-RV32-NEXT: add a7, a1, a7
+; CHECK-RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32-NEXT: sub a0, a3, a6
+; CHECK-RV32-NEXT: sltu a3, a3, a0
+; CHECK-RV32-NEXT: addi a3, a3, -1
+; CHECK-RV32-NEXT: and a0, a3, a0
+; CHECK-RV32-NEXT: vsse64.v v16, (a7), a2, v0.t
+; CHECK-RV32-NEXT: bltu a0, a4, .LBB36_6
; CHECK-RV32-NEXT: # %bb.5:
-; CHECK-RV32-NEXT: mv t0, a7
+; CHECK-RV32-NEXT: mv a0, a4
; CHECK-RV32-NEXT: .LBB36_6:
-; CHECK-RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
-; CHECK-RV32-NEXT: vslidedown.vx v0, v24, a0
-; CHECK-RV32-NEXT: bltu t0, a4, .LBB36_8
-; CHECK-RV32-NEXT: # %bb.7:
-; CHECK-RV32-NEXT: mv t0, a4
-; CHECK-RV32-NEXT: .LBB36_8:
-; CHECK-RV32-NEXT: li a0, 0
-; CHECK-RV32-NEXT: mul a3, a6, a2
-; CHECK-RV32-NEXT: add a7, a1, a3
-; CHECK-RV32-NEXT: vsetvli zero, t0, e64, m8, ta, ma
-; CHECK-RV32-NEXT: sub a3, a6, a4
-; CHECK-RV32-NEXT: vsse64.v v16, (a7), a2, v0.t
-; CHECK-RV32-NEXT: bltu a6, a3, .LBB36_10
-; CHECK-RV32-NEXT: # %bb.9:
-; CHECK-RV32-NEXT: mv a0, a3
-; CHECK-RV32-NEXT: .LBB36_10:
-; CHECK-RV32-NEXT: srli a3, a4, 3
-; CHECK-RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
+; CHECK-RV32-NEXT: srli a3, a4, 2
+; CHECK-RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
; CHECK-RV32-NEXT: vslidedown.vx v0, v24, a3
; CHECK-RV32-NEXT: mul a3, a5, a2
; CHECK-RV32-NEXT: add a1, a1, a3
;
; CHECK-RV64-LABEL: strided_store_nxv17f64:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: addi sp, sp, -16
-; CHECK-RV64-NEXT: .cfi_def_cfa_offset 16
-; CHECK-RV64-NEXT: csrr a4, vlenb
-; CHECK-RV64-NEXT: slli a4, a4, 3
-; CHECK-RV64-NEXT: sub sp, sp, a4
; CHECK-RV64-NEXT: csrr a4, vlenb
-; CHECK-RV64-NEXT: slli a7, a4, 1
+; CHECK-RV64-NEXT: slli a6, a4, 1
; CHECK-RV64-NEXT: vmv1r.v v24, v0
-; CHECK-RV64-NEXT: addi a5, sp, 16
-; CHECK-RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
-; CHECK-RV64-NEXT: mv a6, a3
-; CHECK-RV64-NEXT: bltu a3, a7, .LBB36_2
+; CHECK-RV64-NEXT: mv a5, a3
+; CHECK-RV64-NEXT: bltu a3, a6, .LBB36_2
; CHECK-RV64-NEXT: # %bb.1:
-; CHECK-RV64-NEXT: mv a6, a7
-; CHECK-RV64-NEXT: .LBB36_2:
; CHECK-RV64-NEXT: mv a5, a6
-; CHECK-RV64-NEXT: bltu a6, a4, .LBB36_4
+; CHECK-RV64-NEXT: .LBB36_2:
+; CHECK-RV64-NEXT: mv a7, a5
+; CHECK-RV64-NEXT: bltu a5, a4, .LBB36_4
; CHECK-RV64-NEXT: # %bb.3:
-; CHECK-RV64-NEXT: mv a5, a4
+; CHECK-RV64-NEXT: mv a7, a4
; CHECK-RV64-NEXT: .LBB36_4:
-; CHECK-RV64-NEXT: li t0, 0
-; CHECK-RV64-NEXT: vl8re64.v v16, (a0)
-; CHECK-RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma
+; CHECK-RV64-NEXT: addi sp, sp, -16
+; CHECK-RV64-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64-NEXT: csrr t0, vlenb
+; CHECK-RV64-NEXT: slli t0, t0, 3
+; CHECK-RV64-NEXT: sub sp, sp, t0
+; CHECK-RV64-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64-NEXT: addi a0, sp, 16
+; CHECK-RV64-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma
; CHECK-RV64-NEXT: vmv1r.v v0, v24
; CHECK-RV64-NEXT: vsse64.v v8, (a1), a2, v0.t
-; CHECK-RV64-NEXT: sub a7, a3, a7
-; CHECK-RV64-NEXT: srli a0, a4, 2
-; CHECK-RV64-NEXT: bltu a3, a7, .LBB36_6
+; CHECK-RV64-NEXT: sub a0, a5, a4
+; CHECK-RV64-NEXT: sltu t0, a5, a0
+; CHECK-RV64-NEXT: addi t0, t0, -1
+; CHECK-RV64-NEXT: and a0, t0, a0
+; CHECK-RV64-NEXT: srli t0, a4, 3
+; CHECK-RV64-NEXT: vsetvli t1, zero, e8, mf4, ta, ma
+; CHECK-RV64-NEXT: vslidedown.vx v0, v24, t0
+; CHECK-RV64-NEXT: mul a7, a7, a2
+; CHECK-RV64-NEXT: add a7, a1, a7
+; CHECK-RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64-NEXT: sub a0, a3, a6
+; CHECK-RV64-NEXT: sltu a3, a3, a0
+; CHECK-RV64-NEXT: addi a3, a3, -1
+; CHECK-RV64-NEXT: and a0, a3, a0
+; CHECK-RV64-NEXT: vsse64.v v16, (a7), a2, v0.t
+; CHECK-RV64-NEXT: bltu a0, a4, .LBB36_6
; CHECK-RV64-NEXT: # %bb.5:
-; CHECK-RV64-NEXT: mv t0, a7
+; CHECK-RV64-NEXT: mv a0, a4
; CHECK-RV64-NEXT: .LBB36_6:
-; CHECK-RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
-; CHECK-RV64-NEXT: vslidedown.vx v0, v24, a0
-; CHECK-RV64-NEXT: bltu t0, a4, .LBB36_8
-; CHECK-RV64-NEXT: # %bb.7:
-; CHECK-RV64-NEXT: mv t0, a4
-; CHECK-RV64-NEXT: .LBB36_8:
-; CHECK-RV64-NEXT: li a0, 0
-; CHECK-RV64-NEXT: mul a3, a6, a2
-; CHECK-RV64-NEXT: add a7, a1, a3
-; CHECK-RV64-NEXT: vsetvli zero, t0, e64, m8, ta, ma
-; CHECK-RV64-NEXT: sub a3, a6, a4
-; CHECK-RV64-NEXT: vsse64.v v16, (a7), a2, v0.t
-; CHECK-RV64-NEXT: bltu a6, a3, .LBB36_10
-; CHECK-RV64-NEXT: # %bb.9:
-; CHECK-RV64-NEXT: mv a0, a3
-; CHECK-RV64-NEXT: .LBB36_10:
-; CHECK-RV64-NEXT: srli a3, a4, 3
-; CHECK-RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
+; CHECK-RV64-NEXT: srli a3, a4, 2
+; CHECK-RV64-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
; CHECK-RV64-NEXT: vslidedown.vx v0, v24, a3
; CHECK-RV64-NEXT: mul a3, a5, a2
; CHECK-RV64-NEXT: add a1, a1, a3
define <vscale x 128 x i8> @vadd_vi_nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vadd_vi_nxv128i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: bltu a1, a2, .LBB50_2
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v0, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: sub a2, a1, a0
+; CHECK-NEXT: sltu a3, a1, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT: bltu a1, a0, .LBB50_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: .LBB50_2:
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: vsetvli a5, zero, e8, m8, ta, ma
-; CHECK-NEXT: vlm.v v24, (a0)
-; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT: sub a0, a1, a2
-; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
-; CHECK-NEXT: bltu a1, a0, .LBB50_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a4, a0
-; CHECK-NEXT: .LBB50_4:
-; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0
%vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vadd.vi v16, v16, -1
; CHECK-NEXT: bltu a0, a1, .LBB51_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB51_2:
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
-; CHECK-NEXT: sub a1, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
; CHECK-NEXT: vadd.vi v8, v8, -1
-; CHECK-NEXT: bltu a0, a1, .LBB51_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: .LBB51_4:
-; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT: vadd.vi v16, v16, -1
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0
%vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
; CHECK-LABEL: vadd_vi_nxv32i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 2
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB118_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB118_2:
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB118_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB118_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB118_4:
+; CHECK-NEXT: .LBB118_2:
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vadd.vi v16, v16, -1
; CHECK-NEXT: bltu a0, a1, .LBB119_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB119_2:
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: sub a1, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vadd.vi v8, v8, -1
-; CHECK-NEXT: bltu a0, a1, .LBB119_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: .LBB119_4:
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vadd.vi v16, v16, -1
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
; CHECK-LABEL: vadd_vi_nxv32i32_evl_nx8:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a4, a0, 2
-; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a1
; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB120_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB120_2:
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB120_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB120_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB120_4:
+; CHECK-NEXT: .LBB120_2:
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
; CHECK-LABEL: vfabs_vv_nxv16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 3
+; CHECK-NEXT: srli a2, a1, 3
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB32_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB32_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_4:
+; CHECK-NEXT: .LBB32_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfabs.v v8, v8, v0.t
; CHECK-LABEL: vfabs_vv_nxv16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vfabs.v v16, v16
; CHECK-NEXT: bltu a0, a1, .LBB33_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB33_2:
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT: sub a1, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v8, v8
-; CHECK-NEXT: bltu a0, a1, .LBB33_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: .LBB33_4:
-; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-NEXT: vfabs.v v16, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 48
+; CHECK-NEXT: li a3, 40
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 24
-; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a3, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
-; CHECK-NEXT: slli a5, a1, 3
-; CHECK-NEXT: add a6, a2, a5
-; CHECK-NEXT: vl8re64.v v8, (a6)
-; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: slli a6, a6, 3
-; CHECK-NEXT: add a6, sp, a6
-; CHECK-NEXT: addi a6, a6, 16
-; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill
-; CHECK-NEXT: add a5, a0, a5
+; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; CHECK-NEXT: slli a3, a1, 3
+; CHECK-NEXT: add a5, a2, a3
; CHECK-NEXT: vl8re64.v v8, (a5)
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: li a6, 40
+; CHECK-NEXT: li a6, 24
; CHECK-NEXT: mul a5, a5, a6
; CHECK-NEXT: add a5, sp, a5
; CHECK-NEXT: addi a5, a5, 16
; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
-; CHECK-NEXT: srli a6, a1, 3
; CHECK-NEXT: sub a5, a4, a1
+; CHECK-NEXT: sltu a6, a4, a5
+; CHECK-NEXT: addi a6, a6, -1
+; CHECK-NEXT: and a5, a6, a5
+; CHECK-NEXT: srli a6, a1, 3
+; CHECK-NEXT: add a3, a0, a3
+; CHECK-NEXT: vl8re64.v v8, (a3)
+; CHECK-NEXT: vl8re64.v v16, (a2)
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vslidedown.vx v0, v0, a6
-; CHECK-NEXT: bltu a4, a5, .LBB92_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a5
-; CHECK-NEXT: .LBB92_2:
-; CHECK-NEXT: vl8re64.v v8, (a2)
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 4
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: vl8re64.v v8, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu
+; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, mu
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a2, 24
; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a2, 40
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a2, 40
+; CHECK-NEXT: li a2, 24
; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a4, a1, .LBB92_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a4, a1, .LBB92_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a4, a1
-; CHECK-NEXT: .LBB92_4:
+; CHECK-NEXT: .LBB92_2:
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 40
+; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 48
+; CHECK-NEXT: li a1, 40
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a3, a1, 3
; CHECK-NEXT: add a5, a2, a3
; CHECK-NEXT: vl8re64.v v24, (a5)
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: slli a5, a5, 3
-; CHECK-NEXT: add a5, sp, a5
-; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
; CHECK-NEXT: add a3, a0, a3
-; CHECK-NEXT: vl8re64.v v24, (a3)
-; CHECK-NEXT: sub a5, a4, a1
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: slli a3, a3, 4
-; CHECK-NEXT: add a3, sp, a3
-; CHECK-NEXT: addi a3, a3, 16
-; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: bltu a4, a5, .LBB93_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a5
-; CHECK-NEXT: .LBB93_2:
+; CHECK-NEXT: vl8re64.v v16, (a3)
+; CHECK-NEXT: sub a3, a4, a1
+; CHECK-NEXT: sltu a5, a4, a3
+; CHECK-NEXT: addi a5, a5, -1
; CHECK-NEXT: vl8re64.v v8, (a2)
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vl8re64.v v0, (a0)
-; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-NEXT: and a0, a5, a3
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmadd.vv v24, v16, v8
-; CHECK-NEXT: bltu a4, a1, .LBB93_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: vfmadd.vv v16, v8, v24
+; CHECK-NEXT: bltu a4, a1, .LBB93_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a4, a1
-; CHECK-NEXT: .LBB93_4:
+; CHECK-NEXT: .LBB93_2:
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmadd.vv v0, v16, v8
+; CHECK-NEXT: vfmadd.vv v0, v24, v8
; CHECK-NEXT: vmv.v.v v8, v0
-; CHECK-NEXT: vmv8r.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 48
+; CHECK-NEXT: li a3, 40
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 24
-; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a3, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
-; CHECK-NEXT: slli a5, a1, 3
-; CHECK-NEXT: add a6, a2, a5
-; CHECK-NEXT: vl8re64.v v8, (a6)
-; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: slli a6, a6, 3
-; CHECK-NEXT: add a6, sp, a6
-; CHECK-NEXT: addi a6, a6, 16
-; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill
-; CHECK-NEXT: add a5, a0, a5
+; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; CHECK-NEXT: slli a3, a1, 3
+; CHECK-NEXT: add a5, a2, a3
; CHECK-NEXT: vl8re64.v v8, (a5)
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: li a6, 40
+; CHECK-NEXT: li a6, 24
; CHECK-NEXT: mul a5, a5, a6
; CHECK-NEXT: add a5, sp, a5
; CHECK-NEXT: addi a5, a5, 16
; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
-; CHECK-NEXT: srli a6, a1, 3
; CHECK-NEXT: sub a5, a4, a1
+; CHECK-NEXT: sltu a6, a4, a5
+; CHECK-NEXT: addi a6, a6, -1
+; CHECK-NEXT: and a5, a6, a5
+; CHECK-NEXT: srli a6, a1, 3
+; CHECK-NEXT: add a3, a0, a3
+; CHECK-NEXT: vl8re64.v v8, (a3)
+; CHECK-NEXT: vl8re64.v v16, (a2)
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vslidedown.vx v0, v0, a6
-; CHECK-NEXT: bltu a4, a5, .LBB92_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a5
-; CHECK-NEXT: .LBB92_2:
-; CHECK-NEXT: vl8re64.v v8, (a2)
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 4
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: vl8re64.v v8, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu
+; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, mu
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a2, 24
; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a2, 40
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a2, 40
+; CHECK-NEXT: li a2, 24
; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a4, a1, .LBB92_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a4, a1, .LBB92_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a4, a1
-; CHECK-NEXT: .LBB92_4:
+; CHECK-NEXT: .LBB92_2:
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 40
+; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 48
+; CHECK-NEXT: li a1, 40
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a3, a1, 3
; CHECK-NEXT: add a5, a2, a3
; CHECK-NEXT: vl8re64.v v24, (a5)
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: slli a5, a5, 3
-; CHECK-NEXT: add a5, sp, a5
-; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
; CHECK-NEXT: add a3, a0, a3
-; CHECK-NEXT: vl8re64.v v24, (a3)
-; CHECK-NEXT: sub a5, a4, a1
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: slli a3, a3, 4
-; CHECK-NEXT: add a3, sp, a3
-; CHECK-NEXT: addi a3, a3, 16
-; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: bltu a4, a5, .LBB93_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a5
-; CHECK-NEXT: .LBB93_2:
+; CHECK-NEXT: vl8re64.v v16, (a3)
+; CHECK-NEXT: sub a3, a4, a1
+; CHECK-NEXT: sltu a5, a4, a3
+; CHECK-NEXT: addi a5, a5, -1
; CHECK-NEXT: vl8re64.v v8, (a2)
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vl8re64.v v0, (a0)
-; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-NEXT: and a0, a5, a3
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmadd.vv v24, v16, v8
-; CHECK-NEXT: bltu a4, a1, .LBB93_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: vfmadd.vv v16, v8, v24
+; CHECK-NEXT: bltu a4, a1, .LBB93_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a4, a1
-; CHECK-NEXT: .LBB93_4:
+; CHECK-NEXT: .LBB93_2:
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmadd.vv v0, v16, v8
+; CHECK-NEXT: vfmadd.vv v0, v24, v8
; CHECK-NEXT: vmv.v.v v8, v0
-; CHECK-NEXT: vmv8r.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-LABEL: vfneg_vv_nxv16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 3
+; CHECK-NEXT: srli a2, a1, 3
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB32_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vfneg.v v16, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB32_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_4:
+; CHECK-NEXT: .LBB32_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfneg.v v8, v8, v0.t
; CHECK-LABEL: vfneg_vv_nxv16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vfneg.v v16, v16
; CHECK-NEXT: bltu a0, a1, .LBB33_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB33_2:
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT: sub a1, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfneg.v v8, v8
-; CHECK-NEXT: bltu a0, a1, .LBB33_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: .LBB33_4:
-; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-NEXT: vfneg.v v16, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
; CHECK-LABEL: vfpext_nxv32f16_nxv32f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 2
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB7_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB7_2:
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; CHECK-NEXT: vfwcvt.f.f.v v16, v12, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB7_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB7_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB7_4:
+; CHECK-NEXT: .LBB7_2:
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfwcvt.f.f.v v24, v8, v0.t
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 2
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB25_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB25_2:
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB25_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB25_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB25_4:
+; CHECK-NEXT: .LBB25_2:
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: addi a0, sp, 16
; CHECK-LABEL: vfptosi_nxv32i32_nxv32f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 2
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB26_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB26_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB26_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB26_4:
+; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16
; CHECK-NEXT: bltu a0, a1, .LBB27_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB27_2:
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: sub a1, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8
-; CHECK-NEXT: bltu a0, a1, .LBB27_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: .LBB27_4:
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16
; CHECK-NEXT: ret
%v = call <vscale x 32 x i32> @llvm.vp.fptosi.nxv32i32.nxv32f32(<vscale x 32 x float> %va, <vscale x 32 x i1> shufflevector (<vscale x 32 x i1> insertelement (<vscale x 32 x i1> undef, i1 true, i32 0), <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer), i32 %evl)
ret <vscale x 32 x i32> %v
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 2
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB25_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB25_2:
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB25_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB25_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB25_4:
+; CHECK-NEXT: .LBB25_2:
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: addi a0, sp, 16
; CHECK-LABEL: vfptoui_nxv32i32_nxv32f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 2
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB26_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB26_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB26_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB26_4:
+; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16
; CHECK-NEXT: bltu a0, a1, .LBB27_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB27_2:
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: sub a1, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8
-; CHECK-NEXT: bltu a0, a1, .LBB27_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: .LBB27_4:
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16
; CHECK-NEXT: ret
%v = call <vscale x 32 x i32> @llvm.vp.fptoui.nxv32i32.nxv32f32(<vscale x 32 x float> %va, <vscale x 32 x i1> shufflevector (<vscale x 32 x i1> insertelement (<vscale x 32 x i1> undef, i1 true, i32 0), <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer), i32 %evl)
ret <vscale x 32 x i32> %v
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -verify-machineinstrs < %s | FileCheck %s
declare <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 3
+; CHECK-NEXT: srli a2, a1, 3
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB7_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB7_2:
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
-; CHECK-NEXT: vfncvt.f.f.w v12, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB7_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t
+; CHECK-NEXT: bltu a0, a1, .LBB7_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB7_4:
+; CHECK-NEXT: .LBB7_2:
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfncvt.f.f.w v8, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t
+; CHECK-NEXT: vmv8r.v v8, v16
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: li a3, 24
+; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a4, a1, 1
-; CHECK-NEXT: srli a3, a1, 3
-; CHECK-NEXT: mv a5, a2
-; CHECK-NEXT: bltu a2, a4, .LBB8_2
+; CHECK-NEXT: srli a3, a1, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v25, v0, a3
+; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; CHECK-NEXT: slli a3, a1, 3
+; CHECK-NEXT: add a3, a0, a3
+; CHECK-NEXT: vl8re64.v v8, (a3)
+; CHECK-NEXT: slli a3, a1, 1
+; CHECK-NEXT: sub a4, a2, a3
+; CHECK-NEXT: sltu a5, a2, a4
+; CHECK-NEXT: addi a5, a5, -1
+; CHECK-NEXT: and a4, a5, a4
+; CHECK-NEXT: sub a5, a4, a1
+; CHECK-NEXT: sltu a6, a4, a5
+; CHECK-NEXT: addi a6, a6, -1
+; CHECK-NEXT: and a6, a6, a5
+; CHECK-NEXT: srli a5, a1, 3
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vslidedown.vx v0, v25, a5
+; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma
+; CHECK-NEXT: vfncvt.f.f.w v20, v8, v0.t
+; CHECK-NEXT: bltu a4, a1, .LBB8_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a5, a4
+; CHECK-NEXT: mv a4, a1
; CHECK-NEXT: .LBB8_2:
-; CHECK-NEXT: li a6, 0
-; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma
-; CHECK-NEXT: sub a7, a5, a1
-; CHECK-NEXT: vslidedown.vx v0, v24, a3
-; CHECK-NEXT: bltu a5, a7, .LBB8_4
+; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vx v26, v1, a5
+; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t
+; CHECK-NEXT: bltu a2, a3, .LBB8_4
; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a6, a7
+; CHECK-NEXT: mv a2, a3
; CHECK-NEXT: .LBB8_4:
-; CHECK-NEXT: srli a7, a1, 2
-; CHECK-NEXT: slli t0, a1, 3
-; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma
-; CHECK-NEXT: vfncvt.f.f.w v12, v16, v0.t
-; CHECK-NEXT: bltu a5, a1, .LBB8_6
-; CHECK-NEXT: # %bb.5:
-; CHECK-NEXT: mv a5, a1
-; CHECK-NEXT: .LBB8_6:
-; CHECK-NEXT: li a6, 0
-; CHECK-NEXT: vsetvli t1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v1, v24, a7
-; CHECK-NEXT: add a7, a0, t0
-; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma
-; CHECK-NEXT: sub a4, a2, a4
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: slli a5, a5, 3
-; CHECK-NEXT: add a5, sp, a5
-; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vl8re8.v v16, (a5) # Unknown-size Folded Reload
-; CHECK-NEXT: vfncvt.f.f.w v8, v16, v0.t
-; CHECK-NEXT: bltu a2, a4, .LBB8_8
-; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: mv a6, a4
-; CHECK-NEXT: .LBB8_8:
-; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vl8re64.v v16, (a7)
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: sub a4, a6, a1
-; CHECK-NEXT: vslidedown.vx v0, v1, a3
-; CHECK-NEXT: bltu a6, a4, .LBB8_10
-; CHECK-NEXT: # %bb.9:
-; CHECK-NEXT: mv a2, a4
-; CHECK-NEXT: .LBB8_10:
-; CHECK-NEXT: vl8re64.v v16, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-NEXT: sub a0, a2, a1
+; CHECK-NEXT: sltu a3, a2, a0
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a0, a3, a0
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v26
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t
-; CHECK-NEXT: bltu a6, a1, .LBB8_12
-; CHECK-NEXT: # %bb.11:
-; CHECK-NEXT: mv a6, a1
-; CHECK-NEXT: .LBB8_12:
-; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma
+; CHECK-NEXT: vfncvt.f.f.w v12, v24, v0.t
+; CHECK-NEXT: bltu a2, a1, .LBB8_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: .LBB8_6:
+; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfncvt.f.f.w v16, v24, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfncvt.f.f.w v8, v24, v0.t
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-LABEL: vfsqrt_vv_nxv16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 3
+; CHECK-NEXT: srli a2, a1, 3
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB32_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vfsqrt.v v16, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB32_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_4:
+; CHECK-NEXT: .LBB32_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
; CHECK-LABEL: vfsqrt_vv_nxv16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vfsqrt.v v16, v16
; CHECK-NEXT: bltu a0, a1, .LBB33_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB33_2:
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT: sub a1, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfsqrt.v v8, v8
-; CHECK-NEXT: bltu a0, a1, .LBB33_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: .LBB33_4:
-; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-NEXT: vfsqrt.v v16, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
define <vscale x 128 x i8> @vmax_vx_nxv128i8(<vscale x 128 x i8> %va, i8 %b, <vscale x 128 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmax_vx_nxv128i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: slli a3, a3, 3
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: bltu a2, a3, .LBB34_2
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v0, (a1)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub a3, a2, a1
+; CHECK-NEXT: sltu a4, a2, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t
+; CHECK-NEXT: bltu a2, a1, .LBB34_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB34_2:
-; CHECK-NEXT: li a5, 0
-; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, ma
-; CHECK-NEXT: vlm.v v24, (a1)
-; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
-; CHECK-NEXT: sub a1, a2, a3
-; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t
-; CHECK-NEXT: bltu a2, a1, .LBB34_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a5, a1
-; CHECK-NEXT: .LBB34_4:
-; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, ma
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 128 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT: vmax.vx v16, v16, a0
; CHECK-NEXT: bltu a1, a2, .LBB35_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB35_2:
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT: sub a2, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: bltu a1, a2, .LBB35_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: .LBB35_4:
-; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
-; CHECK-NEXT: vmax.vx v16, v16, a0
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 128 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
; CHECK-LABEL: vmax_vx_nxv32i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a3, 0
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: srli a5, a2, 2
+; CHECK-NEXT: srli a3, a2, 2
; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a3
; CHECK-NEXT: slli a2, a2, 1
-; CHECK-NEXT: sub a4, a1, a2
-; CHECK-NEXT: vslidedown.vx v0, v0, a5
-; CHECK-NEXT: bltu a1, a4, .LBB80_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a4
-; CHECK-NEXT: .LBB80_2:
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB80_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a1, a2, .LBB80_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB80_4:
+; CHECK-NEXT: .LBB80_2:
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 1
-; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vmax.vx v16, v16, a0
; CHECK-NEXT: bltu a1, a2, .LBB81_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB81_2:
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: sub a2, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: bltu a1, a2, .LBB81_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: .LBB81_4:
-; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma
-; CHECK-NEXT: vmax.vx v16, v16, a0
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
; CHECK-LABEL: vmax_vx_nxv32i32_evl_nx8:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a3, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a5, a1, 2
-; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: srli a2, a1, 2
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: sub a4, a1, a2
-; CHECK-NEXT: vslidedown.vx v0, v0, a5
-; CHECK-NEXT: bltu a1, a4, .LBB82_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a4
-; CHECK-NEXT: .LBB82_2:
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB82_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a1, a2, .LBB82_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB82_4:
+; CHECK-NEXT: .LBB82_2:
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t
define <vscale x 128 x i8> @vmaxu_vx_nxv128i8(<vscale x 128 x i8> %va, i8 %b, <vscale x 128 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmaxu_vx_nxv128i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: slli a3, a3, 3
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: bltu a2, a3, .LBB34_2
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v0, (a1)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub a3, a2, a1
+; CHECK-NEXT: sltu a4, a2, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t
+; CHECK-NEXT: bltu a2, a1, .LBB34_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB34_2:
-; CHECK-NEXT: li a5, 0
-; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, ma
-; CHECK-NEXT: vlm.v v24, (a1)
-; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
-; CHECK-NEXT: sub a1, a2, a3
-; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t
-; CHECK-NEXT: bltu a2, a1, .LBB34_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a5, a1
-; CHECK-NEXT: .LBB34_4:
-; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, ma
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 128 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT: vmaxu.vx v16, v16, a0
; CHECK-NEXT: bltu a1, a2, .LBB35_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB35_2:
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT: sub a2, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmaxu.vx v8, v8, a0
-; CHECK-NEXT: bltu a1, a2, .LBB35_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: .LBB35_4:
-; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
-; CHECK-NEXT: vmaxu.vx v16, v16, a0
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 128 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
; CHECK-LABEL: vmaxu_vx_nxv32i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a3, 0
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: srli a5, a2, 2
+; CHECK-NEXT: srli a3, a2, 2
; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a3
; CHECK-NEXT: slli a2, a2, 1
-; CHECK-NEXT: sub a4, a1, a2
-; CHECK-NEXT: vslidedown.vx v0, v0, a5
-; CHECK-NEXT: bltu a1, a4, .LBB80_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a4
-; CHECK-NEXT: .LBB80_2:
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB80_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a1, a2, .LBB80_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB80_4:
+; CHECK-NEXT: .LBB80_2:
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 1
-; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vmaxu.vx v16, v16, a0
; CHECK-NEXT: bltu a1, a2, .LBB81_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB81_2:
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: sub a2, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmaxu.vx v8, v8, a0
-; CHECK-NEXT: bltu a1, a2, .LBB81_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: .LBB81_4:
-; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma
-; CHECK-NEXT: vmaxu.vx v16, v16, a0
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
; CHECK-LABEL: vmaxu_vx_nxv32i32_evl_nx8:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a3, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a5, a1, 2
-; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: srli a2, a1, 2
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: sub a4, a1, a2
-; CHECK-NEXT: vslidedown.vx v0, v0, a5
-; CHECK-NEXT: bltu a1, a4, .LBB82_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a4
-; CHECK-NEXT: .LBB82_2:
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB82_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a1, a2, .LBB82_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB82_4:
+; CHECK-NEXT: .LBB82_2:
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t
define <vscale x 128 x i8> @vmin_vx_nxv128i8(<vscale x 128 x i8> %va, i8 %b, <vscale x 128 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmin_vx_nxv128i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: slli a3, a3, 3
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: bltu a2, a3, .LBB34_2
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v0, (a1)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub a3, a2, a1
+; CHECK-NEXT: sltu a4, a2, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t
+; CHECK-NEXT: bltu a2, a1, .LBB34_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB34_2:
-; CHECK-NEXT: li a5, 0
-; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, ma
-; CHECK-NEXT: vlm.v v24, (a1)
-; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
-; CHECK-NEXT: sub a1, a2, a3
-; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t
-; CHECK-NEXT: bltu a2, a1, .LBB34_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a5, a1
-; CHECK-NEXT: .LBB34_4:
-; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, ma
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 128 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT: vmin.vx v16, v16, a0
; CHECK-NEXT: bltu a1, a2, .LBB35_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB35_2:
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT: sub a2, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: bltu a1, a2, .LBB35_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: .LBB35_4:
-; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
-; CHECK-NEXT: vmin.vx v16, v16, a0
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 128 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
; CHECK-LABEL: vmin_vx_nxv32i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a3, 0
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: srli a5, a2, 2
+; CHECK-NEXT: srli a3, a2, 2
; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a3
; CHECK-NEXT: slli a2, a2, 1
-; CHECK-NEXT: sub a4, a1, a2
-; CHECK-NEXT: vslidedown.vx v0, v0, a5
-; CHECK-NEXT: bltu a1, a4, .LBB80_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a4
-; CHECK-NEXT: .LBB80_2:
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB80_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a1, a2, .LBB80_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB80_4:
+; CHECK-NEXT: .LBB80_2:
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 1
-; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vmin.vx v16, v16, a0
; CHECK-NEXT: bltu a1, a2, .LBB81_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB81_2:
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: sub a2, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: bltu a1, a2, .LBB81_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: .LBB81_4:
-; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma
-; CHECK-NEXT: vmin.vx v16, v16, a0
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
; CHECK-LABEL: vmin_vx_nxv32i32_evl_nx8:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a3, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a5, a1, 2
-; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: srli a2, a1, 2
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: sub a4, a1, a2
-; CHECK-NEXT: vslidedown.vx v0, v0, a5
-; CHECK-NEXT: bltu a1, a4, .LBB82_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a4
-; CHECK-NEXT: .LBB82_2:
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB82_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a1, a2, .LBB82_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB82_4:
+; CHECK-NEXT: .LBB82_2:
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t
define <vscale x 128 x i8> @vminu_vx_nxv128i8(<vscale x 128 x i8> %va, i8 %b, <vscale x 128 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vminu_vx_nxv128i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: slli a3, a3, 3
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: bltu a2, a3, .LBB34_2
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v0, (a1)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub a3, a2, a1
+; CHECK-NEXT: sltu a4, a2, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t
+; CHECK-NEXT: bltu a2, a1, .LBB34_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB34_2:
-; CHECK-NEXT: li a5, 0
-; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, ma
-; CHECK-NEXT: vlm.v v24, (a1)
-; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
-; CHECK-NEXT: sub a1, a2, a3
-; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
-; CHECK-NEXT: bltu a2, a1, .LBB34_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a5, a1
-; CHECK-NEXT: .LBB34_4:
-; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, ma
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 128 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT: vminu.vx v16, v16, a0
; CHECK-NEXT: bltu a1, a2, .LBB35_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB35_2:
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT: sub a2, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vminu.vx v8, v8, a0
-; CHECK-NEXT: bltu a1, a2, .LBB35_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: .LBB35_4:
-; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
-; CHECK-NEXT: vminu.vx v16, v16, a0
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 128 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
; CHECK-LABEL: vminu_vx_nxv32i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a3, 0
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: srli a5, a2, 2
+; CHECK-NEXT: srli a3, a2, 2
; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a3
; CHECK-NEXT: slli a2, a2, 1
-; CHECK-NEXT: sub a4, a1, a2
-; CHECK-NEXT: vslidedown.vx v0, v0, a5
-; CHECK-NEXT: bltu a1, a4, .LBB80_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a4
-; CHECK-NEXT: .LBB80_2:
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB80_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a1, a2, .LBB80_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB80_4:
+; CHECK-NEXT: .LBB80_2:
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 1
-; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vminu.vx v16, v16, a0
; CHECK-NEXT: bltu a1, a2, .LBB81_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB81_2:
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: sub a2, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vminu.vx v8, v8, a0
-; CHECK-NEXT: bltu a1, a2, .LBB81_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: .LBB81_4:
-; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma
-; CHECK-NEXT: vminu.vx v16, v16, a0
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
; CHECK-LABEL: vminu_vx_nxv32i32_evl_nx8:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a3, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a5, a1, 2
-; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: srli a2, a1, 2
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: sub a4, a1, a2
-; CHECK-NEXT: vslidedown.vx v0, v0, a5
-; CHECK-NEXT: bltu a1, a4, .LBB82_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a4
-; CHECK-NEXT: .LBB82_2:
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB82_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a1, a2, .LBB82_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB82_4:
+; CHECK-NEXT: .LBB82_2:
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
; RV32-LABEL: vpgather_baseidx_nxv32i8:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v12, v0
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: srli a5, a2, 2
-; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; RV32-NEXT: slli a2, a2, 1
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: slli a2, a3, 1
; RV32-NEXT: sub a4, a1, a2
-; RV32-NEXT: vslidedown.vx v0, v0, a5
-; RV32-NEXT: bltu a1, a4, .LBB12_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a3, a4
-; RV32-NEXT: .LBB12_2:
-; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
+; RV32-NEXT: sltu a5, a1, a4
+; RV32-NEXT: addi a5, a5, -1
+; RV32-NEXT: and a4, a5, a4
+; RV32-NEXT: srli a3, a3, 2
+; RV32-NEXT: vsetvli a5, zero, e8, mf2, ta, ma
+; RV32-NEXT: vslidedown.vx v0, v0, a3
+; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma
; RV32-NEXT: vsext.vf4 v24, v10
-; RV32-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; RV32-NEXT: vsetvli zero, a4, e8, m2, ta, ma
; RV32-NEXT: vluxei32.v v18, (a0), v24, v0.t
-; RV32-NEXT: bltu a1, a2, .LBB12_4
-; RV32-NEXT: # %bb.3:
+; RV32-NEXT: bltu a1, a2, .LBB12_2
+; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a1, a2
-; RV32-NEXT: .LBB12_4:
+; RV32-NEXT: .LBB12_2:
; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
; RV32-NEXT: vsext.vf4 v24, v8
; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
;
; RV64-LABEL: vpgather_baseidx_nxv32i8:
; RV64: # %bb.0:
-; RV64-NEXT: csrr a3, vlenb
-; RV64-NEXT: slli a5, a3, 1
-; RV64-NEXT: sub a6, a1, a5
-; RV64-NEXT: vmv1r.v v12, v0
-; RV64-NEXT: li a4, 0
-; RV64-NEXT: li a2, 0
-; RV64-NEXT: bltu a1, a6, .LBB12_2
+; RV64-NEXT: csrr a2, vlenb
+; RV64-NEXT: slli a4, a2, 1
+; RV64-NEXT: sub a3, a1, a4
+; RV64-NEXT: sltu a5, a1, a3
+; RV64-NEXT: addi a5, a5, -1
+; RV64-NEXT: and a3, a5, a3
+; RV64-NEXT: vmv1r.v v17, v0
+; RV64-NEXT: mv a5, a3
+; RV64-NEXT: bltu a3, a2, .LBB12_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a2, a6
+; RV64-NEXT: mv a5, a2
; RV64-NEXT: .LBB12_2:
-; RV64-NEXT: sub a6, a2, a3
-; RV64-NEXT: mv a7, a4
-; RV64-NEXT: bltu a2, a6, .LBB12_4
+; RV64-NEXT: srli a6, a2, 2
+; RV64-NEXT: vsetvli a7, zero, e8, mf2, ta, ma
+; RV64-NEXT: vslidedown.vx v16, v17, a6
+; RV64-NEXT: vsetvli a6, zero, e64, m8, ta, ma
+; RV64-NEXT: vsext.vf8 v24, v10
+; RV64-NEXT: vsetvli zero, a5, e8, m1, ta, ma
+; RV64-NEXT: vmv1r.v v0, v16
+; RV64-NEXT: vluxei64.v v14, (a0), v24, v0.t
+; RV64-NEXT: bltu a1, a4, .LBB12_4
; RV64-NEXT: # %bb.3:
-; RV64-NEXT: mv a7, a6
+; RV64-NEXT: mv a1, a4
; RV64-NEXT: .LBB12_4:
-; RV64-NEXT: srli a6, a3, 2
-; RV64-NEXT: vsetvli t0, zero, e8, mf2, ta, ma
-; RV64-NEXT: vslidedown.vx v13, v12, a6
-; RV64-NEXT: srli a6, a3, 3
-; RV64-NEXT: vsetvli t0, zero, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vx v0, v13, a6
-; RV64-NEXT: vsetvli t0, zero, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf8 v24, v11
-; RV64-NEXT: vsetvli zero, a7, e8, m1, ta, ma
-; RV64-NEXT: vluxei64.v v19, (a0), v24, v0.t
-; RV64-NEXT: bltu a1, a5, .LBB12_6
+; RV64-NEXT: sub a4, a1, a2
+; RV64-NEXT: sltu a5, a1, a4
+; RV64-NEXT: addi a5, a5, -1
+; RV64-NEXT: and a5, a5, a4
+; RV64-NEXT: srli a4, a2, 3
+; RV64-NEXT: vsetvli a6, zero, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vx v0, v17, a4
+; RV64-NEXT: vsetvli a6, zero, e64, m8, ta, ma
+; RV64-NEXT: vsext.vf8 v24, v9
+; RV64-NEXT: vsetvli zero, a5, e8, m1, ta, ma
+; RV64-NEXT: vluxei64.v v13, (a0), v24, v0.t
+; RV64-NEXT: bltu a1, a2, .LBB12_6
; RV64-NEXT: # %bb.5:
-; RV64-NEXT: mv a1, a5
+; RV64-NEXT: mv a1, a2
; RV64-NEXT: .LBB12_6:
-; RV64-NEXT: sub a5, a1, a3
-; RV64-NEXT: bltu a1, a5, .LBB12_8
-; RV64-NEXT: # %bb.7:
-; RV64-NEXT: mv a4, a5
-; RV64-NEXT: .LBB12_8:
-; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vx v0, v12, a6
; RV64-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf8 v24, v9
-; RV64-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; RV64-NEXT: vluxei64.v v17, (a0), v24, v0.t
-; RV64-NEXT: bltu a1, a3, .LBB12_10
-; RV64-NEXT: # %bb.9:
-; RV64-NEXT: mv a1, a3
-; RV64-NEXT: .LBB12_10:
-; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v24, v8
; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; RV64-NEXT: vmv1r.v v0, v12
-; RV64-NEXT: vluxei64.v v16, (a0), v24, v0.t
-; RV64-NEXT: bltu a2, a3, .LBB12_12
-; RV64-NEXT: # %bb.11:
-; RV64-NEXT: mv a2, a3
-; RV64-NEXT: .LBB12_12:
-; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf8 v24, v10
-; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, ma
-; RV64-NEXT: vmv1r.v v0, v13
-; RV64-NEXT: vluxei64.v v18, (a0), v24, v0.t
-; RV64-NEXT: vmv4r.v v8, v16
+; RV64-NEXT: vmv1r.v v0, v17
+; RV64-NEXT: vluxei64.v v12, (a0), v24, v0.t
+; RV64-NEXT: sub a1, a3, a2
+; RV64-NEXT: sltu a2, a3, a1
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a1, a2, a1
+; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vx v0, v16, a4
+; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT: vsext.vf8 v16, v11
+; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; RV64-NEXT: vluxei64.v v15, (a0), v16, v0.t
+; RV64-NEXT: vmv4r.v v8, v12
; RV64-NEXT: ret
%ptrs = getelementptr inbounds i8, i8* %base, <vscale x 32 x i8> %idxs
%v = call <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0i8(<vscale x 32 x i8*> %ptrs, <vscale x 32 x i1> %m, i32 %evl)
; RV32-LABEL: vpgather_nxv16f64:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v24, v0
-; RV32-NEXT: li a2, 0
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: srli a4, a1, 3
-; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
-; RV32-NEXT: sub a3, a0, a1
-; RV32-NEXT: vslidedown.vx v0, v0, a4
-; RV32-NEXT: bltu a0, a3, .LBB102_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a2, a3
-; RV32-NEXT: .LBB102_2:
+; RV32-NEXT: sub a2, a0, a1
+; RV32-NEXT: sltu a3, a0, a2
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: and a2, a3, a2
+; RV32-NEXT: srli a3, a1, 3
+; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vx v0, v0, a3
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v16, (zero), v12, v0.t
-; RV32-NEXT: bltu a0, a1, .LBB102_4
-; RV32-NEXT: # %bb.3:
+; RV32-NEXT: bltu a0, a1, .LBB102_2
+; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a1
-; RV32-NEXT: .LBB102_4:
+; RV32-NEXT: .LBB102_2:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vmv1r.v v0, v24
; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t
; RV64-LABEL: vpgather_nxv16f64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v24, v0
-; RV64-NEXT: li a2, 0
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: srli a4, a1, 3
-; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
-; RV64-NEXT: sub a3, a0, a1
-; RV64-NEXT: vslidedown.vx v0, v0, a4
-; RV64-NEXT: bltu a0, a3, .LBB102_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a2, a3
-; RV64-NEXT: .LBB102_2:
+; RV64-NEXT: sub a2, a0, a1
+; RV64-NEXT: sltu a3, a0, a2
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: and a2, a3, a2
+; RV64-NEXT: srli a3, a1, 3
+; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vx v0, v0, a3
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t
-; RV64-NEXT: bltu a0, a1, .LBB102_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a0, a1, .LBB102_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a0, a1
-; RV64-NEXT: .LBB102_4:
+; RV64-NEXT: .LBB102_2:
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
; RV32-LABEL: vpgather_baseidx_nxv16i16_nxv16f64:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v12, v0
-; RV32-NEXT: li a3, 0
; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
; RV32-NEXT: vsext.vf2 v16, v8
; RV32-NEXT: vsll.vi v24, v16, 3
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: srli a5, a2, 3
-; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
-; RV32-NEXT: sub a4, a1, a2
-; RV32-NEXT: vslidedown.vx v0, v0, a5
-; RV32-NEXT: bltu a1, a4, .LBB103_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a3, a4
-; RV32-NEXT: .LBB103_2:
+; RV32-NEXT: sub a3, a1, a2
+; RV32-NEXT: sltu a4, a1, a3
+; RV32-NEXT: addi a4, a4, -1
+; RV32-NEXT: and a3, a4, a3
+; RV32-NEXT: srli a4, a2, 3
+; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vx v0, v0, a4
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t
-; RV32-NEXT: bltu a1, a2, .LBB103_4
-; RV32-NEXT: # %bb.3:
+; RV32-NEXT: bltu a1, a2, .LBB103_2
+; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a1, a2
-; RV32-NEXT: .LBB103_4:
+; RV32-NEXT: .LBB103_2:
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vmv1r.v v0, v12
; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
; RV64-LABEL: vpgather_baseidx_nxv16i16_nxv16f64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v12, v0
-; RV64-NEXT: li a3, 0
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf4 v24, v8
+; RV64-NEXT: vsext.vf4 v16, v8
+; RV64-NEXT: vsll.vi v24, v16, 3
; RV64-NEXT: vsext.vf4 v16, v10
; RV64-NEXT: vsll.vi v16, v16, 3
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: srli a5, a2, 3
-; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
-; RV64-NEXT: sub a4, a1, a2
-; RV64-NEXT: vslidedown.vx v0, v0, a5
-; RV64-NEXT: bltu a1, a4, .LBB103_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a3, a4
-; RV64-NEXT: .LBB103_2:
-; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, ma
-; RV64-NEXT: vsll.vi v24, v24, 3
+; RV64-NEXT: sub a3, a1, a2
+; RV64-NEXT: sltu a4, a1, a3
+; RV64-NEXT: addi a4, a4, -1
+; RV64-NEXT: and a3, a4, a3
+; RV64-NEXT: srli a4, a2, 3
+; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vx v0, v0, a4
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
-; RV64-NEXT: bltu a1, a2, .LBB103_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a1, a2, .LBB103_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a1, a2
-; RV64-NEXT: .LBB103_4:
+; RV64-NEXT: .LBB103_2:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v12
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
; RV32-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v12, v0
-; RV32-NEXT: li a3, 0
; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
; RV32-NEXT: vsext.vf2 v16, v8
; RV32-NEXT: vsll.vi v24, v16, 3
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: srli a5, a2, 3
-; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
-; RV32-NEXT: sub a4, a1, a2
-; RV32-NEXT: vslidedown.vx v0, v0, a5
-; RV32-NEXT: bltu a1, a4, .LBB104_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a3, a4
-; RV32-NEXT: .LBB104_2:
+; RV32-NEXT: sub a3, a1, a2
+; RV32-NEXT: sltu a4, a1, a3
+; RV32-NEXT: addi a4, a4, -1
+; RV32-NEXT: and a3, a4, a3
+; RV32-NEXT: srli a4, a2, 3
+; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vx v0, v0, a4
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t
-; RV32-NEXT: bltu a1, a2, .LBB104_4
-; RV32-NEXT: # %bb.3:
+; RV32-NEXT: bltu a1, a2, .LBB104_2
+; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a1, a2
-; RV32-NEXT: .LBB104_4:
+; RV32-NEXT: .LBB104_2:
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vmv1r.v v0, v12
; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
; RV64-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v12, v0
-; RV64-NEXT: li a3, 0
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v16, v10
; RV64-NEXT: vsext.vf4 v24, v8
+; RV64-NEXT: vsll.vi v24, v24, 3
; RV64-NEXT: vsll.vi v16, v16, 3
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: srli a5, a2, 3
-; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
-; RV64-NEXT: sub a4, a1, a2
-; RV64-NEXT: vslidedown.vx v0, v0, a5
-; RV64-NEXT: bltu a1, a4, .LBB104_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a3, a4
-; RV64-NEXT: .LBB104_2:
-; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, ma
-; RV64-NEXT: vsll.vi v24, v24, 3
+; RV64-NEXT: sub a3, a1, a2
+; RV64-NEXT: sltu a4, a1, a3
+; RV64-NEXT: addi a4, a4, -1
+; RV64-NEXT: and a3, a4, a3
+; RV64-NEXT: srli a4, a2, 3
+; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vx v0, v0, a4
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
-; RV64-NEXT: bltu a1, a2, .LBB104_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a1, a2, .LBB104_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a1, a2
-; RV64-NEXT: .LBB104_4:
+; RV64-NEXT: .LBB104_2:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v12
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
; RV32-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v12, v0
-; RV32-NEXT: li a3, 0
; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
; RV32-NEXT: vzext.vf2 v16, v8
; RV32-NEXT: vsll.vi v24, v16, 3
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: srli a5, a2, 3
-; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
-; RV32-NEXT: sub a4, a1, a2
-; RV32-NEXT: vslidedown.vx v0, v0, a5
-; RV32-NEXT: bltu a1, a4, .LBB105_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a3, a4
-; RV32-NEXT: .LBB105_2:
+; RV32-NEXT: sub a3, a1, a2
+; RV32-NEXT: sltu a4, a1, a3
+; RV32-NEXT: addi a4, a4, -1
+; RV32-NEXT: and a3, a4, a3
+; RV32-NEXT: srli a4, a2, 3
+; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vx v0, v0, a4
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t
-; RV32-NEXT: bltu a1, a2, .LBB105_4
-; RV32-NEXT: # %bb.3:
+; RV32-NEXT: bltu a1, a2, .LBB105_2
+; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a1, a2
-; RV32-NEXT: .LBB105_4:
+; RV32-NEXT: .LBB105_2:
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vmv1r.v v0, v12
; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
; RV64-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v12, v0
-; RV64-NEXT: li a3, 0
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
; RV64-NEXT: vzext.vf4 v16, v10
; RV64-NEXT: vzext.vf4 v24, v8
+; RV64-NEXT: vsll.vi v24, v24, 3
; RV64-NEXT: vsll.vi v16, v16, 3
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: srli a5, a2, 3
-; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
-; RV64-NEXT: sub a4, a1, a2
-; RV64-NEXT: vslidedown.vx v0, v0, a5
-; RV64-NEXT: bltu a1, a4, .LBB105_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a3, a4
-; RV64-NEXT: .LBB105_2:
-; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, ma
-; RV64-NEXT: vsll.vi v24, v24, 3
+; RV64-NEXT: sub a3, a1, a2
+; RV64-NEXT: sltu a4, a1, a3
+; RV64-NEXT: addi a4, a4, -1
+; RV64-NEXT: and a3, a4, a3
+; RV64-NEXT: srli a4, a2, 3
+; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vx v0, v0, a4
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
-; RV64-NEXT: bltu a1, a2, .LBB105_4
-; RV64-NEXT: # %bb.3:
+; RV64-NEXT: bltu a1, a2, .LBB105_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a1, a2
-; RV64-NEXT: .LBB105_4:
+; RV64-NEXT: .LBB105_2:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v12
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
; CHECK-LABEL: vpload_nxv16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v8, v0
-; CHECK-NEXT: li a3, 0
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: srli a5, a2, 3
-; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
-; CHECK-NEXT: sub a4, a1, a2
-; CHECK-NEXT: vslidedown.vx v0, v0, a5
-; CHECK-NEXT: bltu a1, a4, .LBB37_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a4
-; CHECK-NEXT: .LBB37_2:
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a4, a2, 3
+; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a4
; CHECK-NEXT: slli a4, a2, 3
; CHECK-NEXT: add a4, a0, a4
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v16, (a4), v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB37_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a1, a2, .LBB37_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB37_4:
+; CHECK-NEXT: .LBB37_2:
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vle64.v v8, (a0), v0.t
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a4, a5
; CHECK-NEXT: .LBB38_2:
-; CHECK-NEXT: sub a7, a4, a3
-; CHECK-NEXT: li a6, 0
-; CHECK-NEXT: bltu a4, a7, .LBB38_4
+; CHECK-NEXT: sub a6, a4, a3
+; CHECK-NEXT: sltu a7, a4, a6
+; CHECK-NEXT: addi a7, a7, -1
+; CHECK-NEXT: and a6, a7, a6
+; CHECK-NEXT: srli a7, a3, 3
+; CHECK-NEXT: vsetvli t0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v8, a7
+; CHECK-NEXT: slli a7, a3, 3
+; CHECK-NEXT: add a7, a0, a7
+; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v16, (a7), v0.t
+; CHECK-NEXT: sub a5, a2, a5
+; CHECK-NEXT: sltu a2, a2, a5
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a2, a2, a5
+; CHECK-NEXT: bltu a2, a3, .LBB38_4
; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a6, a7
+; CHECK-NEXT: mv a2, a3
; CHECK-NEXT: .LBB38_4:
-; CHECK-NEXT: li a7, 0
-; CHECK-NEXT: srli t0, a3, 3
-; CHECK-NEXT: vsetvli t1, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v8, t0
-; CHECK-NEXT: slli t0, a3, 3
-; CHECK-NEXT: add t0, a0, t0
-; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v16, (t0), v0.t
-; CHECK-NEXT: srli a6, a3, 2
-; CHECK-NEXT: sub t0, a2, a5
+; CHECK-NEXT: srli a5, a3, 2
+; CHECK-NEXT: vsetvli a6, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v8, a5
; CHECK-NEXT: slli a5, a3, 4
-; CHECK-NEXT: bltu a2, t0, .LBB38_6
+; CHECK-NEXT: add a5, a0, a5
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v24, (a5), v0.t
+; CHECK-NEXT: bltu a4, a3, .LBB38_6
; CHECK-NEXT: # %bb.5:
-; CHECK-NEXT: mv a7, t0
-; CHECK-NEXT: .LBB38_6:
-; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v8, a6
-; CHECK-NEXT: add a2, a0, a5
-; CHECK-NEXT: bltu a7, a3, .LBB38_8
-; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: mv a7, a3
-; CHECK-NEXT: .LBB38_8:
-; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a2), v0.t
-; CHECK-NEXT: bltu a4, a3, .LBB38_10
-; CHECK-NEXT: # %bb.9:
; CHECK-NEXT: mv a4, a3
-; CHECK-NEXT: .LBB38_10:
+; CHECK-NEXT: .LBB38_6:
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vle64.v v8, (a0), v0.t
declare <vscale x 128 x i8> @llvm.vp.merge.nxv128i8(<vscale x 128 x i1>, <vscale x 128 x i8>, <vscale x 128 x i8>, i32)
define <vscale x 128 x i8> @vpmerge_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vpmerge_vv_nxv128i8:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a4, 24
-; RV32-NEXT: mul a1, a1, a4
-; RV32-NEXT: sub sp, sp, a1
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 3
-; RV32-NEXT: add a4, a0, a1
-; RV32-NEXT: vl8r.v v24, (a4)
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 3
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 16
-; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli a4, zero, e8, m8, ta, ma
-; RV32-NEXT: vlm.v v2, (a2)
-; RV32-NEXT: sub a4, a3, a1
-; RV32-NEXT: vmv1r.v v1, v0
-; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: slli a2, a2, 4
-; RV32-NEXT: add a2, sp, a2
-; RV32-NEXT: addi a2, a2, 16
-; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: bltu a3, a4, .LBB28_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a2, a4
-; RV32-NEXT: .LBB28_2:
-; RV32-NEXT: vl8r.v v8, (a0)
-; RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; RV32-NEXT: vmv1r.v v0, v2
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 4
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 3
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vmerge.vvm v16, v16, v24, v0
-; RV32-NEXT: bltu a3, a1, .LBB28_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: mv a3, a1
-; RV32-NEXT: .LBB28_4:
-; RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
-; RV32-NEXT: vmv1r.v v0, v1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vmerge.vvm v8, v8, v24, v0
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 24
-; RV32-NEXT: mul a0, a0, a1
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: addi sp, sp, 16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vpmerge_vv_nxv128i8:
-; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 3
-; RV64-NEXT: sub sp, sp, a1
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 3
-; RV64-NEXT: add a4, a0, a1
-; RV64-NEXT: vl8r.v v24, (a4)
-; RV64-NEXT: vsetvli a4, zero, e8, m8, ta, ma
-; RV64-NEXT: vlm.v v2, (a2)
-; RV64-NEXT: sub a4, a3, a1
-; RV64-NEXT: vmv1r.v v1, v0
-; RV64-NEXT: addi a2, sp, 16
-; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; RV64-NEXT: li a2, 0
-; RV64-NEXT: bltu a3, a4, .LBB28_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a2, a4
-; RV64-NEXT: .LBB28_2:
-; RV64-NEXT: vl8r.v v8, (a0)
-; RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; RV64-NEXT: vmv1r.v v0, v2
-; RV64-NEXT: vmerge.vvm v24, v24, v16, v0
-; RV64-NEXT: bltu a3, a1, .LBB28_4
-; RV64-NEXT: # %bb.3:
-; RV64-NEXT: mv a3, a1
-; RV64-NEXT: .LBB28_4:
-; RV64-NEXT: vsetvli zero, a3, e8, m8, tu, ma
-; RV64-NEXT: vmv1r.v v0, v1
-; RV64-NEXT: addi a0, sp, 16
-; RV64-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
-; RV64-NEXT: vmerge.vvm v8, v8, v16, v0
-; RV64-NEXT: vmv8r.v v16, v24
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 3
-; RV64-NEXT: add sp, sp, a0
-; RV64-NEXT: addi sp, sp, 16
-; RV64-NEXT: ret
+; CHECK-LABEL: vpmerge_vv_nxv128i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: vmv1r.v v1, v0
+; CHECK-NEXT: vmv8r.v v24, v16
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a4, a0, a1
+; CHECK-NEXT: vl8r.v v16, (a4)
+; CHECK-NEXT: vl8r.v v8, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT: sub a0, a3, a1
+; CHECK-NEXT: vlm.v v0, (a2)
+; CHECK-NEXT: sltu a2, a3, a0
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a0, a2, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma
+; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT: bltu a3, a1, .LBB28_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: .LBB28_2:
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, tu, ma
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
%v = call <vscale x 128 x i8> @llvm.vp.merge.nxv128i8(<vscale x 128 x i1> %m, <vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, i32 %evl)
ret <vscale x 128 x i8> %v
}
define <vscale x 128 x i8> @vpmerge_vx_nxv128i8(i8 %a, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpmerge_vx_nxv128i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: slli a3, a3, 3
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: bltu a2, a3, .LBB29_2
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v0, (a1)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub a3, a2, a1
+; CHECK-NEXT: sltu a4, a2, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, tu, ma
+; CHECK-NEXT: vmerge.vxm v16, v16, a0, v0
+; CHECK-NEXT: bltu a2, a1, .LBB29_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB29_2:
-; CHECK-NEXT: li a5, 0
-; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, ma
-; CHECK-NEXT: vlm.v v24, (a1)
-; CHECK-NEXT: vsetvli zero, a4, e8, m8, tu, ma
-; CHECK-NEXT: sub a1, a2, a3
-; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
-; CHECK-NEXT: bltu a2, a1, .LBB29_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a5, a1
-; CHECK-NEXT: .LBB29_4:
-; CHECK-NEXT: vsetvli zero, a5, e8, m8, tu, ma
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vmerge.vxm v16, v16, a0, v0
+; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 128 x i8> poison, i8 %a, i32 0
%va = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
define <vscale x 128 x i8> @vpmerge_vi_nxv128i8(<vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpmerge_vi_nxv128i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: bltu a1, a2, .LBB30_2
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v0, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: sub a2, a1, a0
+; CHECK-NEXT: sltu a3, a1, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma
+; CHECK-NEXT: vmerge.vim v16, v16, 2, v0
+; CHECK-NEXT: bltu a1, a0, .LBB30_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: .LBB30_2:
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: vsetvli a5, zero, e8, m8, ta, ma
-; CHECK-NEXT: vlm.v v24, (a0)
-; CHECK-NEXT: vsetvli zero, a3, e8, m8, tu, ma
-; CHECK-NEXT: sub a0, a1, a2
-; CHECK-NEXT: vmerge.vim v8, v8, 2, v0
-; CHECK-NEXT: bltu a1, a0, .LBB30_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a4, a0
-; CHECK-NEXT: .LBB30_4:
-; CHECK-NEXT: vsetvli zero, a4, e8, m8, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vmerge.vim v16, v16, 2, v0
+; CHECK-NEXT: vmerge.vim v8, v8, 2, v0
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 128 x i8> poison, i8 2, i32 0
%va = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a2, a0
; RV32-NEXT: .LBB95_2:
-; RV32-NEXT: li a3, 0
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t
-; RV32-NEXT: srli a2, a0, 3
-; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
-; RV32-NEXT: sub a0, a1, a0
-; RV32-NEXT: vslidedown.vx v0, v0, a2
-; RV32-NEXT: bltu a1, a0, .LBB95_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: mv a3, a0
-; RV32-NEXT: .LBB95_4:
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; RV32-NEXT: sub a2, a1, a0
+; RV32-NEXT: sltu a1, a1, a2
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a1, a1, a2
+; RV32-NEXT: srli a0, a0, 3
+; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vx v0, v0, a0
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t
; RV32-NEXT: ret
;
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: sub sp, sp, a1
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vl8re64.v v16, (a0)
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: slli a3, a1, 3
-; RV64-NEXT: add a0, a0, a3
-; RV64-NEXT: mv a3, a2
+; RV64-NEXT: add a3, a0, a3
+; RV64-NEXT: vl8re64.v v24, (a3)
+; RV64-NEXT: addi a3, sp, 16
+; RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
+; RV64-NEXT: vl8re64.v v24, (a0)
+; RV64-NEXT: mv a0, a2
; RV64-NEXT: bltu a2, a1, .LBB95_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a3, a1
+; RV64-NEXT: mv a0, a1
; RV64-NEXT: .LBB95_2:
-; RV64-NEXT: li a4, 0
-; RV64-NEXT: vl8re64.v v24, (a0)
-; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
-; RV64-NEXT: srli a3, a1, 3
-; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t
; RV64-NEXT: sub a0, a2, a1
-; RV64-NEXT: vslidedown.vx v0, v0, a3
-; RV64-NEXT: bltu a2, a0, .LBB95_4
-; RV64-NEXT: # %bb.3:
-; RV64-NEXT: mv a4, a0
-; RV64-NEXT: .LBB95_4:
-; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma
+; RV64-NEXT: sltu a2, a2, a0
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: srli a1, a1, 3
+; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vx v0, v0, a1
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: addi a0, sp, 16
; RV64-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t
+; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 3
; RV64-NEXT: add sp, sp, a0
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a3, a1
; RV32-NEXT: .LBB96_2:
-; RV32-NEXT: li a4, 0
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
-; RV32-NEXT: srli a3, a1, 3
-; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
-; RV32-NEXT: sub a1, a2, a1
-; RV32-NEXT: vslidedown.vx v0, v0, a3
-; RV32-NEXT: bltu a2, a1, .LBB96_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: mv a4, a1
-; RV32-NEXT: .LBB96_4:
-; RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma
+; RV32-NEXT: sub a3, a2, a1
+; RV32-NEXT: sltu a2, a2, a3
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a2, a2, a3
+; RV32-NEXT: srli a1, a1, 3
+; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vx v0, v0, a1
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
; RV32-NEXT: ret
;
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: csrr a3, vlenb
-; RV64-NEXT: slli a3, a3, 3
+; RV64-NEXT: slli a3, a3, 4
; RV64-NEXT: sub sp, sp, a3
-; RV64-NEXT: vl4re16.v v4, (a1)
-; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vl4re16.v v24, (a1)
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 3
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf4 v16, v4
+; RV64-NEXT: vsext.vf4 v16, v26
; RV64-NEXT: vsll.vi v16, v16, 3
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: vsext.vf4 v16, v24
+; RV64-NEXT: vsll.vi v24, v16, 3
; RV64-NEXT: mv a3, a2
-; RV64-NEXT: vsext.vf4 v24, v6
; RV64-NEXT: bltu a2, a1, .LBB96_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a3, a1
; RV64-NEXT: .LBB96_2:
-; RV64-NEXT: li a4, 0
-; RV64-NEXT: vsll.vi v24, v24, 3
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
-; RV64-NEXT: srli a3, a1, 3
-; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
-; RV64-NEXT: sub a1, a2, a1
-; RV64-NEXT: vslidedown.vx v0, v0, a3
-; RV64-NEXT: bltu a2, a1, .LBB96_4
-; RV64-NEXT: # %bb.3:
-; RV64-NEXT: mv a4, a1
-; RV64-NEXT: .LBB96_4:
-; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT: sub a3, a2, a1
+; RV64-NEXT: sltu a2, a2, a3
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a2, a2, a3
+; RV64-NEXT: srli a1, a1, 3
+; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vx v0, v0, a1
+; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 3
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
+; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 3
+; RV64-NEXT: slli a0, a0, 4
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a3, a1
; RV32-NEXT: .LBB97_2:
-; RV32-NEXT: li a4, 0
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
-; RV32-NEXT: srli a3, a1, 3
-; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
-; RV32-NEXT: sub a1, a2, a1
-; RV32-NEXT: vslidedown.vx v0, v0, a3
-; RV32-NEXT: bltu a2, a1, .LBB97_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: mv a4, a1
-; RV32-NEXT: .LBB97_4:
-; RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma
+; RV32-NEXT: sub a3, a2, a1
+; RV32-NEXT: sltu a2, a2, a3
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a2, a2, a3
+; RV32-NEXT: srli a1, a1, 3
+; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vx v0, v0, a1
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
; RV32-NEXT: ret
;
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: csrr a3, vlenb
-; RV64-NEXT: slli a3, a3, 3
+; RV64-NEXT: slli a3, a3, 4
; RV64-NEXT: sub sp, sp, a3
-; RV64-NEXT: vl4re16.v v4, (a1)
-; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vl4re16.v v24, (a1)
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 3
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf4 v16, v4
+; RV64-NEXT: vsext.vf4 v16, v26
; RV64-NEXT: vsll.vi v16, v16, 3
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: vsext.vf4 v16, v24
+; RV64-NEXT: vsll.vi v24, v16, 3
; RV64-NEXT: mv a3, a2
-; RV64-NEXT: vsext.vf4 v24, v6
; RV64-NEXT: bltu a2, a1, .LBB97_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a3, a1
; RV64-NEXT: .LBB97_2:
-; RV64-NEXT: li a4, 0
-; RV64-NEXT: vsll.vi v24, v24, 3
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
-; RV64-NEXT: srli a3, a1, 3
-; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
-; RV64-NEXT: sub a1, a2, a1
-; RV64-NEXT: vslidedown.vx v0, v0, a3
-; RV64-NEXT: bltu a2, a1, .LBB97_4
-; RV64-NEXT: # %bb.3:
-; RV64-NEXT: mv a4, a1
-; RV64-NEXT: .LBB97_4:
-; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT: sub a3, a2, a1
+; RV64-NEXT: sltu a2, a2, a3
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a2, a2, a3
+; RV64-NEXT: srli a1, a1, 3
+; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vx v0, v0, a1
+; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 3
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
+; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 3
+; RV64-NEXT: slli a0, a0, 4
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a3, a1
; RV32-NEXT: .LBB98_2:
-; RV32-NEXT: li a4, 0
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
-; RV32-NEXT: srli a3, a1, 3
-; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
-; RV32-NEXT: sub a1, a2, a1
-; RV32-NEXT: vslidedown.vx v0, v0, a3
-; RV32-NEXT: bltu a2, a1, .LBB98_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: mv a4, a1
-; RV32-NEXT: .LBB98_4:
-; RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma
+; RV32-NEXT: sub a3, a2, a1
+; RV32-NEXT: sltu a2, a2, a3
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a2, a2, a3
+; RV32-NEXT: srli a1, a1, 3
+; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vx v0, v0, a1
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
; RV32-NEXT: ret
;
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: csrr a3, vlenb
-; RV64-NEXT: slli a3, a3, 3
+; RV64-NEXT: slli a3, a3, 4
; RV64-NEXT: sub sp, sp, a3
-; RV64-NEXT: vl4re16.v v4, (a1)
-; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vl4re16.v v24, (a1)
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 3
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; RV64-NEXT: vzext.vf4 v16, v4
+; RV64-NEXT: vzext.vf4 v16, v26
; RV64-NEXT: vsll.vi v16, v16, 3
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: vzext.vf4 v16, v24
+; RV64-NEXT: vsll.vi v24, v16, 3
; RV64-NEXT: mv a3, a2
-; RV64-NEXT: vzext.vf4 v24, v6
; RV64-NEXT: bltu a2, a1, .LBB98_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a3, a1
; RV64-NEXT: .LBB98_2:
-; RV64-NEXT: li a4, 0
-; RV64-NEXT: vsll.vi v24, v24, 3
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
-; RV64-NEXT: srli a3, a1, 3
-; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
-; RV64-NEXT: sub a1, a2, a1
-; RV64-NEXT: vslidedown.vx v0, v0, a3
-; RV64-NEXT: bltu a2, a1, .LBB98_4
-; RV64-NEXT: # %bb.3:
-; RV64-NEXT: mv a4, a1
-; RV64-NEXT: .LBB98_4:
-; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT: sub a3, a2, a1
+; RV64-NEXT: sltu a2, a2, a3
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a2, a2, a3
+; RV64-NEXT: srli a1, a1, 3
+; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vx v0, v0, a1
+; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 3
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
+; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 3
+; RV64-NEXT: slli a0, a0, 4
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a3, a2
; CHECK-NEXT: .LBB30_2:
-; CHECK-NEXT: li a4, 0
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-NEXT: vse64.v v8, (a0), v0.t
-; CHECK-NEXT: srli a5, a2, 3
-; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; CHECK-NEXT: sub a3, a1, a2
-; CHECK-NEXT: vslidedown.vx v0, v0, a5
-; CHECK-NEXT: bltu a1, a3, .LBB30_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a4, a3
-; CHECK-NEXT: .LBB30_4:
-; CHECK-NEXT: slli a1, a2, 3
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
+; CHECK-NEXT: sltu a1, a1, a3
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a1, a1, a3
+; CHECK-NEXT: srli a3, a2, 3
+; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a3
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vse64.v v16, (a0), v0.t
; CHECK-NEXT: ret
call void @llvm.vp.store.nxv16f64.p0nxv16f64(<vscale x 16 x double> %val, <vscale x 16 x double>* %ptr, <vscale x 16 x i1> %m, i32 %evl)
define void @vpstore_nxv17f64(<vscale x 17 x double> %val, <vscale x 17 x double>* %ptr, <vscale x 17 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpstore_nxv17f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: slli a3, a3, 3
-; CHECK-NEXT: sub sp, sp, a3
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: slli a4, a3, 1
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: addi a5, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
; CHECK-NEXT: mv a5, a2
; CHECK-NEXT: bltu a2, a4, .LBB31_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a5, a4
; CHECK-NEXT: .LBB31_2:
-; CHECK-NEXT: mv a7, a5
+; CHECK-NEXT: mv a6, a5
; CHECK-NEXT: bltu a5, a3, .LBB31_4
; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: mv a6, a3
; CHECK-NEXT: .LBB31_4:
-; CHECK-NEXT: li a6, 0
-; CHECK-NEXT: vl8re64.v v16, (a0)
-; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma
-; CHECK-NEXT: sub a0, a5, a3
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a7, vlenb
+; CHECK-NEXT: slli a7, a7, 3
+; CHECK-NEXT: sub sp, sp, a7
+; CHECK-NEXT: vl8re64.v v0, (a0)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vse64.v v8, (a1), v0.t
-; CHECK-NEXT: bltu a5, a0, .LBB31_6
-; CHECK-NEXT: # %bb.5:
-; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: .LBB31_6:
-; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: sub a0, a5, a3
+; CHECK-NEXT: sltu a5, a5, a0
+; CHECK-NEXT: addi a5, a5, -1
+; CHECK-NEXT: and a0, a5, a0
; CHECK-NEXT: srli a5, a3, 3
-; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v24, a5
; CHECK-NEXT: slli a5, a3, 3
; CHECK-NEXT: add a5, a1, a5
-; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma
-; CHECK-NEXT: addi a6, sp, 16
-; CHECK-NEXT: vl8re8.v v8, (a6) # Unknown-size Folded Reload
-; CHECK-NEXT: vse64.v v8, (a5), v0.t
-; CHECK-NEXT: srli a5, a3, 2
-; CHECK-NEXT: sub a6, a2, a4
-; CHECK-NEXT: slli a4, a3, 4
-; CHECK-NEXT: bltu a2, a6, .LBB31_8
-; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: mv a0, a6
-; CHECK-NEXT: .LBB31_8:
-; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v24, a5
-; CHECK-NEXT: add a1, a1, a4
-; CHECK-NEXT: bltu a0, a3, .LBB31_10
-; CHECK-NEXT: # %bb.9:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: sub a0, a2, a4
+; CHECK-NEXT: sltu a2, a2, a0
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a0, a2, a0
+; CHECK-NEXT: vse64.v v16, (a5), v0.t
+; CHECK-NEXT: bltu a0, a3, .LBB31_6
+; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: mv a0, a3
-; CHECK-NEXT: .LBB31_10:
+; CHECK-NEXT: .LBB31_6:
+; CHECK-NEXT: srli a2, a3, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v24, a2
+; CHECK-NEXT: slli a2, a3, 4
+; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vse64.v v16, (a1), v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vse64.v v8, (a1), v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
define half @vpreduce_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_fadd_nxv64f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: srli a1, a2, 1
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a2, a1, 1
+; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v24, v0, a2
+; CHECK-NEXT: slli a2, a1, 2
+; CHECK-NEXT: sub a1, a0, a2
+; CHECK-NEXT: sltu a3, a0, a1
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a1, a3, a1
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: slli a2, a2, 2
; CHECK-NEXT: vfmv.s.f v25, fa0
-; CHECK-NEXT: mv a3, a0
; CHECK-NEXT: bltu a0, a2, .LBB6_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: .LBB6_2:
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: vsetvli a5, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v24, v0, a1
-; CHECK-NEXT: vsetvli zero, a3, e16, m8, tu, ma
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma
; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: sub a1, a0, a2
; CHECK-NEXT: vfmv.s.f v8, ft0
-; CHECK-NEXT: bltu a0, a1, .LBB6_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a4, a1
-; CHECK-NEXT: .LBB6_4:
-; CHECK-NEXT: vsetvli zero, a4, e16, m8, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfredusum.vs v8, v16, v8, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v8
define half @vpreduce_ord_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_ord_fadd_nxv64f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: srli a1, a2, 1
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a2, a1, 1
+; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v24, v0, a2
+; CHECK-NEXT: slli a2, a1, 2
+; CHECK-NEXT: sub a1, a0, a2
+; CHECK-NEXT: sltu a3, a0, a1
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a1, a3, a1
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: slli a2, a2, 2
; CHECK-NEXT: vfmv.s.f v25, fa0
-; CHECK-NEXT: mv a3, a0
; CHECK-NEXT: bltu a0, a2, .LBB7_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: .LBB7_2:
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: vsetvli a5, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v24, v0, a1
-; CHECK-NEXT: vsetvli zero, a3, e16, m8, tu, ma
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma
; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: sub a1, a0, a2
; CHECK-NEXT: vfmv.s.f v8, ft0
-; CHECK-NEXT: bltu a0, a1, .LBB7_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a4, a1
-; CHECK-NEXT: .LBB7_4:
-; CHECK-NEXT: vsetvli zero, a4, e16, m8, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfredosum.vs v8, v16, v8, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v8
define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, <vscale x 32 x i32> %v, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpreduce_umax_nxv32i32:
; RV32: # %bb.0:
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: srli a2, a3, 2
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: srli a3, a2, 2
+; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; RV32-NEXT: vslidedown.vx v24, v0, a3
+; RV32-NEXT: slli a3, a2, 1
+; RV32-NEXT: sub a2, a1, a3
+; RV32-NEXT: sltu a4, a1, a2
+; RV32-NEXT: addi a4, a4, -1
+; RV32-NEXT: and a2, a4, a2
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: slli a3, a3, 1
; RV32-NEXT: vmv.s.x v25, a0
-; RV32-NEXT: mv a0, a1
; RV32-NEXT: bltu a1, a3, .LBB67_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a0, a3
+; RV32-NEXT: mv a1, a3
; RV32-NEXT: .LBB67_2:
-; RV32-NEXT: li a4, 0
-; RV32-NEXT: vsetvli a5, zero, e8, mf2, ta, ma
-; RV32-NEXT: vslidedown.vx v24, v0, a2
-; RV32-NEXT: vsetvli zero, a0, e32, m8, tu, ma
+; RV32-NEXT: vsetvli zero, a1, e32, m8, tu, ma
; RV32-NEXT: vredmaxu.vs v25, v8, v25, v0.t
-; RV32-NEXT: vmv.x.s a2, v25
+; RV32-NEXT: vmv.x.s a0, v25
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: sub a0, a1, a3
-; RV32-NEXT: vmv.s.x v8, a2
-; RV32-NEXT: bltu a1, a0, .LBB67_4
-; RV32-NEXT: # %bb.3:
-; RV32-NEXT: mv a4, a0
-; RV32-NEXT: .LBB67_4:
-; RV32-NEXT: vsetvli zero, a4, e32, m8, tu, ma
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetvli zero, a2, e32, m8, tu, ma
; RV32-NEXT: vmv1r.v v0, v24
; RV32-NEXT: vredmaxu.vs v8, v16, v8, v0.t
; RV32-NEXT: vmv.x.s a0, v8
; RV64: # %bb.0:
; RV64-NEXT: csrr a3, vlenb
; RV64-NEXT: srli a2, a3, 2
-; RV64-NEXT: slli a4, a0, 32
-; RV64-NEXT: slli a0, a3, 1
-; RV64-NEXT: srli a3, a4, 32
-; RV64-NEXT: mv a4, a1
-; RV64-NEXT: bltu a1, a0, .LBB67_2
+; RV64-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; RV64-NEXT: vslidedown.vx v24, v0, a2
+; RV64-NEXT: slli a0, a0, 32
+; RV64-NEXT: srli a2, a0, 32
+; RV64-NEXT: slli a3, a3, 1
+; RV64-NEXT: sub a0, a1, a3
+; RV64-NEXT: sltu a4, a1, a0
+; RV64-NEXT: addi a4, a4, -1
+; RV64-NEXT: and a0, a4, a0
+; RV64-NEXT: bltu a1, a3, .LBB67_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a4, a0
+; RV64-NEXT: mv a1, a3
; RV64-NEXT: .LBB67_2:
-; RV64-NEXT: li a5, 0
-; RV64-NEXT: vsetvli a6, zero, e8, mf2, ta, ma
-; RV64-NEXT: vslidedown.vx v24, v0, a2
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vmv.s.x v25, a3
-; RV64-NEXT: vsetvli zero, a4, e32, m8, tu, ma
+; RV64-NEXT: vmv.s.x v25, a2
+; RV64-NEXT: vsetvli zero, a1, e32, m8, tu, ma
; RV64-NEXT: vredmaxu.vs v25, v8, v25, v0.t
-; RV64-NEXT: vmv.x.s a2, v25
+; RV64-NEXT: vmv.x.s a1, v25
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: sub a0, a1, a0
-; RV64-NEXT: vmv.s.x v8, a2
-; RV64-NEXT: bltu a1, a0, .LBB67_4
-; RV64-NEXT: # %bb.3:
-; RV64-NEXT: mv a5, a0
-; RV64-NEXT: .LBB67_4:
-; RV64-NEXT: vsetvli zero, a5, e32, m8, tu, ma
+; RV64-NEXT: vmv.s.x v8, a1
+; RV64-NEXT: vsetvli zero, a0, e32, m8, tu, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vredmaxu.vs v8, v16, v8, v0.t
; RV64-NEXT: vmv.x.s a0, v8
define signext i1 @vpreduce_or_nxv128i1(i1 signext %s, <vscale x 128 x i1> %v, <vscale x 128 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_or_nxv128i1:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vcpop.m a3, v8, v0.t
+; CHECK-NEXT: snez a3, a3
; CHECK-NEXT: bltu a1, a2, .LBB22_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB22_2:
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vcpop.m a3, v11, v0.t
-; CHECK-NEXT: snez a3, a3
-; CHECK-NEXT: sub a2, a1, a2
-; CHECK-NEXT: or a0, a3, a0
-; CHECK-NEXT: bltu a1, a2, .LBB22_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: .LBB22_4:
-; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vcpop.m a1, v8, v0.t
+; CHECK-NEXT: vcpop.m a1, v11, v0.t
; CHECK-NEXT: snez a1, a1
; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: or a0, a3, a0
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: neg a0, a0
; CHECK-NEXT: ret
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv1r.v v1, v0
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a4, a1, 3
-; CHECK-NEXT: add a4, a0, a4
-; CHECK-NEXT: vl8re32.v v8, (a4)
-; CHECK-NEXT: srli a5, a1, 2
-; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: csrr a3, vlenb
+; CHECK-NEXT: slli a1, a3, 3
+; CHECK-NEXT: add a1, a0, a1
+; CHECK-NEXT: vl8re32.v v8, (a1)
+; CHECK-NEXT: slli a1, a3, 1
; CHECK-NEXT: sub a4, a2, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a5
-; CHECK-NEXT: bltu a2, a4, .LBB27_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a4
-; CHECK-NEXT: .LBB27_2:
-; CHECK-NEXT: vl8re32.v v24, (a0)
+; CHECK-NEXT: sltu a5, a2, a4
+; CHECK-NEXT: addi a5, a5, -1
+; CHECK-NEXT: and a4, a5, a4
+; CHECK-NEXT: srli a3, a3, 2
+; CHECK-NEXT: vl8re32.v v0, (a0)
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v24, a3
+; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma
; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0
-; CHECK-NEXT: bltu a2, a1, .LBB27_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a2, a1, .LBB27_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB27_4:
+; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv1r.v v1, v0
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a2, a1, 3
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vl8re32.v v8, (a2)
-; CHECK-NEXT: srli a5, a1, 2
-; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: sub a4, a1, a2
-; CHECK-NEXT: vslidedown.vx v0, v0, a5
-; CHECK-NEXT: bltu a1, a4, .LBB28_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a4
-; CHECK-NEXT: .LBB28_2:
-; CHECK-NEXT: vl8re32.v v24, (a0)
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a4, a1, 2
+; CHECK-NEXT: vl8re32.v v0, (a0)
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v24, a4
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0
-; CHECK-NEXT: bltu a1, a2, .LBB28_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a1, a2, .LBB28_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB28_4:
+; CHECK-NEXT: .LBB28_2:
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv1r.v v1, v0
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a4, a1, 3
-; CHECK-NEXT: add a4, a0, a4
-; CHECK-NEXT: vl8re64.v v8, (a4)
-; CHECK-NEXT: srli a5, a1, 3
-; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
-; CHECK-NEXT: sub a4, a2, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a5
-; CHECK-NEXT: bltu a2, a4, .LBB48_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a4
-; CHECK-NEXT: .LBB48_2:
-; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: slli a3, a1, 3
+; CHECK-NEXT: add a3, a0, a3
+; CHECK-NEXT: vl8re64.v v8, (a3)
+; CHECK-NEXT: sub a3, a2, a1
+; CHECK-NEXT: sltu a4, a2, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a4, a1, 3
+; CHECK-NEXT: vl8re64.v v0, (a0)
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v24, a4
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0
-; CHECK-NEXT: bltu a2, a1, .LBB48_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a2, a1, .LBB48_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB48_4:
+; CHECK-NEXT: .LBB48_2:
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-LABEL: vsext_nxv32i8_nxv32i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 2
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB12_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB12_2:
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; CHECK-NEXT: vsext.vf4 v16, v10, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB12_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB12_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB12_4:
+; CHECK-NEXT: .LBB12_2:
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vsext.vf4 v24, v8, v0.t
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vsext.vf4 v16, v10
; CHECK-NEXT: bltu a0, a1, .LBB13_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB13_2:
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: sub a1, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vsext.vf4 v24, v8
-; CHECK-NEXT: bltu a0, a1, .LBB13_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: .LBB13_4:
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vsext.vf4 v16, v10
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vmv.v.v v8, v24
; CHECK-NEXT: ret
%v = call <vscale x 32 x i32> @llvm.vp.sext.nxv32i32.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i1> shufflevector (<vscale x 32 x i1> insertelement (<vscale x 32 x i1> undef, i1 true, i32 0), <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer), i32 %vl)
ret <vscale x 32 x i32> %v
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 2
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB25_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB25_2:
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; CHECK-NEXT: vfncvt.f.x.w v12, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB25_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB25_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB25_4:
+; CHECK-NEXT: .LBB25_2:
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: addi a0, sp, 16
; CHECK-LABEL: vsitofp_nxv32f32_nxv32i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 2
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB26_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB26_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB26_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB26_4:
+; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v16, v16
; CHECK-NEXT: bltu a0, a1, .LBB27_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB27_2:
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: sub a1, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vfcvt.f.x.v v8, v8
-; CHECK-NEXT: bltu a0, a1, .LBB27_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: .LBB27_4:
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vfcvt.f.x.v v16, v16
; CHECK-NEXT: ret
%v = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i1> shufflevector (<vscale x 32 x i1> insertelement (<vscale x 32 x i1> undef, i1 true, i32 0), <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer), i32 %evl)
ret <vscale x 32 x float> %v
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s
declare <vscale x 2 x i7> @llvm.vp.trunc.nxv2i7.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
; CHECK-LABEL: vtrunc_nxv15i16_nxv15i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 3
+; CHECK-NEXT: srli a2, a1, 3
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB12_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB12_2:
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
; CHECK-NEXT: vnsrl.wi v28, v16, 0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vnsrl.wi v18, v28, 0, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB12_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB12_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB12_4:
+; CHECK-NEXT: .LBB12_2:
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vnsrl.wi v20, v8, 0, v0.t
; CHECK-LABEL: vtrunc_nxv32i7_nxv32i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 2
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB15_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB15_2:
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; CHECK-NEXT: vnsrl.wi v28, v16, 0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; CHECK-NEXT: vnsrl.wi v18, v28, 0, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB15_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB15_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB15_4:
+; CHECK-NEXT: .LBB15_2:
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vnsrl.wi v20, v8, 0, v0.t
; CHECK-LABEL: vtrunc_nxv32i8_nxv32i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 2
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB16_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB16_2:
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; CHECK-NEXT: vnsrl.wi v28, v16, 0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; CHECK-NEXT: vnsrl.wi v18, v28, 0, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB16_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB16_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB16_4:
+; CHECK-NEXT: .LBB16_2:
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vnsrl.wi v20, v8, 0, v0.t
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: li a3, 24
+; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a4, a1, 1
-; CHECK-NEXT: srli a3, a1, 3
-; CHECK-NEXT: mv a5, a2
-; CHECK-NEXT: bltu a2, a4, .LBB17_2
+; CHECK-NEXT: srli a3, a1, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v25, v0, a3
+; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; CHECK-NEXT: slli a3, a1, 3
+; CHECK-NEXT: add a3, a0, a3
+; CHECK-NEXT: vl8re64.v v8, (a3)
+; CHECK-NEXT: slli a3, a1, 1
+; CHECK-NEXT: sub a4, a2, a3
+; CHECK-NEXT: sltu a5, a2, a4
+; CHECK-NEXT: addi a5, a5, -1
+; CHECK-NEXT: and a4, a5, a4
+; CHECK-NEXT: sub a5, a4, a1
+; CHECK-NEXT: sltu a6, a4, a5
+; CHECK-NEXT: addi a6, a6, -1
+; CHECK-NEXT: and a6, a6, a5
+; CHECK-NEXT: srli a5, a1, 3
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vslidedown.vx v0, v25, a5
+; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma
+; CHECK-NEXT: vnsrl.wi v20, v8, 0, v0.t
+; CHECK-NEXT: bltu a4, a1, .LBB17_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a5, a4
+; CHECK-NEXT: mv a4, a1
; CHECK-NEXT: .LBB17_2:
-; CHECK-NEXT: li a6, 0
-; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma
-; CHECK-NEXT: sub a7, a5, a1
-; CHECK-NEXT: vslidedown.vx v0, v24, a3
-; CHECK-NEXT: bltu a5, a7, .LBB17_4
+; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vx v26, v1, a5
+; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT: bltu a2, a3, .LBB17_4
; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a6, a7
+; CHECK-NEXT: mv a2, a3
; CHECK-NEXT: .LBB17_4:
-; CHECK-NEXT: srli a7, a1, 2
-; CHECK-NEXT: slli t0, a1, 3
-; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v16, 0, v0.t
-; CHECK-NEXT: bltu a5, a1, .LBB17_6
-; CHECK-NEXT: # %bb.5:
-; CHECK-NEXT: mv a5, a1
-; CHECK-NEXT: .LBB17_6:
-; CHECK-NEXT: li a6, 0
-; CHECK-NEXT: vsetvli t1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v1, v24, a7
-; CHECK-NEXT: add a7, a0, t0
-; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma
-; CHECK-NEXT: sub a4, a2, a4
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: slli a5, a5, 3
-; CHECK-NEXT: add a5, sp, a5
-; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vl8re8.v v16, (a5) # Unknown-size Folded Reload
-; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t
-; CHECK-NEXT: bltu a2, a4, .LBB17_8
-; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: mv a6, a4
-; CHECK-NEXT: .LBB17_8:
-; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vl8re64.v v16, (a7)
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: sub a4, a6, a1
-; CHECK-NEXT: vslidedown.vx v0, v1, a3
-; CHECK-NEXT: bltu a6, a4, .LBB17_10
-; CHECK-NEXT: # %bb.9:
-; CHECK-NEXT: mv a2, a4
-; CHECK-NEXT: .LBB17_10:
-; CHECK-NEXT: vl8re64.v v16, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-NEXT: sub a0, a2, a1
+; CHECK-NEXT: sltu a3, a2, a0
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a0, a3, a0
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v26
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vnsrl.wi v20, v24, 0, v0.t
-; CHECK-NEXT: bltu a6, a1, .LBB17_12
-; CHECK-NEXT: # %bb.11:
-; CHECK-NEXT: mv a6, a1
-; CHECK-NEXT: .LBB17_12:
-; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma
+; CHECK-NEXT: vnsrl.wi v12, v24, 0, v0.t
+; CHECK-NEXT: bltu a2, a1, .LBB17_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: .LBB17_6:
+; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 2
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB25_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB25_2:
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; CHECK-NEXT: vfncvt.f.xu.w v12, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB25_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB25_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB25_4:
+; CHECK-NEXT: .LBB25_2:
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: addi a0, sp, 16
; CHECK-LABEL: vuitofp_nxv32f32_nxv32i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 2
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB26_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB26_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB26_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB26_4:
+; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v16, v16
; CHECK-NEXT: bltu a0, a1, .LBB27_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB27_2:
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: sub a1, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vfcvt.f.xu.v v8, v8
-; CHECK-NEXT: bltu a0, a1, .LBB27_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: .LBB27_4:
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vfcvt.f.xu.v v16, v16
; CHECK-NEXT: ret
%v = call <vscale x 32 x float> @llvm.vp.uitofp.nxv32f32.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i1> shufflevector (<vscale x 32 x i1> insertelement (<vscale x 32 x i1> undef, i1 true, i32 0), <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer), i32 %evl)
ret <vscale x 32 x float> %v
; CHECK-LABEL: vzext_nxv32i8_nxv32i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 2
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: bltu a0, a3, .LBB12_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB12_2:
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; CHECK-NEXT: vzext.vf4 v16, v10, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB12_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: bltu a0, a1, .LBB12_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB12_4:
+; CHECK-NEXT: .LBB12_2:
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vzext.vf4 v24, v8, v0.t
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vzext.vf4 v16, v10
; CHECK-NEXT: bltu a0, a1, .LBB13_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB13_2:
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: sub a1, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vzext.vf4 v24, v8
-; CHECK-NEXT: bltu a0, a1, .LBB13_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: .LBB13_4:
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vzext.vf4 v16, v10
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vmv.v.v v8, v24
; CHECK-NEXT: ret
%v = call <vscale x 32 x i32> @llvm.vp.zext.nxv32i32.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i1> shufflevector (<vscale x 32 x i1> insertelement (<vscale x 32 x i1> undef, i1 true, i32 0), <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer), i32 %vl)
ret <vscale x 32 x i32> %v
; Compare if positive and select of constants where one constant is zero.
define i32 @pos_sel_constants(i32 signext %a) {
-; CHECK-LABEL: pos_sel_constants:
-; CHECK: # %bb.0:
-; CHECK-NEXT: mv a1, a0
-; CHECK-NEXT: li a0, 5
-; CHECK-NEXT: bgez a1, .LBB4_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a0, 0
-; CHECK-NEXT: .LBB4_2:
-; CHECK-NEXT: ret
+; RV32-LABEL: pos_sel_constants:
+; RV32: # %bb.0:
+; RV32-NEXT: slti a0, a0, 0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: andi a0, a0, 5
+; RV32-NEXT: ret
+;
+; RV64-LABEL: pos_sel_constants:
+; RV64: # %bb.0:
+; RV64-NEXT: slti a0, a0, 0
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: andi a0, a0, 5
+; RV64-NEXT: ret
%tmp.1 = icmp sgt i32 %a, -1
%retval = select i1 %tmp.1, i32 5, i32 0
ret i32 %retval
define i32 @pos_sel_variable_and_zero(i32 signext %a, i32 signext %b) {
; RV32I-LABEL: pos_sel_variable_and_zero:
; RV32I: # %bb.0:
-; RV32I-NEXT: bgez a0, .LBB6_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: .LBB6_2:
-; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: slti a0, a0, 0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: pos_sel_variable_and_zero:
; RV64I: # %bb.0:
-; RV64I-NEXT: bgez a0, .LBB6_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: .LBB6_2:
-; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: slti a0, a0, 0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: ret
;
; RV32ZBB-LABEL: pos_sel_variable_and_zero:
define i32 @not_neg_sel_same_variable(i32 signext %a) {
; RV32I-LABEL: not_neg_sel_same_variable:
; RV32I: # %bb.0:
-; RV32I-NEXT: bgtz a0, .LBB7_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: .LBB7_2:
+; RV32I-NEXT: sgtz a1, a0
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: and a0, a1, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: not_neg_sel_same_variable:
; RV64I: # %bb.0:
-; RV64I-NEXT: bgtz a0, .LBB7_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: .LBB7_2:
+; RV64I-NEXT: sgtz a1, a0
+; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: and a0, a1, a0
; RV64I-NEXT: ret
;
; RV32ZBB-LABEL: not_neg_sel_same_variable:
; RV32I-LABEL: sub_clamp_zero:
; RV32I: # %bb.0:
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: bgtz a0, .LBB8_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: .LBB8_2:
+; RV32I-NEXT: sgtz a1, a0
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: and a0, a1, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: sub_clamp_zero:
; RV64I: # %bb.0:
; RV64I-NEXT: subw a0, a0, a1
-; RV64I-NEXT: bgtz a0, .LBB8_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: .LBB8_2:
+; RV64I-NEXT: sgtz a1, a0
+; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: and a0, a1, a0
; RV64I-NEXT: ret
;
; RV32ZBB-LABEL: sub_clamp_zero:
define i8 @sel_shift_bool_i8(i1 %t) {
; CHECK-LABEL: sel_shift_bool_i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andi a1, a0, 1
-; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: bnez a1, .LBB9_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a0, 0
-; CHECK-NEXT: .LBB9_2:
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: andi a0, a0, -128
; CHECK-NEXT: ret
%shl = select i1 %t, i8 128, i8 0
ret i8 %shl
; RV32I-NEXT: bltz a3, .LBB9_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sll a1, a0, a3
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: ret
+; RV32I-NEXT: j .LBB9_3
; RV32I-NEXT: .LBB9_2:
; RV32I-NEXT: sll a1, a1, a2
-; RV32I-NEXT: srli a3, a0, 1
+; RV32I-NEXT: srli a5, a0, 1
; RV32I-NEXT: xori a4, a4, 31
-; RV32I-NEXT: srl a3, a3, a4
-; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: srl a4, a5, a4
+; RV32I-NEXT: or a1, a1, a4
+; RV32I-NEXT: .LBB9_3:
; RV32I-NEXT: sll a0, a0, a2
+; RV32I-NEXT: slti a2, a3, 0
+; RV32I-NEXT: neg a2, a2
+; RV32I-NEXT: and a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: sll_redundant_mask_zeros_i64:
; RV32I-NEXT: bltz a3, .LBB10_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srl a0, a1, a3
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: ret
+; RV32I-NEXT: j .LBB10_3
; RV32I-NEXT: .LBB10_2:
; RV32I-NEXT: srl a0, a0, a2
-; RV32I-NEXT: slli a3, a1, 1
+; RV32I-NEXT: slli a5, a1, 1
; RV32I-NEXT: xori a4, a4, 31
-; RV32I-NEXT: sll a3, a3, a4
-; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: sll a4, a5, a4
+; RV32I-NEXT: or a0, a0, a4
+; RV32I-NEXT: .LBB10_3:
; RV32I-NEXT: srl a1, a1, a2
+; RV32I-NEXT: slti a2, a3, 0
+; RV32I-NEXT: neg a2, a2
+; RV32I-NEXT: and a1, a2, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: srl_redundant_mask_zeros_i64:
; RV32I-NEXT: bltz a3, .LBB0_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srl a0, a1, a3
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: ret
+; RV32I-NEXT: j .LBB0_3
; RV32I-NEXT: .LBB0_2:
; RV32I-NEXT: srl a0, a0, a2
-; RV32I-NEXT: xori a3, a2, 31
-; RV32I-NEXT: slli a4, a1, 1
-; RV32I-NEXT: sll a3, a4, a3
-; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: xori a4, a2, 31
+; RV32I-NEXT: slli a5, a1, 1
+; RV32I-NEXT: sll a4, a5, a4
+; RV32I-NEXT: or a0, a0, a4
+; RV32I-NEXT: .LBB0_3:
; RV32I-NEXT: srl a1, a1, a2
+; RV32I-NEXT: slti a2, a3, 0
+; RV32I-NEXT: neg a2, a2
+; RV32I-NEXT: and a1, a2, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: lshr64:
; RV32I-NEXT: bltz a3, .LBB4_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sll a1, a0, a3
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: ret
+; RV32I-NEXT: j .LBB4_3
; RV32I-NEXT: .LBB4_2:
; RV32I-NEXT: sll a1, a1, a2
-; RV32I-NEXT: xori a3, a2, 31
-; RV32I-NEXT: srli a4, a0, 1
-; RV32I-NEXT: srl a3, a4, a3
-; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: xori a4, a2, 31
+; RV32I-NEXT: srli a5, a0, 1
+; RV32I-NEXT: srl a4, a5, a4
+; RV32I-NEXT: or a1, a1, a4
+; RV32I-NEXT: .LBB4_3:
; RV32I-NEXT: sll a0, a0, a2
+; RV32I-NEXT: slti a2, a3, 0
+; RV32I-NEXT: neg a2, a2
+; RV32I-NEXT: and a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: shl64:
define i128 @lshr128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: lshr128:
; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: lw a2, 0(a2)
-; RV32I-NEXT: lw a5, 8(a1)
-; RV32I-NEXT: lw a4, 12(a1)
-; RV32I-NEXT: neg a6, a2
-; RV32I-NEXT: li a3, 64
-; RV32I-NEXT: li a7, 32
-; RV32I-NEXT: sub t1, a7, a2
-; RV32I-NEXT: sll t0, a5, a6
-; RV32I-NEXT: bltz t1, .LBB6_2
+; RV32I-NEXT: lw a4, 8(a1)
+; RV32I-NEXT: lw a3, 12(a1)
+; RV32I-NEXT: neg a5, a2
+; RV32I-NEXT: li t1, 64
+; RV32I-NEXT: li a6, 32
+; RV32I-NEXT: sub t0, a6, a2
+; RV32I-NEXT: sll a7, a4, a5
+; RV32I-NEXT: bltz t0, .LBB6_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: mv t2, t0
+; RV32I-NEXT: mv t2, a7
; RV32I-NEXT: j .LBB6_3
; RV32I-NEXT: .LBB6_2:
-; RV32I-NEXT: sll a6, a4, a6
-; RV32I-NEXT: sub a7, a3, a2
-; RV32I-NEXT: xori a7, a7, 31
-; RV32I-NEXT: srli t2, a5, 1
-; RV32I-NEXT: srl a7, t2, a7
-; RV32I-NEXT: or t2, a6, a7
+; RV32I-NEXT: sll a5, a3, a5
+; RV32I-NEXT: sub a6, t1, a2
+; RV32I-NEXT: xori a6, a6, 31
+; RV32I-NEXT: srli t2, a4, 1
+; RV32I-NEXT: srl a6, t2, a6
+; RV32I-NEXT: or t2, a5, a6
; RV32I-NEXT: .LBB6_3:
-; RV32I-NEXT: lw t5, 4(a1)
+; RV32I-NEXT: lw t6, 4(a1)
; RV32I-NEXT: addi a6, a2, -32
-; RV32I-NEXT: bgez a6, .LBB6_5
+; RV32I-NEXT: slti a5, a6, 0
+; RV32I-NEXT: neg a5, a5
+; RV32I-NEXT: addi t4, a2, -64
+; RV32I-NEXT: addi t5, a2, -96
+; RV32I-NEXT: bltu a2, t1, .LBB6_5
; RV32I-NEXT: # %bb.4:
-; RV32I-NEXT: srl a7, t5, a2
-; RV32I-NEXT: or t2, t2, a7
+; RV32I-NEXT: srl t2, a3, t4
+; RV32I-NEXT: slti t3, t5, 0
+; RV32I-NEXT: neg t3, t3
+; RV32I-NEXT: and t3, t3, t2
+; RV32I-NEXT: mv t2, t6
+; RV32I-NEXT: bnez a2, .LBB6_6
+; RV32I-NEXT: j .LBB6_7
; RV32I-NEXT: .LBB6_5:
-; RV32I-NEXT: addi t4, a2, -96
-; RV32I-NEXT: addi t3, a2, -64
-; RV32I-NEXT: bltz t4, .LBB6_7
-; RV32I-NEXT: # %bb.6:
-; RV32I-NEXT: li a7, 0
-; RV32I-NEXT: bgeu a2, a3, .LBB6_8
-; RV32I-NEXT: j .LBB6_9
+; RV32I-NEXT: srl t3, t6, a2
+; RV32I-NEXT: and t3, a5, t3
+; RV32I-NEXT: or t3, t3, t2
+; RV32I-NEXT: mv t2, t6
+; RV32I-NEXT: beqz a2, .LBB6_7
+; RV32I-NEXT: .LBB6_6:
+; RV32I-NEXT: mv t2, t3
; RV32I-NEXT: .LBB6_7:
-; RV32I-NEXT: srl a7, a4, t3
-; RV32I-NEXT: bltu a2, a3, .LBB6_9
-; RV32I-NEXT: .LBB6_8:
-; RV32I-NEXT: mv t2, a7
+; RV32I-NEXT: lw a1, 0(a1)
+; RV32I-NEXT: xori t3, a2, 31
+; RV32I-NEXT: bltz a6, .LBB6_10
+; RV32I-NEXT: # %bb.8:
+; RV32I-NEXT: srl s0, t6, a6
+; RV32I-NEXT: slli t6, a3, 1
+; RV32I-NEXT: bgez t5, .LBB6_11
; RV32I-NEXT: .LBB6_9:
-; RV32I-NEXT: mv a7, t5
-; RV32I-NEXT: beqz a2, .LBB6_11
-; RV32I-NEXT: # %bb.10:
-; RV32I-NEXT: mv a7, t2
+; RV32I-NEXT: srl t5, a4, t4
+; RV32I-NEXT: xori t4, t4, 31
+; RV32I-NEXT: sll t4, t6, t4
+; RV32I-NEXT: or t4, t5, t4
+; RV32I-NEXT: bltu a2, t1, .LBB6_12
+; RV32I-NEXT: j .LBB6_13
+; RV32I-NEXT: .LBB6_10:
+; RV32I-NEXT: srl s0, a1, a2
+; RV32I-NEXT: slli t6, t6, 1
+; RV32I-NEXT: sll t6, t6, t3
+; RV32I-NEXT: or s0, s0, t6
+; RV32I-NEXT: slli t6, a3, 1
+; RV32I-NEXT: bltz t5, .LBB6_9
; RV32I-NEXT: .LBB6_11:
-; RV32I-NEXT: lw a1, 0(a1)
-; RV32I-NEXT: xori t2, a2, 31
-; RV32I-NEXT: bltz a6, .LBB6_13
-; RV32I-NEXT: # %bb.12:
-; RV32I-NEXT: srl t5, t5, a6
-; RV32I-NEXT: bltz t1, .LBB6_14
-; RV32I-NEXT: j .LBB6_15
+; RV32I-NEXT: srl t4, a3, t5
+; RV32I-NEXT: bgeu a2, t1, .LBB6_13
+; RV32I-NEXT: .LBB6_12:
+; RV32I-NEXT: slti t0, t0, 0
+; RV32I-NEXT: neg t0, t0
+; RV32I-NEXT: and a7, t0, a7
+; RV32I-NEXT: or t4, s0, a7
; RV32I-NEXT: .LBB6_13:
-; RV32I-NEXT: srl t6, a1, a2
-; RV32I-NEXT: slli t5, t5, 1
-; RV32I-NEXT: sll t5, t5, t2
-; RV32I-NEXT: or t5, t6, t5
-; RV32I-NEXT: bgez t1, .LBB6_15
-; RV32I-NEXT: .LBB6_14:
-; RV32I-NEXT: or t5, t5, t0
+; RV32I-NEXT: bnez a2, .LBB6_16
+; RV32I-NEXT: # %bb.14:
+; RV32I-NEXT: bltz a6, .LBB6_17
; RV32I-NEXT: .LBB6_15:
-; RV32I-NEXT: slli t0, a4, 1
-; RV32I-NEXT: bltz t4, .LBB6_17
-; RV32I-NEXT: # %bb.16:
-; RV32I-NEXT: srl t1, a4, t4
-; RV32I-NEXT: bgeu a2, a3, .LBB6_18
-; RV32I-NEXT: j .LBB6_19
+; RV32I-NEXT: srl a4, a3, a6
+; RV32I-NEXT: j .LBB6_18
+; RV32I-NEXT: .LBB6_16:
+; RV32I-NEXT: mv a1, t4
+; RV32I-NEXT: bgez a6, .LBB6_15
; RV32I-NEXT: .LBB6_17:
-; RV32I-NEXT: srl t1, a5, t3
-; RV32I-NEXT: xori t3, t3, 31
-; RV32I-NEXT: sll t3, t0, t3
-; RV32I-NEXT: or t1, t1, t3
-; RV32I-NEXT: bltu a2, a3, .LBB6_19
-; RV32I-NEXT: .LBB6_18:
-; RV32I-NEXT: mv t5, t1
-; RV32I-NEXT: .LBB6_19:
-; RV32I-NEXT: bnez a2, .LBB6_22
-; RV32I-NEXT: # %bb.20:
-; RV32I-NEXT: bltz a6, .LBB6_23
-; RV32I-NEXT: .LBB6_21:
-; RV32I-NEXT: srl a5, a4, a6
-; RV32I-NEXT: bgeu a2, a3, .LBB6_24
-; RV32I-NEXT: j .LBB6_25
-; RV32I-NEXT: .LBB6_22:
-; RV32I-NEXT: mv a1, t5
-; RV32I-NEXT: bgez a6, .LBB6_21
-; RV32I-NEXT: .LBB6_23:
-; RV32I-NEXT: srl a5, a5, a2
-; RV32I-NEXT: sll t0, t0, t2
-; RV32I-NEXT: or a5, a5, t0
-; RV32I-NEXT: bltu a2, a3, .LBB6_25
-; RV32I-NEXT: .LBB6_24:
-; RV32I-NEXT: li a5, 0
-; RV32I-NEXT: .LBB6_25:
-; RV32I-NEXT: bltz a6, .LBB6_27
-; RV32I-NEXT: # %bb.26:
-; RV32I-NEXT: li a4, 0
-; RV32I-NEXT: bgeu a2, a3, .LBB6_28
-; RV32I-NEXT: j .LBB6_29
-; RV32I-NEXT: .LBB6_27:
; RV32I-NEXT: srl a4, a4, a2
-; RV32I-NEXT: bltu a2, a3, .LBB6_29
-; RV32I-NEXT: .LBB6_28:
-; RV32I-NEXT: li a4, 0
-; RV32I-NEXT: .LBB6_29:
-; RV32I-NEXT: sw a4, 12(a0)
-; RV32I-NEXT: sw a5, 8(a0)
+; RV32I-NEXT: sll a6, t6, t3
+; RV32I-NEXT: or a4, a4, a6
+; RV32I-NEXT: .LBB6_18:
+; RV32I-NEXT: sltiu a6, a2, 64
+; RV32I-NEXT: neg a6, a6
+; RV32I-NEXT: and a4, a6, a4
+; RV32I-NEXT: srl a2, a3, a2
+; RV32I-NEXT: and a2, a5, a2
+; RV32I-NEXT: and a2, a6, a2
+; RV32I-NEXT: sw a2, 12(a0)
+; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a1, 0(a0)
-; RV32I-NEXT: sw a7, 4(a0)
+; RV32I-NEXT: sw t2, 4(a0)
+; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV64I-LABEL: lshr128:
; RV64I-NEXT: bltz a3, .LBB6_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: srl a0, a1, a3
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: ret
+; RV64I-NEXT: j .LBB6_3
; RV64I-NEXT: .LBB6_2:
; RV64I-NEXT: srl a0, a0, a2
-; RV64I-NEXT: xori a3, a2, 63
-; RV64I-NEXT: slli a4, a1, 1
-; RV64I-NEXT: sll a3, a4, a3
-; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: xori a4, a2, 63
+; RV64I-NEXT: slli a5, a1, 1
+; RV64I-NEXT: sll a4, a5, a4
+; RV64I-NEXT: or a0, a0, a4
+; RV64I-NEXT: .LBB6_3:
; RV64I-NEXT: srl a1, a1, a2
+; RV64I-NEXT: slti a2, a3, 0
+; RV64I-NEXT: neg a2, a2
+; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: ret
%1 = lshr i128 %a, %b
ret i128 %1
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: lw a2, 0(a2)
-; RV32I-NEXT: lw a5, 8(a1)
+; RV32I-NEXT: lw a6, 8(a1)
; RV32I-NEXT: lw a4, 12(a1)
-; RV32I-NEXT: neg a6, a2
+; RV32I-NEXT: neg a5, a2
; RV32I-NEXT: li a3, 64
; RV32I-NEXT: li a7, 32
; RV32I-NEXT: sub t2, a7, a2
-; RV32I-NEXT: sll t1, a5, a6
+; RV32I-NEXT: sll t1, a6, a5
; RV32I-NEXT: bltz t2, .LBB7_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: mv t4, t1
+; RV32I-NEXT: mv a7, t1
; RV32I-NEXT: j .LBB7_3
; RV32I-NEXT: .LBB7_2:
-; RV32I-NEXT: sll a6, a4, a6
+; RV32I-NEXT: sll a5, a4, a5
; RV32I-NEXT: sub a7, a3, a2
; RV32I-NEXT: xori a7, a7, 31
-; RV32I-NEXT: srli t0, a5, 1
+; RV32I-NEXT: srli t0, a6, 1
; RV32I-NEXT: srl a7, t0, a7
-; RV32I-NEXT: or t4, a6, a7
+; RV32I-NEXT: or a7, a5, a7
; RV32I-NEXT: .LBB7_3:
-; RV32I-NEXT: lw t6, 4(a1)
-; RV32I-NEXT: addi a6, a2, -32
-; RV32I-NEXT: bgez a6, .LBB7_5
+; RV32I-NEXT: addi t3, a2, -64
+; RV32I-NEXT: addi t4, a2, -96
+; RV32I-NEXT: srai a5, a4, 31
+; RV32I-NEXT: bltz t4, .LBB7_5
; RV32I-NEXT: # %bb.4:
-; RV32I-NEXT: srl a7, t6, a2
-; RV32I-NEXT: or t4, t4, a7
+; RV32I-NEXT: mv t5, a5
+; RV32I-NEXT: j .LBB7_6
; RV32I-NEXT: .LBB7_5:
-; RV32I-NEXT: addi t3, a2, -64
-; RV32I-NEXT: addi t5, a2, -96
-; RV32I-NEXT: srai a7, a4, 31
-; RV32I-NEXT: bltz t5, .LBB7_7
-; RV32I-NEXT: # %bb.6:
-; RV32I-NEXT: mv t0, a7
+; RV32I-NEXT: sra t5, a4, t3
+; RV32I-NEXT: .LBB7_6:
+; RV32I-NEXT: lw t6, 4(a1)
+; RV32I-NEXT: addi t0, a2, -32
; RV32I-NEXT: bgeu a2, a3, .LBB7_8
-; RV32I-NEXT: j .LBB7_9
-; RV32I-NEXT: .LBB7_7:
-; RV32I-NEXT: sra t0, a4, t3
-; RV32I-NEXT: bltu a2, a3, .LBB7_9
+; RV32I-NEXT: # %bb.7:
+; RV32I-NEXT: slti t5, t0, 0
+; RV32I-NEXT: srl s0, t6, a2
+; RV32I-NEXT: neg t5, t5
+; RV32I-NEXT: and t5, t5, s0
+; RV32I-NEXT: or t5, t5, a7
; RV32I-NEXT: .LBB7_8:
-; RV32I-NEXT: mv t4, t0
-; RV32I-NEXT: .LBB7_9:
-; RV32I-NEXT: mv t0, t6
-; RV32I-NEXT: beqz a2, .LBB7_11
-; RV32I-NEXT: # %bb.10:
-; RV32I-NEXT: mv t0, t4
-; RV32I-NEXT: .LBB7_11:
+; RV32I-NEXT: mv a7, t6
+; RV32I-NEXT: beqz a2, .LBB7_10
+; RV32I-NEXT: # %bb.9:
+; RV32I-NEXT: mv a7, t5
+; RV32I-NEXT: .LBB7_10:
; RV32I-NEXT: lw a1, 0(a1)
-; RV32I-NEXT: xori t4, a2, 31
-; RV32I-NEXT: bltz a6, .LBB7_13
-; RV32I-NEXT: # %bb.12:
-; RV32I-NEXT: srl t6, t6, a6
-; RV32I-NEXT: bltz t2, .LBB7_14
-; RV32I-NEXT: j .LBB7_15
+; RV32I-NEXT: xori t5, a2, 31
+; RV32I-NEXT: bltz t0, .LBB7_13
+; RV32I-NEXT: # %bb.11:
+; RV32I-NEXT: srl s0, t6, t0
+; RV32I-NEXT: slli t6, a4, 1
+; RV32I-NEXT: bgez t4, .LBB7_14
+; RV32I-NEXT: .LBB7_12:
+; RV32I-NEXT: srl t4, a6, t3
+; RV32I-NEXT: xori t3, t3, 31
+; RV32I-NEXT: sll t3, t6, t3
+; RV32I-NEXT: or t3, t4, t3
+; RV32I-NEXT: bltu a2, a3, .LBB7_15
+; RV32I-NEXT: j .LBB7_16
; RV32I-NEXT: .LBB7_13:
; RV32I-NEXT: srl s0, a1, a2
; RV32I-NEXT: slli t6, t6, 1
-; RV32I-NEXT: sll t6, t6, t4
-; RV32I-NEXT: or t6, s0, t6
-; RV32I-NEXT: bgez t2, .LBB7_15
+; RV32I-NEXT: sll t6, t6, t5
+; RV32I-NEXT: or s0, s0, t6
+; RV32I-NEXT: slli t6, a4, 1
+; RV32I-NEXT: bltz t4, .LBB7_12
; RV32I-NEXT: .LBB7_14:
-; RV32I-NEXT: or t6, t6, t1
+; RV32I-NEXT: sra t3, a4, t4
+; RV32I-NEXT: bgeu a2, a3, .LBB7_16
; RV32I-NEXT: .LBB7_15:
-; RV32I-NEXT: slli t1, a4, 1
-; RV32I-NEXT: bltz t5, .LBB7_17
-; RV32I-NEXT: # %bb.16:
-; RV32I-NEXT: sra t2, a4, t5
-; RV32I-NEXT: bgeu a2, a3, .LBB7_18
-; RV32I-NEXT: j .LBB7_19
-; RV32I-NEXT: .LBB7_17:
-; RV32I-NEXT: srl t2, a5, t3
-; RV32I-NEXT: xori t3, t3, 31
-; RV32I-NEXT: sll t3, t1, t3
-; RV32I-NEXT: or t2, t2, t3
-; RV32I-NEXT: bltu a2, a3, .LBB7_19
+; RV32I-NEXT: slti t2, t2, 0
+; RV32I-NEXT: neg t2, t2
+; RV32I-NEXT: and t1, t2, t1
+; RV32I-NEXT: or t3, s0, t1
+; RV32I-NEXT: .LBB7_16:
+; RV32I-NEXT: bnez a2, .LBB7_19
+; RV32I-NEXT: # %bb.17:
+; RV32I-NEXT: bltz t0, .LBB7_20
; RV32I-NEXT: .LBB7_18:
-; RV32I-NEXT: mv t6, t2
+; RV32I-NEXT: sra a6, a4, t0
+; RV32I-NEXT: bgeu a2, a3, .LBB7_21
+; RV32I-NEXT: j .LBB7_22
; RV32I-NEXT: .LBB7_19:
-; RV32I-NEXT: bnez a2, .LBB7_22
-; RV32I-NEXT: # %bb.20:
-; RV32I-NEXT: bltz a6, .LBB7_23
+; RV32I-NEXT: mv a1, t3
+; RV32I-NEXT: bgez t0, .LBB7_18
+; RV32I-NEXT: .LBB7_20:
+; RV32I-NEXT: srl a6, a6, a2
+; RV32I-NEXT: sll t1, t6, t5
+; RV32I-NEXT: or a6, a6, t1
+; RV32I-NEXT: bltu a2, a3, .LBB7_22
; RV32I-NEXT: .LBB7_21:
-; RV32I-NEXT: sra a5, a4, a6
-; RV32I-NEXT: bgeu a2, a3, .LBB7_24
-; RV32I-NEXT: j .LBB7_25
+; RV32I-NEXT: mv a6, a5
; RV32I-NEXT: .LBB7_22:
-; RV32I-NEXT: mv a1, t6
-; RV32I-NEXT: bgez a6, .LBB7_21
-; RV32I-NEXT: .LBB7_23:
-; RV32I-NEXT: srl a5, a5, a2
-; RV32I-NEXT: sll t1, t1, t4
-; RV32I-NEXT: or a5, a5, t1
-; RV32I-NEXT: bltu a2, a3, .LBB7_25
+; RV32I-NEXT: bltz t0, .LBB7_24
+; RV32I-NEXT: # %bb.23:
+; RV32I-NEXT: mv a4, a5
+; RV32I-NEXT: bgeu a2, a3, .LBB7_25
+; RV32I-NEXT: j .LBB7_26
; RV32I-NEXT: .LBB7_24:
-; RV32I-NEXT: mv a5, a7
-; RV32I-NEXT: .LBB7_25:
-; RV32I-NEXT: bltz a6, .LBB7_27
-; RV32I-NEXT: # %bb.26:
-; RV32I-NEXT: mv a4, a7
-; RV32I-NEXT: bgeu a2, a3, .LBB7_28
-; RV32I-NEXT: j .LBB7_29
-; RV32I-NEXT: .LBB7_27:
; RV32I-NEXT: sra a4, a4, a2
-; RV32I-NEXT: bltu a2, a3, .LBB7_29
-; RV32I-NEXT: .LBB7_28:
-; RV32I-NEXT: mv a4, a7
-; RV32I-NEXT: .LBB7_29:
+; RV32I-NEXT: bltu a2, a3, .LBB7_26
+; RV32I-NEXT: .LBB7_25:
+; RV32I-NEXT: mv a4, a5
+; RV32I-NEXT: .LBB7_26:
; RV32I-NEXT: sw a4, 12(a0)
-; RV32I-NEXT: sw a5, 8(a0)
+; RV32I-NEXT: sw a6, 8(a0)
; RV32I-NEXT: sw a1, 0(a0)
-; RV32I-NEXT: sw t0, 4(a0)
+; RV32I-NEXT: sw a7, 4(a0)
; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
define i128 @shl128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: shl128:
; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: lw a2, 0(a2)
-; RV32I-NEXT: lw a5, 4(a1)
+; RV32I-NEXT: lw a3, 4(a1)
; RV32I-NEXT: lw a4, 0(a1)
-; RV32I-NEXT: neg a6, a2
-; RV32I-NEXT: li a3, 64
-; RV32I-NEXT: li a7, 32
-; RV32I-NEXT: sub t1, a7, a2
-; RV32I-NEXT: srl t0, a5, a6
-; RV32I-NEXT: bltz t1, .LBB8_2
+; RV32I-NEXT: neg a5, a2
+; RV32I-NEXT: li t0, 64
+; RV32I-NEXT: li a6, 32
+; RV32I-NEXT: sub a7, a6, a2
+; RV32I-NEXT: srl a6, a3, a5
+; RV32I-NEXT: bltz a7, .LBB8_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: mv t2, t0
+; RV32I-NEXT: mv t1, a6
; RV32I-NEXT: j .LBB8_3
; RV32I-NEXT: .LBB8_2:
-; RV32I-NEXT: srl a6, a4, a6
-; RV32I-NEXT: sub a7, a3, a2
-; RV32I-NEXT: xori a7, a7, 31
-; RV32I-NEXT: slli t2, a5, 1
-; RV32I-NEXT: sll a7, t2, a7
-; RV32I-NEXT: or t2, a6, a7
+; RV32I-NEXT: srl a5, a4, a5
+; RV32I-NEXT: sub t1, t0, a2
+; RV32I-NEXT: xori t1, t1, 31
+; RV32I-NEXT: slli t2, a3, 1
+; RV32I-NEXT: sll t1, t2, t1
+; RV32I-NEXT: or t1, a5, t1
; RV32I-NEXT: .LBB8_3:
; RV32I-NEXT: lw t5, 8(a1)
-; RV32I-NEXT: addi a6, a2, -32
-; RV32I-NEXT: bgez a6, .LBB8_5
+; RV32I-NEXT: addi a5, a2, -32
+; RV32I-NEXT: slti t2, a5, 0
+; RV32I-NEXT: neg t2, t2
+; RV32I-NEXT: addi t4, a2, -64
+; RV32I-NEXT: addi t6, a2, -96
+; RV32I-NEXT: bltu a2, t0, .LBB8_5
; RV32I-NEXT: # %bb.4:
-; RV32I-NEXT: sll a7, t5, a2
-; RV32I-NEXT: or t2, t2, a7
+; RV32I-NEXT: sll t1, a4, t4
+; RV32I-NEXT: slti t3, t6, 0
+; RV32I-NEXT: neg t3, t3
+; RV32I-NEXT: and t3, t3, t1
+; RV32I-NEXT: mv t1, t5
+; RV32I-NEXT: bnez a2, .LBB8_6
+; RV32I-NEXT: j .LBB8_7
; RV32I-NEXT: .LBB8_5:
-; RV32I-NEXT: addi t4, a2, -96
-; RV32I-NEXT: addi t3, a2, -64
-; RV32I-NEXT: bltz t4, .LBB8_7
-; RV32I-NEXT: # %bb.6:
-; RV32I-NEXT: li a7, 0
-; RV32I-NEXT: bgeu a2, a3, .LBB8_8
-; RV32I-NEXT: j .LBB8_9
+; RV32I-NEXT: sll t3, t5, a2
+; RV32I-NEXT: and t3, t2, t3
+; RV32I-NEXT: or t3, t3, t1
+; RV32I-NEXT: mv t1, t5
+; RV32I-NEXT: beqz a2, .LBB8_7
+; RV32I-NEXT: .LBB8_6:
+; RV32I-NEXT: mv t1, t3
; RV32I-NEXT: .LBB8_7:
-; RV32I-NEXT: sll a7, a4, t3
-; RV32I-NEXT: bltu a2, a3, .LBB8_9
-; RV32I-NEXT: .LBB8_8:
-; RV32I-NEXT: mv t2, a7
+; RV32I-NEXT: lw a1, 12(a1)
+; RV32I-NEXT: xori t3, a2, 31
+; RV32I-NEXT: bltz a5, .LBB8_10
+; RV32I-NEXT: # %bb.8:
+; RV32I-NEXT: sll s0, t5, a5
+; RV32I-NEXT: srli t5, a4, 1
+; RV32I-NEXT: bgez t6, .LBB8_11
; RV32I-NEXT: .LBB8_9:
-; RV32I-NEXT: mv a7, t5
-; RV32I-NEXT: beqz a2, .LBB8_11
-; RV32I-NEXT: # %bb.10:
-; RV32I-NEXT: mv a7, t2
+; RV32I-NEXT: sll t6, a3, t4
+; RV32I-NEXT: xori t4, t4, 31
+; RV32I-NEXT: srl t4, t5, t4
+; RV32I-NEXT: or t4, t6, t4
+; RV32I-NEXT: bltu a2, t0, .LBB8_12
+; RV32I-NEXT: j .LBB8_13
+; RV32I-NEXT: .LBB8_10:
+; RV32I-NEXT: sll s0, a1, a2
+; RV32I-NEXT: srli t5, t5, 1
+; RV32I-NEXT: srl t5, t5, t3
+; RV32I-NEXT: or s0, s0, t5
+; RV32I-NEXT: srli t5, a4, 1
+; RV32I-NEXT: bltz t6, .LBB8_9
; RV32I-NEXT: .LBB8_11:
-; RV32I-NEXT: lw a1, 12(a1)
-; RV32I-NEXT: xori t2, a2, 31
-; RV32I-NEXT: bltz a6, .LBB8_13
-; RV32I-NEXT: # %bb.12:
-; RV32I-NEXT: sll t5, t5, a6
-; RV32I-NEXT: bltz t1, .LBB8_14
-; RV32I-NEXT: j .LBB8_15
+; RV32I-NEXT: sll t4, a4, t6
+; RV32I-NEXT: bgeu a2, t0, .LBB8_13
+; RV32I-NEXT: .LBB8_12:
+; RV32I-NEXT: slti a7, a7, 0
+; RV32I-NEXT: neg a7, a7
+; RV32I-NEXT: and a6, a7, a6
+; RV32I-NEXT: or t4, s0, a6
; RV32I-NEXT: .LBB8_13:
-; RV32I-NEXT: sll t6, a1, a2
-; RV32I-NEXT: srli t5, t5, 1
-; RV32I-NEXT: srl t5, t5, t2
-; RV32I-NEXT: or t5, t6, t5
-; RV32I-NEXT: bgez t1, .LBB8_15
-; RV32I-NEXT: .LBB8_14:
-; RV32I-NEXT: or t5, t5, t0
+; RV32I-NEXT: beqz a2, .LBB8_15
+; RV32I-NEXT: # %bb.14:
+; RV32I-NEXT: mv a1, t4
; RV32I-NEXT: .LBB8_15:
-; RV32I-NEXT: srli t0, a4, 1
-; RV32I-NEXT: bltz t4, .LBB8_17
+; RV32I-NEXT: sll a6, a4, a2
+; RV32I-NEXT: and a6, t2, a6
+; RV32I-NEXT: sltiu a7, a2, 64
+; RV32I-NEXT: neg a7, a7
+; RV32I-NEXT: and a6, a7, a6
+; RV32I-NEXT: bltz a5, .LBB8_17
; RV32I-NEXT: # %bb.16:
-; RV32I-NEXT: sll t1, a4, t4
-; RV32I-NEXT: bgeu a2, a3, .LBB8_18
-; RV32I-NEXT: j .LBB8_19
+; RV32I-NEXT: sll a2, a4, a5
+; RV32I-NEXT: j .LBB8_18
; RV32I-NEXT: .LBB8_17:
-; RV32I-NEXT: sll t1, a5, t3
-; RV32I-NEXT: xori t3, t3, 31
-; RV32I-NEXT: srl t3, t0, t3
-; RV32I-NEXT: or t1, t1, t3
-; RV32I-NEXT: bltu a2, a3, .LBB8_19
+; RV32I-NEXT: sll a2, a3, a2
+; RV32I-NEXT: srl a3, t5, t3
+; RV32I-NEXT: or a2, a2, a3
; RV32I-NEXT: .LBB8_18:
-; RV32I-NEXT: mv t5, t1
-; RV32I-NEXT: .LBB8_19:
-; RV32I-NEXT: bnez a2, .LBB8_22
-; RV32I-NEXT: # %bb.20:
-; RV32I-NEXT: bltz a6, .LBB8_23
-; RV32I-NEXT: .LBB8_21:
-; RV32I-NEXT: sll a5, a4, a6
-; RV32I-NEXT: bgeu a2, a3, .LBB8_24
-; RV32I-NEXT: j .LBB8_25
-; RV32I-NEXT: .LBB8_22:
-; RV32I-NEXT: mv a1, t5
-; RV32I-NEXT: bgez a6, .LBB8_21
-; RV32I-NEXT: .LBB8_23:
-; RV32I-NEXT: sll a5, a5, a2
-; RV32I-NEXT: srl t0, t0, t2
-; RV32I-NEXT: or a5, a5, t0
-; RV32I-NEXT: bltu a2, a3, .LBB8_25
-; RV32I-NEXT: .LBB8_24:
-; RV32I-NEXT: li a5, 0
-; RV32I-NEXT: .LBB8_25:
-; RV32I-NEXT: bltz a6, .LBB8_27
-; RV32I-NEXT: # %bb.26:
-; RV32I-NEXT: li a4, 0
-; RV32I-NEXT: bgeu a2, a3, .LBB8_28
-; RV32I-NEXT: j .LBB8_29
-; RV32I-NEXT: .LBB8_27:
-; RV32I-NEXT: sll a4, a4, a2
-; RV32I-NEXT: bltu a2, a3, .LBB8_29
-; RV32I-NEXT: .LBB8_28:
-; RV32I-NEXT: li a4, 0
-; RV32I-NEXT: .LBB8_29:
-; RV32I-NEXT: sw a4, 0(a0)
-; RV32I-NEXT: sw a5, 4(a0)
+; RV32I-NEXT: and a2, a7, a2
+; RV32I-NEXT: sw a2, 4(a0)
+; RV32I-NEXT: sw a6, 0(a0)
; RV32I-NEXT: sw a1, 12(a0)
-; RV32I-NEXT: sw a7, 8(a0)
+; RV32I-NEXT: sw t1, 8(a0)
+; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV64I-LABEL: shl128:
; RV64I-NEXT: bltz a3, .LBB8_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: sll a1, a0, a3
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: ret
+; RV64I-NEXT: j .LBB8_3
; RV64I-NEXT: .LBB8_2:
; RV64I-NEXT: sll a1, a1, a2
-; RV64I-NEXT: xori a3, a2, 63
-; RV64I-NEXT: srli a4, a0, 1
-; RV64I-NEXT: srl a3, a4, a3
-; RV64I-NEXT: or a1, a1, a3
+; RV64I-NEXT: xori a4, a2, 63
+; RV64I-NEXT: srli a5, a0, 1
+; RV64I-NEXT: srl a4, a5, a4
+; RV64I-NEXT: or a1, a1, a4
+; RV64I-NEXT: .LBB8_3:
; RV64I-NEXT: sll a0, a0, a2
+; RV64I-NEXT: slti a2, a3, 0
+; RV64I-NEXT: neg a2, a2
+; RV64I-NEXT: and a0, a2, a0
; RV64I-NEXT: ret
%1 = shl i128 %a, %b
ret i128 %1
define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
; RV32I-LABEL: func:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
; RV32I-NEXT: sub a1, a0, a1
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a2, a1, .LBB0_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: .LBB0_2:
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: func:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
; RV64I-NEXT: subw a1, a0, a1
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a2, a1, .LBB0_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: .LBB0_2:
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: ret
;
; RV32IZbb-LABEL: func:
; RV32I-NEXT: sub a2, a0, a2
; RV32I-NEXT: beq a3, a1, .LBB1_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu a4, a1, a3
+; RV32I-NEXT: sltu a0, a1, a3
; RV32I-NEXT: j .LBB1_3
; RV32I-NEXT: .LBB1_2:
-; RV32I-NEXT: sltu a4, a0, a2
+; RV32I-NEXT: sltu a0, a0, a2
; RV32I-NEXT: .LBB1_3:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: bnez a4, .LBB1_5
-; RV32I-NEXT: # %bb.4:
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: mv a1, a3
-; RV32I-NEXT: .LBB1_5:
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a1, a0, -1
+; RV32I-NEXT: and a0, a1, a2
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: func2:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
; RV64I-NEXT: sub a1, a0, a1
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a2, a1, .LBB1_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: .LBB1_2:
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: ret
;
; RV32IZbb-LABEL: func2:
; RV32IZbb-NEXT: sub a2, a0, a2
; RV32IZbb-NEXT: beq a3, a1, .LBB1_2
; RV32IZbb-NEXT: # %bb.1:
-; RV32IZbb-NEXT: sltu a4, a1, a3
+; RV32IZbb-NEXT: sltu a0, a1, a3
; RV32IZbb-NEXT: j .LBB1_3
; RV32IZbb-NEXT: .LBB1_2:
-; RV32IZbb-NEXT: sltu a4, a0, a2
+; RV32IZbb-NEXT: sltu a0, a0, a2
; RV32IZbb-NEXT: .LBB1_3:
-; RV32IZbb-NEXT: li a0, 0
-; RV32IZbb-NEXT: li a1, 0
-; RV32IZbb-NEXT: bnez a4, .LBB1_5
-; RV32IZbb-NEXT: # %bb.4:
-; RV32IZbb-NEXT: mv a0, a2
-; RV32IZbb-NEXT: mv a1, a3
-; RV32IZbb-NEXT: .LBB1_5:
+; RV32IZbb-NEXT: snez a0, a0
+; RV32IZbb-NEXT: addi a1, a0, -1
+; RV32IZbb-NEXT: and a0, a1, a2
+; RV32IZbb-NEXT: and a1, a1, a3
; RV32IZbb-NEXT: ret
;
; RV64IZbb-LABEL: func2:
define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y) nounwind {
; RV32I-LABEL: func16:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
; RV32I-NEXT: sub a1, a0, a1
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a2, a1, .LBB2_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: .LBB2_2:
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: func16:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
; RV64I-NEXT: sub a1, a0, a1
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a2, a1, .LBB2_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: .LBB2_2:
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: ret
;
; RV32IZbb-LABEL: func16:
define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y) nounwind {
; RV32I-LABEL: func8:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
; RV32I-NEXT: sub a1, a0, a1
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a2, a1, .LBB3_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: .LBB3_2:
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: func8:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
; RV64I-NEXT: sub a1, a0, a1
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a2, a1, .LBB3_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: .LBB3_2:
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: ret
;
; RV32IZbb-LABEL: func8:
define zeroext i4 @func3(i4 zeroext %x, i4 zeroext %y) nounwind {
; RV32I-LABEL: func3:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
; RV32I-NEXT: sub a1, a0, a1
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a2, a1, .LBB4_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: .LBB4_2:
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: func3:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
; RV64I-NEXT: sub a1, a0, a1
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a2, a1, .LBB4_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: .LBB4_2:
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: ret
;
; RV32IZbb-LABEL: func3:
define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
; RV32I-LABEL: func32:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a3, a0
-; RV32I-NEXT: mul a0, a1, a2
-; RV32I-NEXT: sub a1, a3, a0
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a3, a1, .LBB0_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: .LBB0_2:
+; RV32I-NEXT: mul a1, a1, a2
+; RV32I-NEXT: sub a1, a0, a1
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: func32:
; RV64I: # %bb.0:
; RV64I-NEXT: mulw a1, a1, a2
; RV64I-NEXT: subw a1, a0, a1
-; RV64I-NEXT: sext.w a2, a0
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a2, a1, .LBB0_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: .LBB0_2:
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: ret
;
; RV32IZbb-LABEL: func32:
; RV32I-NEXT: sub a3, a0, a4
; RV32I-NEXT: beq a2, a1, .LBB1_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu a4, a1, a2
+; RV32I-NEXT: sltu a0, a1, a2
; RV32I-NEXT: j .LBB1_3
; RV32I-NEXT: .LBB1_2:
-; RV32I-NEXT: sltu a4, a0, a3
+; RV32I-NEXT: sltu a0, a0, a3
; RV32I-NEXT: .LBB1_3:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: bnez a4, .LBB1_5
-; RV32I-NEXT: # %bb.4:
-; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: mv a1, a2
-; RV32I-NEXT: .LBB1_5:
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a1, a0, -1
+; RV32I-NEXT: and a0, a1, a3
+; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: func64:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a1, a0
-; RV64I-NEXT: sub a2, a0, a2
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a1, a2, .LBB1_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB1_2:
+; RV64I-NEXT: sub a1, a0, a2
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: ret
;
; RV32IZbb-LABEL: func64:
; RV32IZbb-NEXT: sub a3, a0, a4
; RV32IZbb-NEXT: beq a2, a1, .LBB1_2
; RV32IZbb-NEXT: # %bb.1:
-; RV32IZbb-NEXT: sltu a4, a1, a2
+; RV32IZbb-NEXT: sltu a0, a1, a2
; RV32IZbb-NEXT: j .LBB1_3
; RV32IZbb-NEXT: .LBB1_2:
-; RV32IZbb-NEXT: sltu a4, a0, a3
+; RV32IZbb-NEXT: sltu a0, a0, a3
; RV32IZbb-NEXT: .LBB1_3:
-; RV32IZbb-NEXT: li a0, 0
-; RV32IZbb-NEXT: li a1, 0
-; RV32IZbb-NEXT: bnez a4, .LBB1_5
-; RV32IZbb-NEXT: # %bb.4:
-; RV32IZbb-NEXT: mv a0, a3
-; RV32IZbb-NEXT: mv a1, a2
-; RV32IZbb-NEXT: .LBB1_5:
+; RV32IZbb-NEXT: snez a0, a0
+; RV32IZbb-NEXT: addi a1, a0, -1
+; RV32IZbb-NEXT: and a0, a1, a3
+; RV32IZbb-NEXT: and a1, a1, a2
; RV32IZbb-NEXT: ret
;
; RV64IZbb-LABEL: func64:
; RV32I: # %bb.0:
; RV32I-NEXT: lui a3, 16
; RV32I-NEXT: addi a3, a3, -1
-; RV32I-NEXT: and a4, a0, a3
-; RV32I-NEXT: mul a0, a1, a2
; RV32I-NEXT: and a0, a0, a3
-; RV32I-NEXT: sub a1, a4, a0
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a4, a1, .LBB2_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: .LBB2_2:
+; RV32I-NEXT: mul a1, a1, a2
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: sub a1, a0, a1
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: func16:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a3, 16
; RV64I-NEXT: addiw a3, a3, -1
-; RV64I-NEXT: and a4, a0, a3
-; RV64I-NEXT: mul a0, a1, a2
; RV64I-NEXT: and a0, a0, a3
-; RV64I-NEXT: sub a1, a4, a0
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a4, a1, .LBB2_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: .LBB2_2:
+; RV64I-NEXT: mul a1, a1, a2
+; RV64I-NEXT: and a1, a1, a3
+; RV64I-NEXT: sub a1, a0, a1
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: ret
;
; RV32IZbb-LABEL: func16:
define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind {
; RV32I-LABEL: func8:
; RV32I: # %bb.0:
-; RV32I-NEXT: andi a3, a0, 255
-; RV32I-NEXT: mul a0, a1, a2
; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sub a1, a3, a0
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a3, a1, .LBB3_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: .LBB3_2:
+; RV32I-NEXT: mul a1, a1, a2
+; RV32I-NEXT: andi a1, a1, 255
+; RV32I-NEXT: sub a1, a0, a1
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: func8:
; RV64I: # %bb.0:
-; RV64I-NEXT: andi a3, a0, 255
-; RV64I-NEXT: mulw a0, a1, a2
; RV64I-NEXT: andi a0, a0, 255
-; RV64I-NEXT: sub a1, a3, a0
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a3, a1, .LBB3_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: .LBB3_2:
+; RV64I-NEXT: mulw a1, a1, a2
+; RV64I-NEXT: andi a1, a1, 255
+; RV64I-NEXT: sub a1, a0, a1
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: ret
;
; RV32IZbb-LABEL: func8:
define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind {
; RV32I-LABEL: func4:
; RV32I: # %bb.0:
-; RV32I-NEXT: andi a3, a0, 15
-; RV32I-NEXT: mul a0, a1, a2
; RV32I-NEXT: andi a0, a0, 15
-; RV32I-NEXT: sub a1, a3, a0
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a3, a1, .LBB4_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: .LBB4_2:
+; RV32I-NEXT: mul a1, a1, a2
+; RV32I-NEXT: andi a1, a1, 15
+; RV32I-NEXT: sub a1, a0, a1
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: func4:
; RV64I: # %bb.0:
-; RV64I-NEXT: andi a3, a0, 15
-; RV64I-NEXT: mulw a0, a1, a2
; RV64I-NEXT: andi a0, a0, 15
-; RV64I-NEXT: sub a1, a3, a0
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a3, a1, .LBB4_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: .LBB4_2:
+; RV64I-NEXT: mulw a1, a1, a2
+; RV64I-NEXT: andi a1, a1, 15
+; RV64I-NEXT: sub a1, a0, a1
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: ret
;
; RV32IZbb-LABEL: func4:
; RV32-LABEL: vec3_setcc_crash:
; RV32: # %bb.0:
; RV32-NEXT: lw a0, 0(a0)
-; RV32-NEXT: slli a2, a0, 8
-; RV32-NEXT: slli a3, a0, 24
-; RV32-NEXT: slli a4, a0, 16
-; RV32-NEXT: srai a5, a4, 24
+; RV32-NEXT: srli a2, a0, 16
+; RV32-NEXT: slli a3, a0, 8
; RV32-NEXT: srai a3, a3, 24
-; RV32-NEXT: bgtz a5, .LBB0_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: li a5, 0
-; RV32-NEXT: j .LBB0_3
-; RV32-NEXT: .LBB0_2:
-; RV32-NEXT: srli a5, a4, 24
-; RV32-NEXT: .LBB0_3:
-; RV32-NEXT: srai a4, a2, 24
-; RV32-NEXT: slli a2, a5, 8
-; RV32-NEXT: mv a5, a0
-; RV32-NEXT: bgtz a3, .LBB0_5
-; RV32-NEXT: # %bb.4:
-; RV32-NEXT: li a5, 0
-; RV32-NEXT: .LBB0_5:
-; RV32-NEXT: andi a3, a5, 255
-; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: bgtz a4, .LBB0_7
-; RV32-NEXT: # %bb.6:
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: j .LBB0_8
-; RV32-NEXT: .LBB0_7:
-; RV32-NEXT: srli a0, a0, 16
-; RV32-NEXT: .LBB0_8:
-; RV32-NEXT: sb a0, 2(a1)
-; RV32-NEXT: sh a2, 0(a1)
+; RV32-NEXT: slli a4, a0, 24
+; RV32-NEXT: srai a4, a4, 24
+; RV32-NEXT: srli a5, a0, 8
+; RV32-NEXT: slli a6, a0, 16
+; RV32-NEXT: srai a6, a6, 24
+; RV32-NEXT: sgtz a6, a6
+; RV32-NEXT: neg a6, a6
+; RV32-NEXT: and a5, a6, a5
+; RV32-NEXT: slli a5, a5, 8
+; RV32-NEXT: sgtz a4, a4
+; RV32-NEXT: neg a4, a4
+; RV32-NEXT: and a0, a4, a0
+; RV32-NEXT: andi a0, a0, 255
+; RV32-NEXT: or a0, a0, a5
+; RV32-NEXT: sgtz a3, a3
+; RV32-NEXT: neg a3, a3
+; RV32-NEXT: and a2, a3, a2
+; RV32-NEXT: sb a2, 2(a1)
+; RV32-NEXT: sh a0, 0(a1)
; RV32-NEXT: ret
;
; RV64-LABEL: vec3_setcc_crash:
; RV64: # %bb.0:
; RV64-NEXT: lw a0, 0(a0)
-; RV64-NEXT: slli a2, a0, 40
-; RV64-NEXT: slli a3, a0, 56
-; RV64-NEXT: slli a4, a0, 48
-; RV64-NEXT: srai a5, a4, 56
+; RV64-NEXT: srli a2, a0, 16
+; RV64-NEXT: slli a3, a0, 40
; RV64-NEXT: srai a3, a3, 56
-; RV64-NEXT: bgtz a5, .LBB0_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: li a5, 0
-; RV64-NEXT: j .LBB0_3
-; RV64-NEXT: .LBB0_2:
-; RV64-NEXT: srli a5, a4, 56
-; RV64-NEXT: .LBB0_3:
-; RV64-NEXT: srai a4, a2, 56
-; RV64-NEXT: slli a2, a5, 8
-; RV64-NEXT: mv a5, a0
-; RV64-NEXT: bgtz a3, .LBB0_5
-; RV64-NEXT: # %bb.4:
-; RV64-NEXT: li a5, 0
-; RV64-NEXT: .LBB0_5:
-; RV64-NEXT: andi a3, a5, 255
-; RV64-NEXT: or a2, a3, a2
-; RV64-NEXT: bgtz a4, .LBB0_7
-; RV64-NEXT: # %bb.6:
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: j .LBB0_8
-; RV64-NEXT: .LBB0_7:
-; RV64-NEXT: srliw a0, a0, 16
-; RV64-NEXT: .LBB0_8:
-; RV64-NEXT: sb a0, 2(a1)
-; RV64-NEXT: sh a2, 0(a1)
+; RV64-NEXT: slli a4, a0, 56
+; RV64-NEXT: srai a4, a4, 56
+; RV64-NEXT: srli a5, a0, 8
+; RV64-NEXT: slli a6, a0, 48
+; RV64-NEXT: srai a6, a6, 56
+; RV64-NEXT: sgtz a6, a6
+; RV64-NEXT: neg a6, a6
+; RV64-NEXT: and a5, a6, a5
+; RV64-NEXT: slli a5, a5, 8
+; RV64-NEXT: sgtz a4, a4
+; RV64-NEXT: neg a4, a4
+; RV64-NEXT: and a0, a4, a0
+; RV64-NEXT: andi a0, a0, 255
+; RV64-NEXT: or a0, a0, a5
+; RV64-NEXT: sgtz a3, a3
+; RV64-NEXT: neg a3, a3
+; RV64-NEXT: and a2, a3, a2
+; RV64-NEXT: sb a2, 2(a1)
+; RV64-NEXT: sh a0, 0(a1)
; RV64-NEXT: ret
%a = load <3 x i8>, <3 x i8>* %in
%cmp = icmp sgt <3 x i8> %a, zeroinitializer