From: Craig Topper Date: Thu, 22 Apr 2021 22:10:47 +0000 (-0700) Subject: [RISCV] Fix crash with fptosi.sat/fptoui.sat intrinsics on RV64. Add test cases. X-Git-Tag: llvmorg-14-init~8713 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=5185b52988c5874dd46b2cc17685b78cd64609c1;p=platform%2Fupstream%2Fllvm.git [RISCV] Fix crash with fptosi.sat/fptoui.sat intrinsics on RV64. Add test cases. Add PromoteIntOp_FP_TO_XINT_SAT to type legalize the bit width operand from i32 to i64 for RV64. Add test cases for the saturating intrinsics for half/float/double and i32/i64. CodeGen is definitely not optimal. We can probably make use of the native behavior of fcvt instructions in many cases. Fixes PR50083 --- diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 2b075a0..4e10294 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1547,6 +1547,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: PromoteIntOp_FP_TO_XINT_SAT(N); break; + case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: case ISD::VECREDUCE_AND: @@ -1967,6 +1970,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_FIX(SDNode *N) { DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_FP_TO_XINT_SAT(SDNode *N) { + SDValue Op1 = ZExtPromotedInteger(N->getOperand(1)); + return SDValue( + DAG.UpdateNodeOperands(N, N->getOperand(0), Op1), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_FRAMERETURNADDR(SDNode *N) { // Promote the RETURNADDR/FRAMEADDR argument to a supported integer width. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index a40dd88..3784fe0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -391,6 +391,7 @@ private: SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N); SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_FIX(SDNode *N); + SDValue PromoteIntOp_FP_TO_XINT_SAT(SDNode *N); SDValue PromoteIntOp_FPOWI(SDNode *N); SDValue PromoteIntOp_VECREDUCE(SDNode *N); SDValue PromoteIntOp_SET_ROUNDING(SDNode *N); diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll index a2e53a5..f793a7f 100644 --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -70,6 +70,53 @@ define i32 @fcvt_w_d(double %a) nounwind { ret i32 %1 } +define i32 @fcvt_w_d_sat(double %a) nounwind { +; RV32IFD-LABEL: fcvt_w_d_sat: +; RV32IFD: # %bb.0: # %start +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: feq.d a0, ft0, ft0 +; RV32IFD-NEXT: bnez a0, .LBB3_2 +; RV32IFD-NEXT: # %bb.1: # %start +; RV32IFD-NEXT: mv a0, zero +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB3_2: +; RV32IFD-NEXT: lui a0, %hi(.LCPI3_0) +; RV32IFD-NEXT: fld ft1, %lo(.LCPI3_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI3_1) +; RV32IFD-NEXT: fld ft2, %lo(.LCPI3_1)(a0) +; RV32IFD-NEXT: fmax.d ft0, ft0, ft1 +; RV32IFD-NEXT: fmin.d ft0, ft0, ft2 +; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_w_d_sat: +; RV64IFD: # %bb.0: # %start +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: feq.d a0, ft0, ft0 +; RV64IFD-NEXT: bnez a0, .LBB3_2 +; RV64IFD-NEXT: # %bb.1: # %start +; RV64IFD-NEXT: mv a0, zero +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB3_2: +; RV64IFD-NEXT: lui a0, %hi(.LCPI3_0) +; RV64IFD-NEXT: fld ft1, %lo(.LCPI3_0)(a0) +; RV64IFD-NEXT: lui a0, %hi(.LCPI3_1) +; RV64IFD-NEXT: fld ft2, %lo(.LCPI3_1)(a0) +; RV64IFD-NEXT: fmax.d ft0, ft0, ft1 +; RV64IFD-NEXT: fmin.d ft0, ft0, ft2 +; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz +; RV64IFD-NEXT: ret +start: + %0 = tail call i32 @llvm.fptosi.sat.i32.f64(double %a) + ret i32 %0 +} +declare i32 @llvm.fptosi.sat.i32.f64(double) + ; For RV64D, fcvt.lu.d is semantically equivalent to fcvt.wu.d in this case ; because fptosi will produce poison if the result doesn't fit into an i32. define i32 @fcvt_wu_d(double %a) nounwind { @@ -92,6 +139,38 @@ define i32 @fcvt_wu_d(double %a) nounwind { ret i32 %1 } +define i32 @fcvt_wu_d_sat(double %a) nounwind { +; RV32IFD-LABEL: fcvt_wu_d_sat: +; RV32IFD: # %bb.0: # %start +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: lui a0, %hi(.LCPI5_0) +; RV32IFD-NEXT: fld ft1, %lo(.LCPI5_0)(a0) +; RV32IFD-NEXT: fcvt.d.w ft2, zero +; RV32IFD-NEXT: fmax.d ft0, ft0, ft2 +; RV32IFD-NEXT: fmin.d ft0, ft0, ft1 +; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_wu_d_sat: +; RV64IFD: # %bb.0: # %start +; RV64IFD-NEXT: lui a1, %hi(.LCPI5_0) +; RV64IFD-NEXT: fld ft0, %lo(.LCPI5_0)(a1) +; RV64IFD-NEXT: fmv.d.x ft1, a0 +; RV64IFD-NEXT: fmv.d.x ft2, zero +; RV64IFD-NEXT: fmax.d ft1, ft1, ft2 +; RV64IFD-NEXT: fmin.d ft0, ft1, ft0 +; RV64IFD-NEXT: fcvt.lu.d a0, ft0, rtz +; RV64IFD-NEXT: ret +start: + %0 = tail call i32 @llvm.fptoui.sat.i32.f64(double %a) + ret i32 %0 +} +declare i32 @llvm.fptoui.sat.i32.f64(double) + define double @fcvt_d_w(i32 %a) nounwind { ; RV32IFD-LABEL: fcvt_d_w: ; RV32IFD: # %bb.0: @@ -151,6 +230,98 @@ define i64 @fcvt_l_d(double %a) nounwind { ret i64 %1 } +define i64 @fcvt_l_d_sat(double %a) nounwind { +; RV32IFD-LABEL: fcvt_l_d_sat: +; RV32IFD: # %bb.0: # %start +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw a0, 16(sp) +; RV32IFD-NEXT: sw a1, 20(sp) +; RV32IFD-NEXT: fld ft0, 16(sp) +; RV32IFD-NEXT: fsd ft0, 8(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: call __fixdfdi@plt +; RV32IFD-NEXT: fld ft1, 8(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: lui a2, %hi(.LCPI9_0) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI9_0)(a2) +; RV32IFD-NEXT: fle.d a3, ft0, ft1 +; RV32IFD-NEXT: mv a2, a0 +; RV32IFD-NEXT: bnez a3, .LBB9_2 +; RV32IFD-NEXT: # %bb.1: # %start +; RV32IFD-NEXT: mv a2, zero +; RV32IFD-NEXT: .LBB9_2: # %start +; RV32IFD-NEXT: lui a0, %hi(.LCPI9_1) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI9_1)(a0) +; RV32IFD-NEXT: flt.d a4, ft0, ft1 +; RV32IFD-NEXT: addi a0, zero, -1 +; RV32IFD-NEXT: beqz a4, .LBB9_9 +; RV32IFD-NEXT: # %bb.3: # %start +; RV32IFD-NEXT: feq.d a2, ft1, ft1 +; RV32IFD-NEXT: beqz a2, .LBB9_10 +; RV32IFD-NEXT: .LBB9_4: # %start +; RV32IFD-NEXT: lui a5, 524288 +; RV32IFD-NEXT: beqz a3, .LBB9_11 +; RV32IFD-NEXT: .LBB9_5: # %start +; RV32IFD-NEXT: bnez a4, .LBB9_12 +; RV32IFD-NEXT: .LBB9_6: # %start +; RV32IFD-NEXT: bnez a2, .LBB9_8 +; RV32IFD-NEXT: .LBB9_7: # %start +; RV32IFD-NEXT: mv a1, zero +; RV32IFD-NEXT: .LBB9_8: # %start +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 32 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB9_9: # %start +; RV32IFD-NEXT: mv a0, a2 +; RV32IFD-NEXT: feq.d a2, ft1, ft1 +; RV32IFD-NEXT: bnez a2, .LBB9_4 +; RV32IFD-NEXT: .LBB9_10: # %start +; RV32IFD-NEXT: mv a0, zero +; RV32IFD-NEXT: lui a5, 524288 +; RV32IFD-NEXT: bnez a3, .LBB9_5 +; RV32IFD-NEXT: .LBB9_11: # %start +; RV32IFD-NEXT: lui a1, 524288 +; RV32IFD-NEXT: beqz a4, .LBB9_6 +; RV32IFD-NEXT: .LBB9_12: +; RV32IFD-NEXT: addi a1, a5, -1 +; RV32IFD-NEXT: beqz a2, .LBB9_7 +; RV32IFD-NEXT: j .LBB9_8 +; +; RV64IFD-LABEL: fcvt_l_d_sat: +; RV64IFD: # %bb.0: # %start +; RV64IFD-NEXT: lui a1, %hi(.LCPI9_0) +; RV64IFD-NEXT: fld ft1, %lo(.LCPI9_0)(a1) +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fle.d a0, ft1, ft0 +; RV64IFD-NEXT: addi a1, zero, -1 +; RV64IFD-NEXT: bnez a0, .LBB9_2 +; RV64IFD-NEXT: # %bb.1: # %start +; RV64IFD-NEXT: slli a0, a1, 63 +; RV64IFD-NEXT: j .LBB9_3 +; RV64IFD-NEXT: .LBB9_2: +; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz +; RV64IFD-NEXT: .LBB9_3: # %start +; RV64IFD-NEXT: lui a2, %hi(.LCPI9_1) +; RV64IFD-NEXT: fld ft1, %lo(.LCPI9_1)(a2) +; RV64IFD-NEXT: flt.d a2, ft1, ft0 +; RV64IFD-NEXT: bnez a2, .LBB9_6 +; RV64IFD-NEXT: # %bb.4: # %start +; RV64IFD-NEXT: feq.d a1, ft0, ft0 +; RV64IFD-NEXT: beqz a1, .LBB9_7 +; RV64IFD-NEXT: .LBB9_5: # %start +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB9_6: +; RV64IFD-NEXT: srli a0, a1, 1 +; RV64IFD-NEXT: feq.d a1, ft0, ft0 +; RV64IFD-NEXT: bnez a1, .LBB9_5 +; RV64IFD-NEXT: .LBB9_7: # %start +; RV64IFD-NEXT: mv a0, zero +; RV64IFD-NEXT: ret +start: + %0 = tail call i64 @llvm.fptosi.sat.i64.f64(double %a) + ret i64 %0 +} +declare i64 @llvm.fptosi.sat.i64.f64(double) + define i64 @fcvt_lu_d(double %a) nounwind { ; RV32IFD-LABEL: fcvt_lu_d: ; RV32IFD: # %bb.0: @@ -170,6 +341,76 @@ define i64 @fcvt_lu_d(double %a) nounwind { ret i64 %1 } +define i64 @fcvt_lu_d_sat(double %a) nounwind { +; RV32IFD-LABEL: fcvt_lu_d_sat: +; RV32IFD: # %bb.0: # %start +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw a0, 16(sp) +; RV32IFD-NEXT: sw a1, 20(sp) +; RV32IFD-NEXT: fld ft0, 16(sp) +; RV32IFD-NEXT: fsd ft0, 8(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: call __fixunsdfdi@plt +; RV32IFD-NEXT: fld ft1, 8(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: fcvt.d.w ft0, zero +; RV32IFD-NEXT: fle.d a4, ft0, ft1 +; RV32IFD-NEXT: mv a3, a0 +; RV32IFD-NEXT: bnez a4, .LBB11_2 +; RV32IFD-NEXT: # %bb.1: # %start +; RV32IFD-NEXT: mv a3, zero +; RV32IFD-NEXT: .LBB11_2: # %start +; RV32IFD-NEXT: lui a0, %hi(.LCPI11_0) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI11_0)(a0) +; RV32IFD-NEXT: flt.d a5, ft0, ft1 +; RV32IFD-NEXT: addi a2, zero, -1 +; RV32IFD-NEXT: addi a0, zero, -1 +; RV32IFD-NEXT: beqz a5, .LBB11_7 +; RV32IFD-NEXT: # %bb.3: # %start +; RV32IFD-NEXT: beqz a4, .LBB11_8 +; RV32IFD-NEXT: .LBB11_4: # %start +; RV32IFD-NEXT: bnez a5, .LBB11_6 +; RV32IFD-NEXT: .LBB11_5: # %start +; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: .LBB11_6: # %start +; RV32IFD-NEXT: mv a1, a2 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 32 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB11_7: # %start +; RV32IFD-NEXT: mv a0, a3 +; RV32IFD-NEXT: bnez a4, .LBB11_4 +; RV32IFD-NEXT: .LBB11_8: # %start +; RV32IFD-NEXT: mv a1, zero +; RV32IFD-NEXT: beqz a5, .LBB11_5 +; RV32IFD-NEXT: j .LBB11_6 +; +; RV64IFD-LABEL: fcvt_lu_d_sat: +; RV64IFD: # %bb.0: # %start +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fmv.d.x ft1, zero +; RV64IFD-NEXT: fle.d a0, ft1, ft0 +; RV64IFD-NEXT: bnez a0, .LBB11_2 +; RV64IFD-NEXT: # %bb.1: # %start +; RV64IFD-NEXT: mv a1, zero +; RV64IFD-NEXT: j .LBB11_3 +; RV64IFD-NEXT: .LBB11_2: +; RV64IFD-NEXT: fcvt.lu.d a1, ft0, rtz +; RV64IFD-NEXT: .LBB11_3: # %start +; RV64IFD-NEXT: lui a0, %hi(.LCPI11_0) +; RV64IFD-NEXT: fld ft1, %lo(.LCPI11_0)(a0) +; RV64IFD-NEXT: flt.d a2, ft1, ft0 +; RV64IFD-NEXT: addi a0, zero, -1 +; RV64IFD-NEXT: bnez a2, .LBB11_5 +; RV64IFD-NEXT: # %bb.4: # %start +; RV64IFD-NEXT: mv a0, a1 +; RV64IFD-NEXT: .LBB11_5: # %start +; RV64IFD-NEXT: ret +start: + %0 = tail call i64 @llvm.fptoui.sat.i64.f64(double %a) + ret i64 %0 +} +declare i64 @llvm.fptoui.sat.i64.f64(double) + define i64 @fmv_x_d(double %a, double %b) nounwind { ; RV32IFD-LABEL: fmv_x_d: ; RV32IFD: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll index bf9f576..c362093 100644 --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -22,6 +22,74 @@ define i32 @fcvt_w_s(float %a) nounwind { ret i32 %1 } +define i32 @fcvt_w_s_sat(float %a) nounwind { +; RV32IF-LABEL: fcvt_w_s_sat: +; RV32IF: # %bb.0: # %start +; RV32IF-NEXT: lui a1, %hi(.LCPI1_0) +; RV32IF-NEXT: flw ft1, %lo(.LCPI1_0)(a1) +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fle.s a0, ft1, ft0 +; RV32IF-NEXT: lui a1, 524288 +; RV32IF-NEXT: bnez a0, .LBB1_2 +; RV32IF-NEXT: # %bb.1: # %start +; RV32IF-NEXT: lui a0, 524288 +; RV32IF-NEXT: j .LBB1_3 +; RV32IF-NEXT: .LBB1_2: +; RV32IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IF-NEXT: .LBB1_3: # %start +; RV32IF-NEXT: lui a2, %hi(.LCPI1_1) +; RV32IF-NEXT: flw ft1, %lo(.LCPI1_1)(a2) +; RV32IF-NEXT: flt.s a2, ft1, ft0 +; RV32IF-NEXT: bnez a2, .LBB1_6 +; RV32IF-NEXT: # %bb.4: # %start +; RV32IF-NEXT: feq.s a1, ft0, ft0 +; RV32IF-NEXT: beqz a1, .LBB1_7 +; RV32IF-NEXT: .LBB1_5: # %start +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB1_6: +; RV32IF-NEXT: addi a0, a1, -1 +; RV32IF-NEXT: feq.s a1, ft0, ft0 +; RV32IF-NEXT: bnez a1, .LBB1_5 +; RV32IF-NEXT: .LBB1_7: # %start +; RV32IF-NEXT: mv a0, zero +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_w_s_sat: +; RV64IF: # %bb.0: # %start +; RV64IF-NEXT: lui a1, %hi(.LCPI1_0) +; RV64IF-NEXT: flw ft1, %lo(.LCPI1_0)(a1) +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fle.s a0, ft1, ft0 +; RV64IF-NEXT: lui a1, 524288 +; RV64IF-NEXT: bnez a0, .LBB1_2 +; RV64IF-NEXT: # %bb.1: # %start +; RV64IF-NEXT: lui a0, 524288 +; RV64IF-NEXT: j .LBB1_3 +; RV64IF-NEXT: .LBB1_2: +; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IF-NEXT: .LBB1_3: # %start +; RV64IF-NEXT: lui a2, %hi(.LCPI1_1) +; RV64IF-NEXT: flw ft1, %lo(.LCPI1_1)(a2) +; RV64IF-NEXT: flt.s a2, ft1, ft0 +; RV64IF-NEXT: bnez a2, .LBB1_6 +; RV64IF-NEXT: # %bb.4: # %start +; RV64IF-NEXT: feq.s a1, ft0, ft0 +; RV64IF-NEXT: beqz a1, .LBB1_7 +; RV64IF-NEXT: .LBB1_5: # %start +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB1_6: +; RV64IF-NEXT: addiw a0, a1, -1 +; RV64IF-NEXT: feq.s a1, ft0, ft0 +; RV64IF-NEXT: bnez a1, .LBB1_5 +; RV64IF-NEXT: .LBB1_7: # %start +; RV64IF-NEXT: mv a0, zero +; RV64IF-NEXT: ret +start: + %0 = tail call i32 @llvm.fptosi.sat.i32.f32(float %a) + ret i32 %0 +} +declare i32 @llvm.fptosi.sat.i32.f32(float) + ; For RV64F, fcvt.lu.s is semantically equivalent to fcvt.wu.s in this case ; because fptoui will produce poison if the result doesn't fit into an i32. define i32 @fcvt_wu_s(float %a) nounwind { @@ -40,6 +108,56 @@ define i32 @fcvt_wu_s(float %a) nounwind { ret i32 %1 } +define i32 @fcvt_wu_s_sat(float %a) nounwind { +; RV32IF-LABEL: fcvt_wu_s_sat: +; RV32IF: # %bb.0: # %start +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fmv.w.x ft1, zero +; RV32IF-NEXT: fle.s a0, ft1, ft0 +; RV32IF-NEXT: bnez a0, .LBB3_2 +; RV32IF-NEXT: # %bb.1: # %start +; RV32IF-NEXT: mv a1, zero +; RV32IF-NEXT: j .LBB3_3 +; RV32IF-NEXT: .LBB3_2: +; RV32IF-NEXT: fcvt.wu.s a1, ft0, rtz +; RV32IF-NEXT: .LBB3_3: # %start +; RV32IF-NEXT: lui a0, %hi(.LCPI3_0) +; RV32IF-NEXT: flw ft1, %lo(.LCPI3_0)(a0) +; RV32IF-NEXT: flt.s a2, ft1, ft0 +; RV32IF-NEXT: addi a0, zero, -1 +; RV32IF-NEXT: bnez a2, .LBB3_5 +; RV32IF-NEXT: # %bb.4: # %start +; RV32IF-NEXT: mv a0, a1 +; RV32IF-NEXT: .LBB3_5: # %start +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_wu_s_sat: +; RV64IF: # %bb.0: # %start +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fmv.w.x ft1, zero +; RV64IF-NEXT: fle.s a0, ft1, ft0 +; RV64IF-NEXT: bnez a0, .LBB3_2 +; RV64IF-NEXT: # %bb.1: # %start +; RV64IF-NEXT: mv a0, zero +; RV64IF-NEXT: j .LBB3_3 +; RV64IF-NEXT: .LBB3_2: +; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IF-NEXT: .LBB3_3: # %start +; RV64IF-NEXT: lui a1, %hi(.LCPI3_0) +; RV64IF-NEXT: flw ft1, %lo(.LCPI3_0)(a1) +; RV64IF-NEXT: flt.s a1, ft1, ft0 +; RV64IF-NEXT: beqz a1, .LBB3_5 +; RV64IF-NEXT: # %bb.4: +; RV64IF-NEXT: addi a0, zero, -1 +; RV64IF-NEXT: srli a0, a0, 32 +; RV64IF-NEXT: .LBB3_5: # %start +; RV64IF-NEXT: ret +start: + %0 = tail call i32 @llvm.fptoui.sat.i32.f32(float %a) + ret i32 %0 +} +declare i32 @llvm.fptoui.sat.i32.f32(float) + define i32 @fmv_x_w(float %a, float %b) nounwind { ; RV32IF-LABEL: fmv_x_w: ; RV32IF: # %bb.0: @@ -136,6 +254,99 @@ define i64 @fcvt_l_s(float %a) nounwind { ret i64 %1 } +define i64 @fcvt_l_s_sat(float %a) nounwind { +; RV32IF-LABEL: fcvt_l_s_sat: +; RV32IF: # %bb.0: # %start +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: lui a1, %hi(.LCPI9_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI9_0)(a1) +; RV32IF-NEXT: fmv.w.x ft1, a0 +; RV32IF-NEXT: fsw ft1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fle.s s0, ft0, ft1 +; RV32IF-NEXT: call __fixsfdi@plt +; RV32IF-NEXT: mv a2, a0 +; RV32IF-NEXT: bnez s0, .LBB9_2 +; RV32IF-NEXT: # %bb.1: # %start +; RV32IF-NEXT: mv a2, zero +; RV32IF-NEXT: .LBB9_2: # %start +; RV32IF-NEXT: lui a0, %hi(.LCPI9_1) +; RV32IF-NEXT: flw ft0, %lo(.LCPI9_1)(a0) +; RV32IF-NEXT: flw ft1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flt.s a3, ft0, ft1 +; RV32IF-NEXT: fmv.s ft0, ft1 +; RV32IF-NEXT: addi a0, zero, -1 +; RV32IF-NEXT: beqz a3, .LBB9_9 +; RV32IF-NEXT: # %bb.3: # %start +; RV32IF-NEXT: feq.s a2, ft0, ft0 +; RV32IF-NEXT: beqz a2, .LBB9_10 +; RV32IF-NEXT: .LBB9_4: # %start +; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: beqz s0, .LBB9_11 +; RV32IF-NEXT: .LBB9_5: # %start +; RV32IF-NEXT: bnez a3, .LBB9_12 +; RV32IF-NEXT: .LBB9_6: # %start +; RV32IF-NEXT: bnez a2, .LBB9_8 +; RV32IF-NEXT: .LBB9_7: # %start +; RV32IF-NEXT: mv a1, zero +; RV32IF-NEXT: .LBB9_8: # %start +; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB9_9: # %start +; RV32IF-NEXT: mv a0, a2 +; RV32IF-NEXT: feq.s a2, ft0, ft0 +; RV32IF-NEXT: bnez a2, .LBB9_4 +; RV32IF-NEXT: .LBB9_10: # %start +; RV32IF-NEXT: mv a0, zero +; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: bnez s0, .LBB9_5 +; RV32IF-NEXT: .LBB9_11: # %start +; RV32IF-NEXT: lui a1, 524288 +; RV32IF-NEXT: beqz a3, .LBB9_6 +; RV32IF-NEXT: .LBB9_12: +; RV32IF-NEXT: addi a1, a4, -1 +; RV32IF-NEXT: beqz a2, .LBB9_7 +; RV32IF-NEXT: j .LBB9_8 +; +; RV64IF-LABEL: fcvt_l_s_sat: +; RV64IF: # %bb.0: # %start +; RV64IF-NEXT: lui a1, %hi(.LCPI9_0) +; RV64IF-NEXT: flw ft1, %lo(.LCPI9_0)(a1) +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fle.s a0, ft1, ft0 +; RV64IF-NEXT: addi a1, zero, -1 +; RV64IF-NEXT: bnez a0, .LBB9_2 +; RV64IF-NEXT: # %bb.1: # %start +; RV64IF-NEXT: slli a0, a1, 63 +; RV64IF-NEXT: j .LBB9_3 +; RV64IF-NEXT: .LBB9_2: +; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IF-NEXT: .LBB9_3: # %start +; RV64IF-NEXT: lui a2, %hi(.LCPI9_1) +; RV64IF-NEXT: flw ft1, %lo(.LCPI9_1)(a2) +; RV64IF-NEXT: flt.s a2, ft1, ft0 +; RV64IF-NEXT: bnez a2, .LBB9_6 +; RV64IF-NEXT: # %bb.4: # %start +; RV64IF-NEXT: feq.s a1, ft0, ft0 +; RV64IF-NEXT: beqz a1, .LBB9_7 +; RV64IF-NEXT: .LBB9_5: # %start +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB9_6: +; RV64IF-NEXT: srli a0, a1, 1 +; RV64IF-NEXT: feq.s a1, ft0, ft0 +; RV64IF-NEXT: bnez a1, .LBB9_5 +; RV64IF-NEXT: .LBB9_7: # %start +; RV64IF-NEXT: mv a0, zero +; RV64IF-NEXT: ret +start: + %0 = tail call i64 @llvm.fptosi.sat.i64.f32(float %a) + ret i64 %0 +} +declare i64 @llvm.fptosi.sat.i64.f32(float) + define i64 @fcvt_lu_s(float %a) nounwind { ; RV32IF-LABEL: fcvt_lu_s: ; RV32IF: # %bb.0: @@ -155,6 +366,76 @@ define i64 @fcvt_lu_s(float %a) nounwind { ret i64 %1 } +define i64 @fcvt_lu_s_sat(float %a) nounwind { +; RV32IF-LABEL: fcvt_lu_s_sat: +; RV32IF: # %bb.0: # %start +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fmv.w.x ft1, a0 +; RV32IF-NEXT: fmv.w.x ft0, zero +; RV32IF-NEXT: fsw ft1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fle.s s0, ft0, ft1 +; RV32IF-NEXT: call __fixunssfdi@plt +; RV32IF-NEXT: mv a3, a0 +; RV32IF-NEXT: bnez s0, .LBB11_2 +; RV32IF-NEXT: # %bb.1: # %start +; RV32IF-NEXT: mv a3, zero +; RV32IF-NEXT: .LBB11_2: # %start +; RV32IF-NEXT: lui a0, %hi(.LCPI11_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI11_0)(a0) +; RV32IF-NEXT: flw ft1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flt.s a4, ft0, ft1 +; RV32IF-NEXT: addi a2, zero, -1 +; RV32IF-NEXT: addi a0, zero, -1 +; RV32IF-NEXT: beqz a4, .LBB11_7 +; RV32IF-NEXT: # %bb.3: # %start +; RV32IF-NEXT: beqz s0, .LBB11_8 +; RV32IF-NEXT: .LBB11_4: # %start +; RV32IF-NEXT: bnez a4, .LBB11_6 +; RV32IF-NEXT: .LBB11_5: # %start +; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: .LBB11_6: # %start +; RV32IF-NEXT: mv a1, a2 +; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB11_7: # %start +; RV32IF-NEXT: mv a0, a3 +; RV32IF-NEXT: bnez s0, .LBB11_4 +; RV32IF-NEXT: .LBB11_8: # %start +; RV32IF-NEXT: mv a1, zero +; RV32IF-NEXT: beqz a4, .LBB11_5 +; RV32IF-NEXT: j .LBB11_6 +; +; RV64IF-LABEL: fcvt_lu_s_sat: +; RV64IF: # %bb.0: # %start +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fmv.w.x ft1, zero +; RV64IF-NEXT: fle.s a0, ft1, ft0 +; RV64IF-NEXT: bnez a0, .LBB11_2 +; RV64IF-NEXT: # %bb.1: # %start +; RV64IF-NEXT: mv a1, zero +; RV64IF-NEXT: j .LBB11_3 +; RV64IF-NEXT: .LBB11_2: +; RV64IF-NEXT: fcvt.lu.s a1, ft0, rtz +; RV64IF-NEXT: .LBB11_3: # %start +; RV64IF-NEXT: lui a0, %hi(.LCPI11_0) +; RV64IF-NEXT: flw ft1, %lo(.LCPI11_0)(a0) +; RV64IF-NEXT: flt.s a2, ft1, ft0 +; RV64IF-NEXT: addi a0, zero, -1 +; RV64IF-NEXT: bnez a2, .LBB11_5 +; RV64IF-NEXT: # %bb.4: # %start +; RV64IF-NEXT: mv a0, a1 +; RV64IF-NEXT: .LBB11_5: # %start +; RV64IF-NEXT: ret +start: + %0 = tail call i64 @llvm.fptoui.sat.i64.f32(float %a) + ret i64 %0 +} +declare i64 @llvm.fptoui.sat.i64.f32(float) + define float @fcvt_s_l(i64 %a) nounwind { ; RV32IF-LABEL: fcvt_s_l: ; RV32IF: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll index 30b5809..87b8117 100644 --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -32,6 +32,84 @@ define i16 @fcvt_si_h(half %a) nounwind { ret i16 %1 } +define i16 @fcvt_si_h_sat(half %a) nounwind { +; RV32IZFH-LABEL: fcvt_si_h_sat: +; RV32IZFH: # %bb.0: # %start +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: feq.s a0, ft0, ft0 +; RV32IZFH-NEXT: bnez a0, .LBB1_2 +; RV32IZFH-NEXT: # %bb.1: # %start +; RV32IZFH-NEXT: mv a0, zero +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB1_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_0) +; RV32IZFH-NEXT: flw ft1, %lo(.LCPI1_0)(a0) +; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_1) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI1_1)(a0) +; RV32IZFH-NEXT: fmax.s ft0, ft0, ft1 +; RV32IZFH-NEXT: fmin.s ft0, ft0, ft2 +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_si_h_sat: +; RV32IDZFH: # %bb.0: # %start +; RV32IDZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IDZFH-NEXT: feq.s a0, ft0, ft0 +; RV32IDZFH-NEXT: bnez a0, .LBB1_2 +; RV32IDZFH-NEXT: # %bb.1: # %start +; RV32IDZFH-NEXT: mv a0, zero +; RV32IDZFH-NEXT: ret +; RV32IDZFH-NEXT: .LBB1_2: +; RV32IDZFH-NEXT: lui a0, %hi(.LCPI1_0) +; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI1_0)(a0) +; RV32IDZFH-NEXT: lui a0, %hi(.LCPI1_1) +; RV32IDZFH-NEXT: flw ft2, %lo(.LCPI1_1)(a0) +; RV32IDZFH-NEXT: fmax.s ft0, ft0, ft1 +; RV32IDZFH-NEXT: fmin.s ft0, ft0, ft2 +; RV32IDZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_si_h_sat: +; RV64IZFH: # %bb.0: # %start +; RV64IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFH-NEXT: feq.s a0, ft0, ft0 +; RV64IZFH-NEXT: bnez a0, .LBB1_2 +; RV64IZFH-NEXT: # %bb.1: # %start +; RV64IZFH-NEXT: mv a0, zero +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB1_2: +; RV64IZFH-NEXT: lui a0, %hi(.LCPI1_0) +; RV64IZFH-NEXT: flw ft1, %lo(.LCPI1_0)(a0) +; RV64IZFH-NEXT: lui a0, %hi(.LCPI1_1) +; RV64IZFH-NEXT: flw ft2, %lo(.LCPI1_1)(a0) +; RV64IZFH-NEXT: fmax.s ft0, ft0, ft1 +; RV64IZFH-NEXT: fmin.s ft0, ft0, ft2 +; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_si_h_sat: +; RV64IDZFH: # %bb.0: # %start +; RV64IDZFH-NEXT: fcvt.s.h ft0, fa0 +; RV64IDZFH-NEXT: feq.s a0, ft0, ft0 +; RV64IDZFH-NEXT: bnez a0, .LBB1_2 +; RV64IDZFH-NEXT: # %bb.1: # %start +; RV64IDZFH-NEXT: mv a0, zero +; RV64IDZFH-NEXT: ret +; RV64IDZFH-NEXT: .LBB1_2: +; RV64IDZFH-NEXT: lui a0, %hi(.LCPI1_0) +; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI1_0)(a0) +; RV64IDZFH-NEXT: lui a0, %hi(.LCPI1_1) +; RV64IDZFH-NEXT: flw ft2, %lo(.LCPI1_1)(a0) +; RV64IDZFH-NEXT: fmax.s ft0, ft0, ft1 +; RV64IDZFH-NEXT: fmin.s ft0, ft0, ft2 +; RV64IDZFH-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IDZFH-NEXT: ret +start: + %0 = tail call i16 @llvm.fptosi.sat.i16.f16(half %a) + ret i16 %0 +} +declare i16 @llvm.fptosi.sat.i16.f16(half) + define i16 @fcvt_ui_h(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_ui_h: ; RV32IZFH: # %bb.0: @@ -56,6 +134,56 @@ define i16 @fcvt_ui_h(half %a) nounwind { ret i16 %1 } +define i16 @fcvt_ui_h_sat(half %a) nounwind { +; RV32IZFH-LABEL: fcvt_ui_h_sat: +; RV32IZFH: # %bb.0: # %start +; RV32IZFH-NEXT: lui a0, %hi(.LCPI3_0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; RV32IZFH-NEXT: fcvt.s.h ft1, fa0 +; RV32IZFH-NEXT: fmv.w.x ft2, zero +; RV32IZFH-NEXT: fmax.s ft1, ft1, ft2 +; RV32IZFH-NEXT: fmin.s ft0, ft1, ft0 +; RV32IZFH-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_ui_h_sat: +; RV32IDZFH: # %bb.0: # %start +; RV32IDZFH-NEXT: lui a0, %hi(.LCPI3_0) +; RV32IDZFH-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; RV32IDZFH-NEXT: fcvt.s.h ft1, fa0 +; RV32IDZFH-NEXT: fmv.w.x ft2, zero +; RV32IDZFH-NEXT: fmax.s ft1, ft1, ft2 +; RV32IDZFH-NEXT: fmin.s ft0, ft1, ft0 +; RV32IDZFH-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_ui_h_sat: +; RV64IZFH: # %bb.0: # %start +; RV64IZFH-NEXT: lui a0, %hi(.LCPI3_0) +; RV64IZFH-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; RV64IZFH-NEXT: fcvt.s.h ft1, fa0 +; RV64IZFH-NEXT: fmv.w.x ft2, zero +; RV64IZFH-NEXT: fmax.s ft1, ft1, ft2 +; RV64IZFH-NEXT: fmin.s ft0, ft1, ft0 +; RV64IZFH-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_ui_h_sat: +; RV64IDZFH: # %bb.0: # %start +; RV64IDZFH-NEXT: lui a0, %hi(.LCPI3_0) +; RV64IDZFH-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; RV64IDZFH-NEXT: fcvt.s.h ft1, fa0 +; RV64IDZFH-NEXT: fmv.w.x ft2, zero +; RV64IDZFH-NEXT: fmax.s ft1, ft1, ft2 +; RV64IDZFH-NEXT: fmin.s ft0, ft1, ft0 +; RV64IDZFH-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IDZFH-NEXT: ret +start: + %0 = tail call i16 @llvm.fptoui.sat.i16.f16(half %a) + ret i16 %0 +} +declare i16 @llvm.fptoui.sat.i16.f16(half) + define i32 @fcvt_w_h(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_w_h: ; RV32IZFH: # %bb.0: @@ -80,6 +208,136 @@ define i32 @fcvt_w_h(half %a) nounwind { ret i32 %1 } +define i32 @fcvt_w_h_sat(half %a) nounwind { +; RV32IZFH-LABEL: fcvt_w_h_sat: +; RV32IZFH: # %bb.0: # %start +; RV32IZFH-NEXT: lui a0, %hi(.LCPI5_0) +; RV32IZFH-NEXT: flw ft1, %lo(.LCPI5_0)(a0) +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fle.s a0, ft1, ft0 +; RV32IZFH-NEXT: lui a1, 524288 +; RV32IZFH-NEXT: bnez a0, .LBB5_2 +; RV32IZFH-NEXT: # %bb.1: # %start +; RV32IZFH-NEXT: lui a0, 524288 +; RV32IZFH-NEXT: j .LBB5_3 +; RV32IZFH-NEXT: .LBB5_2: +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: .LBB5_3: # %start +; RV32IZFH-NEXT: lui a2, %hi(.LCPI5_1) +; RV32IZFH-NEXT: flw ft1, %lo(.LCPI5_1)(a2) +; RV32IZFH-NEXT: flt.s a2, ft1, ft0 +; RV32IZFH-NEXT: bnez a2, .LBB5_6 +; RV32IZFH-NEXT: # %bb.4: # %start +; RV32IZFH-NEXT: feq.s a1, ft0, ft0 +; RV32IZFH-NEXT: beqz a1, .LBB5_7 +; RV32IZFH-NEXT: .LBB5_5: # %start +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB5_6: +; RV32IZFH-NEXT: addi a0, a1, -1 +; RV32IZFH-NEXT: feq.s a1, ft0, ft0 +; RV32IZFH-NEXT: bnez a1, .LBB5_5 +; RV32IZFH-NEXT: .LBB5_7: # %start +; RV32IZFH-NEXT: mv a0, zero +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_w_h_sat: +; RV32IDZFH: # %bb.0: # %start +; RV32IDZFH-NEXT: lui a0, %hi(.LCPI5_0) +; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI5_0)(a0) +; RV32IDZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IDZFH-NEXT: fle.s a0, ft1, ft0 +; RV32IDZFH-NEXT: lui a1, 524288 +; RV32IDZFH-NEXT: bnez a0, .LBB5_2 +; RV32IDZFH-NEXT: # %bb.1: # %start +; RV32IDZFH-NEXT: lui a0, 524288 +; RV32IDZFH-NEXT: j .LBB5_3 +; RV32IDZFH-NEXT: .LBB5_2: +; RV32IDZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IDZFH-NEXT: .LBB5_3: # %start +; RV32IDZFH-NEXT: lui a2, %hi(.LCPI5_1) +; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI5_1)(a2) +; RV32IDZFH-NEXT: flt.s a2, ft1, ft0 +; RV32IDZFH-NEXT: bnez a2, .LBB5_6 +; RV32IDZFH-NEXT: # %bb.4: # %start +; RV32IDZFH-NEXT: feq.s a1, ft0, ft0 +; RV32IDZFH-NEXT: beqz a1, .LBB5_7 +; RV32IDZFH-NEXT: .LBB5_5: # %start +; RV32IDZFH-NEXT: ret +; RV32IDZFH-NEXT: .LBB5_6: +; RV32IDZFH-NEXT: addi a0, a1, -1 +; RV32IDZFH-NEXT: feq.s a1, ft0, ft0 +; RV32IDZFH-NEXT: bnez a1, .LBB5_5 +; RV32IDZFH-NEXT: .LBB5_7: # %start +; RV32IDZFH-NEXT: mv a0, zero +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_w_h_sat: +; RV64IZFH: # %bb.0: # %start +; RV64IZFH-NEXT: lui a0, %hi(.LCPI5_0) +; RV64IZFH-NEXT: flw ft1, %lo(.LCPI5_0)(a0) +; RV64IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFH-NEXT: fle.s a0, ft1, ft0 +; RV64IZFH-NEXT: lui a1, 524288 +; RV64IZFH-NEXT: bnez a0, .LBB5_2 +; RV64IZFH-NEXT: # %bb.1: # %start +; RV64IZFH-NEXT: lui a0, 524288 +; RV64IZFH-NEXT: j .LBB5_3 +; RV64IZFH-NEXT: .LBB5_2: +; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFH-NEXT: .LBB5_3: # %start +; RV64IZFH-NEXT: lui a2, %hi(.LCPI5_1) +; RV64IZFH-NEXT: flw ft1, %lo(.LCPI5_1)(a2) +; RV64IZFH-NEXT: flt.s a2, ft1, ft0 +; RV64IZFH-NEXT: bnez a2, .LBB5_6 +; RV64IZFH-NEXT: # %bb.4: # %start +; RV64IZFH-NEXT: feq.s a1, ft0, ft0 +; RV64IZFH-NEXT: beqz a1, .LBB5_7 +; RV64IZFH-NEXT: .LBB5_5: # %start +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB5_6: +; RV64IZFH-NEXT: addiw a0, a1, -1 +; RV64IZFH-NEXT: feq.s a1, ft0, ft0 +; RV64IZFH-NEXT: bnez a1, .LBB5_5 +; RV64IZFH-NEXT: .LBB5_7: # %start +; RV64IZFH-NEXT: mv a0, zero +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_w_h_sat: +; RV64IDZFH: # %bb.0: # %start +; RV64IDZFH-NEXT: lui a0, %hi(.LCPI5_0) +; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI5_0)(a0) +; RV64IDZFH-NEXT: fcvt.s.h ft0, fa0 +; RV64IDZFH-NEXT: fle.s a0, ft1, ft0 +; RV64IDZFH-NEXT: lui a1, 524288 +; RV64IDZFH-NEXT: bnez a0, .LBB5_2 +; RV64IDZFH-NEXT: # %bb.1: # %start +; RV64IDZFH-NEXT: lui a0, 524288 +; RV64IDZFH-NEXT: j .LBB5_3 +; RV64IDZFH-NEXT: .LBB5_2: +; RV64IDZFH-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IDZFH-NEXT: .LBB5_3: # %start +; RV64IDZFH-NEXT: lui a2, %hi(.LCPI5_1) +; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI5_1)(a2) +; RV64IDZFH-NEXT: flt.s a2, ft1, ft0 +; RV64IDZFH-NEXT: bnez a2, .LBB5_6 +; RV64IDZFH-NEXT: # %bb.4: # %start +; RV64IDZFH-NEXT: feq.s a1, ft0, ft0 +; RV64IDZFH-NEXT: beqz a1, .LBB5_7 +; RV64IDZFH-NEXT: .LBB5_5: # %start +; RV64IDZFH-NEXT: ret +; RV64IDZFH-NEXT: .LBB5_6: +; RV64IDZFH-NEXT: addiw a0, a1, -1 +; RV64IDZFH-NEXT: feq.s a1, ft0, ft0 +; RV64IDZFH-NEXT: bnez a1, .LBB5_5 +; RV64IDZFH-NEXT: .LBB5_7: # %start +; RV64IDZFH-NEXT: mv a0, zero +; RV64IDZFH-NEXT: ret +start: + %0 = tail call i32 @llvm.fptosi.sat.i32.f16(half %a) + ret i32 %0 +} +declare i32 @llvm.fptosi.sat.i32.f16(half) + define i32 @fcvt_wu_h(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_wu_h: ; RV32IZFH: # %bb.0: @@ -104,6 +362,100 @@ define i32 @fcvt_wu_h(half %a) nounwind { ret i32 %1 } +define i32 @fcvt_wu_h_sat(half %a) nounwind { +; RV32IZFH-LABEL: fcvt_wu_h_sat: +; RV32IZFH: # %bb.0: # %start +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fmv.w.x ft1, zero +; RV32IZFH-NEXT: fle.s a0, ft1, ft0 +; RV32IZFH-NEXT: bnez a0, .LBB7_2 +; RV32IZFH-NEXT: # %bb.1: # %start +; RV32IZFH-NEXT: mv a1, zero +; RV32IZFH-NEXT: j .LBB7_3 +; RV32IZFH-NEXT: .LBB7_2: +; RV32IZFH-NEXT: fcvt.wu.s a1, ft0, rtz +; RV32IZFH-NEXT: .LBB7_3: # %start +; RV32IZFH-NEXT: lui a0, %hi(.LCPI7_0) +; RV32IZFH-NEXT: flw ft1, %lo(.LCPI7_0)(a0) +; RV32IZFH-NEXT: flt.s a2, ft1, ft0 +; RV32IZFH-NEXT: addi a0, zero, -1 +; RV32IZFH-NEXT: bnez a2, .LBB7_5 +; RV32IZFH-NEXT: # %bb.4: # %start +; RV32IZFH-NEXT: mv a0, a1 +; RV32IZFH-NEXT: .LBB7_5: # %start +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_wu_h_sat: +; RV32IDZFH: # %bb.0: # %start +; RV32IDZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IDZFH-NEXT: fmv.w.x ft1, zero +; RV32IDZFH-NEXT: fle.s a0, ft1, ft0 +; RV32IDZFH-NEXT: bnez a0, .LBB7_2 +; RV32IDZFH-NEXT: # %bb.1: # %start +; RV32IDZFH-NEXT: mv a1, zero +; RV32IDZFH-NEXT: j .LBB7_3 +; RV32IDZFH-NEXT: .LBB7_2: +; RV32IDZFH-NEXT: fcvt.wu.s a1, ft0, rtz +; RV32IDZFH-NEXT: .LBB7_3: # %start +; RV32IDZFH-NEXT: lui a0, %hi(.LCPI7_0) +; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI7_0)(a0) +; RV32IDZFH-NEXT: flt.s a2, ft1, ft0 +; RV32IDZFH-NEXT: addi a0, zero, -1 +; RV32IDZFH-NEXT: bnez a2, .LBB7_5 +; RV32IDZFH-NEXT: # %bb.4: # %start +; RV32IDZFH-NEXT: mv a0, a1 +; RV32IDZFH-NEXT: .LBB7_5: # %start +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_wu_h_sat: +; RV64IZFH: # %bb.0: # %start +; RV64IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFH-NEXT: fmv.w.x ft1, zero +; RV64IZFH-NEXT: fle.s a0, ft1, ft0 +; RV64IZFH-NEXT: bnez a0, .LBB7_2 +; RV64IZFH-NEXT: # %bb.1: # %start +; RV64IZFH-NEXT: mv a0, zero +; RV64IZFH-NEXT: j .LBB7_3 +; RV64IZFH-NEXT: .LBB7_2: +; RV64IZFH-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFH-NEXT: .LBB7_3: # %start +; RV64IZFH-NEXT: lui a1, %hi(.LCPI7_0) +; RV64IZFH-NEXT: flw ft1, %lo(.LCPI7_0)(a1) +; RV64IZFH-NEXT: flt.s a1, ft1, ft0 +; RV64IZFH-NEXT: beqz a1, .LBB7_5 +; RV64IZFH-NEXT: # %bb.4: +; RV64IZFH-NEXT: addi a0, zero, -1 +; RV64IZFH-NEXT: srli a0, a0, 32 +; RV64IZFH-NEXT: .LBB7_5: # %start +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_wu_h_sat: +; RV64IDZFH: # %bb.0: # %start +; RV64IDZFH-NEXT: fcvt.s.h ft0, fa0 +; RV64IDZFH-NEXT: fmv.w.x ft1, zero +; RV64IDZFH-NEXT: fle.s a0, ft1, ft0 +; RV64IDZFH-NEXT: bnez a0, .LBB7_2 +; RV64IDZFH-NEXT: # %bb.1: # %start +; RV64IDZFH-NEXT: mv a0, zero +; RV64IDZFH-NEXT: j .LBB7_3 +; RV64IDZFH-NEXT: .LBB7_2: +; RV64IDZFH-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IDZFH-NEXT: .LBB7_3: # %start +; RV64IDZFH-NEXT: lui a1, %hi(.LCPI7_0) +; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI7_0)(a1) +; RV64IDZFH-NEXT: flt.s a1, ft1, ft0 +; RV64IDZFH-NEXT: beqz a1, .LBB7_5 +; RV64IDZFH-NEXT: # %bb.4: +; RV64IDZFH-NEXT: addi a0, zero, -1 +; RV64IDZFH-NEXT: srli a0, a0, 32 +; RV64IDZFH-NEXT: .LBB7_5: # %start +; RV64IDZFH-NEXT: ret +start: + %0 = tail call i32 @llvm.fptoui.sat.i32.f16(half %a) + ret i32 %0 +} +declare i32 @llvm.fptoui.sat.i32.f16(half) + define i64 @fcvt_l_h(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_l_h: ; RV32IZFH: # %bb.0: @@ -136,6 +488,186 @@ define i64 @fcvt_l_h(half %a) nounwind { ret i64 %1 } +define i64 @fcvt_l_h_sat(half %a) nounwind { +; RV32IZFH-LABEL: fcvt_l_h_sat: +; RV32IZFH: # %bb.0: # %start +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: lui a0, %hi(.LCPI9_0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI9_0)(a0) +; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 +; RV32IZFH-NEXT: fle.s s0, ft0, fs0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: call __fixsfdi@plt +; RV32IZFH-NEXT: mv a2, a0 +; RV32IZFH-NEXT: bnez s0, .LBB9_2 +; RV32IZFH-NEXT: # %bb.1: # %start +; RV32IZFH-NEXT: mv a2, zero +; RV32IZFH-NEXT: .LBB9_2: # %start +; RV32IZFH-NEXT: lui a0, %hi(.LCPI9_1) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI9_1)(a0) +; RV32IZFH-NEXT: flt.s a3, ft0, fs0 +; RV32IZFH-NEXT: addi a0, zero, -1 +; RV32IZFH-NEXT: beqz a3, .LBB9_9 +; RV32IZFH-NEXT: # %bb.3: # %start +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: beqz a2, .LBB9_10 +; RV32IZFH-NEXT: .LBB9_4: # %start +; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: beqz s0, .LBB9_11 +; RV32IZFH-NEXT: .LBB9_5: # %start +; RV32IZFH-NEXT: bnez a3, .LBB9_12 +; RV32IZFH-NEXT: .LBB9_6: # %start +; RV32IZFH-NEXT: bnez a2, .LBB9_8 +; RV32IZFH-NEXT: .LBB9_7: # %start +; RV32IZFH-NEXT: mv a1, zero +; RV32IZFH-NEXT: .LBB9_8: # %start +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB9_9: # %start +; RV32IZFH-NEXT: mv a0, a2 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: bnez a2, .LBB9_4 +; RV32IZFH-NEXT: .LBB9_10: # %start +; RV32IZFH-NEXT: mv a0, zero +; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: bnez s0, .LBB9_5 +; RV32IZFH-NEXT: .LBB9_11: # %start +; RV32IZFH-NEXT: lui a1, 524288 +; RV32IZFH-NEXT: beqz a3, .LBB9_6 +; RV32IZFH-NEXT: .LBB9_12: +; RV32IZFH-NEXT: addi a1, a4, -1 +; RV32IZFH-NEXT: beqz a2, .LBB9_7 +; RV32IZFH-NEXT: j .LBB9_8 +; +; RV32IDZFH-LABEL: fcvt_l_h_sat: +; RV32IDZFH: # %bb.0: # %start +; RV32IDZFH-NEXT: addi sp, sp, -16 +; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IDZFH-NEXT: lui a0, %hi(.LCPI9_0) +; RV32IDZFH-NEXT: flw ft0, %lo(.LCPI9_0)(a0) +; RV32IDZFH-NEXT: fcvt.s.h fs0, fa0 +; RV32IDZFH-NEXT: fle.s s0, ft0, fs0 +; RV32IDZFH-NEXT: fmv.s fa0, fs0 +; RV32IDZFH-NEXT: call __fixsfdi@plt +; RV32IDZFH-NEXT: mv a2, a0 +; RV32IDZFH-NEXT: bnez s0, .LBB9_2 +; RV32IDZFH-NEXT: # %bb.1: # %start +; RV32IDZFH-NEXT: mv a2, zero +; RV32IDZFH-NEXT: .LBB9_2: # %start +; RV32IDZFH-NEXT: lui a0, %hi(.LCPI9_1) +; RV32IDZFH-NEXT: flw ft0, %lo(.LCPI9_1)(a0) +; RV32IDZFH-NEXT: flt.s a3, ft0, fs0 +; RV32IDZFH-NEXT: addi a0, zero, -1 +; RV32IDZFH-NEXT: beqz a3, .LBB9_9 +; RV32IDZFH-NEXT: # %bb.3: # %start +; RV32IDZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IDZFH-NEXT: beqz a2, .LBB9_10 +; RV32IDZFH-NEXT: .LBB9_4: # %start +; RV32IDZFH-NEXT: lui a4, 524288 +; RV32IDZFH-NEXT: beqz s0, .LBB9_11 +; RV32IDZFH-NEXT: .LBB9_5: # %start +; RV32IDZFH-NEXT: bnez a3, .LBB9_12 +; RV32IDZFH-NEXT: .LBB9_6: # %start +; RV32IDZFH-NEXT: bnez a2, .LBB9_8 +; RV32IDZFH-NEXT: .LBB9_7: # %start +; RV32IDZFH-NEXT: mv a1, zero +; RV32IDZFH-NEXT: .LBB9_8: # %start +; RV32IDZFH-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IDZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: ret +; RV32IDZFH-NEXT: .LBB9_9: # %start +; RV32IDZFH-NEXT: mv a0, a2 +; RV32IDZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IDZFH-NEXT: bnez a2, .LBB9_4 +; RV32IDZFH-NEXT: .LBB9_10: # %start +; RV32IDZFH-NEXT: mv a0, zero +; RV32IDZFH-NEXT: lui a4, 524288 +; RV32IDZFH-NEXT: bnez s0, .LBB9_5 +; RV32IDZFH-NEXT: .LBB9_11: # %start +; RV32IDZFH-NEXT: lui a1, 524288 +; RV32IDZFH-NEXT: beqz a3, .LBB9_6 +; RV32IDZFH-NEXT: .LBB9_12: +; RV32IDZFH-NEXT: addi a1, a4, -1 +; RV32IDZFH-NEXT: beqz a2, .LBB9_7 +; RV32IDZFH-NEXT: j .LBB9_8 +; +; RV64IZFH-LABEL: fcvt_l_h_sat: +; RV64IZFH: # %bb.0: # %start +; RV64IZFH-NEXT: lui a0, %hi(.LCPI9_0) +; RV64IZFH-NEXT: flw ft1, %lo(.LCPI9_0)(a0) +; RV64IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFH-NEXT: fle.s a0, ft1, ft0 +; RV64IZFH-NEXT: addi a1, zero, -1 +; RV64IZFH-NEXT: bnez a0, .LBB9_2 +; RV64IZFH-NEXT: # %bb.1: # %start +; RV64IZFH-NEXT: slli a0, a1, 63 +; RV64IZFH-NEXT: j .LBB9_3 +; RV64IZFH-NEXT: .LBB9_2: +; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFH-NEXT: .LBB9_3: # %start +; RV64IZFH-NEXT: lui a2, %hi(.LCPI9_1) +; RV64IZFH-NEXT: flw ft1, %lo(.LCPI9_1)(a2) +; RV64IZFH-NEXT: flt.s a2, ft1, ft0 +; RV64IZFH-NEXT: bnez a2, .LBB9_6 +; RV64IZFH-NEXT: # %bb.4: # %start +; RV64IZFH-NEXT: feq.s a1, ft0, ft0 +; RV64IZFH-NEXT: beqz a1, .LBB9_7 +; RV64IZFH-NEXT: .LBB9_5: # %start +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB9_6: +; RV64IZFH-NEXT: srli a0, a1, 1 +; RV64IZFH-NEXT: feq.s a1, ft0, ft0 +; RV64IZFH-NEXT: bnez a1, .LBB9_5 +; RV64IZFH-NEXT: .LBB9_7: # %start +; RV64IZFH-NEXT: mv a0, zero +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_l_h_sat: +; RV64IDZFH: # %bb.0: # %start +; RV64IDZFH-NEXT: lui a0, %hi(.LCPI9_0) +; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI9_0)(a0) +; RV64IDZFH-NEXT: fcvt.s.h ft0, fa0 +; RV64IDZFH-NEXT: fle.s a0, ft1, ft0 +; RV64IDZFH-NEXT: addi a1, zero, -1 +; RV64IDZFH-NEXT: bnez a0, .LBB9_2 +; RV64IDZFH-NEXT: # %bb.1: # %start +; RV64IDZFH-NEXT: slli a0, a1, 63 +; RV64IDZFH-NEXT: j .LBB9_3 +; RV64IDZFH-NEXT: .LBB9_2: +; RV64IDZFH-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IDZFH-NEXT: .LBB9_3: # %start +; RV64IDZFH-NEXT: lui a2, %hi(.LCPI9_1) +; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI9_1)(a2) +; RV64IDZFH-NEXT: flt.s a2, ft1, ft0 +; RV64IDZFH-NEXT: bnez a2, .LBB9_6 +; RV64IDZFH-NEXT: # %bb.4: # %start +; RV64IDZFH-NEXT: feq.s a1, ft0, ft0 +; RV64IDZFH-NEXT: beqz a1, .LBB9_7 +; RV64IDZFH-NEXT: .LBB9_5: # %start +; RV64IDZFH-NEXT: ret +; RV64IDZFH-NEXT: .LBB9_6: +; RV64IDZFH-NEXT: srli a0, a1, 1 +; RV64IDZFH-NEXT: feq.s a1, ft0, ft0 +; RV64IDZFH-NEXT: bnez a1, .LBB9_5 +; RV64IDZFH-NEXT: .LBB9_7: # %start +; RV64IDZFH-NEXT: mv a0, zero +; RV64IDZFH-NEXT: ret +start: + %0 = tail call i64 @llvm.fptosi.sat.i64.f16(half %a) + ret i64 %0 +} +declare i64 @llvm.fptosi.sat.i64.f16(half) + define i64 @fcvt_lu_h(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_lu_h: ; RV32IZFH: # %bb.0: @@ -168,6 +700,142 @@ define i64 @fcvt_lu_h(half %a) nounwind { ret i64 %1 } +define i64 @fcvt_lu_h_sat(half %a) nounwind { +; RV32IZFH-LABEL: fcvt_lu_h_sat: +; RV32IZFH: # %bb.0: # %start +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 +; RV32IZFH-NEXT: fmv.w.x ft0, zero +; RV32IZFH-NEXT: fle.s s0, ft0, fs0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: call __fixunssfdi@plt +; RV32IZFH-NEXT: mv a3, a0 +; RV32IZFH-NEXT: bnez s0, .LBB11_2 +; RV32IZFH-NEXT: # %bb.1: # %start +; RV32IZFH-NEXT: mv a3, zero +; RV32IZFH-NEXT: .LBB11_2: # %start +; RV32IZFH-NEXT: lui a0, %hi(.LCPI11_0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI11_0)(a0) +; RV32IZFH-NEXT: flt.s a4, ft0, fs0 +; RV32IZFH-NEXT: addi a2, zero, -1 +; RV32IZFH-NEXT: addi a0, zero, -1 +; RV32IZFH-NEXT: beqz a4, .LBB11_7 +; RV32IZFH-NEXT: # %bb.3: # %start +; RV32IZFH-NEXT: beqz s0, .LBB11_8 +; RV32IZFH-NEXT: .LBB11_4: # %start +; RV32IZFH-NEXT: bnez a4, .LBB11_6 +; RV32IZFH-NEXT: .LBB11_5: # %start +; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: .LBB11_6: # %start +; RV32IZFH-NEXT: mv a1, a2 +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB11_7: # %start +; RV32IZFH-NEXT: mv a0, a3 +; RV32IZFH-NEXT: bnez s0, .LBB11_4 +; RV32IZFH-NEXT: .LBB11_8: # %start +; RV32IZFH-NEXT: mv a1, zero +; RV32IZFH-NEXT: beqz a4, .LBB11_5 +; RV32IZFH-NEXT: j .LBB11_6 +; +; RV32IDZFH-LABEL: fcvt_lu_h_sat: +; RV32IDZFH: # %bb.0: # %start +; RV32IDZFH-NEXT: addi sp, sp, -16 +; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IDZFH-NEXT: fcvt.s.h fs0, fa0 +; RV32IDZFH-NEXT: fmv.w.x ft0, zero +; RV32IDZFH-NEXT: fle.s s0, ft0, fs0 +; RV32IDZFH-NEXT: fmv.s fa0, fs0 +; RV32IDZFH-NEXT: call __fixunssfdi@plt +; RV32IDZFH-NEXT: mv a3, a0 +; RV32IDZFH-NEXT: bnez s0, .LBB11_2 +; RV32IDZFH-NEXT: # %bb.1: # %start +; RV32IDZFH-NEXT: mv a3, zero +; RV32IDZFH-NEXT: .LBB11_2: # %start +; RV32IDZFH-NEXT: lui a0, %hi(.LCPI11_0) +; RV32IDZFH-NEXT: flw ft0, %lo(.LCPI11_0)(a0) +; RV32IDZFH-NEXT: flt.s a4, ft0, fs0 +; RV32IDZFH-NEXT: addi a2, zero, -1 +; RV32IDZFH-NEXT: addi a0, zero, -1 +; RV32IDZFH-NEXT: beqz a4, .LBB11_7 +; RV32IDZFH-NEXT: # %bb.3: # %start +; RV32IDZFH-NEXT: beqz s0, .LBB11_8 +; RV32IDZFH-NEXT: .LBB11_4: # %start +; RV32IDZFH-NEXT: bnez a4, .LBB11_6 +; RV32IDZFH-NEXT: .LBB11_5: # %start +; RV32IDZFH-NEXT: mv a2, a1 +; RV32IDZFH-NEXT: .LBB11_6: # %start +; RV32IDZFH-NEXT: mv a1, a2 +; RV32IDZFH-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IDZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: ret +; RV32IDZFH-NEXT: .LBB11_7: # %start +; RV32IDZFH-NEXT: mv a0, a3 +; RV32IDZFH-NEXT: bnez s0, .LBB11_4 +; RV32IDZFH-NEXT: .LBB11_8: # %start +; RV32IDZFH-NEXT: mv a1, zero +; RV32IDZFH-NEXT: beqz a4, .LBB11_5 +; RV32IDZFH-NEXT: j .LBB11_6 +; +; RV64IZFH-LABEL: fcvt_lu_h_sat: +; RV64IZFH: # %bb.0: # %start +; RV64IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFH-NEXT: fmv.w.x ft1, zero +; RV64IZFH-NEXT: fle.s a0, ft1, ft0 +; RV64IZFH-NEXT: bnez a0, .LBB11_2 +; RV64IZFH-NEXT: # %bb.1: # %start +; RV64IZFH-NEXT: mv a1, zero +; RV64IZFH-NEXT: j .LBB11_3 +; RV64IZFH-NEXT: .LBB11_2: +; RV64IZFH-NEXT: fcvt.lu.s a1, ft0, rtz +; RV64IZFH-NEXT: .LBB11_3: # %start +; RV64IZFH-NEXT: lui a0, %hi(.LCPI11_0) +; RV64IZFH-NEXT: flw ft1, %lo(.LCPI11_0)(a0) +; RV64IZFH-NEXT: flt.s a2, ft1, ft0 +; RV64IZFH-NEXT: addi a0, zero, -1 +; RV64IZFH-NEXT: bnez a2, .LBB11_5 +; RV64IZFH-NEXT: # %bb.4: # %start +; RV64IZFH-NEXT: mv a0, a1 +; RV64IZFH-NEXT: .LBB11_5: # %start +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_lu_h_sat: +; RV64IDZFH: # %bb.0: # %start +; RV64IDZFH-NEXT: fcvt.s.h ft0, fa0 +; RV64IDZFH-NEXT: fmv.w.x ft1, zero +; RV64IDZFH-NEXT: fle.s a0, ft1, ft0 +; RV64IDZFH-NEXT: bnez a0, .LBB11_2 +; RV64IDZFH-NEXT: # %bb.1: # %start +; RV64IDZFH-NEXT: mv a1, zero +; RV64IDZFH-NEXT: j .LBB11_3 +; RV64IDZFH-NEXT: .LBB11_2: +; RV64IDZFH-NEXT: fcvt.lu.s a1, ft0, rtz +; RV64IDZFH-NEXT: .LBB11_3: # %start +; RV64IDZFH-NEXT: lui a0, %hi(.LCPI11_0) +; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI11_0)(a0) +; RV64IDZFH-NEXT: flt.s a2, ft1, ft0 +; RV64IDZFH-NEXT: addi a0, zero, -1 +; RV64IDZFH-NEXT: bnez a2, .LBB11_5 +; RV64IDZFH-NEXT: # %bb.4: # %start +; RV64IDZFH-NEXT: mv a0, a1 +; RV64IDZFH-NEXT: .LBB11_5: # %start +; RV64IDZFH-NEXT: ret +start: + %0 = tail call i64 @llvm.fptoui.sat.i64.f16(half %a) + ret i64 %0 +} +declare i64 @llvm.fptoui.sat.i64.f16(half) + define half @fcvt_h_si(i16 %a) nounwind { ; RV32IZFH-LABEL: fcvt_h_si: ; RV32IZFH: # %bb.0: