// Gets the two common "VL" operands: an all-ones mask and the vector length.
// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
-// the vector type that it is contained in.
+// the vector type that the fixed-length vector is contained in. Otherwise if
+// VecVT is scalable, then ContainerVT should be the same as VecVT.
static std::pair<SDValue, SDValue>
getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MVT XLenVT = Subtarget.getXLenVT();
- // Only handle XLen or i32 types. Other types narrower than XLen will
- // eventually be legalized to XLenVT.
- EVT VT = N->getValueType(0);
- if (VT != MVT::i32 && VT != XLenVT)
- return SDValue();
-
SDValue Src = N->getOperand(0);
- // Ensure the FP type is also legal.
+ // Ensure the FP type is legal.
if (!TLI.isTypeLegal(Src.getValueType()))
return SDValue();
if (FRM == RISCVFPRndMode::Invalid)
return SDValue();
+ SDLoc DL(N);
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
+ EVT VT = N->getValueType(0);
+
+ if (VT.isVector() && TLI.isTypeLegal(VT)) {
+ MVT SrcVT = Src.getSimpleValueType();
+ MVT SrcContainerVT = SrcVT;
+ MVT ContainerVT = VT.getSimpleVT();
+ SDValue XVal = Src.getOperand(0);
+
+ // TODO: Support combining with widening and narrowing instructions
+ // For now only support conversions of the same bit size
+ if (VT.getScalarSizeInBits() != SrcVT.getScalarSizeInBits())
+ return SDValue();
+
+ // Make fixed-length vectors scalable first
+ if (SrcVT.isFixedLengthVector()) {
+ SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
+ XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
+ ContainerVT =
+ getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
+ }
+
+ auto [Mask, VL] =
+ getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
+
+ SDValue FpToInt;
+ if (FRM == RISCVFPRndMode::RTZ) {
+ // Use the dedicated trunc static rounding mode if we're truncating so we
+ // don't need to generate calls to fsrmi/fsrm
+ unsigned Opc =
+ IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
+ FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
+ } else {
+ unsigned Opc =
+ IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
+ FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
+ DAG.getTargetConstant(FRM, DL, XLenVT), VL);
+ }
+
+ // If converted from fixed-length to scalable, convert back
+ if (VT.isFixedLengthVector())
+ FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
+
+ return FpToInt;
+ }
+
+ // Only handle XLen or i32 types. Other types narrower than XLen will
+ // eventually be legalized to XLenVT.
+ if (VT != MVT::i32 && VT != XLenVT)
+ return SDValue();
unsigned Opc;
if (VT == XLenVT)
else
Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
- SDLoc DL(N);
SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
DAG.getTargetConstant(FRM, DL, XLenVT));
return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
case RISCV::PseudoVFCVT_RM_X_F_V_MF4_MASK:
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
+ case RISCV::PseudoVFCVT_RM_XU_F_V_M1_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M1_MASK);
+ case RISCV::PseudoVFCVT_RM_XU_F_V_M2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M2_MASK);
+ case RISCV::PseudoVFCVT_RM_XU_F_V_M4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M4_MASK);
+ case RISCV::PseudoVFCVT_RM_XU_F_V_M8_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M8_MASK);
+ case RISCV::PseudoVFCVT_RM_XU_F_V_MF2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_MF2_MASK);
+ case RISCV::PseudoVFCVT_RM_XU_F_V_MF4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_MF4_MASK);
case RISCV::PseudoVFCVT_RM_F_XU_V_M1_MASK:
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M1_MASK);
case RISCV::PseudoVFCVT_RM_F_XU_V_M2_MASK:
NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
NODE_NAME_CASE(VFCVT_RM_X_F_VL)
+ NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
NODE_NAME_CASE(VFCVT_X_F_VL)
+ NODE_NAME_CASE(VFCVT_XU_F_VL)
NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
NODE_NAME_CASE(SINT_TO_FP_VL)
NODE_NAME_CASE(UINT_TO_FP_VL)
VFCVT_RTZ_X_F_VL,
VFCVT_RTZ_XU_F_VL,
VFCVT_X_F_VL,
+ VFCVT_XU_F_VL,
VFROUND_NOEXCEPT_VL,
VFCVT_RM_X_F_VL, // Has a rounding mode operand.
+ VFCVT_RM_XU_F_VL, // Has a rounding mode operand.
SINT_TO_FP_VL,
UINT_TO_FP_VL,
VFCVT_RM_F_XU_VL, // Has a rounding mode operand.
}
defm PseudoVFCVT_RTZ_XU_F : VPseudoVCVTI_V;
defm PseudoVFCVT_RTZ_X_F : VPseudoVCVTI_V;
+defm PseudoVFCVT_RM_XU_F : VPseudoVCVTI_RM_V;
defm PseudoVFCVT_RM_X_F : VPseudoVCVTI_RM_V;
defm PseudoVFROUND_NOEXCEPT : VPseudoVFROUND_NOEXCEPT_V;
let Uses = [FRM] in {
]>;
def riscv_vfcvt_rm_x_f_vl : SDNode<"RISCVISD::VFCVT_RM_X_F_VL", SDT_RISCVVecCvtF2XOp_VL>;
+def riscv_vfcvt_rm_xu_f_vl : SDNode<"RISCVISD::VFCVT_RM_XU_F_VL", SDT_RISCVVecCvtF2XOp_VL>;
def riscv_vfcvt_x_f_vl : SDNode<"RISCVISD::VFCVT_X_F_VL", SDT_RISCVFP2IOp_VL>;
+def riscv_vfcvt_xu_f_vl : SDNode<"RISCVISD::VFCVT_XU_F_VL", SDT_RISCVFP2IOp_VL>;
def riscv_vfround_noexcept_vl: SDNode<"RISCVISD::VFROUND_NOEXCEPT_VL", SDT_RISCVFPUnOp_VL>;
def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL",
// 13.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions
defm : VPatConvertFP2I_RM_VL_V<riscv_vfcvt_rm_x_f_vl, "PseudoVFCVT_RM_X_F_V">;
+ defm : VPatConvertFP2I_RM_VL_V<riscv_vfcvt_rm_xu_f_vl, "PseudoVFCVT_RM_XU_F_V">;
defm : VPatConvertFP2IVL_V<riscv_vfcvt_x_f_vl, "PseudoVFCVT_X_F_V">;
+ defm : VPatConvertFP2IVL_V<riscv_vfcvt_xu_f_vl, "PseudoVFCVT_XU_F_V">;
defm : VPatConvertFP2IVL_V<riscv_vfcvt_rtz_x_f_vl, "PseudoVFCVT_RTZ_X_F_V">;
defm : VPatConvertFP2IVL_V<riscv_vfcvt_rtz_xu_f_vl, "PseudoVFCVT_RTZ_XU_F_V">;
defm : VPatConvertI2FPVL_V<riscv_sint_to_fp_vl, "PseudoVFCVT_F_X_V">;
define <vscale x 1 x i64> @trunc_nxv1f64_to_si64(<vscale x 1 x double> %x) {
; RV32-LABEL: trunc_nxv1f64_to_si64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI6_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI6_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f64_to_si64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI6_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI6_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x double> @llvm.trunc.nxv1f64(<vscale x 1 x double> %x)
define <vscale x 1 x i64> @trunc_nxv1f64_to_ui64(<vscale x 1 x double> %x) {
; RV32-LABEL: trunc_nxv1f64_to_ui64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI7_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI7_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f64_to_ui64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI7_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI7_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x double> @llvm.trunc.nxv1f64(<vscale x 1 x double> %x)
define <vscale x 4 x i64> @trunc_nxv4f64_to_si64(<vscale x 4 x double> %x) {
; RV32-LABEL: trunc_nxv4f64_to_si64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI14_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI14_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT: vfabs.v v12, v8
-; RV32-NEXT: vmflt.vf v0, v12, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f64_to_si64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI14_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI14_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV64-NEXT: vfabs.v v12, v8
-; RV64-NEXT: vmflt.vf v0, v12, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x double> @llvm.trunc.nxv4f64(<vscale x 4 x double> %x)
define <vscale x 4 x i64> @trunc_nxv4f64_to_ui64(<vscale x 4 x double> %x) {
; RV32-LABEL: trunc_nxv4f64_to_ui64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI15_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI15_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT: vfabs.v v12, v8
-; RV32-NEXT: vmflt.vf v0, v12, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f64_to_ui64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI15_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI15_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV64-NEXT: vfabs.v v12, v8
-; RV64-NEXT: vmflt.vf v0, v12, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x double> @llvm.trunc.nxv4f64(<vscale x 4 x double> %x)
define <vscale x 1 x i64> @ceil_nxv1f64_to_si64(<vscale x 1 x double> %x) {
; RV32-LABEL: ceil_nxv1f64_to_si64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI22_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI22_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f64_to_si64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI22_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI22_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x)
%b = fptosi <vscale x 1 x double> %a to <vscale x 1 x i64>
define <vscale x 1 x i64> @ceil_nxv1f64_to_ui64(<vscale x 1 x double> %x) {
; RV32-LABEL: ceil_nxv1f64_to_ui64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI23_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI23_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f64_to_ui64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI23_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI23_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x)
%b = fptoui <vscale x 1 x double> %a to <vscale x 1 x i64>
define <vscale x 4 x i64> @ceil_nxv4f64_to_si64(<vscale x 4 x double> %x) {
; RV32-LABEL: ceil_nxv4f64_to_si64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI30_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI30_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT: vfabs.v v12, v8
-; RV32-NEXT: vmflt.vf v0, v12, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f64_to_si64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI30_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI30_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV64-NEXT: vfabs.v v12, v8
-; RV64-NEXT: vmflt.vf v0, v12, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> %x)
%b = fptosi <vscale x 4 x double> %a to <vscale x 4 x i64>
define <vscale x 4 x i64> @ceil_nxv4f64_to_ui64(<vscale x 4 x double> %x) {
; RV32-LABEL: ceil_nxv4f64_to_ui64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI31_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI31_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT: vfabs.v v12, v8
-; RV32-NEXT: vmflt.vf v0, v12, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f64_to_ui64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI31_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI31_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV64-NEXT: vfabs.v v12, v8
-; RV64-NEXT: vmflt.vf v0, v12, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> %x)
%b = fptoui <vscale x 4 x double> %a to <vscale x 4 x i64>
define <vscale x 1 x i32> @trunc_nxv1f32_to_si32(<vscale x 1 x float> %x) {
; RV32-LABEL: trunc_nxv1f32_to_si32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI4_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI4_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f32_to_si32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI4_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI4_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> %x)
define <vscale x 1 x i32> @trunc_nxv1f32_to_ui32(<vscale x 1 x float> %x) {
; RV32-LABEL: trunc_nxv1f32_to_ui32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI5_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI5_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f32_to_ui32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI5_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI5_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> %x)
define <vscale x 4 x i32> @trunc_nxv4f32_to_si32(<vscale x 4 x float> %x) {
; RV32-LABEL: trunc_nxv4f32_to_si32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI12_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI12_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfabs.v v10, v8
-; RV32-NEXT: vmflt.vf v0, v10, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f32_to_si32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI12_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI12_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfabs.v v10, v8
-; RV64-NEXT: vmflt.vf v0, v10, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x)
define <vscale x 4 x i32> @trunc_nxv4f32_to_ui32(<vscale x 4 x float> %x) {
; RV32-LABEL: trunc_nxv4f32_to_ui32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI13_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI13_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfabs.v v10, v8
-; RV32-NEXT: vmflt.vf v0, v10, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f32_to_ui32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI13_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI13_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfabs.v v10, v8
-; RV64-NEXT: vmflt.vf v0, v10, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x)
define <vscale x 1 x i32> @ceil_nxv1f32_to_si32(<vscale x 1 x float> %x) {
; RV32-LABEL: ceil_nxv1f32_to_si32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI20_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI20_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f32_to_si32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI20_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI20_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
%b = fptosi <vscale x 1 x float> %a to <vscale x 1 x i32>
define <vscale x 1 x i32> @ceil_nxv1f32_to_ui32(<vscale x 1 x float> %x) {
; RV32-LABEL: ceil_nxv1f32_to_ui32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI21_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI21_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f32_to_ui32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI21_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI21_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
%b = fptoui <vscale x 1 x float> %a to <vscale x 1 x i32>
define <vscale x 4 x i32> @ceil_nxv4f32_to_si32(<vscale x 4 x float> %x) {
; RV32-LABEL: ceil_nxv4f32_to_si32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI28_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI28_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfabs.v v10, v8
-; RV32-NEXT: vmflt.vf v0, v10, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f32_to_si32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI28_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI28_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfabs.v v10, v8
-; RV64-NEXT: vmflt.vf v0, v10, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
%b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i32>
define <vscale x 4 x i32> @ceil_nxv4f32_to_ui32(<vscale x 4 x float> %x) {
; RV32-LABEL: ceil_nxv4f32_to_ui32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI29_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI29_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfabs.v v10, v8
-; RV32-NEXT: vmflt.vf v0, v10, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f32_to_ui32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI29_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI29_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfabs.v v10, v8
-; RV64-NEXT: vmflt.vf v0, v10, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
%b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i32>
define <vscale x 1 x i16> @trunc_nxv1f16_to_si16(<vscale x 1 x half> %x) {
; RV32-LABEL: trunc_nxv1f16_to_si16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI2_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI2_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f16_to_si16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI2_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI2_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half> %x)
define <vscale x 1 x i16> @trunc_nxv1f16_to_ui16(<vscale x 1 x half> %x) {
; RV32-LABEL: trunc_nxv1f16_to_ui16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI3_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI3_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f16_to_ui16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI3_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI3_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half> %x)
define <vscale x 4 x i16> @trunc_nxv4f16_to_si16(<vscale x 4 x half> %x) {
; RV32-LABEL: trunc_nxv4f16_to_si16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI10_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI10_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f16_to_si16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI10_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI10_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> %x)
define <vscale x 4 x i16> @trunc_nxv4f16_to_ui16(<vscale x 4 x half> %x) {
; RV32-LABEL: trunc_nxv4f16_to_ui16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI11_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI11_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f16_to_ui16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI11_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI11_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> %x)
define <vscale x 1 x i16> @ceil_nxv1f16_to_si16(<vscale x 1 x half> %x) {
; RV32-LABEL: ceil_nxv1f16_to_si16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI18_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI18_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f16_to_si16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI18_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI18_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
%b = fptosi <vscale x 1 x half> %a to <vscale x 1 x i16>
define <vscale x 1 x i16> @ceil_nxv1f16_to_ui16(<vscale x 1 x half> %x) {
; RV32-LABEL: ceil_nxv1f16_to_ui16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI19_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI19_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f16_to_ui16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI19_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI19_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
%b = fptoui <vscale x 1 x half> %a to <vscale x 1 x i16>
define <vscale x 4 x i16> @ceil_nxv4f16_to_si16(<vscale x 4 x half> %x) {
; RV32-LABEL: ceil_nxv4f16_to_si16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI26_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI26_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f16_to_si16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI26_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI26_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
%b = fptosi <vscale x 4 x half> %a to <vscale x 4 x i16>
define <vscale x 4 x i16> @ceil_nxv4f16_to_ui16(<vscale x 4 x half> %x) {
; RV32-LABEL: ceil_nxv4f16_to_ui16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI27_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI27_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f16_to_ui16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI27_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI27_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
%b = fptoui <vscale x 4 x half> %a to <vscale x 4 x i16>