// If abs(Src) was larger than MaxVal or nan, keep it.
MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
- SDValue Mask = DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, Abs, MaxValSplat,
- DAG.getCondCode(ISD::SETOLT), TrueMask, VL);
+ SDValue Mask = DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
+ {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
+ DAG.getUNDEF(SetccVT), TrueMask, VL});
// Truncate to integer and convert back to FP.
MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
DAG.getConstantFP(1.0, DL, ContainerVT.getVectorElementType());
SDValue Splat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), SplatVal, VL);
- SDValue NeedAdjust =
- DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, Truncated, Src,
- DAG.getCondCode(ISD::SETOLT), Mask, VL);
+ SDValue NeedAdjust = DAG.getNode(
+ RISCVISD::SETCC_VL, DL, SetccVT,
+ {Truncated, Src, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
Truncated = DAG.getNode(RISCVISD::FADD_VL, DL, ContainerVT, Truncated,
Splat, Truncated, NeedAdjust, VL);
} else if (Op.getOpcode() == ISD::FFLOOR) {
DAG.getConstantFP(1.0, DL, ContainerVT.getVectorElementType());
SDValue Splat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), SplatVal, VL);
- SDValue NeedAdjust =
- DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, Src, Truncated,
- DAG.getCondCode(ISD::SETOLT), Mask, VL);
+ SDValue NeedAdjust = DAG.getNode(
+ RISCVISD::SETCC_VL, DL, SetccVT,
+ {Src, Truncated, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
Truncated = DAG.getNode(RISCVISD::FSUB_VL, DL, ContainerVT, Truncated,
Splat, Truncated, NeedAdjust, VL);
}
// If abs(Src) was larger than MaxVal or nan, keep it.
MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
- SDValue Mask = DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, Abs, MaxValSplat,
- DAG.getCondCode(ISD::SETOLT), TrueMask, VL);
+ SDValue Mask = DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
+ {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
+ DAG.getUNDEF(SetccVT), TrueMask, VL});
bool Ignored;
APFloat Point5Pred = APFloat(0.5f);
case ISD::VP_SETCC:
if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
return lowerVPSetCCMaskOp(Op, DAG);
- return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL);
+ return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL, /*HasMergeOp*/ true);
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
return lowerVPStridedLoad(Op, DAG);
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
DAG.getUNDEF(ContainerVT), Mask, VL);
- Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
- DAG.getCondCode(ISD::SETNE), Mask, VL);
+ Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
+ {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
+ DAG.getUNDEF(MaskContainerVT), Mask, VL});
if (MaskVT.isFixedLengthVector())
Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
return Trunc;
SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
SDValue SelectCond =
- DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
- DAG.getCondCode(ISD::SETEQ), Mask, VL);
+ DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
+ {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
+ DAG.getUNDEF(MaskVT), Mask, VL});
return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
Vec, VL);
}
MVT MaskVT = getMaskTypeFor(ContainerVT);
SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
- SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2,
- Op.getOperand(2), Mask, VL);
+ SDValue Cmp =
+ DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
+ {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
}
SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
DAG.getUNDEF(InterimIVT), SplatZero);
- Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT, Result, SplatZero,
- DAG.getCondCode(ISD::SETNE), Mask, VL);
+ Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
+ {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
+ DAG.getUNDEF(DstVT), Mask, VL});
} else {
MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
DstVT.getVectorElementCount());
def riscv_uint_to_fp_vl : SDNode<"RISCVISD::UINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>;
def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL",
- SDTypeProfile<1, 5, [SDTCVecEltisVT<0, i1>,
+ SDTypeProfile<1, 6, [SDTCVecEltisVT<0, i1>,
SDTCisVec<1>,
SDTCisSameNumEltsAs<0, 1>,
SDTCisSameAs<1, 2>,
SDTCisVT<3, OtherVT>,
SDTCisSameAs<0, 4>,
- SDTCisVT<5, XLenVT>]>>;
+ SDTCisSameAs<0, 5>,
+ SDTCisVT<6, XLenVT>]>>;
def riscv_vrgather_vx_vl : SDNode<"RISCVISD::VRGATHER_VX_VL",
SDTypeProfile<1, 5, [SDTCisVec<0>,
CondCode cc> {
def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1),
vti.RegClass:$rs2, cc,
+ VR:$merge,
(vti.Mask V0),
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX#"_MASK")
- (vti.Mask (IMPLICIT_DEF)),
+ VR:$merge,
vti.RegClass:$rs1,
vti.RegClass:$rs2,
(vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
: VPatIntegerSetCCVL_VV<vti, instruction_name, cc> {
def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs2),
vti.RegClass:$rs1, invcc,
+ VR:$merge,
(vti.Mask V0),
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX#"_MASK")
- (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ VR:$merge, vti.RegClass:$rs1,
vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
}
defvar instruction_masked = !cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX#"_MASK");
def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1),
(SplatPat (XLenVT GPR:$rs2)), cc,
+ VR:$merge,
(vti.Mask V0),
VLOpFrag)),
- (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ (instruction_masked VR:$merge, vti.RegClass:$rs1,
GPR:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
def : Pat<(vti.Mask (riscv_setcc_vl (SplatPat (XLenVT GPR:$rs2)),
(vti.Vector vti.RegClass:$rs1), invcc,
+ VR:$merge,
(vti.Mask V0),
VLOpFrag)),
- (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ (instruction_masked VR:$merge, vti.RegClass:$rs1,
GPR:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
}
defvar instruction_masked = !cast<Instruction>(instruction_name#"_VI_"#vti.LMul.MX#"_MASK");
def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1),
(SplatPat_simm5 simm5:$rs2), cc,
+ VR:$merge,
(vti.Mask V0),
VLOpFrag)),
- (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ (instruction_masked VR:$merge, vti.RegClass:$rs1,
XLenVT:$rs2, (vti.Mask V0), GPR:$vl,
vti.Log2SEW)>;
// FIXME: Can do some canonicalization to remove these patterns.
def : Pat<(vti.Mask (riscv_setcc_vl (SplatPat_simm5 simm5:$rs2),
(vti.Vector vti.RegClass:$rs1), invcc,
+ VR:$merge,
(vti.Mask V0),
VLOpFrag)),
- (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ (instruction_masked VR:$merge, vti.RegClass:$rs1,
simm5:$rs2, (vti.Mask V0), GPR:$vl,
vti.Log2SEW)>;
}
defvar instruction_masked = !cast<Instruction>(instruction_name#"_VI_"#vti.LMul.MX#"_MASK");
def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1),
(splatpat_kind simm5:$rs2), cc,
+ VR:$merge,
(vti.Mask V0),
VLOpFrag)),
- (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ (instruction_masked VR:$merge, vti.RegClass:$rs1,
(DecImm simm5:$rs2), (vti.Mask V0), GPR:$vl,
vti.Log2SEW)>;
// FIXME: Can do some canonicalization to remove these patterns.
def : Pat<(vti.Mask (riscv_setcc_vl (splatpat_kind simm5:$rs2),
(vti.Vector vti.RegClass:$rs1), invcc,
+ VR:$merge,
(vti.Mask V0),
VLOpFrag)),
- (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ (instruction_masked VR:$merge, vti.RegClass:$rs1,
(DecImm simm5:$rs2), (vti.Mask V0), GPR:$vl,
vti.Log2SEW)>;
}
def : Pat<(fvti.Mask (riscv_setcc_vl (fvti.Vector fvti.RegClass:$rs1),
fvti.RegClass:$rs2,
cc,
+ VR:$merge,
(fvti.Mask V0),
VLOpFrag)),
(!cast<Instruction>(inst_name#"_VV_"#fvti.LMul.MX#"_MASK")
- (fvti.Mask (IMPLICIT_DEF)), fvti.RegClass:$rs1,
+ VR:$merge, fvti.RegClass:$rs1,
fvti.RegClass:$rs2, (fvti.Mask V0),
GPR:$vl, fvti.Log2SEW)>;
def : Pat<(fvti.Mask (riscv_setcc_vl (fvti.Vector fvti.RegClass:$rs1),
(SplatFPOp fvti.ScalarRegClass:$rs2),
cc,
+ VR:$merge,
(fvti.Mask V0),
VLOpFrag)),
(!cast<Instruction>(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
- (fvti.Mask (IMPLICIT_DEF)), fvti.RegClass:$rs1,
+ VR:$merge, fvti.RegClass:$rs1,
fvti.ScalarRegClass:$rs2, (fvti.Mask V0),
GPR:$vl, fvti.Log2SEW)>;
def : Pat<(fvti.Mask (riscv_setcc_vl (SplatFPOp fvti.ScalarRegClass:$rs2),
(fvti.Vector fvti.RegClass:$rs1),
cc,
+ VR:$merge,
(fvti.Mask V0),
VLOpFrag)),
(!cast<Instruction>(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
- (fvti.Mask (IMPLICIT_DEF)), fvti.RegClass:$rs1,
+ VR:$merge, fvti.RegClass:$rs1,
fvti.ScalarRegClass:$rs2, (fvti.Mask V0),
GPR:$vl, fvti.Log2SEW)>;
}
; CHECK-NEXT: vmflt.vf v9, v9, ft0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI0_1)
; CHECK-NEXT: flh ft0, %lo(.LCPI0_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
ret <vscale x 1 x half> %a
; CHECK-NEXT: vmflt.vf v9, v9, ft0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI1_1)
; CHECK-NEXT: flh ft0, %lo(.LCPI1_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> %x)
ret <vscale x 2 x half> %a
; CHECK-NEXT: vmflt.vf v9, v9, ft0
; CHECK-NEXT: vmv.v.v v0, v9
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI2_1)
; CHECK-NEXT: flh ft0, %lo(.LCPI2_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
+; CHECK-NEXT: vmv.v.v v10, v9
+; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
ret <vscale x 4 x half> %a
; CHECK-NEXT: vmflt.vf v10, v12, ft0
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI3_1)
; CHECK-NEXT: flh ft0, %lo(.LCPI3_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT: vmv1r.v v11, v10
; CHECK-NEXT: vmflt.vv v11, v12, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: vfadd.vf v12, v12, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmflt.vf v12, v16, ft0
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI4_1)
; CHECK-NEXT: flh ft0, %lo(.LCPI4_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vmv1r.v v13, v12
; CHECK-NEXT: vmflt.vv v13, v16, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v13
; CHECK-NEXT: vfadd.vf v16, v16, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmflt.vf v16, v24, ft0
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI5_1)
; CHECK-NEXT: flh ft0, %lo(.LCPI5_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vmv1r.v v17, v16
; CHECK-NEXT: vmflt.vv v17, v24, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v17
; CHECK-NEXT: vfadd.vf v24, v24, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vmflt.vf v9, v9, ft0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI6_1)
; CHECK-NEXT: flw ft0, %lo(.LCPI6_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
ret <vscale x 1 x float> %a
; CHECK-NEXT: vmflt.vf v9, v9, ft0
; CHECK-NEXT: vmv.v.v v0, v9
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI7_1)
; CHECK-NEXT: flw ft0, %lo(.LCPI7_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
-; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
+; CHECK-NEXT: vmv.v.v v10, v9
+; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> %x)
ret <vscale x 2 x float> %a
; CHECK-NEXT: vmflt.vf v10, v12, ft0
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI8_1)
; CHECK-NEXT: flw ft0, %lo(.LCPI8_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT: vmv1r.v v11, v10
; CHECK-NEXT: vmflt.vv v11, v12, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: vfadd.vf v12, v12, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmflt.vf v12, v16, ft0
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
; CHECK-NEXT: flw ft0, %lo(.LCPI9_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vmv1r.v v13, v12
; CHECK-NEXT: vmflt.vv v13, v16, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v13
; CHECK-NEXT: vfadd.vf v16, v16, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmflt.vf v16, v24, ft0
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI10_1)
; CHECK-NEXT: flw ft0, %lo(.LCPI10_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vmv1r.v v17, v16
; CHECK-NEXT: vmflt.vv v17, v24, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v17
; CHECK-NEXT: vfadd.vf v24, v24, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vmflt.vf v9, v9, ft0
; CHECK-NEXT: vmv.v.v v0, v9
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI11_1)
; CHECK-NEXT: fld ft0, %lo(.LCPI11_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
+; CHECK-NEXT: vmv.v.v v10, v9
+; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x)
ret <vscale x 1 x double> %a
; CHECK-NEXT: vmflt.vf v10, v12, ft0
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI12_1)
; CHECK-NEXT: fld ft0, %lo(.LCPI12_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT: vmv1r.v v11, v10
; CHECK-NEXT: vmflt.vv v11, v12, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: vfadd.vf v12, v12, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmflt.vf v12, v16, ft0
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI13_1)
; CHECK-NEXT: fld ft0, %lo(.LCPI13_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vmv1r.v v13, v12
; CHECK-NEXT: vmflt.vv v13, v16, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v13
; CHECK-NEXT: vfadd.vf v16, v16, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmflt.vf v16, v24, ft0
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI14_1)
; CHECK-NEXT: fld ft0, %lo(.LCPI14_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vmv1r.v v17, v16
; CHECK-NEXT: vmflt.vv v17, v24, v8, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v17
; CHECK-NEXT: vfadd.vf v24, v24, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vmflt.vf v9, v9, ft0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI0_1)
; CHECK-NEXT: flh ft0, %lo(.LCPI0_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 1 x half> @llvm.floor.nxv1f16(<vscale x 1 x half> %x)
ret <vscale x 1 x half> %a
; CHECK-NEXT: vmflt.vf v9, v9, ft0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI1_1)
; CHECK-NEXT: flh ft0, %lo(.LCPI1_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 2 x half> @llvm.floor.nxv2f16(<vscale x 2 x half> %x)
ret <vscale x 2 x half> %a
; CHECK-NEXT: vmflt.vf v9, v9, ft0
; CHECK-NEXT: vmv.v.v v0, v9
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI2_1)
; CHECK-NEXT: flh ft0, %lo(.LCPI2_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
+; CHECK-NEXT: vmv.v.v v10, v9
+; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 4 x half> @llvm.floor.nxv4f16(<vscale x 4 x half> %x)
ret <vscale x 4 x half> %a
; CHECK-NEXT: vmflt.vf v10, v12, ft0
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI3_1)
; CHECK-NEXT: flh ft0, %lo(.LCPI3_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT: vmv1r.v v11, v10
; CHECK-NEXT: vmflt.vv v11, v8, v12, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: vfsub.vf v12, v12, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmflt.vf v12, v16, ft0
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI4_1)
; CHECK-NEXT: flh ft0, %lo(.LCPI4_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vmv1r.v v13, v12
; CHECK-NEXT: vmflt.vv v13, v8, v16, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v13
; CHECK-NEXT: vfsub.vf v16, v16, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmflt.vf v16, v24, ft0
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI5_1)
; CHECK-NEXT: flh ft0, %lo(.LCPI5_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vmv1r.v v17, v16
; CHECK-NEXT: vmflt.vv v17, v8, v24, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v17
; CHECK-NEXT: vfsub.vf v24, v24, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vmflt.vf v9, v9, ft0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI6_1)
; CHECK-NEXT: flw ft0, %lo(.LCPI6_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 1 x float> @llvm.floor.nxv1f32(<vscale x 1 x float> %x)
ret <vscale x 1 x float> %a
; CHECK-NEXT: vmflt.vf v9, v9, ft0
; CHECK-NEXT: vmv.v.v v0, v9
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI7_1)
; CHECK-NEXT: flw ft0, %lo(.LCPI7_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
-; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
+; CHECK-NEXT: vmv.v.v v10, v9
+; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 2 x float> @llvm.floor.nxv2f32(<vscale x 2 x float> %x)
ret <vscale x 2 x float> %a
; CHECK-NEXT: vmflt.vf v10, v12, ft0
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI8_1)
; CHECK-NEXT: flw ft0, %lo(.LCPI8_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT: vmv1r.v v11, v10
; CHECK-NEXT: vmflt.vv v11, v8, v12, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: vfsub.vf v12, v12, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmflt.vf v12, v16, ft0
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
; CHECK-NEXT: flw ft0, %lo(.LCPI9_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vmv1r.v v13, v12
; CHECK-NEXT: vmflt.vv v13, v8, v16, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v13
; CHECK-NEXT: vfsub.vf v16, v16, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmflt.vf v16, v24, ft0
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI10_1)
; CHECK-NEXT: flw ft0, %lo(.LCPI10_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vmv1r.v v17, v16
; CHECK-NEXT: vmflt.vv v17, v8, v24, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v17
; CHECK-NEXT: vfsub.vf v24, v24, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vmflt.vf v9, v9, ft0
; CHECK-NEXT: vmv.v.v v0, v9
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI11_1)
; CHECK-NEXT: fld ft0, %lo(.LCPI11_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
+; CHECK-NEXT: vmv.v.v v10, v9
+; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 1 x double> @llvm.floor.nxv1f64(<vscale x 1 x double> %x)
ret <vscale x 1 x double> %a
; CHECK-NEXT: vmflt.vf v10, v12, ft0
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI12_1)
; CHECK-NEXT: fld ft0, %lo(.LCPI12_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT: vmv1r.v v11, v10
; CHECK-NEXT: vmflt.vv v11, v8, v12, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: vfsub.vf v12, v12, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmflt.vf v12, v16, ft0
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI13_1)
; CHECK-NEXT: fld ft0, %lo(.LCPI13_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vmv1r.v v13, v12
; CHECK-NEXT: vmflt.vv v13, v8, v16, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v13
; CHECK-NEXT: vfsub.vf v16, v16, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmflt.vf v16, v24, ft0
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: lui a0, %hi(.LCPI14_1)
; CHECK-NEXT: fld ft0, %lo(.LCPI14_1)(a0)
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vmv1r.v v17, v16
; CHECK-NEXT: vmflt.vv v17, v8, v24, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v17
; CHECK-NEXT: vfsub.vf v24, v24, ft0, v0.t
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-LABEL: ceil_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a1, %hi(.LCPI94_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI94_0)(a1)
-; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: vfabs.v v8, v10
; CHECK-NEXT: vmflt.vf v8, v8, ft0
; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v9, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
; CHECK-NEXT: lui a1, %hi(.LCPI94_1)
; CHECK-NEXT: flh ft0, %lo(.LCPI94_1)(a1)
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmflt.vv v0, v10, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
+; CHECK-NEXT: vmv.v.v v9, v8
+; CHECK-NEXT: vmflt.vv v9, v11, v10, v0.t
+; CHECK-NEXT: vmv.v.v v0, v9
+; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfsgnj.vv v9, v10, v9, v0.t
-; CHECK-NEXT: vse16.v v9, (a0)
+; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
+; CHECK-NEXT: vse16.v v10, (a0)
; CHECK-NEXT: ret
%a = load <8 x half>, <8 x half>* %x
%b = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a)
; CHECK-LABEL: ceil_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vle32.v v9, (a0)
+; CHECK-NEXT: vle32.v v10, (a0)
; CHECK-NEXT: lui a1, %hi(.LCPI95_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI95_0)(a1)
-; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: vfabs.v v8, v10
; CHECK-NEXT: vmflt.vf v8, v8, ft0
; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v9, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
; CHECK-NEXT: lui a1, %hi(.LCPI95_1)
; CHECK-NEXT: flw ft0, %lo(.LCPI95_1)(a1)
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmflt.vv v0, v10, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
-; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
+; CHECK-NEXT: vmv.v.v v9, v8
+; CHECK-NEXT: vmflt.vv v9, v11, v10, v0.t
+; CHECK-NEXT: vmv.v.v v0, v9
+; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfsgnj.vv v9, v10, v9, v0.t
-; CHECK-NEXT: vse32.v v9, (a0)
+; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
+; CHECK-NEXT: vse32.v v10, (a0)
; CHECK-NEXT: ret
%a = load <4 x float>, <4 x float>* %x
%b = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a)
; CHECK-LABEL: ceil_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
-; CHECK-NEXT: vle64.v v9, (a0)
+; CHECK-NEXT: vle64.v v10, (a0)
; CHECK-NEXT: lui a1, %hi(.LCPI96_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI96_0)(a1)
-; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: vfabs.v v8, v10
; CHECK-NEXT: vmflt.vf v8, v8, ft0
; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v9, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
; CHECK-NEXT: lui a1, %hi(.LCPI96_1)
; CHECK-NEXT: fld ft0, %lo(.LCPI96_1)(a1)
-; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT: vmflt.vv v0, v10, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
+; CHECK-NEXT: vmv.v.v v9, v8
+; CHECK-NEXT: vmflt.vv v9, v11, v10, v0.t
+; CHECK-NEXT: vmv.v.v v0, v9
+; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfsgnj.vv v9, v10, v9, v0.t
-; CHECK-NEXT: vse64.v v9, (a0)
+; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
+; CHECK-NEXT: vse64.v v10, (a0)
; CHECK-NEXT: ret
%a = load <2 x double>, <2 x double>* %x
%b = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a)
; CHECK-LABEL: floor_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a1, %hi(.LCPI97_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI97_0)(a1)
-; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: vfabs.v v8, v10
; CHECK-NEXT: vmflt.vf v8, v8, ft0
; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v9, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
; CHECK-NEXT: lui a1, %hi(.LCPI97_1)
; CHECK-NEXT: flh ft0, %lo(.LCPI97_1)(a1)
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmflt.vv v0, v9, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
+; CHECK-NEXT: vmv.v.v v9, v8
+; CHECK-NEXT: vmflt.vv v9, v10, v11, v0.t
+; CHECK-NEXT: vmv.v.v v0, v9
+; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfsgnj.vv v9, v10, v9, v0.t
-; CHECK-NEXT: vse16.v v9, (a0)
+; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
+; CHECK-NEXT: vse16.v v10, (a0)
; CHECK-NEXT: ret
%a = load <8 x half>, <8 x half>* %x
%b = call <8 x half> @llvm.floor.v8f16(<8 x half> %a)
; CHECK-LABEL: floor_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vle32.v v9, (a0)
+; CHECK-NEXT: vle32.v v10, (a0)
; CHECK-NEXT: lui a1, %hi(.LCPI98_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI98_0)(a1)
-; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: vfabs.v v8, v10
; CHECK-NEXT: vmflt.vf v8, v8, ft0
; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v9, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
; CHECK-NEXT: lui a1, %hi(.LCPI98_1)
; CHECK-NEXT: flw ft0, %lo(.LCPI98_1)(a1)
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmflt.vv v0, v9, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
-; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
+; CHECK-NEXT: vmv.v.v v9, v8
+; CHECK-NEXT: vmflt.vv v9, v10, v11, v0.t
+; CHECK-NEXT: vmv.v.v v0, v9
+; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfsgnj.vv v9, v10, v9, v0.t
-; CHECK-NEXT: vse32.v v9, (a0)
+; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
+; CHECK-NEXT: vse32.v v10, (a0)
; CHECK-NEXT: ret
%a = load <4 x float>, <4 x float>* %x
%b = call <4 x float> @llvm.floor.v4f32(<4 x float> %a)
; CHECK-LABEL: floor_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
-; CHECK-NEXT: vle64.v v9, (a0)
+; CHECK-NEXT: vle64.v v10, (a0)
; CHECK-NEXT: lui a1, %hi(.LCPI99_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI99_0)(a1)
-; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: vfabs.v v8, v10
; CHECK-NEXT: vmflt.vf v8, v8, ft0
; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v9, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
; CHECK-NEXT: lui a1, %hi(.LCPI99_1)
; CHECK-NEXT: fld ft0, %lo(.LCPI99_1)(a1)
-; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT: vmflt.vv v0, v9, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
+; CHECK-NEXT: vmv.v.v v9, v8
+; CHECK-NEXT: vmflt.vv v9, v10, v11, v0.t
+; CHECK-NEXT: vmv.v.v v0, v9
+; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfsgnj.vv v9, v10, v9, v0.t
-; CHECK-NEXT: vse64.v v9, (a0)
+; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
+; CHECK-NEXT: vse64.v v10, (a0)
; CHECK-NEXT: ret
%a = load <2 x double>, <2 x double>* %x
%b = call <2 x double> @llvm.floor.v2f64(<2 x double> %a)