From 35c877a6f0acc84b707b4de70827688d9bafd6e6 Mon Sep 17 00:00:00 2001 From: Yeting Kuo Date: Tue, 18 Apr 2023 16:47:33 +0800 Subject: [PATCH] [RISCV] Customed lower vector nearbyint and rint in RISC-V. The patch lowers vector rint/nearbyint like vp.rint/nearbyint. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D148619 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 15 +- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 34 +++ llvm/test/Analysis/CostModel/RISCV/fround.ll | 68 ++--- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 138 ++++++++++ llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll | 305 +++++++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll | 275 +++++++++++++++++++ 6 files changed, 794 insertions(+), 41 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7641c0f..d714399 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -766,9 +766,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal); - setOperationAction( - {ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN}, - VT, Custom); + setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, + ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT}, + VT, Custom); setOperationAction(FloatingPointVecReduceOps, VT, Custom); @@ -783,8 +783,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FLOG, VT, Expand); setOperationAction(ISD::FLOG2, VT, Expand); setOperationAction(ISD::FLOG10, VT, Expand); - setOperationAction(ISD::FRINT, VT, Expand); - setOperationAction(ISD::FNEARBYINT, VT, Expand); setOperationAction(ISD::FCOPYSIGN, VT, Legal); @@ -1031,7 +1029,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, - ISD::FROUNDEVEN}, + ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT}, VT, Custom); setCondCodeAction(VFPCCToExpand, VT, Expand); @@ -2504,9 +2502,11 @@ lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src, Mask, VL); break; + case ISD::FRINT: case ISD::VP_FRINT: Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL); break; + case ISD::FNEARBYINT: case ISD::VP_FNEARBYINT: Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src, Mask, VL); @@ -2514,7 +2514,7 @@ lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, } // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. - if (Op.getOpcode() != ISD::VP_FNEARBYINT) + if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL) Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated, Mask, VL); @@ -4520,6 +4520,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::FTRUNC: case ISD::FCEIL: case ISD::FFLOOR: + case ISD::FNEARBYINT: case ISD::FRINT: case ISD::FROUND: case ISD::FROUNDEVEN: diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 042b09d..e4a7f73 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -626,6 +626,40 @@ static const CostTblEntry VectorIntrinsicCostTable[]{ {Intrinsic::roundeven, MVT::nxv2f64, 9}, {Intrinsic::roundeven, MVT::nxv4f64, 9}, {Intrinsic::roundeven, MVT::nxv8f64, 9}, + {Intrinsic::rint, MVT::v2f32, 7}, + {Intrinsic::rint, MVT::v4f32, 7}, + {Intrinsic::rint, MVT::v8f32, 7}, + {Intrinsic::rint, MVT::v16f32, 7}, + {Intrinsic::rint, MVT::nxv1f32, 7}, + {Intrinsic::rint, MVT::nxv2f32, 7}, + {Intrinsic::rint, MVT::nxv4f32, 7}, + {Intrinsic::rint, MVT::nxv8f32, 7}, + {Intrinsic::rint, MVT::nxv16f32, 7}, + {Intrinsic::rint, MVT::v2f64, 7}, + {Intrinsic::rint, MVT::v4f64, 7}, + {Intrinsic::rint, MVT::v8f64, 7}, + {Intrinsic::rint, MVT::v16f64, 7}, + {Intrinsic::rint, MVT::nxv1f64, 7}, + {Intrinsic::rint, MVT::nxv2f64, 7}, + {Intrinsic::rint, MVT::nxv4f64, 7}, + {Intrinsic::rint, MVT::nxv8f64, 7}, + {Intrinsic::nearbyint, MVT::v2f32, 9}, + {Intrinsic::nearbyint, MVT::v4f32, 9}, + {Intrinsic::nearbyint, MVT::v8f32, 9}, + {Intrinsic::nearbyint, MVT::v16f32, 9}, + {Intrinsic::nearbyint, MVT::nxv1f32, 9}, + {Intrinsic::nearbyint, MVT::nxv2f32, 9}, + {Intrinsic::nearbyint, MVT::nxv4f32, 9}, + {Intrinsic::nearbyint, MVT::nxv8f32, 9}, + {Intrinsic::nearbyint, MVT::nxv16f32, 9}, + {Intrinsic::nearbyint, MVT::v2f64, 9}, + {Intrinsic::nearbyint, MVT::v4f64, 9}, + {Intrinsic::nearbyint, MVT::v8f64, 9}, + {Intrinsic::nearbyint, MVT::v16f64, 9}, + {Intrinsic::nearbyint, MVT::nxv1f64, 9}, + {Intrinsic::nearbyint, MVT::nxv2f64, 9}, + {Intrinsic::nearbyint, MVT::nxv4f64, 9}, + {Intrinsic::nearbyint, MVT::nxv8f64, 9}, {Intrinsic::bswap, MVT::v2i16, 3}, {Intrinsic::bswap, MVT::v4i16, 3}, {Intrinsic::bswap, MVT::v8i16, 3}, diff --git a/llvm/test/Analysis/CostModel/RISCV/fround.ll b/llvm/test/Analysis/CostModel/RISCV/fround.ll index 37f02eb..b8e6482 100644 --- a/llvm/test/Analysis/CostModel/RISCV/fround.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fround.ll @@ -139,24 +139,24 @@ define void @trunc() { define void @rint() { ; CHECK-LABEL: 'rint' ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.rint.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %2 = call <2 x float> @llvm.rint.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %3 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %4 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 159 for instruction: %5 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %6 = call @llvm.rint.nxv1f32( undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %7 = call @llvm.rint.nxv2f32( undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %8 = call @llvm.rint.nxv4f32( undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %9 = call @llvm.rint.nxv8f32( undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %10 = call @llvm.rint.nxv16f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <2 x float> @llvm.rint.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call @llvm.rint.nxv1f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call @llvm.rint.nxv2f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call @llvm.rint.nxv4f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call @llvm.rint.nxv8f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call @llvm.rint.nxv16f32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call double @llvm.rint.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %12 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %13 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %14 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 159 for instruction: %15 = call <16 x double> @llvm.rint.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %16 = call @llvm.rint.nxv1f64( undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %17 = call @llvm.rint.nxv2f64( undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %18 = call @llvm.rint.nxv4f64( undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %19 = call @llvm.rint.nxv8f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call <16 x double> @llvm.rint.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.rint.nxv1f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.rint.nxv2f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call @llvm.rint.nxv4f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call @llvm.rint.nxv8f64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; call float @llvm.rint.f32(float undef) @@ -184,24 +184,24 @@ define void @rint() { define void @nearbyint() { ; CHECK-LABEL: 'nearbyint' ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.nearbyint.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %2 = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %3 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %4 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %5 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %6 = call @llvm.nearbyint.nxv1f32( undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %7 = call @llvm.nearbyint.nxv2f32( undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %8 = call @llvm.nearbyint.nxv4f32( undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %9 = call @llvm.nearbyint.nxv8f32( undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %10 = call @llvm.nearbyint.nxv16f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %5 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %6 = call @llvm.nearbyint.nxv1f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %7 = call @llvm.nearbyint.nxv2f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %8 = call @llvm.nearbyint.nxv4f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %9 = call @llvm.nearbyint.nxv8f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %10 = call @llvm.nearbyint.nxv16f32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %11 = call double @llvm.nearbyint.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %12 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %13 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %14 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %15 = call <16 x double> @llvm.nearbyint.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %16 = call @llvm.nearbyint.nxv1f64( undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %17 = call @llvm.nearbyint.nxv2f64( undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %18 = call @llvm.nearbyint.nxv4f64( undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %19 = call @llvm.nearbyint.nxv8f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x double> @llvm.nearbyint.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call @llvm.nearbyint.nxv1f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call @llvm.nearbyint.nxv2f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call @llvm.nearbyint.nxv4f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %19 = call @llvm.nearbyint.nxv8f64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; call float @llvm.nearbyint.f32(float undef) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index aaf5820..3c5eaec 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -2249,6 +2249,144 @@ define void @round_v2f64(ptr %x) { } declare <2 x double> @llvm.round.v2f64(<2 x double>) +define void @rint_v8f16(ptr %x) { +; CHECK-LABEL: rint_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a1, %hi(.LCPI103_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI103_0)(a1) +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, ptr %x + %b = call <8 x half> @llvm.rint.v8f16(<8 x half> %a) + store <8 x half> %b, ptr %x + ret void +} +declare <8 x half> @llvm.rint.v8f16(<8 x half>) + +define void @rint_v4f32(ptr %x) { +; CHECK-LABEL: rint_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: lui a1, 307200 +; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, ptr %x + %b = call <4 x float> @llvm.rint.v4f32(<4 x float> %a) + store <4 x float> %b, ptr %x + ret void +} +declare <4 x float> @llvm.rint.v4f32(<4 x float>) + +define void @rint_v2f64(ptr %x) { +; CHECK-LABEL: rint_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: lui a1, %hi(.LCPI105_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI105_0)(a1) +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, ptr %x + %b = call <2 x double> @llvm.rint.v2f64(<2 x double> %a) + store <2 x double> %b, ptr %x + ret void +} +declare <2 x double> @llvm.rint.v2f64(<2 x double>) + +define void @nearbyint_v8f16(ptr %x) { +; CHECK-LABEL: nearbyint_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a1, %hi(.LCPI106_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI106_0)(a1) +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: frflags a1 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a1 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, ptr %x + %b = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %a) + store <8 x half> %b, ptr %x + ret void +} +declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>) + +define void @nearbyint_v4f32(ptr %x) { +; CHECK-LABEL: nearbyint_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: lui a1, 307200 +; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: frflags a1 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a1 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, ptr %x + %b = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) + store <4 x float> %b, ptr %x + ret void +} +declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) + +define void @nearbyint_v2f64(ptr %x) { +; CHECK-LABEL: nearbyint_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: lui a1, %hi(.LCPI108_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI108_0)(a1) +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: frflags a1 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a1 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, ptr %x + %b = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) + store <2 x double> %b, ptr %x + ret void +} +declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) + define void @fmuladd_v8f16(ptr %x, ptr %y, ptr %z) { ; CHECK-LABEL: fmuladd_v8f16: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll new file mode 100644 index 0000000..ce3eb84 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll @@ -0,0 +1,305 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +define @nearbyint_nxv1f16( %x) { +; CHECK-LABEL: nearbyint_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI0_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.nearbyint.nxv1f16( %x) + ret %a +} +declare @llvm.nearbyint.nxv1f16() + +define @nearbyint_nxv2f16( %x) { +; CHECK-LABEL: nearbyint_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI1_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.nearbyint.nxv2f16( %x) + ret %a +} +declare @llvm.nearbyint.nxv2f16() + +define @nearbyint_nxv4f16( %x) { +; CHECK-LABEL: nearbyint_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI2_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.nearbyint.nxv4f16( %x) + ret %a +} +declare @llvm.nearbyint.nxv4f16() + +define @nearbyint_nxv8f16( %x) { +; CHECK-LABEL: nearbyint_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI3_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.nearbyint.nxv8f16( %x) + ret %a +} +declare @llvm.nearbyint.nxv8f16() + +define @nearbyint_nxv16f16( %x) { +; CHECK-LABEL: nearbyint_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI4_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.nearbyint.nxv16f16( %x) + ret %a +} +declare @llvm.nearbyint.nxv16f16() + +define @nearbyint_nxv32f16( %x) { +; CHECK-LABEL: nearbyint_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI5_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.nearbyint.nxv32f16( %x) + ret %a +} +declare @llvm.nearbyint.nxv32f16() + +define @nearbyint_nxv1f32( %x) { +; CHECK-LABEL: nearbyint_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.nearbyint.nxv1f32( %x) + ret %a +} +declare @llvm.nearbyint.nxv1f32() + +define @nearbyint_nxv2f32( %x) { +; CHECK-LABEL: nearbyint_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.nearbyint.nxv2f32( %x) + ret %a +} +declare @llvm.nearbyint.nxv2f32() + +define @nearbyint_nxv4f32( %x) { +; CHECK-LABEL: nearbyint_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.nearbyint.nxv4f32( %x) + ret %a +} +declare @llvm.nearbyint.nxv4f32() + +define @nearbyint_nxv8f32( %x) { +; CHECK-LABEL: nearbyint_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.nearbyint.nxv8f32( %x) + ret %a +} +declare @llvm.nearbyint.nxv8f32() + +define @nearbyint_nxv16f32( %x) { +; CHECK-LABEL: nearbyint_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.nearbyint.nxv16f32( %x) + ret %a +} +declare @llvm.nearbyint.nxv16f32() + +define @nearbyint_nxv1f64( %x) { +; CHECK-LABEL: nearbyint_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI11_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.nearbyint.nxv1f64( %x) + ret %a +} +declare @llvm.nearbyint.nxv1f64() + +define @nearbyint_nxv2f64( %x) { +; CHECK-LABEL: nearbyint_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.nearbyint.nxv2f64( %x) + ret %a +} +declare @llvm.nearbyint.nxv2f64() + +define @nearbyint_nxv4f64( %x) { +; CHECK-LABEL: nearbyint_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI13_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.nearbyint.nxv4f64( %x) + ret %a +} +declare @llvm.nearbyint.nxv4f64() + +define @nearbyint_nxv8f64( %x) { +; CHECK-LABEL: nearbyint_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI14_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.nearbyint.nxv8f64( %x) + ret %a +} +declare @llvm.nearbyint.nxv8f64() diff --git a/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll new file mode 100644 index 0000000..f791968 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll @@ -0,0 +1,275 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +define @rint_nxv1f16( %x) { +; CHECK-LABEL: rint_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI0_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv1f16( %x) + ret %a +} +declare @llvm.rint.nxv1f16() + +define @rint_nxv2f16( %x) { +; CHECK-LABEL: rint_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI1_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv2f16( %x) + ret %a +} +declare @llvm.rint.nxv2f16() + +define @rint_nxv4f16( %x) { +; CHECK-LABEL: rint_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI2_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv4f16( %x) + ret %a +} +declare @llvm.rint.nxv4f16() + +define @rint_nxv8f16( %x) { +; CHECK-LABEL: rint_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI3_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv8f16( %x) + ret %a +} +declare @llvm.rint.nxv8f16() + +define @rint_nxv16f16( %x) { +; CHECK-LABEL: rint_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI4_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv16f16( %x) + ret %a +} +declare @llvm.rint.nxv16f16() + +define @rint_nxv32f16( %x) { +; CHECK-LABEL: rint_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI5_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv32f16( %x) + ret %a +} +declare @llvm.rint.nxv32f16() + +define @rint_nxv1f32( %x) { +; CHECK-LABEL: rint_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv1f32( %x) + ret %a +} +declare @llvm.rint.nxv1f32() + +define @rint_nxv2f32( %x) { +; CHECK-LABEL: rint_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv2f32( %x) + ret %a +} +declare @llvm.rint.nxv2f32() + +define @rint_nxv4f32( %x) { +; CHECK-LABEL: rint_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv4f32( %x) + ret %a +} +declare @llvm.rint.nxv4f32() + +define @rint_nxv8f32( %x) { +; CHECK-LABEL: rint_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv8f32( %x) + ret %a +} +declare @llvm.rint.nxv8f32() + +define @rint_nxv16f32( %x) { +; CHECK-LABEL: rint_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv16f32( %x) + ret %a +} +declare @llvm.rint.nxv16f32() + +define @rint_nxv1f64( %x) { +; CHECK-LABEL: rint_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI11_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv1f64( %x) + ret %a +} +declare @llvm.rint.nxv1f64() + +define @rint_nxv2f64( %x) { +; CHECK-LABEL: rint_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv2f64( %x) + ret %a +} +declare @llvm.rint.nxv2f64() + +define @rint_nxv4f64( %x) { +; CHECK-LABEL: rint_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI13_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv4f64( %x) + ret %a +} +declare @llvm.rint.nxv4f64() + +define @rint_nxv8f64( %x) { +; CHECK-LABEL: rint_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI14_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv8f64( %x) + ret %a +} +declare @llvm.rint.nxv8f64() -- 2.7.4