From: Simon Tatham Date: Mon, 17 Feb 2020 17:05:13 +0000 (+0000) Subject: [ARM,MVE] Add intrinsics for FP rounding operations. X-Git-Tag: llvmorg-12-init~14394 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c8b3196e54308b0113d2a0888d13ccc92e3b7ccc;p=platform%2Fupstream%2Fllvm.git [ARM,MVE] Add intrinsics for FP rounding operations. Summary: This adds the unpredicated forms of six different MVE intrinsics which all round a vector of floating-point numbers to integer values, leaving them still in FP format, differing only in rounding mode and exception settings. Five of them map to existing target-independent intrinsics in LLVM IR, such as @llvm.trunc and @llvm.rint. The sixth, mapping to the `vrintn` instruction, is done by inventing a target-specific intrinsic. (`vrintn` behaves the same as `vrintx` in terms of the output value: the side effects on the FPSCR flags are the only difference between the two. But ACLE specifies separate user-callable intrinsics for the two, so the side effects matter enough to make sure we generate the right one of the two instructions in each case.) Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard Reviewed By: miyuki Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D74333 --- diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 5b20f23..a2bf7af 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -417,6 +417,21 @@ defm : float_int_conversions; defm : float_int_conversions; defm : float_int_conversions; +let params = T.Float in { + def vrndq: Intrinsic $a)>; + def vrndmq: Intrinsic $a)>; + def vrndpq: Intrinsic $a)>; + def vrndaq: Intrinsic $a)>; + def vrndxq: Intrinsic $a)>; + def vrndnq: Intrinsic $a)>; +} + multiclass compare_with_pred { // Make the predicated and unpredicated versions of a single comparison. diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c new file mode 100644 index 0000000..a324c36 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c @@ -0,0 +1,173 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s + +#include + +// CHECK-LABEL: @test_vrndaq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.round.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vrndaq_f16(float16x8_t a) +{ +#ifdef POLYMORPHIC + return vrndaq(a); +#else /* POLYMORPHIC */ + return vrndaq_f16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndaq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vrndaq_f32(float32x4_t a) +{ +#ifdef POLYMORPHIC + return vrndaq(a); +#else /* POLYMORPHIC */ + return vrndaq_f32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndmq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.floor.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vrndmq_f16(float16x8_t a) +{ +#ifdef POLYMORPHIC + return vrndmq(a); +#else /* POLYMORPHIC */ + return vrndmq_f16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndmq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vrndmq_f32(float32x4_t a) +{ +#ifdef POLYMORPHIC + return vrndmq(a); +#else /* POLYMORPHIC */ + return vrndmq_f32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndpq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.ceil.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vrndpq_f16(float16x8_t a) +{ +#ifdef POLYMORPHIC + return vrndpq(a); +#else /* POLYMORPHIC */ + return vrndpq_f16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndpq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vrndpq_f32(float32x4_t a) +{ +#ifdef POLYMORPHIC + return vrndpq(a); +#else /* POLYMORPHIC */ + return vrndpq_f32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.trunc.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vrndq_f16(float16x8_t a) +{ +#ifdef POLYMORPHIC + return vrndq(a); +#else /* POLYMORPHIC */ + return vrndq_f16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vrndq_f32(float32x4_t a) +{ +#ifdef POLYMORPHIC + return vrndq(a); +#else /* POLYMORPHIC */ + return vrndq_f32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndxq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.rint.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vrndxq_f16(float16x8_t a) +{ +#ifdef POLYMORPHIC + return vrndxq(a); +#else /* POLYMORPHIC */ + return vrndxq_f16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndxq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vrndxq_f32(float32x4_t a) +{ +#ifdef POLYMORPHIC + return vrndxq(a); +#else /* POLYMORPHIC */ + return vrndxq_f32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndnq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vrndnq_f16(float16x8_t a) +{ +#ifdef POLYMORPHIC + return vrndnq(a); +#else /* POLYMORPHIC */ + return vrndnq_f16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndnq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vrndnq_f32(float32x4_t a) +{ +#ifdef POLYMORPHIC + return vrndnq(a); +#else /* POLYMORPHIC */ + return vrndnq_f32(a); +#endif /* POLYMORPHIC */ +} diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 9c9339c..80ab3a7 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -1158,4 +1158,8 @@ defm int_arm_mve_vcvt_fix: MVEMXPredicated< [llvm_anyvector_ty /* output */], [llvm_i32_ty], [llvm_anyvector_ty /* input vector */, llvm_i32_ty /* scale */], LLVMMatchType<0>, llvm_anyvector_ty>; + +def int_arm_mve_vrintn: Intrinsic< + [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>; + } // end TargetPrefix diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 5a2bb9c..b0ec2049 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -3179,6 +3179,10 @@ let Predicates = [HasMVEFloat] in { (v4f32 (MVE_VRINTf32P (v4f32 MQPR:$val1)))>; def : Pat<(v8f16 (fceil (v8f16 MQPR:$val1))), (v8f16 (MVE_VRINTf16P (v8f16 MQPR:$val1)))>; + def : Pat<(v4f32 (int_arm_mve_vrintn (v4f32 MQPR:$val1))), + (v4f32 (MVE_VRINTf32N (v4f32 MQPR:$val1)))>; + def : Pat<(v8f16 (int_arm_mve_vrintn (v8f16 MQPR:$val1))), + (v8f16 (MVE_VRINTf16N (v8f16 MQPR:$val1)))>; } class MVEFloatArithNeon @test_vrndnq_f16(<8 x half> %a) { +; CHECK-LABEL: test_vrndnq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrintn.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.arm.mve.vrintn.v8f16(<8 x half> %a) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndnq_f32(<4 x float> %a) { +; CHECK-LABEL: test_vrndnq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrintn.f32 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vrintn.v4f32(<4 x float> %a) + ret <4 x float> %0 +} + +declare <8 x half> @llvm.arm.mve.vrintn.v8f16(<8 x half>) +declare <4 x float> @llvm.arm.mve.vrintn.v4f32(<4 x float>)