From: Jiong Wang Date: Mon, 25 Jul 2016 16:15:34 +0000 (+0000) Subject: [AArch64][10/10] ARMv8.2-A FP16 lane scalar intrinsics X-Git-Tag: upstream/12.2.0~45667 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bb6131dbd15c8aca4e87f8197377b62331ecbe4a;p=platform%2Fupstream%2Fgcc.git [AArch64][10/10] ARMv8.2-A FP16 lane scalar intrinsics gcc/ * config/aarch64/arm_neon.h (vfmah_lane_f16, vfmah_laneq_f16, vfmsh_lane_f16, vfmsh_laneq_f16, vmulh_lane_f16, vmulh_laneq_f16, vmulxh_lane_f16, vmulxh_laneq_f16): New. From-SVN: r238725 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c1da62a..303e4f5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,11 @@ 2016-07-25 Jiong Wang + * config/aarch64/arm_neon.h (vfmah_lane_f16, vfmah_laneq_f16, + vfmsh_lane_f16, vfmsh_laneq_f16, vmulh_lane_f16, vmulh_laneq_f16, + vmulxh_lane_f16, vmulxh_laneq_f16): New. + +2016-07-25 Jiong Wang + * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64.md (fma, fnma): Support HF. * config/aarch64/arm_fp16.h (vfmah_f16, vfmsh_f16): New. diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index fd55558..ab3a00c 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -26777,6 +26777,20 @@ vfmsq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) /* ARMv8.2-A FP16 lane vector intrinsics. */ +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vfmah_lane_f16 (float16_t __a, float16_t __b, + float16x4_t __c, const int __lane) +{ + return vfmah_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane)); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vfmah_laneq_f16 (float16_t __a, float16_t __b, + float16x8_t __c, const int __lane) +{ + return vfmah_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane)); +} + __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vfma_lane_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c, const int __lane) @@ -26817,6 +26831,20 @@ vfmaq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c) return vfmaq_f16 (__a, __b, vdupq_n_f16 (__c)); } +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vfmsh_lane_f16 (float16_t __a, float16_t __b, + float16x4_t __c, const int __lane) +{ + return vfmsh_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane)); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vfmsh_laneq_f16 (float16_t __a, float16_t __b, + float16x8_t __c, const int __lane) +{ + return vfmsh_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane)); +} + __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vfms_lane_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c, const int __lane) @@ -26857,6 +26885,12 @@ vfmsq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c) return vfmsq_f16 (__a, __b, vdupq_n_f16 (__c)); } +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmulh_lane_f16 (float16_t __a, float16x4_t __b, const int __lane) +{ + return __a * __aarch64_vget_lane_any (__b, __lane); +} + __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vmul_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane) { @@ -26869,6 +26903,12 @@ vmulq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane) return vmulq_f16 (__a, vdupq_n_f16 (__aarch64_vget_lane_any (__b, __lane))); } +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmulh_laneq_f16 (float16_t __a, float16x8_t __b, const int __lane) +{ + return __a * __aarch64_vget_lane_any (__b, __lane); +} + __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vmul_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane) { @@ -26893,6 +26933,12 @@ vmulq_n_f16 (float16x8_t __a, float16_t __b) return vmulq_laneq_f16 (__a, vdupq_n_f16 (__b), 0); } +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmulxh_lane_f16 (float16_t __a, float16x4_t __b, const int __lane) +{ + return vmulxh_f16 (__a, __aarch64_vget_lane_any (__b, __lane)); +} + __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vmulx_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane) { @@ -26905,6 +26951,12 @@ vmulxq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane) return vmulxq_f16 (__a, __aarch64_vdupq_lane_f16 (__b, __lane)); } +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmulxh_laneq_f16 (float16_t __a, float16x8_t __b, const int __lane) +{ + return vmulxh_f16 (__a, __aarch64_vget_lane_any (__b, __lane)); +} + __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vmulx_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane) {