From 68ad28c34a53456625df599035f7b6530b46ba0f Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Mon, 25 Jul 2016 16:10:52 +0000 Subject: [PATCH] [AArch64][8/10] ARMv8.2-A FP16 two operands scalar intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64.md (hf3): New. (hf3): Likewise. (add3): Likewise. (sub3): Likewise. (mul3): Likewise. (div3): Likewise. (*div3): Likewise. (3): Extend to HF. * config/aarch64/aarch64-simd.md (aarch64_rsqrts): Likewise. (fabd3): Likewise. (3): Likewise. (3): Likewise. (aarch64_fmulx): Likewise. (aarch64_fac): Likewise. (aarch64_frecps): Likewise. (hfhi3): New. (hihf3): Likewise. * config/aarch64/iterators.md (VHSDF_SDF): Delete. (VSDQ_HSDI): Support HI. (fcvt_target, FCVT_TARGET): Likewise. * config/aarch64/arm_fp16.h (vaddh_f16, vsubh_f16, vabdh_f16, vcageh_f16, vcagth_f16, vcaleh_f16, vcalth_f16, vceqh_f16, vcgeh_f16, vcgth_f16, vcleh_f16, vclth_f16, vcvth_n_f16_s16, vcvth_n_f16_s32, vcvth_n_f16_s64, vcvth_n_f16_u16, vcvth_n_f16_u32, vcvth_n_f16_u64, vcvth_n_s16_f16, vcvth_n_s32_f16, vcvth_n_s64_f16, vcvth_n_u16_f16, vcvth_n_u32_f16, vcvth_n_u64_f16, vdivh_f16, vmaxh_f16, vmaxnmh_f16, vminh_f16, vminnmh_f16, vmulh_f16, vmulxh_f16, vrecpsh_f16, vrsqrtsh_f16): New. From-SVN: r238723 --- gcc/ChangeLog | 38 ++++- gcc/config/aarch64/aarch64-simd-builtins.def | 31 +++-- gcc/config/aarch64/aarch64-simd.md | 40 +++--- gcc/config/aarch64/aarch64.md | 92 ++++++++---- gcc/config/aarch64/arm_fp16.h | 200 +++++++++++++++++++++++++++ gcc/config/aarch64/iterators.md | 11 +- 6 files changed, 347 insertions(+), 65 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a26721b..3bef6f0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,38 @@ 2016-07-25 Jiong Wang + * config/aarch64/aarch64-simd-builtins.def: Register new builtins. + * config/aarch64/aarch64.md (hf3): + New. + (hf3): Likewise. + (add3): Likewise. + (sub3): Likewise. + (mul3): Likewise. + (div3): Likewise. + (*div3): Likewise. + (3): Extend to HF. + * config/aarch64/aarch64-simd.md (aarch64_rsqrts): Likewise. + (fabd3): Likewise. + (3): Likewise. + (3): Likewise. + (aarch64_fmulx): Likewise. + (aarch64_fac): Likewise. + (aarch64_frecps): Likewise. + (hfhi3): New. + (hihf3): Likewise. + * config/aarch64/iterators.md (VHSDF_SDF): Delete. + (VSDQ_HSDI): Support HI. + (fcvt_target, FCVT_TARGET): Likewise. + * config/aarch64/arm_fp16.h (vaddh_f16, vsubh_f16, vabdh_f16, + vcageh_f16, vcagth_f16, vcaleh_f16, vcalth_f16, vceqh_f16, vcgeh_f16, + vcgth_f16, vcleh_f16, vclth_f16, vcvth_n_f16_s16, vcvth_n_f16_s32, + vcvth_n_f16_s64, vcvth_n_f16_u16, vcvth_n_f16_u32, vcvth_n_f16_u64, + vcvth_n_s16_f16, vcvth_n_s32_f16, vcvth_n_s64_f16, vcvth_n_u16_f16, + vcvth_n_u32_f16, vcvth_n_u64_f16, vdivh_f16, vmaxh_f16, vmaxnmh_f16, + vminh_f16, vminnmh_f16, vmulh_f16, vmulxh_f16, vrecpsh_f16, + vrsqrtsh_f16): New. + +2016-07-25 Jiong Wang + * config.gcc (aarch64*-*-*): Install arm_fp16.h. * config/aarch64/aarch64-builtins.c (hi_UP): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. @@ -11,6 +44,7 @@ (l2): Likewise. (fix_trunc2): Likewise. (sqrt2): Likewise. + (*sqrt2): Likewise. (abs2): Likewise. (hf2): New pattern for HF mode. (hihf2): Likewise. @@ -58,7 +92,7 @@ (f, fp): Support HF modes. * config/aarch64/arm_neon.h (vfma_lane_f16, vfmaq_lane_f16, vfma_laneq_f16, vfmaq_laneq_f16, vfma_n_f16, vfmaq_n_f16, vfms_lane_f16, - vfmsq_lane_f16, vfms_laneq_f16, vfmsq_laneq_f16, vfms_n_f16, + vfmsq_lane_f16, vfms_laneq_f16, vfmsq_laneq_f16, vfms_n_f16, vfmsq_n_f16, vmul_lane_f16, vmulq_lane_f16, vmul_laneq_f16, vmulq_laneq_f16, vmul_n_f16, vmulq_n_f16, vmulx_lane_f16, vmulxq_lane_f16, vmulx_laneq_f16, vmulxq_laneq_f16): New. @@ -159,7 +193,7 @@ and V8HFmode. * config/aarch64/arm_neon.h (__INTERLEAVE_LIST): Support float16x4_t, float16x8_t. - (__aarch64_vdup_lane_f16, __aarch64_vdup_laneq_f16, + (__aarch64_vdup_lane_f16, __aarch64_vdup_laneq_f16, __aarch64_vdupq_lane_f16, __aarch64_vdupq_laneq_f16, vbsl_f16, vbslq_f16, vdup_n_f16, vdupq_n_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vduph_lane_f16, vduph_laneq_f16, diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 6f50d84..31abc07 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -41,7 +41,7 @@ BUILTIN_VDC (COMBINE, combine, 0) BUILTIN_VB (BINOP, pmul, 0) - BUILTIN_VHSDF_SDF (BINOP, fmulx, 0) + BUILTIN_VHSDF_HSDF (BINOP, fmulx, 0) BUILTIN_VHSDF_DF (UNOP, sqrt, 2) BUILTIN_VD_BHSI (BINOP, addp, 0) VAR1 (UNOP, addp, 0, di) @@ -393,13 +393,12 @@ /* Implemented by aarch64_frecp. */ BUILTIN_GPF_F16 (UNOP, frecpe, 0) - BUILTIN_GPF (BINOP, frecps, 0) BUILTIN_GPF_F16 (UNOP, frecpx, 0) BUILTIN_VDQ_SI (UNOP, urecpe, 0) BUILTIN_VHSDF (UNOP, frecpe, 0) - BUILTIN_VHSDF (BINOP, frecps, 0) + BUILTIN_VHSDF_HSDF (BINOP, frecps, 0) /* Implemented by a mixture of abs2 patterns. Note the DImode builtin is only ever used for the int64x1_t intrinsic, there is no scalar version. */ @@ -496,17 +495,23 @@ /* Implemented by <*><*>3. */ BUILTIN_VSDQ_HSDI (SHIFTIMM, scvtf, 3) BUILTIN_VSDQ_HSDI (FCVTIMM_SUS, ucvtf, 3) - BUILTIN_VHSDF_SDF (SHIFTIMM, fcvtzs, 3) - BUILTIN_VHSDF_SDF (SHIFTIMM_USS, fcvtzu, 3) + BUILTIN_VHSDF_HSDF (SHIFTIMM, fcvtzs, 3) + BUILTIN_VHSDF_HSDF (SHIFTIMM_USS, fcvtzu, 3) + VAR1 (SHIFTIMM, scvtfsi, 3, hf) + VAR1 (SHIFTIMM, scvtfdi, 3, hf) + VAR1 (FCVTIMM_SUS, ucvtfsi, 3, hf) + VAR1 (FCVTIMM_SUS, ucvtfdi, 3, hf) + BUILTIN_GPI (SHIFTIMM, fcvtzshf, 3) + BUILTIN_GPI (SHIFTIMM_USS, fcvtzuhf, 3) /* Implemented by aarch64_rsqrte. */ BUILTIN_VHSDF_HSDF (UNOP, rsqrte, 0) /* Implemented by aarch64_rsqrts. */ - BUILTIN_VHSDF_SDF (BINOP, rsqrts, 0) + BUILTIN_VHSDF_HSDF (BINOP, rsqrts, 0) /* Implemented by fabd3. */ - BUILTIN_VHSDF_SDF (BINOP, fabd, 3) + BUILTIN_VHSDF_HSDF (BINOP, fabd, 3) /* Implemented by aarch64_faddp. */ BUILTIN_VHSDF (BINOP, faddp, 0) @@ -522,10 +527,10 @@ BUILTIN_VHSDF_HSDF (UNOP, neg, 2) /* Implemented by aarch64_fac. */ - BUILTIN_VHSDF_SDF (BINOP_USS, faclt, 0) - BUILTIN_VHSDF_SDF (BINOP_USS, facle, 0) - BUILTIN_VHSDF_SDF (BINOP_USS, facgt, 0) - BUILTIN_VHSDF_SDF (BINOP_USS, facge, 0) + BUILTIN_VHSDF_HSDF (BINOP_USS, faclt, 0) + BUILTIN_VHSDF_HSDF (BINOP_USS, facle, 0) + BUILTIN_VHSDF_HSDF (BINOP_USS, facgt, 0) + BUILTIN_VHSDF_HSDF (BINOP_USS, facge, 0) /* Implemented by sqrt2. */ VAR1 (UNOP, sqrt, 2, hf) @@ -543,3 +548,7 @@ BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2) BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2) BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2) + + /* Implemented by 3. */ + VAR1 (BINOP, fmax, 3, hf) + VAR1 (BINOP, fmin, 3, hf) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 2ed9d9f..0bf3ac8 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -391,9 +391,9 @@ [(set_attr "type" "neon_fp_rsqrte_")]) (define_insn "aarch64_rsqrts" - [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w") - (unspec:VHSDF_SDF [(match_operand:VHSDF_SDF 1 "register_operand" "w") - (match_operand:VHSDF_SDF 2 "register_operand" "w")] + [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") + (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w") + (match_operand:VHSDF_HSDF 2 "register_operand" "w")] UNSPEC_RSQRTS))] "TARGET_SIMD" "frsqrts\\t%0, %1, %2" @@ -475,11 +475,11 @@ ) (define_insn "fabd3" - [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w") - (abs:VHSDF_SDF - (minus:VHSDF_SDF - (match_operand:VHSDF_SDF 1 "register_operand" "w") - (match_operand:VHSDF_SDF 2 "register_operand" "w"))))] + [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") + (abs:VHSDF_HSDF + (minus:VHSDF_HSDF + (match_operand:VHSDF_HSDF 1 "register_operand" "w") + (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))] "TARGET_SIMD" "fabd\t%0, %1, %2" [(set_attr "type" "neon_fp_abd_")] @@ -3078,10 +3078,10 @@ ;; fmulx. (define_insn "aarch64_fmulx" - [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w") - (unspec:VHSDF_SDF - [(match_operand:VHSDF_SDF 1 "register_operand" "w") - (match_operand:VHSDF_SDF 2 "register_operand" "w")] + [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") + (unspec:VHSDF_HSDF + [(match_operand:VHSDF_HSDF 1 "register_operand" "w") + (match_operand:VHSDF_HSDF 2 "register_operand" "w")] UNSPEC_FMULX))] "TARGET_SIMD" "fmulx\t%0, %1, %2" @@ -4341,10 +4341,10 @@ [(set (match_operand: 0 "register_operand" "=w") (neg: (FAC_COMPARISONS: - (abs:VHSDF_SDF - (match_operand:VHSDF_SDF 1 "register_operand" "w")) - (abs:VHSDF_SDF - (match_operand:VHSDF_SDF 2 "register_operand" "w")) + (abs:VHSDF_HSDF + (match_operand:VHSDF_HSDF 1 "register_operand" "w")) + (abs:VHSDF_HSDF + (match_operand:VHSDF_HSDF 2 "register_operand" "w")) )))] "TARGET_SIMD" "fac\t%0, %, %" @@ -5460,10 +5460,10 @@ ) (define_insn "aarch64_frecps" - [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w") - (unspec:VHSDF_SDF - [(match_operand:VHSDF_SDF 1 "register_operand" "w") - (match_operand:VHSDF_SDF 2 "register_operand" "w")] + [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") + (unspec:VHSDF_HSDF + [(match_operand:VHSDF_HSDF 1 "register_operand" "w") + (match_operand:VHSDF_HSDF 2 "register_operand" "w")] UNSPEC_FRECPS))] "TARGET_SIMD" "frecps\\t%0, %1, %2" diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 56ad581..6d0a9dc 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4660,38 +4660,78 @@ (set_attr "simd" "*, yes")] ) +(define_insn "hf3" + [(set (match_operand:GPI 0 "register_operand" "=r") + (unspec:GPI [(match_operand:HF 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + FCVT_F2FIXED))] + "TARGET_FP_F16INST" + "\t%0, %h1, #%2" + [(set_attr "type" "f_cvtf2i")] +) + +(define_insn "hf3" + [(set (match_operand:HF 0 "register_operand" "=w") + (unspec:HF [(match_operand:GPI 1 "register_operand" "r") + (match_operand:SI 2 "immediate_operand" "i")] + FCVT_FIXED2F))] + "TARGET_FP_F16INST" + "\t%h0, %1, #%2" + [(set_attr "type" "f_cvti2f")] +) + +(define_insn "hf3" + [(set (match_operand:HI 0 "register_operand" "=w") + (unspec:HI [(match_operand:HF 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + FCVT_F2FIXED))] + "TARGET_SIMD" + "\t%h0, %h1, #%2" + [(set_attr "type" "neon_fp_to_int_s")] +) + +(define_insn "hi3" + [(set (match_operand:HF 0 "register_operand" "=w") + (unspec:HF [(match_operand:HI 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + FCVT_FIXED2F))] + "TARGET_SIMD" + "\t%h0, %h1, #%2" + [(set_attr "type" "neon_int_to_fp_s")] +) + ;; ------------------------------------------------------------------- ;; Floating-point arithmetic ;; ------------------------------------------------------------------- (define_insn "add3" - [(set (match_operand:GPF 0 "register_operand" "=w") - (plus:GPF - (match_operand:GPF 1 "register_operand" "w") - (match_operand:GPF 2 "register_operand" "w")))] + [(set (match_operand:GPF_F16 0 "register_operand" "=w") + (plus:GPF_F16 + (match_operand:GPF_F16 1 "register_operand" "w") + (match_operand:GPF_F16 2 "register_operand" "w")))] "TARGET_FLOAT" "fadd\\t%0, %1, %2" - [(set_attr "type" "fadd")] + [(set_attr "type" "fadd")] ) (define_insn "sub3" - [(set (match_operand:GPF 0 "register_operand" "=w") - (minus:GPF - (match_operand:GPF 1 "register_operand" "w") - (match_operand:GPF 2 "register_operand" "w")))] + [(set (match_operand:GPF_F16 0 "register_operand" "=w") + (minus:GPF_F16 + (match_operand:GPF_F16 1 "register_operand" "w") + (match_operand:GPF_F16 2 "register_operand" "w")))] "TARGET_FLOAT" "fsub\\t%0, %1, %2" - [(set_attr "type" "fadd")] + [(set_attr "type" "fadd")] ) (define_insn "mul3" - [(set (match_operand:GPF 0 "register_operand" "=w") - (mult:GPF - (match_operand:GPF 1 "register_operand" "w") - (match_operand:GPF 2 "register_operand" "w")))] + [(set (match_operand:GPF_F16 0 "register_operand" "=w") + (mult:GPF_F16 + (match_operand:GPF_F16 1 "register_operand" "w") + (match_operand:GPF_F16 2 "register_operand" "w")))] "TARGET_FLOAT" "fmul\\t%0, %1, %2" - [(set_attr "type" "fmul")] + [(set_attr "type" "fmul")] ) (define_insn "*fnmul3" @@ -4715,9 +4755,9 @@ ) (define_expand "div3" - [(set (match_operand:GPF 0 "register_operand") - (div:GPF (match_operand:GPF 1 "general_operand") - (match_operand:GPF 2 "register_operand")))] + [(set (match_operand:GPF_F16 0 "register_operand") + (div:GPF_F16 (match_operand:GPF_F16 1 "general_operand") + (match_operand:GPF_F16 2 "register_operand")))] "TARGET_SIMD" { if (aarch64_emit_approx_div (operands[0], operands[1], operands[2])) @@ -4727,12 +4767,12 @@ }) (define_insn "*div3" - [(set (match_operand:GPF 0 "register_operand" "=w") - (div:GPF (match_operand:GPF 1 "register_operand" "w") - (match_operand:GPF 2 "register_operand" "w")))] + [(set (match_operand:GPF_F16 0 "register_operand" "=w") + (div:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w") + (match_operand:GPF_F16 2 "register_operand" "w")))] "TARGET_FLOAT" "fdiv\\t%0, %1, %2" - [(set_attr "type" "fdiv")] + [(set_attr "type" "fdiv")] ) (define_insn "neg2" @@ -4792,13 +4832,13 @@ ;; Scalar forms for the IEEE-754 fmax()/fmin() functions (define_insn "3" - [(set (match_operand:GPF 0 "register_operand" "=w") - (unspec:GPF [(match_operand:GPF 1 "register_operand" "w") - (match_operand:GPF 2 "register_operand" "w")] + [(set (match_operand:GPF_F16 0 "register_operand" "=w") + (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w") + (match_operand:GPF_F16 2 "register_operand" "w")] FMAXMIN))] "TARGET_FLOAT" "\\t%0, %1, %2" - [(set_attr "type" "f_minmax")] + [(set_attr "type" "f_minmax")] ) ;; For copysign (x, y), we want to generate: diff --git a/gcc/config/aarch64/arm_fp16.h b/gcc/config/aarch64/arm_fp16.h index 818aa61..21edc65 100644 --- a/gcc/config/aarch64/arm_fp16.h +++ b/gcc/config/aarch64/arm_fp16.h @@ -360,6 +360,206 @@ vsqrth_f16 (float16_t __a) return __builtin_aarch64_sqrthf (__a); } +/* ARMv8.2-A FP16 two operands scalar intrinsics. */ + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vaddh_f16 (float16_t __a, float16_t __b) +{ + return __a + __b; +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vabdh_f16 (float16_t __a, float16_t __b) +{ + return __builtin_aarch64_fabdhf (__a, __b); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vcageh_f16 (float16_t __a, float16_t __b) +{ + return __builtin_aarch64_facgehf_uss (__a, __b); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vcagth_f16 (float16_t __a, float16_t __b) +{ + return __builtin_aarch64_facgthf_uss (__a, __b); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vcaleh_f16 (float16_t __a, float16_t __b) +{ + return __builtin_aarch64_faclehf_uss (__a, __b); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vcalth_f16 (float16_t __a, float16_t __b) +{ + return __builtin_aarch64_faclthf_uss (__a, __b); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vceqh_f16 (float16_t __a, float16_t __b) +{ + return __builtin_aarch64_cmeqhf_uss (__a, __b); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vcgeh_f16 (float16_t __a, float16_t __b) +{ + return __builtin_aarch64_cmgehf_uss (__a, __b); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vcgth_f16 (float16_t __a, float16_t __b) +{ + return __builtin_aarch64_cmgthf_uss (__a, __b); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vcleh_f16 (float16_t __a, float16_t __b) +{ + return __builtin_aarch64_cmlehf_uss (__a, __b); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vclth_f16 (float16_t __a, float16_t __b) +{ + return __builtin_aarch64_cmlthf_uss (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_n_f16_s16 (int16_t __a, const int __b) +{ + return __builtin_aarch64_scvtfhi (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_n_f16_s32 (int32_t __a, const int __b) +{ + return __builtin_aarch64_scvtfsihf (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_n_f16_s64 (int64_t __a, const int __b) +{ + return __builtin_aarch64_scvtfdihf (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_n_f16_u16 (uint16_t __a, const int __b) +{ + return __builtin_aarch64_ucvtfhi_sus (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_n_f16_u32 (uint32_t __a, const int __b) +{ + return __builtin_aarch64_ucvtfsihf_sus (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_n_f16_u64 (uint64_t __a, const int __b) +{ + return __builtin_aarch64_ucvtfdihf_sus (__a, __b); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vcvth_n_s16_f16 (float16_t __a, const int __b) +{ + return __builtin_aarch64_fcvtzshf (__a, __b); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvth_n_s32_f16 (float16_t __a, const int __b) +{ + return __builtin_aarch64_fcvtzshfsi (__a, __b); +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvth_n_s64_f16 (float16_t __a, const int __b) +{ + return __builtin_aarch64_fcvtzshfdi (__a, __b); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vcvth_n_u16_f16 (float16_t __a, const int __b) +{ + return __builtin_aarch64_fcvtzuhf_uss (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvth_n_u32_f16 (float16_t __a, const int __b) +{ + return __builtin_aarch64_fcvtzuhfsi_uss (__a, __b); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvth_n_u64_f16 (float16_t __a, const int __b) +{ + return __builtin_aarch64_fcvtzuhfdi_uss (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vdivh_f16 (float16_t __a, float16_t __b) +{ + return __a / __b; +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmaxh_f16 (float16_t __a, float16_t __b) +{ + return __builtin_aarch64_fmaxhf (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmaxnmh_f16 (float16_t __a, float16_t __b) +{ + return __builtin_aarch64_fmaxhf (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vminh_f16 (float16_t __a, float16_t __b) +{ + return __builtin_aarch64_fminhf (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vminnmh_f16 (float16_t __a, float16_t __b) +{ + return __builtin_aarch64_fminhf (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmulh_f16 (float16_t __a, float16_t __b) +{ + return __a * __b; +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmulxh_f16 (float16_t __a, float16_t __b) +{ + return __builtin_aarch64_fmulxhf (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrecpsh_f16 (float16_t __a, float16_t __b) +{ + return __builtin_aarch64_frecpshf (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrsqrtsh_f16 (float16_t __a, float16_t __b) +{ + return __builtin_aarch64_rsqrtshf (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vsubh_f16 (float16_t __a, float16_t __b) +{ + return __a - __b; +} + #pragma GCC pop_options #endif diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 20d0f1b..91e2e64 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -105,9 +105,6 @@ (define_mode_iterator VHSDF_DF [(V4HF "TARGET_SIMD_F16INST") (V8HF "TARGET_SIMD_F16INST") V2SF V4SF V2DF DF]) -(define_mode_iterator VHSDF_SDF [(V4HF "TARGET_SIMD_F16INST") - (V8HF "TARGET_SIMD_F16INST") - V2SF V4SF V2DF SF DF]) (define_mode_iterator VHSDF_HSDF [(V4HF "TARGET_SIMD_F16INST") (V8HF "TARGET_SIMD_F16INST") V2SF V4SF V2DF @@ -190,7 +187,9 @@ ;; Scalar and Vector modes for S and D, Vector modes for H. (define_mode_iterator VSDQ_HSDI [(V4HI "TARGET_SIMD_F16INST") (V8HI "TARGET_SIMD_F16INST") - V2SI V4SI V2DI SI DI]) + V2SI V4SI V2DI + (HI "TARGET_SIMD_F16INST") + SI DI]) ;; Vector modes for Q and H types. (define_mode_iterator VDQQH [V8QI V16QI V4HI V8HI]) @@ -705,12 +704,12 @@ (V2DI "v2df") (V4SI "v4sf") (V2SI "v2sf") (SF "si") (DF "di") (SI "sf") (DI "df") (V4HF "v4hi") (V8HF "v8hi") (V4HI "v4hf") - (V8HI "v8hf")]) + (V8HI "v8hf") (HF "hi") (HI "hf")]) (define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI") (V2DI "V2DF") (V4SI "V4SF") (V2SI "V2SF") (SF "SI") (DF "DI") (SI "SF") (DI "DF") (V4HF "V4HI") (V8HF "V8HI") (V4HI "V4HF") - (V8HI "V8HF")]) + (V8HI "V8HF") (HF "HI") (HI "HF")]) ;; for the inequal width integer to fp conversions -- 2.7.4