From fe89a30c89f79a4ddbb0c22c4ceaf6a1b2e34197 Mon Sep 17 00:00:00 2001 From: Yvan Roux Date: Fri, 14 Oct 2016 14:51:34 +0200 Subject: [PATCH] gcc/ Backport from trunk r240398. 2016-09-23 Matthew Wahab * config/arm/arm-arches.def ("armv8.1-a"): Add FL_CRC32. ("armv8.2-a"): New. ("armv8.2-a+fp16"): New. * config/arm/arm-protos.h (FL2_ARCH8_2): New. (FL2_FP16INST): New. (FL2_FOR_ARCH8_2A): New. * config/arm/arm-tables.opt: Regenerate. * config/arm/arm.c (arm_arch8_2): New. (arm_fp16_inst): New. (arm_option_override): Set arm_arch8_2 and arm_fp16_inst. Check for incompatible fp16-format settings. * config/arm/arm.h (TARGET_VFP_FP16INST): New. (TARGET_NEON_FP16INST): New. (arm_arch8_2): Declare. (arm_fp16_inst): Declare. * config/arm/bpabi.h (BE8_LINK_SPEC): Add entries for march=armv8.2-a and march=armv8.2-a+fp16. * config/arm/t-aprofile (Arch Matches): Add entries for armv8.2-a and armv8.2-a+fp16. * doc/invoke.texi (ARM Options): Add "-march=armv8.1-a", "-march=armv8.2-a" and "-march=armv8.2-a+fp16". gcc/ Backport from trunk r240400. 2016-09-23 Matthew Wahab * doc/sourcebuild.texi (ARM-specific attributes): Add entries for arm_fp16_alternative_ok and arm_fp16_none_ok. gcc/testsuite/ Backport from trunk r240400. 2016-09-23 Matthew Wahab * g++.dg/ext/arm-fp16/arm-fp16-ops-3.C: Use arm_fp16_alternative_ok. * g++.dg/ext/arm-fp16/arm-fp16-ops-4.C: Likewise. * gcc.dg/torture/arm-fp16-int-convert-alt.c: Likewise. * gcc/testsuite/gcc.dg/torture/arm-fp16-ops-3.c: Likewise. * gcc/testsuite/gcc.dg/torture/arm-fp16-ops-4.c: Likewise. * gcc.target/arm/fp16-compile-alt-1.c: Likewise. * gcc.target/arm/fp16-compile-alt-10.c: Likewise. * gcc.target/arm/fp16-compile-alt-11.c: Likewise. * gcc.target/arm/fp16-compile-alt-12.c: Likewise. * gcc.target/arm/fp16-compile-alt-2.c: Likewise. * gcc.target/arm/fp16-compile-alt-3.c: Likewise. * gcc.target/arm/fp16-compile-alt-4.c: Likewise. * gcc.target/arm/fp16-compile-alt-5.c: Likewise. * gcc.target/arm/fp16-compile-alt-6.c: Likewise. * gcc.target/arm/fp16-compile-alt-7.c: Likewise. * gcc.target/arm/fp16-compile-alt-8.c: Likewise. * gcc.target/arm/fp16-compile-alt-9.c: Likewise. * gcc.target/arm/fp16-compile-none-1.c: Use arm_fp16_none_ok. * gcc.target/arm/fp16-compile-none-2.c: Likewise. * gcc.target/arm/fp16-rounding-alt-1.c: Use arm_fp16_alternative_ok. * lib/target-supports.exp (check_effective_target_arm_fp16_alternative_ok_nocache): New. (check_effective_target_arm_fp16_alternative_ok): New. (check_effective_target_arm_fp16_none_ok_nocache): New. (check_effective_target_arm_fp16_none_ok): New. gcc/ Backport from trunk r240401. 2016-09-23 Matthew Wahab * doc/sourcebuild.texi (ARM-specific attributes): Add anchor for arm_v8_1a_neon_ok. Add entries for arm_v8_2a_fp16_scalar_ok, arm_v8_2a_fp16_scalar_hw, arm_v8_2a_fp16_neon_ok and arm_v8_2a_fp16_neon_hw. (Add options): Add entries for arm_v8_1a_neon, arm_v8_2a_scalar, arm_v8_2a_neon. gcc/testsuite/ Backport from trunk r240401. 2016-09-23 Matthew Wahab * lib/target-supports.exp (add_options_for_arm_v8_2a_fp16_scalar): New. (add_options_for_arm_v8_2a_fp16_neon): New. (check_effective_target_arm_arch_v8_2a_ok): Auto-generate. (add_options_for_arm_arch_v8_2a): Auto-generate. (check_effective_target_arm_arch_v8_2a_multilib): Auto-generate. (check_effective_target_arm_v8_2a_fp16_scalar_ok_nocache): New. (check_effective_target_arm_v8_2a_fp16_scalar_ok): New. (check_effective_target_arm_v8_2a_fp16_neon_ok_nocache): New. (check_effective_target_arm_v8_2a_fp16_neon_ok): New. (check_effective_target_arm_v8_2a_fp16_scalar_hw): New. (check_effective_target_arm_v8_2a_fp16_neon_hw): New. gcc/ Backport from trunk r240402. 2016-09-23 Matthew Wahab * config/arm/arm-c.c (arm_cpu_builtins): Define "__ARM_FEATURE_FP16_SCALAR_ARITHMETIC" and "__ARM_FEATURE_FP16_VECTOR_ARITHMETIC". gcc/testsuite/ Backport from trunk r240402. 2016-09-23 Matthew Wahab * gcc.target/arm/attr-fp16-arith-1.c: New. gcc/ Backport from trunk r240403. 2016-09-23 Jiong Wang Matthew Wahab * config/arm/arm.c (output_move_vfp): Weaken assert to allow HImode. (arm_hard_regno_mode_ok): Allow HImode values in VFP registers. * config/arm/arm.md (*movhi_bytes): Disable when VFP registers are available. Also fix some white-space. * config/arm/vfp.md (*arm_movhi_vfp): New. (*thumb2_movhi_vfp): New. gcc/testsuite/ Backport from trunk r240403. 2016-09-23 Matthew Wahab * gcc.target/arm/short-vfp-1.c: New. gcc/ Backport from trunk r240404. 2016-09-23 Matthew Wahab * config/arm/arm.c (arm_evpc_neon_vuzp): Add support for V8HF and V4HF modes. (arm_evpc_neon_vtrn): Likewise. (arm_evpc_neon_vrev): Likewise. (arm_evpc_neon_vext): Likewise. * config/arm/arm_neon.h (vbsl_f16): New. (vbslq_f16): New. (vdup_n_f16): New. (vdupq_n_f16): New. (vdup_lane_f16): New. (vdupq_lane_f16): New. (vext_f16): New. (vextq_f16): New. (vmov_n_f16): New. (vmovq_n_f16): New. (vrev64_f16): New. (vrev64q_f16): New. (vtrn_f16): New. (vtrnq_f16): New. (vuzp_f16): New. (vuzpq_f16): New. (vzip_f16): New. (vzipq_f16): New. * config/arm/arm_neon_buillins.def (vdup_n): New (v8hf, v4hf variants). (vdup_lane): New (v8hf, v4hf variants). (vext): New (v8hf, v4hf variants). (vbsl): New (v8hf, v4hf variants). * config/arm/iterators.md (VDQWH): New. (VH): New. (V_double_vector_mode): Add V8HF and V4HF. Fix white-space. (Scalar_mul_8_16): Fix white-space. (Is_d_reg): Add V4HF and V8HF. * config/arm/neon.md (neon_vdup_lane_internal): New. (neon_vdup_lane): New. (neon_vtrn_internal): Replace VDQW with VDQWH. (*neon_vtrn_insn): Likewise. (neon_vzip_internal): Likewise. Also fix white-space. (*neon_vzip_insn): Likewise (neon_vuzp_internal): Likewise. (*neon_vuzp_insn): Likewise * config/arm/vec-common.md (vec_perm_const): New. gcc/testsuite/ Backport from trunk r240404. 2016-09-23 Matthew Wahab * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (FP16_SUPPORTED): New (expected-hfloat-16x4): Make conditional on __fp16 support. (expected-hfloat-16x8): Likewise. (vdup_n_f16): Disable for non-AArch64 targets. * gcc.target/aarch64/advsimd-intrinsics/vbsl.c: Add __fp16 tests, conditional on FP16_SUPPORTED. * gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vext.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vrev.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc: Add support for testing __fp16. * gcc.target/aarch64/advsimd-intrinsics/vtrn.c: Add __fp16 tests, conditional on FP16_SUPPORTED. * gcc.target/aarch64/advsimd-intrinsics/vuzp.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vzip.c: Likewise. gcc/ Backport from trunk r240407. 2016-09-23 Matthew Wahab Jiong Wang * config/arm/arm.c (coproc_secondary_reload_class): Make HFmode available when FP16 instructions are available. (output_move_vfp): Add support for 16-bit data moves. (arm_validize_comparison): Fix some white-space. Support HFmode by conversion to SFmode. * config/arm/arm.md (truncdfhf2): Fix a comment. (extendhfdf2): Likewise. (cstorehf4): New. (movsicc): Fix some white-space. (movhfcc): New. (movsfcc): Fix some white-space. (*cmovhf): New. * config/arm/vfp.md (*arm_movhi_vfp): Disable when VFP FP16 instructions are available. (*thumb2_movhi_vfp): Likewise. (*arm_movhi_fp16): New. (*thumb2_movhi_fp16): New. (*movhf_vfp_fp16): New. (*movhf_vfp_neon): Disable when VFP FP16 instructions are available. (*movhf_vfp): Likewise. (extendhfsf2): Enable when VFP FP16 instructions are available. (truncsfhf2): Enable when VFP FP16 instructions are available. gcc/testsuite/ Backport from trunk r240407. 2016-09-23 Matthew Wahab * gcc.target/arm/armv8_2_fp16-move-1.c: New. * gcc.target/arm/fp16-aapcs-1.c: Update expected output. gcc/ Backport from trunk r240411. 2016-09-23 Matthew Wahab * config/arm/iterators.md (Code iterators): Fix some white-space in the comments. (GLTE): New. (ABSNEG): New (FCVT): Moved from vfp.md. (VCVT_HF_US_N): New. (VCVT_SI_US_N): New. (VCVT_HF_US): New. (VCVTH_US): New. (FP16_RND): New. (absneg_str): New. (FCVTI32typename): Moved from vfp.md. (sup): Add UNSPEC_VCVTA_S, UNSPEC_VCVTA_U, UNSPEC_VCVTM_S, UNSPEC_VCVTM_U, UNSPEC_VCVTN_S, UNSPEC_VCVTN_U, UNSPEC_VCVTP_S, UNSPEC_VCVTP_U, UNSPEC_VCVT_HF_S_N, UNSPEC_VCVT_HF_U_N, UNSPEC_VCVT_SI_S_N, UNSPEC_VCVT_SI_U_N, UNSPEC_VCVTH_S_N, UNSPEC_VCVTH_U_N, UNSPEC_VCVTH_S and UNSPEC_VCVTH_U. (vcvth_op): New. (fp16_rnd_str): New. (fp16_rnd_insn): New. * config/arm/unspecs.md (UNSPEC_VCVT_HF_S_N): New. (UNSPEC_VCVT_HF_U_N): New. (UNSPEC_VCVT_SI_S_N): New. (UNSPEC_VCVT_SI_U_N): New. (UNSPEC_VCVTH_S): New. (UNSPEC_VCVTH_U): New. (UNSPEC_VCVTA_S): New. (UNSPEC_VCVTA_U): New. (UNSPEC_VCVTM_S): New. (UNSPEC_VCVTM_U): New. (UNSPEC_VCVTN_S): New. (UNSPEC_VCVTN_U): New. (UNSPEC_VCVTP_S): New. (UNSPEC_VCVTP_U): New. (UNSPEC_VCVTP_S): New. (UNSPEC_VCVTP_U): New. (UNSPEC_VRND): New. (UNSPEC_VRNDA): New. (UNSPEC_VRNDI): New. (UNSPEC_VRNDM): New. (UNSPEC_VRNDN): New. (UNSPEC_VRNDP): New. (UNSPEC_VRNDX): New. * config/arm/vfp.md (hf2): New. (neon_vabshf): New. (neon_vhf): New. (neon_vrndihf): New. (addhf3): New. (subhf3): New. (divhf3): New. (mulhf3): New. (*mulsf3neghf_vfp): New. (*negmulhf3_vfp): New. (*mulsf3addhf_vfp): New. (*mulhf3subhf_vfp): New. (*mulhf3neghfaddhf_vfp): New. (*mulhf3neghfsubhf_vfp): New. (fmahf4): New. (neon_vfmahf): New. (fmsubhf4_fp16): New. (neon_vfmshf): New. (*fnmsubhf4): New. (*fnmaddhf4): New. (neon_vsqrthf): New. (neon_vrsqrtshf): New. (FCVT): Move to iterators.md. (FCVTI32typename): Likewise. (neon_vcvthhf): New. (neon_vcvthsi): New. (neon_vcvth_nhf_unspec): New. (neon_vcvth_nhf): New. (neon_vcvth_nsi_unspec): New. (neon_vcvth_nsi): New. (neon_vcvthsi): New. (neon_hf): New. gcc/testsuite/ Backport from trunk r240411. 2016-09-23 Matthew Wahab * gcc.target/arm/armv8_2-fp16-arith-1.c: New. * gcc.target/arm/armv8_2-fp16-conv-1.c: New. gcc/ Backport from trunk r240415. 2016-09-23 Matthew Wahab * config/arm/iterators.md (VCVTHI): New. (NEON_VCMP): Add UNSPEC_VCLT and UNSPEC_VCLE. Fix a long line. (NEON_VAGLTE): New. (VFM_LANE_AS): New. (VH_CVTTO): New. (V_reg): Add HF, V4HF and V8HF. Fix white-space. (V_HALF): Add V4HF. Fix white-space. (V_if_elem): Add HF, V4HF and V8HF. Fix white-space. (V_s_elem): Likewise. (V_sz_elem): Fix white-space. (V_elem_ch): Likewise. (VH_elem_ch): New. (scalar_mul_constraint): Add V8HF and V4HF. (Is_float_mode): Fix white-space. (Is_d_reg): Add V4HF and V8HF. Fix white-space. (q): Add HF. Fix white-space. (float_sup): New. (float_SUP): New. (cmp_op_unsp): Add UNSPEC_VCALE and UNSPEC_VCALT. (neon_vfm_lane_as): New. * config/arm/neon.md (add3_fp16): New. (sub3_fp16): New. (mul3add_neon): New. (fma4_intrinsic): New. (fmsub4_intrinsic): Fix white-space. (fmsub4_intrinsic): New. (2): New. (neon_v): New. (neon_v): New. (neon_vrsqrte): New. (neon_vpaddv4hf): New. (neon_vadd): New. (neon_vsub): New. (neon_vmulf): New. (neon_vfma): New. (neon_vfms): New. (neon_vc): New. (neon_vc_fp16insn): New (neon_vc_fp16insn_unspec): New. (neon_vca): New. (neon_vca_fp16insn): New. (neon_vca_fp16insn_unspec): New. (neon_vcz): New. (neon_vabd): New. (neon_vf): New. (neon_vpfv4hf: New. (neon_): New. (neon_vrecps): New. (neon_vrsqrts): New. (neon_vrecpe): New (VH variant). (neon_vdup_lane_internal): New. (neon_vdup_lane): New. (neon_vcvt): New (VCVTHI variant). (neon_vcvt): New (VH variant). (neon_vcvt_n): New (VH variant). (neon_vcvt_n): New (VCVTHI variant). (neon_vcvt): New. (neon_vmul_lane): New. (neon_vmul_n): New. * config/arm/unspecs.md (UNSPEC_VCALE): New (UNSPEC_VCALT): New. (UNSPEC_VFMA_LANE): New. (UNSPECS_VFMS_LANE): New. gcc/testsuite/ Backport from trunk r240415. 2016-09-23 Matthew Wahab * gcc.target/arm/armv8_2-fp16-arith-1.c: Use arm_v8_2a_fp16_neon options. Add tests for float16x4_t and float16x8_t. gcc/ Backport from trunk r240416. 2016-09-23 Matthew Wahab * config/arm/arm-builtins.c (arm_init_neon_builtin): New. (arm_init_builtins): Move body of a loop to the standalone function arm_init_neon_builtin. (arm_expand_neon_builtin_1): New. Update comment. Function body moved from arm_neon_builtin with some white-space fixes. (arm_expand_neon_builtin): Move code into the standalone function arm_expand_neon_builtin_1. gcc/ Backport from trunk r240421. 2016-09-23 Matthew Wahab * config/arm/arm-builtins.c (hf_UP): New. (si_UP): New. (vfp_builtin_data): New. Update comment. (enum arm_builtins): Include "arm_vfp_builtins.def". (ARM_BUILTIN_VFP_PATTERN_START): New. (arm_init_vfp_builtins): New. (arm_init_builtins): Add arm_init_vfp_builtins. (arm_expand_vfp_builtin): New. (arm_expand_builtins): Update for arm_expand_vfp_builtin. Fix long line. * config/arm/arm_vfp_builtins.def: New file. * config/arm/t-arm (arm.o): Add arm_vfp_builtins.def. (arm-builtins.o): Likewise. gcc/ Backport from trunk r240422. 2016-09-23 Matthew Wahab * config/arm/arm_neon_builtins.def (vadd): New (v8hf, v4hf variants). (vmulf): New (v8hf, v4hf variants). (vfma): New (v8hf, v4hf variants). (vfms): New (v8hf, v4hf variants). (vsub): New (v8hf, v4hf variants). (vcage): New (v8hf, v4hf variants). (vcagt): New (v8hf, v4hf variants). (vcale): New (v8hf, v4hf variants). (vcalt): New (v8hf, v4hf variants). (vceq): New (v8hf, v4hf variants). (vcgt): New (v8hf, v4hf variants). (vcge): New (v8hf, v4hf variants). (vcle): New (v8hf, v4hf variants). (vclt): New (v8hf, v4hf variants). (vceqz): New (v8hf, v4hf variants). (vcgez): New (v8hf, v4hf variants). (vcgtz): New (v8hf, v4hf variants). (vcltz): New (v8hf, v4hf variants). (vclez): New (v8hf, v4hf variants). (vabd): New (v8hf, v4hf variants). (vmaxf): New (v8hf, v4hf variants). (vmaxnm): New (v8hf, v4hf variants). (vminf): New (v8hf, v4hf variants). (vminnm): New (v8hf, v4hf variants). (vpmaxf): New (v4hf variant). (vpminf): New (v4hf variant). (vpadd): New (v4hf variant). (vrecps): New (v8hf, v4hf variants). (vrsqrts): New (v8hf, v4hf variants). (vabs): New (v8hf, v4hf variants). (vneg): New (v8hf, v4hf variants). (vrecpe): New (v8hf, v4hf variants). (vrnd): New (v8hf, v4hf variants). (vrnda): New (v8hf, v4hf variants). (vrndm): New (v8hf, v4hf variants). (vrndn): New (v8hf, v4hf variants). (vrndp): New (v8hf, v4hf variants). (vrndx): New (v8hf, v4hf variants). (vrsqrte): New (v8hf, v4hf variants). (vmul_lane): Add v4hf and v8hf variants. (vmul_n): Add v4hf and v8hf variants. (vext): New (v8hf, v4hf variants). (vcvts): New (v8hi, v4hi variants). (vcvts): New (v8hf, v4hf variants). (vcvtu): New (v8hi, v4hi variants). (vcvtu): New (v8hf, v4hf variants). (vcvts_n): New (v8hf, v4hf variants). (vcvtu_n): New (v8hi, v4hi variants). (vcvts_n): New (v8hi, v4hi variants). (vcvtu_n): New (v8hf, v4hf variants). (vbsl): New (v8hf, v4hf variants). (vcvtas): New (v8hf, v4hf variants). (vcvtau): New (v8hf, v4hf variants). (vcvtms): New (v8hf, v4hf variants). (vcvtmu): New (v8hf, v4hf variants). (vcvtns): New (v8hf, v4hf variants). (vcvtnu): New (v8hf, v4hf variants). (vcvtps): New (v8hf, v4hf variants). (vcvtpu): New (v8hf, v4hf variants). gcc/ Backport from trunk r240423. 2016-09-23 Matthew Wahab * config.gcc (extra_headers): Add arm_fp16.h * config/arm/arm_fp16.h: New. * config/arm/arm_neon.h: Include "arm_fp16.h". gcc/ Backport from trunk r240424. 2016-09-23 Matthew Wahab * config/arm/arm_neon.h (vabd_f16): New. (vabdq_f16): New. (vabs_f16): New. (vabsq_f16): New. (vadd_f16): New. (vaddq_f16): New. (vcage_f16): New. (vcageq_f16): New. (vcagt_f16): New. (vcagtq_f16): New. (vcale_f16): New. (vcaleq_f16): New. (vcalt_f16): New. (vcaltq_f16): New. (vceq_f16): New. (vceqq_f16): New. (vceqz_f16): New. (vceqzq_f16): New. (vcge_f16): New. (vcgeq_f16): New. (vcgez_f16): New. (vcgezq_f16): New. (vcgt_f16): New. (vcgtq_f16): New. (vcgtz_f16): New. (vcgtzq_f16): New. (vcle_f16): New. (vcleq_f16): New. (vclez_f16): New. (vclezq_f16): New. (vclt_f16): New. (vcltq_f16): New. (vcltz_f16): New. (vcltzq_f16): New. (vcvt_f16_s16): New. (vcvt_f16_u16): New. (vcvt_s16_f16): New. (vcvt_u16_f16): New. (vcvtq_f16_s16): New. (vcvtq_f16_u16): New. (vcvtq_s16_f16): New. (vcvtq_u16_f16): New. (vcvta_s16_f16): New. (vcvta_u16_f16): New. (vcvtaq_s16_f16): New. (vcvtaq_u16_f16): New. (vcvtm_s16_f16): New. (vcvtm_u16_f16): New. (vcvtmq_s16_f16): New. (vcvtmq_u16_f16): New. (vcvtn_s16_f16): New. (vcvtn_u16_f16): New. (vcvtnq_s16_f16): New. (vcvtnq_u16_f16): New. (vcvtp_s16_f16): New. (vcvtp_u16_f16): New. (vcvtpq_s16_f16): New. (vcvtpq_u16_f16): New. (vcvt_n_f16_s16): New. (vcvt_n_f16_u16): New. (vcvtq_n_f16_s16): New. (vcvtq_n_f16_u16): New. (vcvt_n_s16_f16): New. (vcvt_n_u16_f16): New. (vcvtq_n_s16_f16): New. (vcvtq_n_u16_f16): New. (vfma_f16): New. (vfmaq_f16): New. (vfms_f16): New. (vfmsq_f16): New. (vmax_f16): New. (vmaxq_f16): New. (vmaxnm_f16): New. (vmaxnmq_f16): New. (vmin_f16): New. (vminq_f16): New. (vminnm_f16): New. (vminnmq_f16): New. (vmul_f16): New. (vmul_lane_f16): New. (vmul_n_f16): New. (vmulq_f16): New. (vmulq_lane_f16): New. (vmulq_n_f16): New. (vneg_f16): New. (vnegq_f16): New. (vpadd_f16): New. (vpmax_f16): New. (vpmin_f16): New. (vrecpe_f16): New. (vrecpeq_f16): New. (vrnd_f16): New. (vrndq_f16): New. (vrnda_f16): New. (vrndaq_f16): New. (vrndm_f16): New. (vrndmq_f16): New. (vrndn_f16): New. (vrndnq_f16): New. (vrndp_f16): New. (vrndpq_f16): New. (vrndx_f16): New. (vrndxq_f16): New. (vrsqrte_f16): New. (vrsqrteq_f16): New. (vrecps_f16): New. (vrecpsq_f16): New. (vrsqrts_f16): New. (vrsqrtsq_f16): New. (vsub_f16): New. (vsubq_f16): New. gcc/testsuite/ Backport from trunk r240425. 2016-09-23 Matthew Wahab * gcc.target/arm/armv8_2-fp16-neon-1.c: New. * gcc.target/arm/armv8_2-fp16-scalar-1.c: New. * gcc.target/arm/armv8_2-fp16-scalar-2.c: New. * gcc.target/arm/attr-fp16-arith-1.c: Add a test of intrinsics support. gcc/testsuite/ Backport from trunk r240426. 2016-09-23 Jiong Wang Matthew Wahab * gcc.target/aarch64/advsimd-intrinsics/binary_scalar_op.inc: New. * gcc.target/aarch64/advsimd-intrinsics/unary_scalar_op.inc: New. * gcc.target/aarch64/advsimd-intrinsics/ternary_scalar_op.inc: New. * gcc.target/aarch64/advsimd-intrinsics/vabsh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vaddh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtah_s32_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtah_u32_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_s32_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_u32_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_s32_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_u32_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_n_s32_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_n_u32_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_s32_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_u32_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtmh_s32_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtmh_u32_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtnh_s32_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtnh_u32_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtph_s32_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtph_u32_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vdivh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vfmah_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vfmsh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vmaxnmh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vminnmh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vmulh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vnegh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vrndah_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vrndh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vrndih_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vrndmh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vrndnh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vrndph_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vrndxh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vsqrth_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vsubh_f16_1.c: New. gcc/testsuite/ Backport from trunk r240427. 2016-09-23 Matthew Wahab * gcc.target/advsimd-intrinsics/advsimd-intrinsics.exp: Enable -march=armv8.2-a+fp16 when supported by the hardware. * gcc.target/aarch64/advsimd-intrinsics/binary_op_float.inc: New. * gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc: Add F16 tests, enabled if macro HAS_FLOAT16_VARIANT is defined. Add semi-colons to a macro invocations. * gcc.target/aarch64/advsimd-intrinsics/cmp_fp_op.inc: Add F16 tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is defined. * gcc.target/aarch64/advsimd-intrinsics/cmp_op.inc: Likewise. * gcc.target/aarch64/advsimd-intrinsics/cmp_zero_op.inc: New. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vabd.c: Add F16 tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is defined. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vabs.c: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vadd.c: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vcage.c: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vcagt.c: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vcale.c: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vcalt.c: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vceq.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vceqz_1.c: New. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vcge.c: Add F16 tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is defined. * gcc.target/aarch64/advsimd-intrinsics/vcgez_1.c: New. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vcgt.c: Add F16 tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is defined. * gcc.target/aarch64/advsimd-intrinsics/vcgtz_1.c: New. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vcle.c: Add F16 tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is defined. * gcc.target/aarch64/advsimd-intrinsics/vclez_1.c: New. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vclt.c: Add F16 tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is defined. * gcc.target/aarch64/advsimd-intrinsics/vcltz_1.c: New. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vcvt.c: Add F16 tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is defined. Also fix some white-space. * gcc.target/aarch64/advsimd-intrinsics/vcvtX.inc: New. * gcc.target/aarch64/advsimd-intrinsics/vcvta_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtm_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtp_1.c: New. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vfma.c: Add F16 tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is defined. Also fix some long lines and white-space. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vfms.c: Add F16 tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is defined. Also fix some long lines and white-space. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vmax.c: Add F16 tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is defined. * gcc.target/aarch64/advsimd-intrinsics/vmaxnm_1.c: New. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vmin.c: Add F16 tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is defined. * gcc.target/aarch64/advsimd-intrinsics/vminnm_1.c: New. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vmul.c: Add F16 tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is defined. * gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vmul_n.c: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vneg.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vpXXX.inc: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vpadd.c: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vpmax.c: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vpmin.c: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vrecpe.c: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vrecps.c: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vrnd.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vrndX.inc: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vrnda.c: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vrndm.c: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vrndn.c: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vrndp.c: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vrndx.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vrsqrte.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vrsqrts.c: Likewise. * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vsub.c: Likewise. gcc/ Backport from trunk r240541. 2016-09-27 Matthew Wahab * config/arm/arm.md (*arm_movsi_insn): Add "arch" attribute. * config/arm/vfp.md (*arm_movhi_vfp): Likewise. (*thumb2_movhi_vfp): Likewise. (*arm_movhi_fp16): Remove predication operand from VMOV.F16 template. Expand predicable attribute to mark VMOV.F16 as not predicable. Add "arch" attribute. (*thumb2_movhi_fp16): Likewise. (*arm_movsi_vfp): Break a long line. Add "arch" attribute. (*thumb2_movsi_vfp): Add "arch" attribute. missing/ Backport from trunk r240542. gcc/testsuite/ Backport from trunk r240551. 2016-09-27 Jiong Wang * lib/target-supports.exp (check_effective_target_arm_v8_2a_fp16_scalar_hw): Delete redundant word in function comment. gcc/ Backport from trunk r240622. 2016-09-29 Matthew Wahab * config/arm/arm.md (*arm_movsi_insn): Replace "t2" arch attribute with "v6t2". Move "arch" attribute above "pool_range". * config/arm/vfp.md (*arm_movhi_vfp): Replace "t2" arch attribute with "v6t2". (*thumb2_movhi_vfp): Likewise. (*arm_movhi_fp16): Likewise. (*thumb2_movhi_fp16): Likewise. (*arm_movsi_vfp): Remove "arch" attribute. (*thumb2_movsi_vfp): Likewise. gcc/testsuite/ Backport from trunk r240921. 2016-10-10 Matthew Wahab Jiong Wang * target-supports.exp (add_options_for_arm_v8_2a_fp16_scalar): Mention AArch64 support. (add_options_for_arm_v8_2a_fp16_neon): Likewise. (check_effective_target_arm_v8_2a_fp16_scalar_ok_nocache): Support AArch64 targets. (check_effective_target_arm_v8_2a_fp16_neon_ok_nocache): Support AArch64 targets. (check_effective_target_arm_v8_2a_fp16_scalar_hw): Support AArch64 targets. (check_effective_target_arm_v8_2a_fp16_neon_hw): Likewise. 2016-10-10 Eric Botgazou gcc/testsuite/ Backport from trunk r240922. 2016-10-10 Jiong Wang * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (FP16_SUPPORTED): Enable AArch64. * gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: Add support for vdup*_laneq. * gcc.target/aarch64/advsimd-intrinsics/vduph_lane.c: New. * gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c: New. * gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c: New. * gcc.target/aarch64/advsimd-intrinsics/vzip_half.c: New. gcc/testsuite/ Backport from trunk r240923. 2016-10-10 Jiong Wang * gcc.target/aarch64/advsimd-intrinsics/vdiv_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vfmas_lane_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vfmas_n_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vmaxnmv_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vmaxv_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vminnmv_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vminv_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vmul_lane_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vmulx_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vmulx_lane_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vmulx_n_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vpminmaxnm_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vrndi_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vsqrt_f16_1.c: New. gcc/testsuite/ Backport from trunk r240924. 2016-10-10 Jiong Wang * gcc.target/aarch64/advsimd-intrinsics/unary_scalar_op.inc: Support FMT64. * gcc.target/aarch64/advsimd-intrinsics/vabdh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcageh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcagth_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcaleh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcalth_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vceqh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vceqzh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcgeh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcgezh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcgth_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcgtzh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcleh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vclezh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vclth_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcltzh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtah_s16_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtah_s64_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtah_u16_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtah_u64_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_s16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_s64_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_u16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_u64_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_s16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_s64_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_u16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_u64_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_n_s16_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_n_s64_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_n_u16_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_n_u64_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_s16_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_s64_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_u16_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvth_u64_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtmh_s16_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtmh_s64_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtmh_u16_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtmh_u64_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtnh_s16_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtnh_s64_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtnh_u16_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtnh_u64_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtph_s16_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtph_s64_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtph_u16_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vcvtph_u64_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vfmash_lane_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vmaxh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vminh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vmulh_lane_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vmulxh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vmulxh_lane_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vrecpeh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vrecpsh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vrecpxh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vrsqrteh_f16_1.c: New. * gcc.target/aarch64/advsimd-intrinsics/vrsqrtsh_f16_1.c: New. Change-Id: I4f2c1a2c934a8c101457de5a1ce7134baebe2fb0 --- gcc/config.gcc | 2 +- gcc/config/arm/arm-arches.def | 10 +- gcc/config/arm/arm-builtins.c | 361 ++++---- gcc/config/arm/arm-c.c | 5 + gcc/config/arm/arm-protos.h | 4 + gcc/config/arm/arm-tables.opt | 16 +- gcc/config/arm/arm.c | 46 +- gcc/config/arm/arm.h | 14 + gcc/config/arm/arm.md | 88 +- gcc/config/arm/arm_fp16.h | 255 ++++++ gcc/config/arm/arm_neon.h | 850 +++++++++++++++++++ gcc/config/arm/arm_neon_builtins.def | 63 +- gcc/config/arm/arm_vfp_builtins.def | 51 ++ gcc/config/arm/bpabi.h | 4 + gcc/config/arm/iterators.md | 204 +++-- gcc/config/arm/neon.md | 574 ++++++++++++- gcc/config/arm/t-aprofile | 2 + gcc/config/arm/t-arm | 4 +- gcc/config/arm/unspecs.md | 26 +- gcc/config/arm/vec-common.md | 14 + gcc/config/arm/vfp.md | 665 ++++++++++++++- gcc/doc/invoke.texi | 13 + gcc/doc/sourcebuild.texi | 47 ++ gcc/testsuite/g++.dg/ext/arm-fp16/arm-fp16-ops-3.C | 1 + gcc/testsuite/g++.dg/ext/arm-fp16/arm-fp16-ops-4.C | 1 + .../gcc.dg/torture/arm-fp16-int-convert-alt.c | 1 + gcc/testsuite/gcc.dg/torture/arm-fp16-ops-3.c | 1 + gcc/testsuite/gcc.dg/torture/arm-fp16-ops-4.c | 1 + .../advsimd-intrinsics/advsimd-intrinsics.exp | 5 +- .../aarch64/advsimd-intrinsics/arm-neon-ref.h | 17 +- .../aarch64/advsimd-intrinsics/binary_op_float.inc | 170 ++++ .../aarch64/advsimd-intrinsics/binary_op_no64.inc | 57 +- .../advsimd-intrinsics/binary_scalar_op.inc | 160 ++++ .../aarch64/advsimd-intrinsics/cmp_fp_op.inc | 41 + .../aarch64/advsimd-intrinsics/cmp_op.inc | 80 ++ .../aarch64/advsimd-intrinsics/cmp_zero_op.inc | 111 +++ .../advsimd-intrinsics/ternary_scalar_op.inc | 206 +++++ .../aarch64/advsimd-intrinsics/unary_scalar_op.inc | 200 +++++ .../gcc.target/aarch64/advsimd-intrinsics/vabd.c | 57 +- .../aarch64/advsimd-intrinsics/vabdh_f16_1.c | 44 + .../gcc.target/aarch64/advsimd-intrinsics/vabs.c | 28 + .../aarch64/advsimd-intrinsics/vabsh_f16_1.c | 40 + .../gcc.target/aarch64/advsimd-intrinsics/vadd.c | 31 + .../aarch64/advsimd-intrinsics/vaddh_f16_1.c | 40 + .../gcc.target/aarch64/advsimd-intrinsics/vbsl.c | 28 + .../gcc.target/aarch64/advsimd-intrinsics/vcage.c | 10 + .../aarch64/advsimd-intrinsics/vcageh_f16_1.c | 22 + .../gcc.target/aarch64/advsimd-intrinsics/vcagt.c | 10 + .../aarch64/advsimd-intrinsics/vcagth_f16_1.c | 21 + .../gcc.target/aarch64/advsimd-intrinsics/vcale.c | 10 + .../aarch64/advsimd-intrinsics/vcaleh_f16_1.c | 22 + .../gcc.target/aarch64/advsimd-intrinsics/vcalt.c | 10 + .../aarch64/advsimd-intrinsics/vcalth_f16_1.c | 22 + .../gcc.target/aarch64/advsimd-intrinsics/vceq.c | 18 + .../aarch64/advsimd-intrinsics/vceqh_f16_1.c | 21 + .../aarch64/advsimd-intrinsics/vceqz_1.c | 27 + .../aarch64/advsimd-intrinsics/vceqzh_f16_1.c | 21 + .../gcc.target/aarch64/advsimd-intrinsics/vcge.c | 22 + .../aarch64/advsimd-intrinsics/vcgeh_f16_1.c | 22 + .../aarch64/advsimd-intrinsics/vcgez_1.c | 30 + .../aarch64/advsimd-intrinsics/vcgezh_f16_1.c | 22 + .../gcc.target/aarch64/advsimd-intrinsics/vcgt.c | 21 + .../aarch64/advsimd-intrinsics/vcgth_f16_1.c | 22 + .../aarch64/advsimd-intrinsics/vcgtz_1.c | 28 + .../aarch64/advsimd-intrinsics/vcgtzh_f16_1.c | 22 + .../gcc.target/aarch64/advsimd-intrinsics/vcle.c | 22 + .../aarch64/advsimd-intrinsics/vcleh_f16_1.c | 22 + .../aarch64/advsimd-intrinsics/vclez_1.c | 29 + .../aarch64/advsimd-intrinsics/vclezh_f16_1.c | 21 + .../gcc.target/aarch64/advsimd-intrinsics/vclt.c | 21 + .../aarch64/advsimd-intrinsics/vclth_f16_1.c | 22 + .../aarch64/advsimd-intrinsics/vcltz_1.c | 27 + .../aarch64/advsimd-intrinsics/vcltzh_f16_1.c | 21 + .../gcc.target/aarch64/advsimd-intrinsics/vcvt.c | 189 ++++- .../aarch64/advsimd-intrinsics/vcvtX.inc | 113 +++ .../aarch64/advsimd-intrinsics/vcvta_1.c | 33 + .../aarch64/advsimd-intrinsics/vcvtah_s16_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vcvtah_s32_f16_1.c | 53 ++ .../aarch64/advsimd-intrinsics/vcvtah_s64_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vcvtah_u16_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vcvtah_u32_f16_1.c | 53 ++ .../aarch64/advsimd-intrinsics/vcvtah_u64_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vcvth_f16_s16_1.c | 25 + .../aarch64/advsimd-intrinsics/vcvth_f16_s32_1.c | 52 ++ .../aarch64/advsimd-intrinsics/vcvth_f16_s64_1.c | 25 + .../aarch64/advsimd-intrinsics/vcvth_f16_u16_1.c | 25 + .../aarch64/advsimd-intrinsics/vcvth_f16_u32_1.c | 52 ++ .../aarch64/advsimd-intrinsics/vcvth_f16_u64_1.c | 25 + .../aarch64/advsimd-intrinsics/vcvth_n_f16_s16_1.c | 46 ++ .../aarch64/advsimd-intrinsics/vcvth_n_f16_s32_1.c | 99 +++ .../aarch64/advsimd-intrinsics/vcvth_n_f16_s64_1.c | 46 ++ .../aarch64/advsimd-intrinsics/vcvth_n_f16_u16_1.c | 46 ++ .../aarch64/advsimd-intrinsics/vcvth_n_f16_u32_1.c | 99 +++ .../aarch64/advsimd-intrinsics/vcvth_n_f16_u64_1.c | 46 ++ .../aarch64/advsimd-intrinsics/vcvth_n_s16_f16_1.c | 29 + .../aarch64/advsimd-intrinsics/vcvth_n_s32_f16_1.c | 100 +++ .../aarch64/advsimd-intrinsics/vcvth_n_s64_f16_1.c | 29 + .../aarch64/advsimd-intrinsics/vcvth_n_u16_f16_1.c | 29 + .../aarch64/advsimd-intrinsics/vcvth_n_u32_f16_1.c | 100 +++ .../aarch64/advsimd-intrinsics/vcvth_n_u64_f16_1.c | 29 + .../aarch64/advsimd-intrinsics/vcvth_s16_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vcvth_s32_f16_1.c | 53 ++ .../aarch64/advsimd-intrinsics/vcvth_s64_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vcvth_u16_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vcvth_u32_f16_1.c | 53 ++ .../aarch64/advsimd-intrinsics/vcvth_u64_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vcvtm_1.c | 33 + .../aarch64/advsimd-intrinsics/vcvtmh_s16_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vcvtmh_s32_f16_1.c | 53 ++ .../aarch64/advsimd-intrinsics/vcvtmh_s64_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vcvtmh_u16_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vcvtmh_u32_f16_1.c | 53 ++ .../aarch64/advsimd-intrinsics/vcvtmh_u64_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vcvtnh_s16_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vcvtnh_s32_f16_1.c | 53 ++ .../aarch64/advsimd-intrinsics/vcvtnh_s64_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vcvtnh_u16_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vcvtnh_u32_f16_1.c | 53 ++ .../aarch64/advsimd-intrinsics/vcvtnh_u64_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vcvtp_1.c | 33 + .../aarch64/advsimd-intrinsics/vcvtph_s16_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vcvtph_s32_f16_1.c | 53 ++ .../aarch64/advsimd-intrinsics/vcvtph_s64_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vcvtph_u16_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vcvtph_u32_f16_1.c | 53 ++ .../aarch64/advsimd-intrinsics/vcvtph_u64_f16_1.c | 23 + .../aarch64/advsimd-intrinsics/vdiv_f16_1.c | 86 ++ .../aarch64/advsimd-intrinsics/vdivh_f16_1.c | 42 + .../aarch64/advsimd-intrinsics/vdup-vmov.c | 75 ++ .../aarch64/advsimd-intrinsics/vdup_lane.c | 142 +++- .../aarch64/advsimd-intrinsics/vduph_lane.c | 137 ++++ .../gcc.target/aarch64/advsimd-intrinsics/vext.c | 30 + .../gcc.target/aarch64/advsimd-intrinsics/vfma.c | 46 +- .../aarch64/advsimd-intrinsics/vfmah_f16_1.c | 40 + .../aarch64/advsimd-intrinsics/vfmas_lane_f16_1.c | 908 +++++++++++++++++++++ .../aarch64/advsimd-intrinsics/vfmas_n_f16_1.c | 469 +++++++++++ .../aarch64/advsimd-intrinsics/vfmash_lane_f16_1.c | 143 ++++ .../gcc.target/aarch64/advsimd-intrinsics/vfms.c | 45 +- .../aarch64/advsimd-intrinsics/vfmsh_f16_1.c | 40 + .../gcc.target/aarch64/advsimd-intrinsics/vmax.c | 33 + .../aarch64/advsimd-intrinsics/vmaxh_f16_1.c | 34 + .../aarch64/advsimd-intrinsics/vmaxnm_1.c | 47 ++ .../aarch64/advsimd-intrinsics/vmaxnmh_f16_1.c | 42 + .../aarch64/advsimd-intrinsics/vmaxnmv_f16_1.c | 131 +++ .../aarch64/advsimd-intrinsics/vmaxv_f16_1.c | 131 +++ .../gcc.target/aarch64/advsimd-intrinsics/vmin.c | 37 + .../aarch64/advsimd-intrinsics/vminh_f16_1.c | 34 + .../aarch64/advsimd-intrinsics/vminnm_1.c | 51 ++ .../aarch64/advsimd-intrinsics/vminnmh_f16_1.c | 42 + .../aarch64/advsimd-intrinsics/vminnmv_f16_1.c | 131 +++ .../aarch64/advsimd-intrinsics/vminv_f16_1.c | 131 +++ .../gcc.target/aarch64/advsimd-intrinsics/vmul.c | 35 + .../aarch64/advsimd-intrinsics/vmul_lane.c | 37 + .../aarch64/advsimd-intrinsics/vmul_lane_f16_1.c | 454 +++++++++++ .../gcc.target/aarch64/advsimd-intrinsics/vmul_n.c | 32 + .../aarch64/advsimd-intrinsics/vmulh_f16_1.c | 42 + .../aarch64/advsimd-intrinsics/vmulh_lane_f16_1.c | 90 ++ .../aarch64/advsimd-intrinsics/vmulx_f16_1.c | 84 ++ .../aarch64/advsimd-intrinsics/vmulx_lane_f16_1.c | 452 ++++++++++ .../aarch64/advsimd-intrinsics/vmulx_n_f16_1.c | 177 ++++ .../aarch64/advsimd-intrinsics/vmulxh_f16_1.c | 50 ++ .../aarch64/advsimd-intrinsics/vmulxh_lane_f16_1.c | 91 +++ .../gcc.target/aarch64/advsimd-intrinsics/vneg.c | 29 + .../aarch64/advsimd-intrinsics/vnegh_f16_1.c | 39 + .../aarch64/advsimd-intrinsics/vpXXX.inc | 15 + .../gcc.target/aarch64/advsimd-intrinsics/vpadd.c | 3 + .../gcc.target/aarch64/advsimd-intrinsics/vpmax.c | 3 + .../gcc.target/aarch64/advsimd-intrinsics/vpmin.c | 3 + .../aarch64/advsimd-intrinsics/vpminmaxnm_f16_1.c | 114 +++ .../gcc.target/aarch64/advsimd-intrinsics/vrecpe.c | 125 +++ .../aarch64/advsimd-intrinsics/vrecpeh_f16_1.c | 42 + .../gcc.target/aarch64/advsimd-intrinsics/vrecps.c | 98 +++ .../aarch64/advsimd-intrinsics/vrecpsh_f16_1.c | 50 ++ .../aarch64/advsimd-intrinsics/vrecpxh_f16_1.c | 32 + .../gcc.target/aarch64/advsimd-intrinsics/vrev.c | 20 + .../gcc.target/aarch64/advsimd-intrinsics/vrnd.c | 8 + .../aarch64/advsimd-intrinsics/vrndX.inc | 20 + .../gcc.target/aarch64/advsimd-intrinsics/vrnda.c | 9 + .../aarch64/advsimd-intrinsics/vrndah_f16_1.c | 40 + .../aarch64/advsimd-intrinsics/vrndh_f16_1.c | 40 + .../aarch64/advsimd-intrinsics/vrndi_f16_1.c | 71 ++ .../aarch64/advsimd-intrinsics/vrndih_f16_1.c | 40 + .../gcc.target/aarch64/advsimd-intrinsics/vrndm.c | 9 + .../aarch64/advsimd-intrinsics/vrndmh_f16_1.c | 40 + .../gcc.target/aarch64/advsimd-intrinsics/vrndn.c | 9 + .../aarch64/advsimd-intrinsics/vrndnh_f16_1.c | 40 + .../gcc.target/aarch64/advsimd-intrinsics/vrndp.c | 8 + .../aarch64/advsimd-intrinsics/vrndph_f16_1.c | 40 + .../gcc.target/aarch64/advsimd-intrinsics/vrndx.c | 8 + .../aarch64/advsimd-intrinsics/vrndxh_f16_1.c | 40 + .../aarch64/advsimd-intrinsics/vrsqrte.c | 91 +++ .../aarch64/advsimd-intrinsics/vrsqrteh_f16_1.c | 30 + .../aarch64/advsimd-intrinsics/vrsqrts.c | 97 +++ .../aarch64/advsimd-intrinsics/vrsqrtsh_f16_1.c | 50 ++ .../aarch64/advsimd-intrinsics/vshuffle.inc | 42 +- .../aarch64/advsimd-intrinsics/vsqrt_f16_1.c | 72 ++ .../aarch64/advsimd-intrinsics/vsqrth_f16_1.c | 40 + .../gcc.target/aarch64/advsimd-intrinsics/vsub.c | 31 + .../aarch64/advsimd-intrinsics/vsubh_f16_1.c | 42 + .../gcc.target/aarch64/advsimd-intrinsics/vtrn.c | 20 + .../aarch64/advsimd-intrinsics/vtrn_half.c | 263 ++++++ .../gcc.target/aarch64/advsimd-intrinsics/vuzp.c | 20 + .../aarch64/advsimd-intrinsics/vuzp_half.c | 259 ++++++ .../gcc.target/aarch64/advsimd-intrinsics/vzip.c | 20 + .../aarch64/advsimd-intrinsics/vzip_half.c | 263 ++++++ .../gcc.target/arm/armv8_2-fp16-arith-1.c | 105 +++ gcc/testsuite/gcc.target/arm/armv8_2-fp16-conv-1.c | 101 +++ gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c | 165 ++++ gcc/testsuite/gcc.target/arm/armv8_2-fp16-neon-1.c | 490 +++++++++++ .../gcc.target/arm/armv8_2-fp16-scalar-1.c | 203 +++++ .../gcc.target/arm/armv8_2-fp16-scalar-2.c | 71 ++ gcc/testsuite/gcc.target/arm/attr-fp16-arith-1.c | 58 ++ gcc/testsuite/gcc.target/arm/fp16-aapcs-1.c | 6 +- gcc/testsuite/gcc.target/arm/fp16-compile-alt-1.c | 1 + gcc/testsuite/gcc.target/arm/fp16-compile-alt-10.c | 1 + gcc/testsuite/gcc.target/arm/fp16-compile-alt-11.c | 1 + gcc/testsuite/gcc.target/arm/fp16-compile-alt-12.c | 1 + gcc/testsuite/gcc.target/arm/fp16-compile-alt-2.c | 1 + gcc/testsuite/gcc.target/arm/fp16-compile-alt-3.c | 1 + gcc/testsuite/gcc.target/arm/fp16-compile-alt-4.c | 1 + gcc/testsuite/gcc.target/arm/fp16-compile-alt-5.c | 1 + gcc/testsuite/gcc.target/arm/fp16-compile-alt-6.c | 1 + gcc/testsuite/gcc.target/arm/fp16-compile-alt-7.c | 1 + gcc/testsuite/gcc.target/arm/fp16-compile-alt-8.c | 1 + gcc/testsuite/gcc.target/arm/fp16-compile-alt-9.c | 1 + gcc/testsuite/gcc.target/arm/fp16-compile-none-1.c | 1 + gcc/testsuite/gcc.target/arm/fp16-compile-none-2.c | 1 + gcc/testsuite/gcc.target/arm/fp16-rounding-alt-1.c | 1 + gcc/testsuite/gcc.target/arm/short-vfp-1.c | 45 + gcc/testsuite/lib/target-supports.exp | 227 ++++++ 230 files changed, 15631 insertions(+), 300 deletions(-) create mode 100644 gcc/config/arm/arm_fp16.h create mode 100644 gcc/config/arm/arm_vfp_builtins.def create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_float.inc create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_scalar_op.inc create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_zero_op.inc create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/ternary_scalar_op.inc create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_scalar_op.inc create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabdh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabsh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcageh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagth_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcaleh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalth_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceqh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceqz_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceqzh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgeh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgez_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgezh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgth_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgtz_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgtzh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcleh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclez_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclezh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclth_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcltz_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcltzh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtX.inc create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvta_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_s16_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_s32_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_s64_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_u16_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_u32_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_u64_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_s16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_s32_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_s64_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_u16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_u32_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_u64_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_s16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_s32_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_s64_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_u16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_u32_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_u64_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_s16_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_s32_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_s64_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_u16_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_u32_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_u64_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_s16_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_s32_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_s64_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_u16_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_u32_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_u64_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtm_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_s16_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_s32_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_s64_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_u16_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_u32_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_u64_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_s16_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_s32_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_s64_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_u16_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_u32_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_u64_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtp_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_s16_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_s32_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_s64_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_u16_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_u32_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_u64_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdiv_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdivh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vduph_lane.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmah_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmas_lane_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmas_n_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmash_lane_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmsh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxnm_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxnmh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxnmv_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxv_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminnm_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminnmh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminnmv_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminv_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulh_lane_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulx_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulx_lane_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulx_n_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulxh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulxh_lane_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vnegh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpminmaxnm_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpeh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpsh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpxh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndah_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndi_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndih_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndmh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndnh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndph_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndxh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrteh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrtsh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsqrt_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsqrth_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsubh_f16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip_half.c create mode 100644 gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c create mode 100644 gcc/testsuite/gcc.target/arm/armv8_2-fp16-conv-1.c create mode 100644 gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c create mode 100644 gcc/testsuite/gcc.target/arm/armv8_2-fp16-neon-1.c create mode 100644 gcc/testsuite/gcc.target/arm/armv8_2-fp16-scalar-1.c create mode 100644 gcc/testsuite/gcc.target/arm/armv8_2-fp16-scalar-2.c create mode 100644 gcc/testsuite/gcc.target/arm/attr-fp16-arith-1.c create mode 100644 gcc/testsuite/gcc.target/arm/short-vfp-1.c diff --git a/gcc/config.gcc b/gcc/config.gcc index 00d010e..7252555 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -327,7 +327,7 @@ arc*-*-*) arm*-*-*) cpu_type=arm extra_objs="arm-builtins.o aarch-common.o" - extra_headers="mmintrin.h arm_neon.h arm_acle.h" + extra_headers="mmintrin.h arm_neon.h arm_acle.h arm_fp16.h" target_type_format_char='%' c_target_objs="arm-c.o" cxx_target_objs="arm-c.o" diff --git a/gcc/config/arm/arm-arches.def b/gcc/config/arm/arm-arches.def index be46521..4b196a7 100644 --- a/gcc/config/arm/arm-arches.def +++ b/gcc/config/arm/arm-arches.def @@ -58,10 +58,17 @@ ARM_ARCH("armv7e-m", cortexm4, 7EM, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_F ARM_ARCH("armv8-a", cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH8A)) ARM_ARCH("armv8-a+crc",cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A)) ARM_ARCH("armv8.1-a", cortexa53, 8A, - ARM_FSET_MAKE (FL_CO_PROC | FL_FOR_ARCH8A, FL2_FOR_ARCH8_1A)) + ARM_FSET_MAKE (FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A, + FL2_FOR_ARCH8_1A)) ARM_ARCH("armv8.1-a+crc",cortexa53, 8A, ARM_FSET_MAKE (FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A, FL2_FOR_ARCH8_1A)) +ARM_ARCH ("armv8.2-a", cortexa53, 8A, + ARM_FSET_MAKE (FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A, + FL2_FOR_ARCH8_2A)) +ARM_ARCH ("armv8.2-a+fp16", cortexa53, 8A, + ARM_FSET_MAKE (FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A, + FL2_FOR_ARCH8_2A | FL2_FP16INST)) ARM_ARCH("armv8-m.base", cortexm0, 8M_BASE, ARM_FSET_MAKE_CPU1 ( FL_FOR_ARCH8M_BASE)) ARM_ARCH("armv8-m.main", cortexm7, 8M_MAIN, @@ -70,4 +77,3 @@ ARM_ARCH("armv8-m.main+dsp", cortexm7, 8M_MAIN, ARM_FSET_MAKE_CPU1(FL_CO_PROC | FL_ARCH7EM | FL_FOR_ARCH8M_MAIN)) ARM_ARCH("iwmmxt", iwmmxt, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT)) ARM_ARCH("iwmmxt2", iwmmxt2, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2)) - diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c index 68b2839..70bcc07 100644 --- a/gcc/config/arm/arm-builtins.c +++ b/gcc/config/arm/arm-builtins.c @@ -190,6 +190,8 @@ arm_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define ti_UP TImode #define ei_UP EImode #define oi_UP OImode +#define hf_UP HFmode +#define si_UP SImode #define UP(X) X##_UP @@ -239,12 +241,22 @@ typedef struct { VAR11 (T, N, A, B, C, D, E, F, G, H, I, J, K) \ VAR1 (T, N, L) -/* The NEON builtin data can be found in arm_neon_builtins.def. - The mode entries in the following table correspond to the "key" type of the - instruction variant, i.e. equivalent to that which would be specified after - the assembler mnemonic, which usually refers to the last vector operand. - The modes listed per instruction should be the same as those defined for - that instruction's pattern in neon.md. */ +/* The NEON builtin data can be found in arm_neon_builtins.def and + arm_vfp_builtins.def. The entries in arm_neon_builtins.def require + TARGET_NEON to be true. The entries in arm_vfp_builtins.def require + TARGET_VFP to be true. The feature tests are checked when the builtins are + expanded. + + The mode entries in the following table correspond to + the "key" type of the instruction variant, i.e. equivalent to that which + would be specified after the assembler mnemonic, which usually refers to the + last vector operand. The modes listed per instruction should be the same as + those defined for that instruction's pattern in neon.md. */ + +static neon_builtin_datum vfp_builtin_data[] = +{ +#include "arm_vfp_builtins.def" +}; static neon_builtin_datum neon_builtin_data[] = { @@ -534,6 +546,10 @@ enum arm_builtins #undef CRYPTO2 #undef CRYPTO3 + ARM_BUILTIN_VFP_BASE, + +#include "arm_vfp_builtins.def" + ARM_BUILTIN_NEON_BASE, ARM_BUILTIN_NEON_LANE_CHECK = ARM_BUILTIN_NEON_BASE, @@ -542,8 +558,11 @@ enum arm_builtins ARM_BUILTIN_MAX }; +#define ARM_BUILTIN_VFP_PATTERN_START \ + (ARM_BUILTIN_VFP_BASE + 1) + #define ARM_BUILTIN_NEON_PATTERN_START \ - (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data)) + (ARM_BUILTIN_NEON_BASE + 1) #undef CF #undef VAR1 @@ -895,6 +914,110 @@ arm_init_simd_builtin_scalar_types (void) "__builtin_neon_uti"); } +/* Set up a NEON builtin. */ + +static void +arm_init_neon_builtin (unsigned int fcode, + neon_builtin_datum *d) +{ + bool print_type_signature_p = false; + char type_signature[SIMD_MAX_BUILTIN_ARGS] = { 0 }; + char namebuf[60]; + tree ftype = NULL; + tree fndecl = NULL; + + d->fcode = fcode; + + /* We must track two variables here. op_num is + the operand number as in the RTL pattern. This is + required to access the mode (e.g. V4SF mode) of the + argument, from which the base type can be derived. + arg_num is an index in to the qualifiers data, which + gives qualifiers to the type (e.g. const unsigned). + The reason these two variables may differ by one is the + void return type. While all return types take the 0th entry + in the qualifiers array, there is no operand for them in the + RTL pattern. */ + int op_num = insn_data[d->code].n_operands - 1; + int arg_num = d->qualifiers[0] & qualifier_void + ? op_num + 1 + : op_num; + tree return_type = void_type_node, args = void_list_node; + tree eltype; + + /* Build a function type directly from the insn_data for this + builtin. The build_function_type () function takes care of + removing duplicates for us. */ + for (; op_num >= 0; arg_num--, op_num--) + { + machine_mode op_mode = insn_data[d->code].operand[op_num].mode; + enum arm_type_qualifiers qualifiers = d->qualifiers[arg_num]; + + if (qualifiers & qualifier_unsigned) + { + type_signature[arg_num] = 'u'; + print_type_signature_p = true; + } + else if (qualifiers & qualifier_poly) + { + type_signature[arg_num] = 'p'; + print_type_signature_p = true; + } + else + type_signature[arg_num] = 's'; + + /* Skip an internal operand for vget_{low, high}. */ + if (qualifiers & qualifier_internal) + continue; + + /* Some builtins have different user-facing types + for certain arguments, encoded in d->mode. */ + if (qualifiers & qualifier_map_mode) + op_mode = d->mode; + + /* For pointers, we want a pointer to the basic type + of the vector. */ + if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode)) + op_mode = GET_MODE_INNER (op_mode); + + eltype = arm_simd_builtin_type + (op_mode, + (qualifiers & qualifier_unsigned) != 0, + (qualifiers & qualifier_poly) != 0); + gcc_assert (eltype != NULL); + + /* Add qualifiers. */ + if (qualifiers & qualifier_const) + eltype = build_qualified_type (eltype, TYPE_QUAL_CONST); + + if (qualifiers & qualifier_pointer) + eltype = build_pointer_type (eltype); + + /* If we have reached arg_num == 0, we are at a non-void + return type. Otherwise, we are still processing + arguments. */ + if (arg_num == 0) + return_type = eltype; + else + args = tree_cons (NULL_TREE, eltype, args); + } + + ftype = build_function_type (return_type, args); + + gcc_assert (ftype != NULL); + + if (print_type_signature_p) + snprintf (namebuf, sizeof (namebuf), "__builtin_neon_%s_%s", + d->name, type_signature); + else + snprintf (namebuf, sizeof (namebuf), "__builtin_neon_%s", + d->name); + + fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, + NULL, NULL_TREE); + arm_builtin_decls[fcode] = fndecl; +} + /* Set up all the NEON builtins, even builtins for instructions that are not in the current target ISA to allow the user to compile particular modules with different target specific options that differ from the command line @@ -924,103 +1047,22 @@ arm_init_neon_builtins (void) for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++, fcode++) { - bool print_type_signature_p = false; - char type_signature[SIMD_MAX_BUILTIN_ARGS] = { 0 }; neon_builtin_datum *d = &neon_builtin_data[i]; - char namebuf[60]; - tree ftype = NULL; - tree fndecl = NULL; - - d->fcode = fcode; - - /* We must track two variables here. op_num is - the operand number as in the RTL pattern. This is - required to access the mode (e.g. V4SF mode) of the - argument, from which the base type can be derived. - arg_num is an index in to the qualifiers data, which - gives qualifiers to the type (e.g. const unsigned). - The reason these two variables may differ by one is the - void return type. While all return types take the 0th entry - in the qualifiers array, there is no operand for them in the - RTL pattern. */ - int op_num = insn_data[d->code].n_operands - 1; - int arg_num = d->qualifiers[0] & qualifier_void - ? op_num + 1 - : op_num; - tree return_type = void_type_node, args = void_list_node; - tree eltype; - - /* Build a function type directly from the insn_data for this - builtin. The build_function_type () function takes care of - removing duplicates for us. */ - for (; op_num >= 0; arg_num--, op_num--) - { - machine_mode op_mode = insn_data[d->code].operand[op_num].mode; - enum arm_type_qualifiers qualifiers = d->qualifiers[arg_num]; - - if (qualifiers & qualifier_unsigned) - { - type_signature[arg_num] = 'u'; - print_type_signature_p = true; - } - else if (qualifiers & qualifier_poly) - { - type_signature[arg_num] = 'p'; - print_type_signature_p = true; - } - else - type_signature[arg_num] = 's'; - - /* Skip an internal operand for vget_{low, high}. */ - if (qualifiers & qualifier_internal) - continue; - - /* Some builtins have different user-facing types - for certain arguments, encoded in d->mode. */ - if (qualifiers & qualifier_map_mode) - op_mode = d->mode; - - /* For pointers, we want a pointer to the basic type - of the vector. */ - if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode)) - op_mode = GET_MODE_INNER (op_mode); - - eltype = arm_simd_builtin_type - (op_mode, - (qualifiers & qualifier_unsigned) != 0, - (qualifiers & qualifier_poly) != 0); - gcc_assert (eltype != NULL); - - /* Add qualifiers. */ - if (qualifiers & qualifier_const) - eltype = build_qualified_type (eltype, TYPE_QUAL_CONST); - - if (qualifiers & qualifier_pointer) - eltype = build_pointer_type (eltype); - - /* If we have reached arg_num == 0, we are at a non-void - return type. Otherwise, we are still processing - arguments. */ - if (arg_num == 0) - return_type = eltype; - else - args = tree_cons (NULL_TREE, eltype, args); - } - - ftype = build_function_type (return_type, args); + arm_init_neon_builtin (fcode, d); + } +} - gcc_assert (ftype != NULL); +/* Set up all the scalar floating point builtins. */ - if (print_type_signature_p) - snprintf (namebuf, sizeof (namebuf), "__builtin_neon_%s_%s", - d->name, type_signature); - else - snprintf (namebuf, sizeof (namebuf), "__builtin_neon_%s", - d->name); +static void +arm_init_vfp_builtins (void) +{ + unsigned int i, fcode = ARM_BUILTIN_VFP_PATTERN_START; - fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, - NULL, NULL_TREE); - arm_builtin_decls[fcode] = fndecl; + for (i = 0; i < ARRAY_SIZE (vfp_builtin_data); i++, fcode++) + { + neon_builtin_datum *d = &vfp_builtin_data[i]; + arm_init_neon_builtin (fcode, d); } } @@ -1768,7 +1810,7 @@ arm_init_builtins (void) if (TARGET_HARD_FLOAT) { arm_init_neon_builtins (); - + arm_init_vfp_builtins (); arm_init_crypto_builtins (); } @@ -2211,40 +2253,16 @@ constant_arg: return target; } -/* Expand a Neon builtin, i.e. those registered only if TARGET_NEON holds. - Most of these are "special" because they don't have symbolic - constants defined per-instruction or per instruction-variant. Instead, the - required info is looked up in the table neon_builtin_data. */ +/* Expand a neon builtin. This is also used for vfp builtins, which behave in + the same way. These builtins are "special" because they don't have symbolic + constants defined per-instruction or per instruction-variant. Instead, the + required info is looked up in the NEON_BUILTIN_DATA record that is passed + into the function. */ + static rtx -arm_expand_neon_builtin (int fcode, tree exp, rtx target) +arm_expand_neon_builtin_1 (int fcode, tree exp, rtx target, + neon_builtin_datum *d) { - /* Check in the context of the function making the call whether the - builtin is supported. */ - if (! TARGET_NEON) - { - fatal_error (input_location, - "You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use these intrinsics."); - return const0_rtx; - } - - if (fcode == ARM_BUILTIN_NEON_LANE_CHECK) - { - /* Builtin is only to check bounds of the lane passed to some intrinsics - that are implemented with gcc vector extensions in arm_neon.h. */ - - tree nlanes = CALL_EXPR_ARG (exp, 0); - gcc_assert (TREE_CODE (nlanes) == INTEGER_CST); - rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 1)); - if (CONST_INT_P (lane_idx)) - neon_lane_bounds (lane_idx, 0, TREE_INT_CST_LOW (nlanes), exp); - else - error ("%Klane index must be a constant immediate", exp); - /* Don't generate any RTL. */ - return const0_rtx; - } - - neon_builtin_datum *d = - &neon_builtin_data[fcode - ARM_BUILTIN_NEON_PATTERN_START]; enum insn_code icode = d->code; builtin_arg args[SIMD_MAX_BUILTIN_ARGS + 1]; int num_args = insn_data[d->code].n_operands; @@ -2260,8 +2278,8 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target) /* We have four arrays of data, each indexed in a different fashion. qualifiers - element 0 always describes the function return type. operands - element 0 is either the operand for return value (if - the function has a non-void return type) or the operand for the - first argument. + the function has a non-void return type) or the operand for the + first argument. expr_args - element 0 always holds the first argument. args - element 0 is always used for the return type. */ int qualifiers_k = k; @@ -2283,7 +2301,7 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target) bool op_const_int_p = (CONST_INT_P (arg) && (*insn_data[icode].operand[operands_k].predicate) - (arg, insn_data[icode].operand[operands_k].mode)); + (arg, insn_data[icode].operand[operands_k].mode)); args[k] = op_const_int_p ? NEON_ARG_CONSTANT : NEON_ARG_COPY_TO_REG; } else if (d->qualifiers[qualifiers_k] & qualifier_pointer) @@ -2296,8 +2314,68 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target) /* The interface to arm_expand_neon_args expects a 0 if the function is void, and a 1 if it is not. */ return arm_expand_neon_args - (target, d->mode, fcode, icode, !is_void, exp, - &args[1]); + (target, d->mode, fcode, icode, !is_void, exp, + &args[1]); +} + +/* Expand a Neon builtin, i.e. those registered only if TARGET_NEON holds. + Most of these are "special" because they don't have symbolic + constants defined per-instruction or per instruction-variant. Instead, the + required info is looked up in the table neon_builtin_data. */ + +static rtx +arm_expand_neon_builtin (int fcode, tree exp, rtx target) +{ + if (fcode >= ARM_BUILTIN_NEON_BASE && ! TARGET_NEON) + { + fatal_error (input_location, + "You must enable NEON instructions" + " (e.g. -mfloat-abi=softfp -mfpu=neon)" + " to use these intrinsics."); + return const0_rtx; + } + + if (fcode == ARM_BUILTIN_NEON_LANE_CHECK) + { + /* Builtin is only to check bounds of the lane passed to some intrinsics + that are implemented with gcc vector extensions in arm_neon.h. */ + + tree nlanes = CALL_EXPR_ARG (exp, 0); + gcc_assert (TREE_CODE (nlanes) == INTEGER_CST); + rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 1)); + if (CONST_INT_P (lane_idx)) + neon_lane_bounds (lane_idx, 0, TREE_INT_CST_LOW (nlanes), exp); + else + error ("%Klane index must be a constant immediate", exp); + /* Don't generate any RTL. */ + return const0_rtx; + } + + neon_builtin_datum *d + = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_PATTERN_START]; + + return arm_expand_neon_builtin_1 (fcode, exp, target, d); +} + +/* Expand a VFP builtin, if TARGET_VFP is true. These builtins are treated like + neon builtins except that the data is looked up in table + VFP_BUILTIN_DATA. */ + +static rtx +arm_expand_vfp_builtin (int fcode, tree exp, rtx target) +{ + if (fcode >= ARM_BUILTIN_VFP_BASE && ! TARGET_VFP) + { + fatal_error (input_location, + "You must enable VFP instructions" + " to use these intrinsics."); + return const0_rtx; + } + + neon_builtin_datum *d + = &vfp_builtin_data[fcode - ARM_BUILTIN_VFP_PATTERN_START]; + + return arm_expand_neon_builtin_1 (fcode, exp, target, d); } /* Expand an expression EXP that calls a built-in function, @@ -2337,13 +2415,18 @@ arm_expand_builtin (tree exp, if (fcode >= ARM_BUILTIN_NEON_BASE) return arm_expand_neon_builtin (fcode, exp, target); + if (fcode >= ARM_BUILTIN_VFP_BASE) + return arm_expand_vfp_builtin (fcode, exp, target); + /* Check in the context of the function making the call whether the builtin is supported. */ if (fcode >= ARM_BUILTIN_CRYPTO_BASE && (!TARGET_CRYPTO || !TARGET_HARD_FLOAT)) { fatal_error (input_location, - "You must enable crypto intrinsics (e.g. include -mfloat-abi=softfp -mfpu=crypto-neon...) to use these intrinsics."); + "You must enable crypto instructions" + " (e.g. include -mfloat-abi=softfp -mfpu=crypto-neon...)" + " to use these intrinsics."); return const0_rtx; } diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c index b98470f..7283700 100644 --- a/gcc/config/arm/arm-c.c +++ b/gcc/config/arm/arm-c.c @@ -142,6 +142,11 @@ arm_cpu_builtins (struct cpp_reader* pfile) def_or_undef_macro (pfile, "__ARM_FP16_ARGS", arm_fp16_format != ARM_FP16_FORMAT_NONE); + def_or_undef_macro (pfile, "__ARM_FEATURE_FP16_SCALAR_ARITHMETIC", + TARGET_VFP_FP16INST); + def_or_undef_macro (pfile, "__ARM_FEATURE_FP16_VECTOR_ARITHMETIC", + TARGET_NEON_FP16INST); + def_or_undef_macro (pfile, "__ARM_FEATURE_FMA", TARGET_FMA); def_or_undef_macro (pfile, "__ARM_NEON__", TARGET_NEON); def_or_undef_macro (pfile, "__ARM_NEON", TARGET_NEON); diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 6678650..eefe0ba 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -393,6 +393,9 @@ extern bool arm_is_constant_pool_ref (rtx); #define FL_ARCH6KZ (1 << 31) /* ARMv6KZ architecture. */ #define FL2_ARCH8_1 (1 << 0) /* Architecture 8.1. */ +#define FL2_ARCH8_2 (1 << 1) /* Architecture 8.2. */ +#define FL2_FP16INST (1 << 2) /* FP16 Instructions for ARMv8.2 and + later. */ /* Flags that only effect tuning, not available instructions. */ #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \ @@ -424,6 +427,7 @@ extern bool arm_is_constant_pool_ref (rtx); #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8) #define FL2_FOR_ARCH8_1A FL2_ARCH8_1 +#define FL2_FOR_ARCH8_2A (FL2_FOR_ARCH8_1A | FL2_ARCH8_2) #define FL_FOR_ARCH8M_BASE (FL_FOR_ARCH6M | FL_ARCH8 | FL_THUMB_DIV) #define FL_FOR_ARCH8M_MAIN (FL_FOR_ARCH7M | FL_ARCH8) diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt index b92cb17..ee9e3bb 100644 --- a/gcc/config/arm/arm-tables.opt +++ b/gcc/config/arm/arm-tables.opt @@ -437,19 +437,25 @@ EnumValue Enum(arm_arch) String(armv8.1-a+crc) Value(28) EnumValue -Enum(arm_arch) String(armv8-m.base) Value(29) +Enum(arm_arch) String(armv8.2-a) Value(29) EnumValue -Enum(arm_arch) String(armv8-m.main) Value(30) +Enum(arm_arch) String(armv8.2-a+fp16) Value(30) EnumValue -Enum(arm_arch) String(armv8-m.main+dsp) Value(31) +Enum(arm_arch) String(armv8-m.base) Value(31) EnumValue -Enum(arm_arch) String(iwmmxt) Value(32) +Enum(arm_arch) String(armv8-m.main) Value(32) EnumValue -Enum(arm_arch) String(iwmmxt2) Value(33) +Enum(arm_arch) String(armv8-m.main+dsp) Value(33) + +EnumValue +Enum(arm_arch) String(iwmmxt) Value(34) + +EnumValue +Enum(arm_arch) String(iwmmxt2) Value(35) Enum Name(arm_fpu) Type(int) diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 1bdcdf7..30e9592 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -814,6 +814,13 @@ int arm_arch8 = 0; /* Nonzero if this chip supports the ARMv8.1 extensions. */ int arm_arch8_1 = 0; +/* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */ +int arm_arch8_2 = 0; + +/* Nonzero if this chip supports the FP16 instructions extension of ARM + Architecture 8.2. */ +int arm_fp16_inst = 0; + /* Nonzero if this chip can benefit from load scheduling. */ int arm_ld_sched = 0; @@ -3217,6 +3224,7 @@ arm_option_override (void) arm_arch7em = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7EM); arm_arch8 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH8); arm_arch8_1 = ARM_FSET_HAS_CPU2 (insn_flags, FL2_ARCH8_1); + arm_arch8_2 = ARM_FSET_HAS_CPU2 (insn_flags, FL2_ARCH8_2); arm_arch_thumb1 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB); arm_arch_thumb2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB2); arm_arch_xscale = ARM_FSET_HAS_CPU1 (insn_flags, FL_XSCALE); @@ -3233,6 +3241,13 @@ arm_option_override (void) arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; arm_arch_crc = ARM_FSET_HAS_CPU1 (insn_flags, FL_CRC32); arm_m_profile_small_mul = ARM_FSET_HAS_CPU1 (insn_flags, FL_SMALLMUL); + arm_fp16_inst = ARM_FSET_HAS_CPU2 (insn_flags, FL2_FP16INST); + if (arm_fp16_inst) + { + if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) + error ("selected fp16 options are incompatible."); + arm_fp16_format = ARM_FP16_FORMAT_IEEE; + } /* V5 code we generate is completely interworking capable, so we turn off TARGET_INTERWORK here to avoid many tests later on. */ @@ -13221,7 +13236,7 @@ coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb) { if (mode == HFmode) { - if (!TARGET_NEON_FP16) + if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST) return GENERAL_REGS; if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true)) return NO_REGS; @@ -18672,6 +18687,8 @@ output_move_vfp (rtx *operands) rtx reg, mem, addr, ops[2]; int load = REG_P (operands[0]); int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8; + int sp = (!TARGET_VFP_FP16INST + || GET_MODE_SIZE (GET_MODE (operands[0])) == 4); int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT; const char *templ; char buff[50]; @@ -18687,6 +18704,7 @@ output_move_vfp (rtx *operands) gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT && TARGET_VFP) || mode == SFmode || mode == DFmode + || mode == HImode || mode == SImode || mode == DImode || (TARGET_NEON && VALID_NEON_DREG_MODE (mode))); @@ -18717,7 +18735,7 @@ output_move_vfp (rtx *operands) sprintf (buff, templ, load ? "ld" : "st", - dp ? "64" : "32", + dp ? "64" : sp ? "32" : "16", dp ? "P" : "", integer_p ? "\t%@ int" : ""); output_asm_insn (buff, ops); @@ -23480,6 +23498,10 @@ arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode) if (mode == HFmode) return VFP_REGNO_OK_FOR_SINGLE (regno); + /* VFP registers can hold HImode values. */ + if (mode == HImode) + return VFP_REGNO_OK_FOR_SINGLE (regno); + if (TARGET_NEON) return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno)) || (VALID_NEON_QREG_MODE (mode) @@ -28529,6 +28551,8 @@ arm_evpc_neon_vuzp (struct expand_vec_perm_d *d) case V8QImode: gen = gen_neon_vuzpv8qi_internal; break; case V8HImode: gen = gen_neon_vuzpv8hi_internal; break; case V4HImode: gen = gen_neon_vuzpv4hi_internal; break; + case V8HFmode: gen = gen_neon_vuzpv8hf_internal; break; + case V4HFmode: gen = gen_neon_vuzpv4hf_internal; break; case V4SImode: gen = gen_neon_vuzpv4si_internal; break; case V2SImode: gen = gen_neon_vuzpv2si_internal; break; case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break; @@ -28602,6 +28626,8 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d) case V8QImode: gen = gen_neon_vzipv8qi_internal; break; case V8HImode: gen = gen_neon_vzipv8hi_internal; break; case V4HImode: gen = gen_neon_vzipv4hi_internal; break; + case V8HFmode: gen = gen_neon_vzipv8hf_internal; break; + case V4HFmode: gen = gen_neon_vzipv4hf_internal; break; case V4SImode: gen = gen_neon_vzipv4si_internal; break; case V2SImode: gen = gen_neon_vzipv2si_internal; break; case V2SFmode: gen = gen_neon_vzipv2sf_internal; break; @@ -28654,6 +28680,8 @@ arm_evpc_neon_vrev (struct expand_vec_perm_d *d) case V8QImode: gen = gen_neon_vrev32v8qi; break; case V8HImode: gen = gen_neon_vrev64v8hi; break; case V4HImode: gen = gen_neon_vrev64v4hi; break; + case V8HFmode: gen = gen_neon_vrev64v8hf; break; + case V4HFmode: gen = gen_neon_vrev64v4hf; break; default: return false; } @@ -28737,6 +28765,8 @@ arm_evpc_neon_vtrn (struct expand_vec_perm_d *d) case V8QImode: gen = gen_neon_vtrnv8qi_internal; break; case V8HImode: gen = gen_neon_vtrnv8hi_internal; break; case V4HImode: gen = gen_neon_vtrnv4hi_internal; break; + case V8HFmode: gen = gen_neon_vtrnv8hf_internal; break; + case V4HFmode: gen = gen_neon_vtrnv4hf_internal; break; case V4SImode: gen = gen_neon_vtrnv4si_internal; break; case V2SImode: gen = gen_neon_vtrnv2si_internal; break; case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break; @@ -28812,6 +28842,8 @@ arm_evpc_neon_vext (struct expand_vec_perm_d *d) case V8HImode: gen = gen_neon_vextv8hi; break; case V2SImode: gen = gen_neon_vextv2si; break; case V4SImode: gen = gen_neon_vextv4si; break; + case V4HFmode: gen = gen_neon_vextv4hf; break; + case V8HFmode: gen = gen_neon_vextv8hf; break; case V2SFmode: gen = gen_neon_vextv2sf; break; case V4SFmode: gen = gen_neon_vextv4sf; break; case V2DImode: gen = gen_neon_vextv2di; break; @@ -29337,7 +29369,7 @@ arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2) { enum rtx_code code = GET_CODE (*comparison); int code_int; - machine_mode mode = (GET_MODE (*op1) == VOIDmode) + machine_mode mode = (GET_MODE (*op1) == VOIDmode) ? GET_MODE (*op2) : GET_MODE (*op1); gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode); @@ -29365,6 +29397,14 @@ arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2) *op2 = force_reg (mode, *op2); return true; + case HFmode: + if (!TARGET_VFP_FP16INST) + break; + /* FP16 comparisons are done in SF mode. */ + mode = SFmode; + *op1 = convert_to_mode (mode, *op1, 1); + *op2 = convert_to_mode (mode, *op2, 1); + /* Fall through. */ case SFmode: case DFmode: if (!arm_float_compare_operand (*op1, mode)) diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index c7149d1..49825d3 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -217,6 +217,13 @@ extern void (*arm_lang_output_object_attributes_hook)(void); /* FPU supports ARMv8.1 Adv.SIMD extensions. */ #define TARGET_NEON_RDMA (TARGET_NEON && arm_arch8_1) +/* FPU supports the floating point FP16 instructions for ARMv8.2 and later. */ +#define TARGET_VFP_FP16INST \ + (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 && arm_fp16_inst) + +/* FPU supports the AdvSIMD FP16 instructions for ARMv8.2 and later. */ +#define TARGET_NEON_FP16INST (TARGET_VFP_FP16INST && TARGET_NEON_RDMA) + /* Q-bit is present. */ #define TARGET_ARM_QBIT \ (TARGET_32BIT && arm_arch5e && (arm_arch_notm || arm_arch7)) @@ -456,6 +463,13 @@ extern int arm_arch8; /* Nonzero if this chip supports the ARM Architecture 8.1 extensions. */ extern int arm_arch8_1; +/* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */ +extern int arm_arch8_2; + +/* Nonzero if this chip supports the FP16 instructions extension of ARM + Architecture 8.2. */ +extern int arm_fp16_inst; + /* Nonzero if this chip can benefit from load scheduling. */ extern int arm_ld_sched; diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 318db75..e85bcd9 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -5189,7 +5189,7 @@ "" ) -/* DFmode -> HFmode conversions have to go through SFmode. */ +;; DFmode to HFmode conversions have to go through SFmode. (define_expand "truncdfhf2" [(set (match_operand:HF 0 "general_operand" "") (float_truncate:HF @@ -5696,7 +5696,7 @@ "" ) -/* HFmode -> DFmode conversions have to go through SFmode. */ +;; HFmode -> DFmode conversions have to go through SFmode. (define_expand "extendhfdf2" [(set (match_operand:DF 0 "general_operand" "") (float_extend:DF (match_operand:HF 1 "general_operand" "")))] @@ -6064,6 +6064,7 @@ str%?\\t%1, %0" [(set_attr "type" "mov_reg,mov_imm,mvn_imm,mov_imm,load1,store1") (set_attr "predicable" "yes") + (set_attr "arch" "*,*,*,v6t2,*,*") (set_attr "pool_range" "*,*,*,*,4096,*") (set_attr "neg_pool_range" "*,*,*,*,4084,*")] ) @@ -6700,7 +6701,7 @@ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m,r") (match_operand:HI 1 "general_operand" "rIk,K,n,r,mi"))] "TARGET_ARM - && arm_arch4 + && arm_arch4 && !(TARGET_HARD_FLOAT && TARGET_VFP) && (register_operand (operands[0], HImode) || register_operand (operands[1], HImode))" "@ @@ -6726,7 +6727,7 @@ (define_insn "*movhi_bytes" [(set (match_operand:HI 0 "s_register_operand" "=r,r,r") (match_operand:HI 1 "arm_rhs_operand" "I,rk,K"))] - "TARGET_ARM" + "TARGET_ARM && !(TARGET_HARD_FLOAT && TARGET_VFP)" "@ mov%?\\t%0, %1\\t%@ movhi mov%?\\t%0, %1\\t%@ movhi @@ -6734,7 +6735,7 @@ [(set_attr "predicable" "yes") (set_attr "type" "mov_imm,mov_reg,mvn_imm")] ) - + ;; We use a DImode scratch because we may occasionally need an additional ;; temporary if the address isn't offsettable -- push_reload doesn't seem ;; to take any notice of the "o" constraints on reload_memory_operand operand. @@ -7704,6 +7705,24 @@ DONE; }") +(define_expand "cstorehf4" + [(set (match_operand:SI 0 "s_register_operand") + (match_operator:SI 1 "expandable_comparison_operator" + [(match_operand:HF 2 "s_register_operand") + (match_operand:HF 3 "arm_float_compare_operand")]))] + "TARGET_VFP_FP16INST" + { + if (!arm_validize_comparison (&operands[1], + &operands[2], + &operands[3])) + FAIL; + + emit_insn (gen_cstore_cc (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } +) + (define_expand "cstoresf4" [(set (match_operand:SI 0 "s_register_operand" "") (match_operator:SI 1 "expandable_comparison_operator" @@ -7756,9 +7775,31 @@ rtx ccreg; if (!arm_validize_comparison (&operands[1], &XEXP (operands[1], 0), - &XEXP (operands[1], 1))) + &XEXP (operands[1], 1))) FAIL; - + + code = GET_CODE (operands[1]); + ccreg = arm_gen_compare_reg (code, XEXP (operands[1], 0), + XEXP (operands[1], 1), NULL_RTX); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); + }" +) + +(define_expand "movhfcc" + [(set (match_operand:HF 0 "s_register_operand") + (if_then_else:HF (match_operand 1 "arm_cond_move_operator") + (match_operand:HF 2 "s_register_operand") + (match_operand:HF 3 "s_register_operand")))] + "TARGET_VFP_FP16INST" + " + { + enum rtx_code code = GET_CODE (operands[1]); + rtx ccreg; + + if (!arm_validize_comparison (&operands[1], &XEXP (operands[1], 0), + &XEXP (operands[1], 1))) + FAIL; + code = GET_CODE (operands[1]); ccreg = arm_gen_compare_reg (code, XEXP (operands[1], 0), XEXP (operands[1], 1), NULL_RTX); @@ -7777,7 +7818,7 @@ enum rtx_code code = GET_CODE (operands[1]); rtx ccreg; - if (!arm_validize_comparison (&operands[1], &XEXP (operands[1], 0), + if (!arm_validize_comparison (&operands[1], &XEXP (operands[1], 0), &XEXP (operands[1], 1))) FAIL; @@ -7842,6 +7883,37 @@ (set_attr "type" "fcsel")] ) +(define_insn "*cmovhf" + [(set (match_operand:HF 0 "s_register_operand" "=t") + (if_then_else:HF (match_operator 1 "arm_vsel_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (match_operand:HF 3 "s_register_operand" "t") + (match_operand:HF 4 "s_register_operand" "t")))] + "TARGET_VFP_FP16INST" + "* + { + enum arm_cond_code code = maybe_get_arm_condition_code (operands[1]); + switch (code) + { + case ARM_GE: + case ARM_GT: + case ARM_EQ: + case ARM_VS: + return \"vsel%d1.f16\\t%0, %3, %4\"; + case ARM_LT: + case ARM_LE: + case ARM_NE: + case ARM_VC: + return \"vsel%D1.f16\\t%0, %4, %3\"; + default: + gcc_unreachable (); + } + return \"\"; + }" + [(set_attr "conds" "use") + (set_attr "type" "fcsel")] +) + (define_insn_and_split "*movsicc_insn" [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r,r,r,r,r") (if_then_else:SI diff --git a/gcc/config/arm/arm_fp16.h b/gcc/config/arm/arm_fp16.h new file mode 100644 index 0000000..c72d8c4 --- /dev/null +++ b/gcc/config/arm/arm_fp16.h @@ -0,0 +1,255 @@ +/* ARM FP16 intrinsics include file. + + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _GCC_ARM_FP16_H +#define _GCC_ARM_FP16_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* Intrinsics for FP16 instructions. */ +#pragma GCC push_options +#pragma GCC target ("fpu=fp-armv8") + +#if defined (__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) + +typedef __fp16 float16_t; + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vabsh_f16 (float16_t __a) +{ + return __builtin_neon_vabshf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vaddh_f16 (float16_t __a, float16_t __b) +{ + return __a + __b; +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtah_s32_f16 (float16_t __a) +{ + return __builtin_neon_vcvtahssi (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtah_u32_f16 (float16_t __a) +{ + return __builtin_neon_vcvtahusi (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_f16_s32 (int32_t __a) +{ + return __builtin_neon_vcvthshf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_f16_u32 (uint32_t __a) +{ + return __builtin_neon_vcvthuhf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_n_f16_s32 (int32_t __a, const int __b) +{ + return __builtin_neon_vcvths_nhf (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_n_f16_u32 (uint32_t __a, const int __b) +{ + return __builtin_neon_vcvthu_nhf ((int32_t)__a, __b); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvth_n_s32_f16 (float16_t __a, const int __b) +{ + return __builtin_neon_vcvths_nsi (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvth_n_u32_f16 (float16_t __a, const int __b) +{ + return (uint32_t)__builtin_neon_vcvthu_nsi (__a, __b); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvth_s32_f16 (float16_t __a) +{ + return __builtin_neon_vcvthssi (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvth_u32_f16 (float16_t __a) +{ + return __builtin_neon_vcvthusi (__a); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtmh_s32_f16 (float16_t __a) +{ + return __builtin_neon_vcvtmhssi (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtmh_u32_f16 (float16_t __a) +{ + return __builtin_neon_vcvtmhusi (__a); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtnh_s32_f16 (float16_t __a) +{ + return __builtin_neon_vcvtnhssi (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtnh_u32_f16 (float16_t __a) +{ + return __builtin_neon_vcvtnhusi (__a); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtph_s32_f16 (float16_t __a) +{ + return __builtin_neon_vcvtphssi (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtph_u32_f16 (float16_t __a) +{ + return __builtin_neon_vcvtphusi (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vdivh_f16 (float16_t __a, float16_t __b) +{ + return __a / __b; +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vfmah_f16 (float16_t __a, float16_t __b, float16_t __c) +{ + return __builtin_neon_vfmahf (__a, __b, __c); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vfmsh_f16 (float16_t __a, float16_t __b, float16_t __c) +{ + return __builtin_neon_vfmshf (__a, __b, __c); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmaxnmh_f16 (float16_t __a, float16_t __b) +{ + return __builtin_neon_vmaxnmhf (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vminnmh_f16 (float16_t __a, float16_t __b) +{ + return __builtin_neon_vminnmhf (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmulh_f16 (float16_t __a, float16_t __b) +{ + return __a * __b; +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vnegh_f16 (float16_t __a) +{ + return - __a; +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndah_f16 (float16_t __a) +{ + return __builtin_neon_vrndahf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndh_f16 (float16_t __a) +{ + return __builtin_neon_vrndhf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndih_f16 (float16_t __a) +{ + return __builtin_neon_vrndihf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndmh_f16 (float16_t __a) +{ + return __builtin_neon_vrndmhf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndnh_f16 (float16_t __a) +{ + return __builtin_neon_vrndnhf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndph_f16 (float16_t __a) +{ + return __builtin_neon_vrndphf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndxh_f16 (float16_t __a) +{ + return __builtin_neon_vrndxhf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vsqrth_f16 (float16_t __a) +{ + return __builtin_neon_vsqrthf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vsubh_f16 (float16_t __a, float16_t __b) +{ + return __a - __b; +} + +#endif /* __ARM_FEATURE_FP16_SCALAR_ARITHMETIC */ +#pragma GCC pop_options + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 32ee06c..54bbc7d 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -38,6 +38,7 @@ extern "C" { #endif +#include #include typedef __simd64_int8_t int8x8_t; @@ -14842,6 +14843,855 @@ vmull_high_p64 (poly64x2_t __a, poly64x2_t __b) #pragma GCC pop_options + /* Intrinsics for FP16 instructions. */ +#pragma GCC push_options +#pragma GCC target ("fpu=neon-fp-armv8") +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vabd_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vabdv4hf (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vabdq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_neon_vabdv8hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vabs_f16 (float16x4_t __a) +{ + return __builtin_neon_vabsv4hf (__a); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vabsq_f16 (float16x8_t __a) +{ + return __builtin_neon_vabsv8hf (__a); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vadd_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vaddv4hf (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vaddq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_neon_vaddv8hf (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcage_f16 (float16x4_t __a, float16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcagev4hf (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcageq_f16 (float16x8_t __a, float16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcagev8hf (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcagt_f16 (float16x4_t __a, float16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcagtv4hf (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcagtq_f16 (float16x8_t __a, float16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcagtv8hf (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcale_f16 (float16x4_t __a, float16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcalev4hf (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcaleq_f16 (float16x8_t __a, float16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcalev8hf (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcalt_f16 (float16x4_t __a, float16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcaltv4hf (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcaltq_f16 (float16x8_t __a, float16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcaltv8hf (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vceq_f16 (float16x4_t __a, float16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vceqv4hf (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vceqq_f16 (float16x8_t __a, float16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vceqv8hf (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vceqz_f16 (float16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vceqzv4hf (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vceqzq_f16 (float16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vceqzv8hf (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcge_f16 (float16x4_t __a, float16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcgev4hf (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgeq_f16 (float16x8_t __a, float16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcgev8hf (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgez_f16 (float16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vcgezv4hf (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgezq_f16 (float16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vcgezv8hf (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgt_f16 (float16x4_t __a, float16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcgtv4hf (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgtq_f16 (float16x8_t __a, float16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcgtv8hf (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgtz_f16 (float16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vcgtzv4hf (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgtzq_f16 (float16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vcgtzv8hf (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcle_f16 (float16x4_t __a, float16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vclev4hf (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcleq_f16 (float16x8_t __a, float16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vclev8hf (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vclez_f16 (float16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vclezv4hf (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vclezq_f16 (float16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vclezv8hf (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vclt_f16 (float16x4_t __a, float16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcltv4hf (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcltq_f16 (float16x8_t __a, float16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcltv8hf (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcltz_f16 (float16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vcltzv4hf (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcltzq_f16 (float16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vcltzv8hf (__a); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vcvt_f16_s16 (int16x4_t __a) +{ + return (float16x4_t)__builtin_neon_vcvtsv4hi (__a); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vcvt_f16_u16 (uint16x4_t __a) +{ + return (float16x4_t)__builtin_neon_vcvtuv4hi ((int16x4_t)__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vcvt_s16_f16 (float16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vcvtsv4hf (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcvt_u16_f16 (float16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vcvtuv4hf (__a); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vcvtq_f16_s16 (int16x8_t __a) +{ + return (float16x8_t)__builtin_neon_vcvtsv8hi (__a); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vcvtq_f16_u16 (uint16x8_t __a) +{ + return (float16x8_t)__builtin_neon_vcvtuv8hi ((int16x8_t)__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vcvtq_s16_f16 (float16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vcvtsv8hf (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcvtq_u16_f16 (float16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vcvtuv8hf (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vcvta_s16_f16 (float16x4_t __a) +{ + return __builtin_neon_vcvtasv4hf (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcvta_u16_f16 (float16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vcvtauv4hf (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vcvtaq_s16_f16 (float16x8_t __a) +{ + return __builtin_neon_vcvtasv8hf (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcvtaq_u16_f16 (float16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vcvtauv8hf (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vcvtm_s16_f16 (float16x4_t __a) +{ + return __builtin_neon_vcvtmsv4hf (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcvtm_u16_f16 (float16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vcvtmuv4hf (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vcvtmq_s16_f16 (float16x8_t __a) +{ + return __builtin_neon_vcvtmsv8hf (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcvtmq_u16_f16 (float16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vcvtmuv8hf (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vcvtn_s16_f16 (float16x4_t __a) +{ + return __builtin_neon_vcvtnsv4hf (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcvtn_u16_f16 (float16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vcvtnuv4hf (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vcvtnq_s16_f16 (float16x8_t __a) +{ + return __builtin_neon_vcvtnsv8hf (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcvtnq_u16_f16 (float16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vcvtnuv8hf (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vcvtp_s16_f16 (float16x4_t __a) +{ + return __builtin_neon_vcvtpsv4hf (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcvtp_u16_f16 (float16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vcvtpuv4hf (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vcvtpq_s16_f16 (float16x8_t __a) +{ + return __builtin_neon_vcvtpsv8hf (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcvtpq_u16_f16 (float16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vcvtpuv8hf (__a); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vcvt_n_f16_s16 (int16x4_t __a, const int __b) +{ + return __builtin_neon_vcvts_nv4hi (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vcvt_n_f16_u16 (uint16x4_t __a, const int __b) +{ + return __builtin_neon_vcvtu_nv4hi ((int16x4_t)__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vcvtq_n_f16_s16 (int16x8_t __a, const int __b) +{ + return __builtin_neon_vcvts_nv8hi (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vcvtq_n_f16_u16 (uint16x8_t __a, const int __b) +{ + return __builtin_neon_vcvtu_nv8hi ((int16x8_t)__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vcvt_n_s16_f16 (float16x4_t __a, const int __b) +{ + return __builtin_neon_vcvts_nv4hf (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcvt_n_u16_f16 (float16x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vcvtu_nv4hf (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vcvtq_n_s16_f16 (float16x8_t __a, const int __b) +{ + return __builtin_neon_vcvts_nv8hf (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcvtq_n_u16_f16 (float16x8_t __a, const int __b) +{ + return (uint16x8_t)__builtin_neon_vcvtu_nv8hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vfma_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c) +{ + return __builtin_neon_vfmav4hf (__a, __b, __c); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vfmaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) +{ + return __builtin_neon_vfmav8hf (__a, __b, __c); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vfms_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c) +{ + return __builtin_neon_vfmsv4hf (__a, __b, __c); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vfmsq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) +{ + return __builtin_neon_vfmsv8hf (__a, __b, __c); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vmax_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vmaxfv4hf (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vmaxq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_neon_vmaxfv8hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vmaxnm_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vmaxnmv4hf (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vmaxnmq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_neon_vmaxnmv8hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vmin_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vminfv4hf (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vminq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_neon_vminfv8hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vminnm_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vminnmv4hf (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vminnmq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_neon_vminnmv8hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vmul_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vmulfv4hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vmul_lane_f16 (float16x4_t __a, float16x4_t __b, const int __c) +{ + return __builtin_neon_vmul_lanev4hf (__a, __b, __c); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vmul_n_f16 (float16x4_t __a, float16_t __b) +{ + return __builtin_neon_vmul_nv4hf (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vmulq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_neon_vmulfv8hf (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vmulq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __c) +{ + return __builtin_neon_vmul_lanev8hf (__a, __b, __c); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vmulq_n_f16 (float16x8_t __a, float16_t __b) +{ + return __builtin_neon_vmul_nv8hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vneg_f16 (float16x4_t __a) +{ + return __builtin_neon_vnegv4hf (__a); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vnegq_f16 (float16x8_t __a) +{ + return __builtin_neon_vnegv8hf (__a); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vpadd_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vpaddv4hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vpmax_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vpmaxfv4hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vpmin_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vpminfv4hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vrecpe_f16 (float16x4_t __a) +{ + return __builtin_neon_vrecpev4hf (__a); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vrecpeq_f16 (float16x8_t __a) +{ + return __builtin_neon_vrecpev8hf (__a); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vrnd_f16 (float16x4_t __a) +{ + return __builtin_neon_vrndv4hf (__a); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vrndq_f16 (float16x8_t __a) +{ + return __builtin_neon_vrndv8hf (__a); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vrnda_f16 (float16x4_t __a) +{ + return __builtin_neon_vrndav4hf (__a); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vrndaq_f16 (float16x8_t __a) +{ + return __builtin_neon_vrndav8hf (__a); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vrndm_f16 (float16x4_t __a) +{ + return __builtin_neon_vrndmv4hf (__a); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vrndmq_f16 (float16x8_t __a) +{ + return __builtin_neon_vrndmv8hf (__a); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vrndn_f16 (float16x4_t __a) +{ + return __builtin_neon_vrndnv4hf (__a); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vrndnq_f16 (float16x8_t __a) +{ + return __builtin_neon_vrndnv8hf (__a); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vrndp_f16 (float16x4_t __a) +{ + return __builtin_neon_vrndpv4hf (__a); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vrndpq_f16 (float16x8_t __a) +{ + return __builtin_neon_vrndpv8hf (__a); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vrndx_f16 (float16x4_t __a) +{ + return __builtin_neon_vrndxv4hf (__a); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vrndxq_f16 (float16x8_t __a) +{ + return __builtin_neon_vrndxv8hf (__a); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vrsqrte_f16 (float16x4_t __a) +{ + return __builtin_neon_vrsqrtev4hf (__a); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vrsqrteq_f16 (float16x8_t __a) +{ + return __builtin_neon_vrsqrtev8hf (__a); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vrecps_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vrecpsv4hf (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vrecpsq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_neon_vrecpsv8hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vrsqrts_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vrsqrtsv4hf (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vrsqrtsq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_neon_vrsqrtsv8hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vsub_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vsubv4hf (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vsubq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_neon_vsubv8hf (__a, __b); +} + +#endif /* __ARM_FEATURE_VECTOR_FP16_ARITHMETIC. */ +#pragma GCC pop_options + + /* Half-precision data processing intrinsics. */ +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vbsl_f16 (uint16x4_t __a, float16x4_t __b, float16x4_t __c) +{ + return __builtin_neon_vbslv4hf ((int16x4_t)__a, __b, __c); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vbslq_f16 (uint16x8_t __a, float16x8_t __b, float16x8_t __c) +{ + return __builtin_neon_vbslv8hf ((int16x8_t)__a, __b, __c); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vdup_n_f16 (float16_t __a) +{ + return __builtin_neon_vdup_nv4hf (__a); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vdupq_n_f16 (float16_t __a) +{ + return __builtin_neon_vdup_nv8hf (__a); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vdup_lane_f16 (float16x4_t __a, const int __b) +{ + return __builtin_neon_vdup_lanev4hf (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vdupq_lane_f16 (float16x4_t __a, const int __b) +{ + return __builtin_neon_vdup_lanev8hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vext_f16 (float16x4_t __a, float16x4_t __b, const int __c) +{ + return __builtin_neon_vextv4hf (__a, __b, __c); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vextq_f16 (float16x8_t __a, float16x8_t __b, const int __c) +{ + return __builtin_neon_vextv8hf (__a, __b, __c); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vmov_n_f16 (float16_t __a) +{ + return __builtin_neon_vdup_nv4hf (__a); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vmovq_n_f16 (float16_t __a) +{ + return __builtin_neon_vdup_nv8hf (__a); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vrev64_f16 (float16x4_t __a) +{ + return (float16x4_t)__builtin_shuffle (__a, (uint16x4_t){ 3, 2, 1, 0 }); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vrev64q_f16 (float16x8_t __a) +{ + return + (float16x8_t)__builtin_shuffle (__a, + (uint16x8_t){ 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__)) +vtrn_f16 (float16x4_t __a, float16x4_t __b) +{ + float16x4x2_t __rv; +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 1, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 6, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 2, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 5, 3, 7 }); +#endif + return __rv; +} + +__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__)) +vtrnq_f16 (float16x8_t __a, float16x8_t __b) +{ + float16x8x2_t __rv; +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, + (uint16x8_t){ 9, 1, 11, 3, 13, 5, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, + (uint16x8_t){ 8, 0, 10, 2, 12, 4, 14, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, + (uint16x8_t){ 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, + (uint16x8_t){ 1, 9, 3, 11, 5, 13, 7, 15 }); +#endif + return __rv; +} + +__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__)) +vuzp_f16 (float16x4_t __a, float16x4_t __b) +{ + float16x4x2_t __rv; +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 7, 1, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 6, 0, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 2, 4, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 3, 5, 7 }); +#endif + return __rv; +} + +__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__)) +vuzpq_f16 (float16x8_t __a, float16x8_t __b) +{ + float16x8x2_t __rv; +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 5, 7, 1, 3, 13, 15, 9, 11 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 4, 6, 0, 2, 12, 14, 8, 10 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, + (uint16x8_t){ 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, + (uint16x8_t){ 1, 3, 5, 7, 9, 11, 13, 15 }); +#endif + return __rv; +} + +__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__)) +vzip_f16 (float16x4_t __a, float16x4_t __b) +{ + float16x4x2_t __rv; +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 6, 2, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 5, 1 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 1, 5 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 2, 6, 3, 7 }); +#endif + return __rv; +} + +__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__)) +vzipq_f16 (float16x8_t __a, float16x8_t __b) +{ + float16x8x2_t __rv; +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 10, 2, 11, 3, 8, 0, 9, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 14, 6, 15, 7, 12, 4, 13, 5 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, + (uint16x8_t){ 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = __builtin_shuffle (__a, __b, + (uint16x8_t){ 4, 12, 5, 13, 6, 14, 7, 15 }); +#endif + return __rv; +} + +#endif + #ifdef __cplusplus } #endif diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index d9fac78..b29aa91 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -19,6 +19,7 @@ . */ VAR2 (BINOP, vadd, v2sf, v4sf) +VAR2 (BINOP, vadd, v8hf, v4hf) VAR3 (BINOP, vaddls, v8qi, v4hi, v2si) VAR3 (BINOP, vaddlu, v8qi, v4hi, v2si) VAR3 (BINOP, vaddws, v8qi, v4hi, v2si) @@ -32,12 +33,15 @@ VAR8 (BINOP, vqaddu, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) VAR3 (BINOP, vraddhn, v8hi, v4si, v2di) VAR2 (BINOP, vmulf, v2sf, v4sf) +VAR2 (BINOP, vmulf, v8hf, v4hf) VAR2 (BINOP, vmulp, v8qi, v16qi) VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) VAR3 (TERNOP, vmlals, v8qi, v4hi, v2si) VAR3 (TERNOP, vmlalu, v8qi, v4hi, v2si) VAR2 (TERNOP, vfma, v2sf, v4sf) +VAR2 (TERNOP, vfma, v4hf, v8hf) VAR2 (TERNOP, vfms, v2sf, v4sf) +VAR2 (TERNOP, vfms, v4hf, v8hf) VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) VAR3 (TERNOP, vmlsls, v8qi, v4hi, v2si) VAR3 (TERNOP, vmlslu, v8qi, v4hi, v2si) @@ -94,6 +98,7 @@ VAR8 (TERNOP_IMM, vsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) VAR8 (TERNOP_IMM, vrsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) VAR8 (TERNOP_IMM, vrsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) VAR2 (BINOP, vsub, v2sf, v4sf) +VAR2 (BINOP, vsub, v8hf, v4hf) VAR3 (BINOP, vsubls, v8qi, v4hi, v2si) VAR3 (BINOP, vsublu, v8qi, v4hi, v2si) VAR3 (BINOP, vsubws, v8qi, v4hi, v2si) @@ -111,12 +116,27 @@ VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR2 (BINOP, vcage, v2sf, v4sf) VAR2 (BINOP, vcagt, v2sf, v4sf) +VAR2 (BINOP, vcage, v4hf, v8hf) +VAR2 (BINOP, vcagt, v4hf, v8hf) +VAR2 (BINOP, vcale, v4hf, v8hf) +VAR2 (BINOP, vcalt, v4hf, v8hf) +VAR2 (BINOP, vceq, v4hf, v8hf) +VAR2 (BINOP, vcge, v4hf, v8hf) +VAR2 (BINOP, vcgt, v4hf, v8hf) +VAR2 (BINOP, vcle, v4hf, v8hf) +VAR2 (BINOP, vclt, v4hf, v8hf) +VAR2 (UNOP, vceqz, v4hf, v8hf) +VAR2 (UNOP, vcgez, v4hf, v8hf) +VAR2 (UNOP, vcgtz, v4hf, v8hf) +VAR2 (UNOP, vclez, v4hf, v8hf) +VAR2 (UNOP, vcltz, v4hf, v8hf) VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR6 (BINOP, vabds, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR6 (BINOP, vabdu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR2 (BINOP, vabdf, v2sf, v4sf) VAR3 (BINOP, vabdls, v8qi, v4hi, v2si) VAR3 (BINOP, vabdlu, v8qi, v4hi, v2si) +VAR2 (BINOP, vabd, v8hf, v4hf) VAR6 (TERNOP, vabas, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR6 (TERNOP, vabau, v8qi, v4hi, v2si, v16qi, v8hi, v4si) @@ -126,27 +146,38 @@ VAR3 (TERNOP, vabalu, v8qi, v4hi, v2si) VAR6 (BINOP, vmaxs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR6 (BINOP, vmaxu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR2 (BINOP, vmaxf, v2sf, v4sf) +VAR2 (BINOP, vmaxf, v8hf, v4hf) +VAR2 (BINOP, vmaxnm, v4hf, v8hf) VAR6 (BINOP, vmins, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR6 (BINOP, vminu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR2 (BINOP, vminf, v2sf, v4sf) +VAR2 (BINOP, vminf, v4hf, v8hf) +VAR2 (BINOP, vminnm, v8hf, v4hf) VAR3 (BINOP, vpmaxs, v8qi, v4hi, v2si) VAR3 (BINOP, vpmaxu, v8qi, v4hi, v2si) VAR1 (BINOP, vpmaxf, v2sf) +VAR1 (BINOP, vpmaxf, v4hf) VAR3 (BINOP, vpmins, v8qi, v4hi, v2si) VAR3 (BINOP, vpminu, v8qi, v4hi, v2si) VAR1 (BINOP, vpminf, v2sf) +VAR1 (BINOP, vpminf, v4hf) VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) +VAR1 (BINOP, vpadd, v4hf) VAR6 (UNOP, vpaddls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR6 (UNOP, vpaddlu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR6 (BINOP, vpadals, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR6 (BINOP, vpadalu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR2 (BINOP, vrecps, v2sf, v4sf) VAR2 (BINOP, vrsqrts, v2sf, v4sf) +VAR2 (BINOP, vrecps, v4hf, v8hf) +VAR2 (BINOP, vrsqrts, v4hf, v8hf) VAR8 (TERNOP_IMM, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) VAR8 (TERNOP_IMM, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) +VAR2 (UNOP, vabs, v8hf, v4hf) +VAR2 (UNOP, vneg, v8hf, v4hf) VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) @@ -155,8 +186,16 @@ VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR5 (BSWAP, bswap, v4hi, v8hi, v2si, v4si, v2di) VAR2 (UNOP, vcnt, v8qi, v16qi) VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) +VAR2 (UNOP, vrecpe, v8hf, v4hf) VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) +VAR2 (UNOP, vrsqrte, v4hf, v8hf) VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) +VAR2 (UNOP, vrnd, v8hf, v4hf) +VAR2 (UNOP, vrnda, v8hf, v4hf) +VAR2 (UNOP, vrndm, v8hf, v4hf) +VAR2 (UNOP, vrndn, v8hf, v4hf) +VAR2 (UNOP, vrndp, v8hf, v4hf) +VAR2 (UNOP, vrndx, v8hf, v4hf) /* FIXME: vget_lane supports more variants than this! */ VAR10 (GETLANE, vget_lane, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) @@ -166,8 +205,10 @@ VAR10 (SETLANE, vset_lane, VAR5 (UNOP, vcreate, v8qi, v4hi, v2si, v2sf, di) VAR10 (UNOP, vdup_n, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) +VAR2 (UNOP, vdup_n, v8hf, v4hf) VAR10 (GETLANE, vdup_lane, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) +VAR2 (GETLANE, vdup_lane, v8hf, v4hf) VAR6 (COMBINE, vcombine, v8qi, v4hi, v4hf, v2si, v2sf, di) VAR6 (UNOP, vget_high, v16qi, v8hi, v8hf, v4si, v4sf, v2di) VAR6 (UNOP, vget_low, v16qi, v8hi, v8hf, v4si, v4sf, v2di) @@ -177,7 +218,7 @@ VAR3 (UNOP, vqmovnu, v8hi, v4si, v2di) VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) VAR3 (UNOP, vmovls, v8qi, v4hi, v2si) VAR3 (UNOP, vmovlu, v8qi, v4hi, v2si) -VAR6 (SETLANE, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) +VAR8 (SETLANE, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf, v4hf, v8hf) VAR6 (MAC_LANE, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) VAR2 (MAC_LANE, vmlals_lane, v4hi, v2si) VAR2 (MAC_LANE, vmlalu_lane, v4hi, v2si) @@ -186,7 +227,7 @@ VAR6 (MAC_LANE, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) VAR2 (MAC_LANE, vmlsls_lane, v4hi, v2si) VAR2 (MAC_LANE, vmlslu_lane, v4hi, v2si) VAR2 (MAC_LANE, vqdmlsl_lane, v4hi, v2si) -VAR6 (BINOP, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) +VAR8 (BINOP, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf, v4hf, v8hf) VAR6 (MAC_N, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) VAR2 (MAC_N, vmlals_n, v4hi, v2si) VAR2 (MAC_N, vmlalu_n, v4hi, v2si) @@ -197,17 +238,27 @@ VAR2 (MAC_N, vmlslu_n, v4hi, v2si) VAR2 (MAC_N, vqdmlsl_n, v4hi, v2si) VAR10 (SETLANE, vext, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) +VAR2 (SETLANE, vext, v8hf, v4hf) VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) VAR2 (UNOP, vrev16, v8qi, v16qi) VAR4 (UNOP, vcvts, v2si, v2sf, v4si, v4sf) +VAR2 (UNOP, vcvts, v4hi, v8hi) +VAR2 (UNOP, vcvts, v4hf, v8hf) +VAR2 (UNOP, vcvtu, v4hi, v8hi) +VAR2 (UNOP, vcvtu, v4hf, v8hf) VAR4 (UNOP, vcvtu, v2si, v2sf, v4si, v4sf) VAR4 (BINOP, vcvts_n, v2si, v2sf, v4si, v4sf) VAR4 (BINOP, vcvtu_n, v2si, v2sf, v4si, v4sf) +VAR2 (BINOP, vcvts_n, v4hf, v8hf) +VAR2 (BINOP, vcvtu_n, v4hi, v8hi) +VAR2 (BINOP, vcvts_n, v4hi, v8hi) +VAR2 (BINOP, vcvtu_n, v4hf, v8hf) VAR1 (UNOP, vcvtv4sf, v4hf) VAR1 (UNOP, vcvtv4hf, v4sf) VAR10 (TERNOP, vbsl, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) +VAR2 (TERNOP, vbsl, v8hf, v4hf) VAR2 (UNOP, copysignf, v2sf, v4sf) VAR2 (UNOP, vrintn, v2sf, v4sf) VAR2 (UNOP, vrinta, v2sf, v4sf) @@ -219,6 +270,14 @@ VAR1 (UNOP, vcvtav2sf, v2si) VAR1 (UNOP, vcvtav4sf, v4si) VAR1 (UNOP, vcvtauv2sf, v2si) VAR1 (UNOP, vcvtauv4sf, v4si) +VAR2 (UNOP, vcvtas, v4hf, v8hf) +VAR2 (UNOP, vcvtau, v4hf, v8hf) +VAR2 (UNOP, vcvtms, v4hf, v8hf) +VAR2 (UNOP, vcvtmu, v4hf, v8hf) +VAR2 (UNOP, vcvtns, v4hf, v8hf) +VAR2 (UNOP, vcvtnu, v4hf, v8hf) +VAR2 (UNOP, vcvtps, v4hf, v8hf) +VAR2 (UNOP, vcvtpu, v4hf, v8hf) VAR1 (UNOP, vcvtpv2sf, v2si) VAR1 (UNOP, vcvtpv4sf, v4si) VAR1 (UNOP, vcvtpuv2sf, v2si) diff --git a/gcc/config/arm/arm_vfp_builtins.def b/gcc/config/arm/arm_vfp_builtins.def new file mode 100644 index 0000000..5abfcdd --- /dev/null +++ b/gcc/config/arm/arm_vfp_builtins.def @@ -0,0 +1,51 @@ +/* VFP instruction builtin definitions. + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by ARM Ltd. + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* This file lists the builtins that may be available when VFP is enabled but + not NEON is enabled. The entries otherwise have the same requirements and + generate the same structures as those in the arm_neon_builtins.def. */ + +/* FP16 Arithmetic instructions. */ +VAR1 (UNOP, vabs, hf) +VAR2 (UNOP, vcvths, hf, si) +VAR2 (UNOP, vcvthu, hf, si) +VAR1 (UNOP, vcvtahs, si) +VAR1 (UNOP, vcvtahu, si) +VAR1 (UNOP, vcvtmhs, si) +VAR1 (UNOP, vcvtmhu, si) +VAR1 (UNOP, vcvtnhs, si) +VAR1 (UNOP, vcvtnhu, si) +VAR1 (UNOP, vcvtphs, si) +VAR1 (UNOP, vcvtphu, si) +VAR1 (UNOP, vrnd, hf) +VAR1 (UNOP, vrnda, hf) +VAR1 (UNOP, vrndi, hf) +VAR1 (UNOP, vrndm, hf) +VAR1 (UNOP, vrndn, hf) +VAR1 (UNOP, vrndp, hf) +VAR1 (UNOP, vrndx, hf) +VAR1 (UNOP, vsqrt, hf) + +VAR2 (BINOP, vcvths_n, hf, si) +VAR2 (BINOP, vcvthu_n, hf, si) +VAR1 (BINOP, vmaxnm, hf) +VAR1 (BINOP, vminnm, hf) + +VAR1 (TERNOP, vfma, hf) +VAR1 (TERNOP, vfms, hf) diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h index ff21660..0da98fb 100644 --- a/gcc/config/arm/bpabi.h +++ b/gcc/config/arm/bpabi.h @@ -93,6 +93,8 @@ |march=armv8-a+crc \ |march=armv8.1-a \ |march=armv8.1-a+crc \ + |march=armv8.2-a \ + |march=armv8.2-a+fp16 \ |march=armv8-m.base \ |march=armv8-m.main \ |march=armv8-m.main+dsp \ @@ -130,6 +132,8 @@ |march=armv8-a+crc \ |march=armv8.1-a \ |march=armv8.1-a+crc \ + |march=armv8.2-a \ + |march=armv8.2-a+fp16 \ |march=armv8-m.base \ |march=armv8-m.main \ |march=armv8-m.main+dsp \ diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index aba1023..be39e4a 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -119,6 +119,10 @@ ;; All supported vector modes (except those with 64-bit integer elements). (define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF]) +;; All supported vector modes including 16-bit float modes. +(define_mode_iterator VDQWH [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF + V8HF V4HF]) + ;; Supported integer vector modes (not 64 bit elements). (define_mode_iterator VDQIW [V8QI V16QI V4HI V8HI V2SI V4SI]) @@ -141,6 +145,9 @@ ;; Vector modes form int->float conversions. (define_mode_iterator VCVTI [V2SI V4SI]) +;; Vector modes for int->half conversions. +(define_mode_iterator VCVTHI [V4HI V8HI]) + ;; Vector modes for doubleword multiply-accumulate, etc. insns. (define_mode_iterator VMD [V4HI V2SI V2SF]) @@ -174,6 +181,9 @@ ;; Modes with 8-bit, 16-bit and 32-bit elements. (define_mode_iterator VU [V16QI V8HI V4SI]) +;; Vector modes for 16-bit floating-point support. +(define_mode_iterator VH [V8HF V4HF]) + ;; Iterators used for fixed-point support. (define_mode_iterator FIXED [QQ HQ SQ UQQ UHQ USQ HA SA UHA USA]) @@ -192,14 +202,17 @@ ;; Code iterators ;;---------------------------------------------------------------------------- -;; A list of condition codes used in compare instructions where -;; the carry flag from the addition is used instead of doing the +;; A list of condition codes used in compare instructions where +;; the carry flag from the addition is used instead of doing the ;; compare a second time. (define_code_iterator LTUGEU [ltu geu]) ;; The signed gt, ge comparisons (define_code_iterator GTGE [gt ge]) +;; The signed gt, ge, lt, le comparisons +(define_code_iterator GLTE [gt ge lt le]) + ;; The unsigned gt, ge comparisons (define_code_iterator GTUGEU [gtu geu]) @@ -228,6 +241,12 @@ ;; Binary operators whose second operand can be shifted. (define_code_iterator SHIFTABLE_OPS [plus minus ior xor and]) +;; Operations on the sign of a number. +(define_code_iterator ABSNEG [abs neg]) + +;; Conversions. +(define_code_iterator FCVT [unsigned_float float]) + ;; plus and minus are the only SHIFTABLE_OPS for which Thumb2 allows ;; a stack pointer opoerand. The minus operation is a candidate for an rsub ;; and hence only plus is supported. @@ -251,10 +270,14 @@ (define_int_iterator VRINT [UNSPEC_VRINTZ UNSPEC_VRINTP UNSPEC_VRINTM UNSPEC_VRINTR UNSPEC_VRINTX UNSPEC_VRINTA]) -(define_int_iterator NEON_VCMP [UNSPEC_VCEQ UNSPEC_VCGT UNSPEC_VCGE UNSPEC_VCLT UNSPEC_VCLE]) +(define_int_iterator NEON_VCMP [UNSPEC_VCEQ UNSPEC_VCGT UNSPEC_VCGE + UNSPEC_VCLT UNSPEC_VCLE]) (define_int_iterator NEON_VACMP [UNSPEC_VCAGE UNSPEC_VCAGT]) +(define_int_iterator NEON_VAGLTE [UNSPEC_VCAGE UNSPEC_VCAGT + UNSPEC_VCALE UNSPEC_VCALT]) + (define_int_iterator VCVT [UNSPEC_VRINTP UNSPEC_VRINTM UNSPEC_VRINTA]) (define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM @@ -323,6 +346,22 @@ (define_int_iterator VCVT_US_N [UNSPEC_VCVT_S_N UNSPEC_VCVT_U_N]) +(define_int_iterator VCVT_HF_US_N [UNSPEC_VCVT_HF_S_N UNSPEC_VCVT_HF_U_N]) + +(define_int_iterator VCVT_SI_US_N [UNSPEC_VCVT_SI_S_N UNSPEC_VCVT_SI_U_N]) + +(define_int_iterator VCVT_HF_US [UNSPEC_VCVTA_S UNSPEC_VCVTA_U + UNSPEC_VCVTM_S UNSPEC_VCVTM_U + UNSPEC_VCVTN_S UNSPEC_VCVTN_U + UNSPEC_VCVTP_S UNSPEC_VCVTP_U]) + +(define_int_iterator VCVTH_US [UNSPEC_VCVTH_S UNSPEC_VCVTH_U]) + +;; Operators for FP16 instructions. +(define_int_iterator FP16_RND [UNSPEC_VRND UNSPEC_VRNDA + UNSPEC_VRNDM UNSPEC_VRNDN + UNSPEC_VRNDP UNSPEC_VRNDX]) + (define_int_iterator VQMOVN [UNSPEC_VQMOVN_S UNSPEC_VQMOVN_U]) (define_int_iterator VMOVL [UNSPEC_VMOVL_S UNSPEC_VMOVL_U]) @@ -366,6 +405,8 @@ (define_int_iterator VQRDMLH_AS [UNSPEC_VQRDMLAH UNSPEC_VQRDMLSH]) +(define_int_iterator VFM_LANE_AS [UNSPEC_VFMA_LANE UNSPEC_VFMS_LANE]) + ;;---------------------------------------------------------------------------- ;; Mode attributes ;;---------------------------------------------------------------------------- @@ -384,6 +425,10 @@ (define_mode_attr V_cvtto [(V2SI "v2sf") (V2SF "v2si") (V4SI "v4sf") (V4SF "v4si")]) +;; (Opposite) mode to convert to/from for vector-half mode conversions. +(define_mode_attr VH_CVTTO [(V4HI "V4HF") (V4HF "V4HI") + (V8HI "V8HF") (V8HF "V8HI")]) + ;; Define element mode for each vector mode. (define_mode_attr V_elem [(V8QI "QI") (V16QI "QI") (V4HI "HI") (V8HI "HI") @@ -427,12 +472,13 @@ ;; Register width from element mode (define_mode_attr V_reg [(V8QI "P") (V16QI "q") - (V4HI "P") (V8HI "q") - (V4HF "P") (V8HF "q") - (V2SI "P") (V4SI "q") - (V2SF "P") (V4SF "q") - (DI "P") (V2DI "q") - (SF "") (DF "P")]) + (V4HI "P") (V8HI "q") + (V4HF "P") (V8HF "q") + (V2SI "P") (V4SI "q") + (V2SF "P") (V4SF "q") + (DI "P") (V2DI "q") + (SF "") (DF "P") + (HF "")]) ;; Wider modes with the same number of elements. (define_mode_attr V_widen [(V8QI "V8HI") (V4HI "V4SI") (V2SI "V2DI")]) @@ -448,7 +494,7 @@ (define_mode_attr V_HALF [(V16QI "V8QI") (V8HI "V4HI") (V8HF "V4HF") (V4SI "V2SI") (V4SF "V2SF") (V2DF "DF") - (V2DI "DI")]) + (V2DI "DI") (V4HF "HF")]) ;; Same, but lower-case. (define_mode_attr V_half [(V16QI "v8qi") (V8HI "v4hi") @@ -475,9 +521,10 @@ ;; Used for neon_vdup_lane, where the second operand is double-sized ;; even when the first one is quad. (define_mode_attr V_double_vector_mode [(V16QI "V8QI") (V8HI "V4HI") - (V4SI "V2SI") (V4SF "V2SF") - (V8QI "V8QI") (V4HI "V4HI") - (V2SI "V2SI") (V2SF "V2SF")]) + (V4SI "V2SI") (V4SF "V2SF") + (V8QI "V8QI") (V4HI "V4HI") + (V2SI "V2SI") (V2SF "V2SF") + (V8HF "V4HF") (V4HF "V4HF")]) ;; Mode of result of comparison operations (and bit-select operand 1). (define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI") @@ -496,18 +543,22 @@ ;; Get element type from double-width mode, for operations where we ;; don't care about signedness. (define_mode_attr V_if_elem [(V8QI "i8") (V16QI "i8") - (V4HI "i16") (V8HI "i16") - (V2SI "i32") (V4SI "i32") - (DI "i64") (V2DI "i64") - (V2SF "f32") (V4SF "f32") - (SF "f32") (DF "f64")]) + (V4HI "i16") (V8HI "i16") + (V2SI "i32") (V4SI "i32") + (DI "i64") (V2DI "i64") + (V2SF "f32") (V4SF "f32") + (SF "f32") (DF "f64") + (HF "f16") (V4HF "f16") + (V8HF "f16")]) ;; Same, but for operations which work on signed values. (define_mode_attr V_s_elem [(V8QI "s8") (V16QI "s8") - (V4HI "s16") (V8HI "s16") - (V2SI "s32") (V4SI "s32") - (DI "s64") (V2DI "s64") - (V2SF "f32") (V4SF "f32")]) + (V4HI "s16") (V8HI "s16") + (V2SI "s32") (V4SI "s32") + (DI "s64") (V2DI "s64") + (V2SF "f32") (V4SF "f32") + (HF "f16") (V4HF "f16") + (V8HF "f16")]) ;; Same, but for operations which work on unsigned values. (define_mode_attr V_u_elem [(V8QI "u8") (V16QI "u8") @@ -524,17 +575,22 @@ (V2SF "32") (V4SF "32")]) (define_mode_attr V_sz_elem [(V8QI "8") (V16QI "8") - (V4HI "16") (V8HI "16") - (V2SI "32") (V4SI "32") - (DI "64") (V2DI "64") + (V4HI "16") (V8HI "16") + (V2SI "32") (V4SI "32") + (DI "64") (V2DI "64") (V4HF "16") (V8HF "16") - (V2SF "32") (V4SF "32")]) + (V2SF "32") (V4SF "32")]) (define_mode_attr V_elem_ch [(V8QI "b") (V16QI "b") - (V4HI "h") (V8HI "h") - (V2SI "s") (V4SI "s") - (DI "d") (V2DI "d") - (V2SF "s") (V4SF "s")]) + (V4HI "h") (V8HI "h") + (V2SI "s") (V4SI "s") + (DI "d") (V2DI "d") + (V2SF "s") (V4SF "s") + (V2SF "s") (V4SF "s")]) + +(define_mode_attr VH_elem_ch [(V4HI "s") (V8HI "s") + (V4HF "s") (V8HF "s") + (HF "s")]) ;; Element sizes for duplicating ARM registers to all elements of a vector. (define_mode_attr VD_dup [(V8QI "8") (V4HI "16") (V2SI "32") (V2SF "32")]) @@ -570,29 +626,30 @@ ;; This mode attribute is used to obtain the correct register constraints. (define_mode_attr scalar_mul_constraint [(V4HI "x") (V2SI "t") (V2SF "t") - (V8HI "x") (V4SI "t") (V4SF "t")]) + (V8HI "x") (V4SI "t") (V4SF "t") + (V8HF "x") (V4HF "x")]) ;; Predicates used for setting type for neon instructions (define_mode_attr Is_float_mode [(V8QI "false") (V16QI "false") - (V4HI "false") (V8HI "false") - (V2SI "false") (V4SI "false") - (V4HF "true") (V8HF "true") - (V2SF "true") (V4SF "true") - (DI "false") (V2DI "false")]) + (V4HI "false") (V8HI "false") + (V2SI "false") (V4SI "false") + (V4HF "true") (V8HF "true") + (V2SF "true") (V4SF "true") + (DI "false") (V2DI "false")]) (define_mode_attr Scalar_mul_8_16 [(V8QI "true") (V16QI "true") - (V4HI "true") (V8HI "true") - (V2SI "false") (V4SI "false") - (V2SF "false") (V4SF "false") - (DI "false") (V2DI "false")]) - + (V4HI "true") (V8HI "true") + (V2SI "false") (V4SI "false") + (V2SF "false") (V4SF "false") + (DI "false") (V2DI "false")]) (define_mode_attr Is_d_reg [(V8QI "true") (V16QI "false") - (V4HI "true") (V8HI "false") - (V2SI "true") (V4SI "false") - (V2SF "true") (V4SF "false") - (DI "true") (V2DI "false")]) + (V4HI "true") (V8HI "false") + (V2SI "true") (V4SI "false") + (V2SF "true") (V4SF "false") + (DI "true") (V2DI "false") + (V4HF "true") (V8HF "false")]) (define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16") (V4HF "4") (V8HF "8") @@ -637,12 +694,14 @@ ;; Mode attribute used to build the "type" attribute. (define_mode_attr q [(V8QI "") (V16QI "_q") - (V4HI "") (V8HI "_q") - (V2SI "") (V4SI "_q") + (V4HI "") (V8HI "_q") + (V2SI "") (V4SI "_q") + (V4HF "") (V8HF "_q") + (V2SF "") (V4SF "_q") (V4HF "") (V8HF "_q") - (V2SF "") (V4SF "_q") - (DI "") (V2DI "_q") - (DF "") (V2DF "_q")]) + (DI "") (V2DI "_q") + (DF "") (V2DF "_q") + (HF "")]) (define_mode_attr pf [(V8QI "p") (V16QI "p") (V2SF "f") (V4SF "f")]) @@ -679,6 +738,16 @@ (define_code_attr shift [(ashiftrt "ashr") (lshiftrt "lshr")]) (define_code_attr shifttype [(ashiftrt "signed") (lshiftrt "unsigned")]) +;; String reprentations of operations on the sign of a number. +(define_code_attr absneg_str [(abs "abs") (neg "neg")]) + +;; Conversions. +(define_code_attr FCVTI32typename [(unsigned_float "u32") (float "s32")]) + +(define_code_attr float_sup [(unsigned_float "u") (float "s")]) + +(define_code_attr float_SUP [(unsigned_float "U") (float "S")]) + ;;---------------------------------------------------------------------------- ;; Int attributes ;;---------------------------------------------------------------------------- @@ -710,7 +779,13 @@ (UNSPEC_VPMAX "s") (UNSPEC_VPMAX_U "u") (UNSPEC_VPMIN "s") (UNSPEC_VPMIN_U "u") (UNSPEC_VCVT_S "s") (UNSPEC_VCVT_U "u") + (UNSPEC_VCVTA_S "s") (UNSPEC_VCVTA_U "u") + (UNSPEC_VCVTM_S "s") (UNSPEC_VCVTM_U "u") + (UNSPEC_VCVTN_S "s") (UNSPEC_VCVTN_U "u") + (UNSPEC_VCVTP_S "s") (UNSPEC_VCVTP_U "u") (UNSPEC_VCVT_S_N "s") (UNSPEC_VCVT_U_N "u") + (UNSPEC_VCVT_HF_S_N "s") (UNSPEC_VCVT_HF_U_N "u") + (UNSPEC_VCVT_SI_S_N "s") (UNSPEC_VCVT_SI_U_N "u") (UNSPEC_VQMOVN_S "s") (UNSPEC_VQMOVN_U "u") (UNSPEC_VMOVL_S "s") (UNSPEC_VMOVL_U "u") (UNSPEC_VSHL_S "s") (UNSPEC_VSHL_U "u") @@ -725,13 +800,30 @@ (UNSPEC_VSHLL_S_N "s") (UNSPEC_VSHLL_U_N "u") (UNSPEC_VSRA_S_N "s") (UNSPEC_VSRA_U_N "u") (UNSPEC_VRSRA_S_N "s") (UNSPEC_VRSRA_U_N "u") - + (UNSPEC_VCVTH_S "s") (UNSPEC_VCVTH_U "u") ]) +(define_int_attr vcvth_op + [(UNSPEC_VCVTA_S "a") (UNSPEC_VCVTA_U "a") + (UNSPEC_VCVTM_S "m") (UNSPEC_VCVTM_U "m") + (UNSPEC_VCVTN_S "n") (UNSPEC_VCVTN_U "n") + (UNSPEC_VCVTP_S "p") (UNSPEC_VCVTP_U "p")]) + +(define_int_attr fp16_rnd_str + [(UNSPEC_VRND "rnd") (UNSPEC_VRNDA "rnda") + (UNSPEC_VRNDM "rndm") (UNSPEC_VRNDN "rndn") + (UNSPEC_VRNDP "rndp") (UNSPEC_VRNDX "rndx")]) + +(define_int_attr fp16_rnd_insn + [(UNSPEC_VRND "vrintz") (UNSPEC_VRNDA "vrinta") + (UNSPEC_VRNDM "vrintm") (UNSPEC_VRNDN "vrintn") + (UNSPEC_VRNDP "vrintp") (UNSPEC_VRNDX "vrintx")]) + (define_int_attr cmp_op_unsp [(UNSPEC_VCEQ "eq") (UNSPEC_VCGT "gt") - (UNSPEC_VCGE "ge") (UNSPEC_VCLE "le") - (UNSPEC_VCLT "lt") (UNSPEC_VCAGE "ge") - (UNSPEC_VCAGT "gt")]) + (UNSPEC_VCGE "ge") (UNSPEC_VCLE "le") + (UNSPEC_VCLT "lt") (UNSPEC_VCAGE "ge") + (UNSPEC_VCAGT "gt") (UNSPEC_VCALE "le") + (UNSPEC_VCALT "lt")]) (define_int_attr r [ (UNSPEC_VRHADD_S "r") (UNSPEC_VRHADD_U "r") @@ -847,3 +939,7 @@ ;; Attributes for VQRDMLAH/VQRDMLSH (define_int_attr neon_rdma_as [(UNSPEC_VQRDMLAH "a") (UNSPEC_VQRDMLSH "s")]) + +;; Attributes for VFMA_LANE/ VFMS_LANE +(define_int_attr neon_vfm_lane_as + [(UNSPEC_VFMA_LANE "a") (UNSPEC_VFMS_LANE "s")]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index e2fdfbb..0532333 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -505,6 +505,20 @@ (const_string "neon_add")))] ) +(define_insn "add3_fp16" + [(set + (match_operand:VH 0 "s_register_operand" "=w") + (plus:VH + (match_operand:VH 1 "s_register_operand" "w") + (match_operand:VH 2 "s_register_operand" "w")))] + "TARGET_NEON_FP16INST" + "vadd.\t%0, %1, %2" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_addsub_s") + (const_string "neon_add")))] +) + (define_insn "adddi3_neon" [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r") (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r") @@ -543,6 +557,17 @@ (const_string "neon_sub")))] ) +(define_insn "sub3_fp16" + [(set + (match_operand:VH 0 "s_register_operand" "=w") + (minus:VH + (match_operand:VH 1 "s_register_operand" "w") + (match_operand:VH 2 "s_register_operand" "w")))] + "TARGET_NEON_FP16INST" + "vsub.\t%0, %1, %2" + [(set_attr "type" "neon_sub")] +) + (define_insn "subdi3_neon" [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w") (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w") @@ -591,6 +616,16 @@ (const_string "neon_mla_")))] ) +(define_insn "mul3add_neon" + [(set (match_operand:VH 0 "s_register_operand" "=w") + (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w") + (match_operand:VH 3 "s_register_operand" "w")) + (match_operand:VH 1 "s_register_operand" "0")))] + "TARGET_NEON_FP16INST && (! || flag_unsafe_math_optimizations)" + "vmla.f16\t%0, %2, %3" + [(set_attr "type" "neon_fp_mla_s")] +) + (define_insn "mul3negadd_neon" [(set (match_operand:VDQW 0 "s_register_operand" "=w") (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0") @@ -629,6 +664,19 @@ [(set_attr "type" "neon_fp_mla_s")] ) +;; There is limited support for unsafe-math optimizations using the NEON FP16 +;; arithmetic instructions, so only the intrinsic is currently supported. +(define_insn "fma4_intrinsic" + [(set (match_operand:VH 0 "register_operand" "=w") + (fma:VH + (match_operand:VH 1 "register_operand" "w") + (match_operand:VH 2 "register_operand" "w") + (match_operand:VH 3 "register_operand" "0")))] + "TARGET_NEON_FP16INST" + "vfma.\\t%0, %1, %2" + [(set_attr "type" "neon_fp_mla_s")] +) + (define_insn "*fmsub4" [(set (match_operand:VCVTF 0 "register_operand" "=w") (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) @@ -640,13 +688,25 @@ ) (define_insn "fmsub4_intrinsic" - [(set (match_operand:VCVTF 0 "register_operand" "=w") - (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) - (match_operand:VCVTF 2 "register_operand" "w") - (match_operand:VCVTF 3 "register_operand" "0")))] - "TARGET_NEON && TARGET_FMA" - "vfms%?.\\t%0, %1, %2" - [(set_attr "type" "neon_fp_mla_s")] + [(set (match_operand:VCVTF 0 "register_operand" "=w") + (fma:VCVTF + (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) + (match_operand:VCVTF 2 "register_operand" "w") + (match_operand:VCVTF 3 "register_operand" "0")))] + "TARGET_NEON && TARGET_FMA" + "vfms%?.\\t%0, %1, %2" + [(set_attr "type" "neon_fp_mla_s")] +) + +(define_insn "fmsub4_intrinsic" + [(set (match_operand:VH 0 "register_operand" "=w") + (fma:VH + (neg:VH (match_operand:VH 1 "register_operand" "w")) + (match_operand:VH 2 "register_operand" "w") + (match_operand:VH 3 "register_operand" "0")))] + "TARGET_NEON_FP16INST" + "vfms.\\t%0, %1, %2" + [(set_attr "type" "neon_fp_mla_s")] ) (define_insn "neon_vrint" @@ -860,6 +920,44 @@ "" ) +(define_insn "2" + [(set (match_operand:VH 0 "s_register_operand" "=w") + (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))] + "TARGET_NEON_FP16INST" + "v.\t%0, %1" + [(set_attr "type" "neon_abs")] +) + +(define_expand "neon_v" + [(set + (match_operand:VH 0 "s_register_operand") + (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))] + "TARGET_NEON_FP16INST" +{ + emit_insn (gen_2 (operands[0], operands[1])); + DONE; +}) + +(define_insn "neon_v" + [(set (match_operand:VH 0 "s_register_operand" "=w") + (unspec:VH + [(match_operand:VH 1 "s_register_operand" "w")] + FP16_RND))] + "TARGET_NEON_FP16INST" + ".\t%0, %1" + [(set_attr "type" "neon_fp_round_s")] +) + +(define_insn "neon_vrsqrte" + [(set (match_operand:VH 0 "s_register_operand" "=w") + (unspec:VH + [(match_operand:VH 1 "s_register_operand" "w")] + UNSPEC_VRSQRTE))] + "TARGET_NEON_FP16INST" + "vrsqrte.f16\t%0, %1" + [(set_attr "type" "neon_fp_rsqrte_s")] +) + (define_insn "*umin3_neon" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") @@ -1601,6 +1699,17 @@ (const_string "neon_reduc_add")))] ) +(define_insn "neon_vpaddv4hf" + [(set + (match_operand:V4HF 0 "s_register_operand" "=w") + (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w") + (match_operand:V4HF 2 "s_register_operand" "w")] + UNSPEC_VPADD))] + "TARGET_NEON_FP16INST" + "vpadd.f16\t%P0, %P1, %P2" + [(set_attr "type" "neon_reduc_add")] +) + (define_insn "neon_vpsmin" [(set (match_operand:VD 0 "s_register_operand" "=w") (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") @@ -1949,6 +2058,26 @@ DONE; }) +(define_expand "neon_vadd" + [(match_operand:VH 0 "s_register_operand") + (match_operand:VH 1 "s_register_operand") + (match_operand:VH 2 "s_register_operand")] + "TARGET_NEON_FP16INST" +{ + emit_insn (gen_add3_fp16 (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "neon_vsub" + [(match_operand:VH 0 "s_register_operand") + (match_operand:VH 1 "s_register_operand") + (match_operand:VH 2 "s_register_operand")] + "TARGET_NEON_FP16INST" +{ + emit_insn (gen_sub3_fp16 (operands[0], operands[1], operands[2])); + DONE; +}) + ; Note that NEON operations don't support the full IEEE 754 standard: in ; particular, denormal values are flushed to zero. This means that GCC cannot ; use those instructions for autovectorization, etc. unless @@ -2040,6 +2169,17 @@ (const_string "neon_mul_")))] ) +(define_insn "neon_vmulf" + [(set + (match_operand:VH 0 "s_register_operand" "=w") + (mult:VH + (match_operand:VH 1 "s_register_operand" "w") + (match_operand:VH 2 "s_register_operand" "w")))] + "TARGET_NEON_FP16INST" + "vmul.f16\t%0, %1, %2" + [(set_attr "type" "neon_mul_")] +) + (define_expand "neon_vmla" [(match_operand:VDQW 0 "s_register_operand" "=w") (match_operand:VDQW 1 "s_register_operand" "0") @@ -2068,6 +2208,18 @@ DONE; }) +(define_expand "neon_vfma" + [(match_operand:VH 0 "s_register_operand") + (match_operand:VH 1 "s_register_operand") + (match_operand:VH 2 "s_register_operand") + (match_operand:VH 3 "s_register_operand")] + "TARGET_NEON_FP16INST" +{ + emit_insn (gen_fma4_intrinsic (operands[0], operands[2], operands[3], + operands[1])); + DONE; +}) + (define_expand "neon_vfms" [(match_operand:VCVTF 0 "s_register_operand") (match_operand:VCVTF 1 "s_register_operand") @@ -2080,6 +2232,18 @@ DONE; }) +(define_expand "neon_vfms" + [(match_operand:VH 0 "s_register_operand") + (match_operand:VH 1 "s_register_operand") + (match_operand:VH 2 "s_register_operand") + (match_operand:VH 3 "s_register_operand")] + "TARGET_NEON_FP16INST" +{ + emit_insn (gen_fmsub4_intrinsic (operands[0], operands[2], operands[3], + operands[1])); + DONE; +}) + ; Used for intrinsics when flag_unsafe_math_optimizations is false. (define_insn "neon_vmla_unspec" @@ -2380,6 +2544,72 @@ [(set_attr "type" "neon_fp_compare_s")] ) +(define_expand "neon_vc" + [(match_operand: 0 "s_register_operand") + (neg: + (COMPARISONS:VH + (match_operand:VH 1 "s_register_operand") + (match_operand:VH 2 "reg_or_zero_operand")))] + "TARGET_NEON_FP16INST" +{ + /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations + are enabled. */ + if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT + && !flag_unsafe_math_optimizations) + emit_insn + (gen_neon_vc_fp16insn_unspec + (operands[0], operands[1], operands[2])); + else + emit_insn + (gen_neon_vc_fp16insn + (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "neon_vc_fp16insn" + [(set (match_operand: 0 "s_register_operand" "=w,w") + (neg: + (COMPARISONS: + (match_operand:VH 1 "s_register_operand" "w,w") + (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))] + "TARGET_NEON_FP16INST + && !(GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT + && !flag_unsafe_math_optimizations)" +{ + char pattern[100]; + sprintf (pattern, "vc.%s%%#\t%%0," + " %%1, %s", + GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT + ? "f" : "", + which_alternative == 0 + ? "%2" : "#0"); + output_asm_insn (pattern, operands); + return ""; +} + [(set (attr "type") + (if_then_else (match_operand 2 "zero_operand") + (const_string "neon_compare_zero") + (const_string "neon_compare")))]) + +(define_insn "neon_vc_fp16insn_unspec" + [(set + (match_operand: 0 "s_register_operand" "=w,w") + (unspec: + [(match_operand:VH 1 "s_register_operand" "w,w") + (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")] + NEON_VCMP))] + "TARGET_NEON_FP16INST" +{ + char pattern[100]; + sprintf (pattern, "vc.f%%#\t%%0," + " %%1, %s", + which_alternative == 0 + ? "%2" : "#0"); + output_asm_insn (pattern, operands); + return ""; +} + [(set_attr "type" "neon_fp_compare_s")]) + (define_insn "neon_vcu" [(set (match_operand: 0 "s_register_operand" "=w") (neg: @@ -2431,6 +2661,60 @@ [(set_attr "type" "neon_fp_compare_s")] ) +(define_expand "neon_vca" + [(set + (match_operand: 0 "s_register_operand") + (neg: + (GLTE: + (abs:VH (match_operand:VH 1 "s_register_operand")) + (abs:VH (match_operand:VH 2 "s_register_operand")))))] + "TARGET_NEON_FP16INST" +{ + if (flag_unsafe_math_optimizations) + emit_insn (gen_neon_vca_fp16insn + (operands[0], operands[1], operands[2])); + else + emit_insn (gen_neon_vca_fp16insn_unspec + (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "neon_vca_fp16insn" + [(set + (match_operand: 0 "s_register_operand" "=w") + (neg: + (GLTE: + (abs:VH (match_operand:VH 1 "s_register_operand" "w")) + (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))] + "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" + "vac.\t%0, %1, %2" + [(set_attr "type" "neon_fp_compare_s")] +) + +(define_insn "neon_vca_fp16insn_unspec" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: + [(match_operand:VH 1 "s_register_operand" "w") + (match_operand:VH 2 "s_register_operand" "w")] + NEON_VAGLTE))] + "TARGET_NEON" + "vac.\t%0, %1, %2" + [(set_attr "type" "neon_fp_compare_s")] +) + +(define_expand "neon_vcz" + [(set + (match_operand: 0 "s_register_operand") + (COMPARISONS: + (match_operand:VH 1 "s_register_operand") + (const_int 0)))] + "TARGET_NEON_FP16INST" + { + emit_insn (gen_neon_vc (operands[0], operands[1], + CONST0_RTX (mode))); + DONE; +}) + (define_insn "neon_vtst" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") @@ -2451,6 +2735,16 @@ [(set_attr "type" "neon_abd")] ) +(define_insn "neon_vabd" + [(set (match_operand:VH 0 "s_register_operand" "=w") + (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") + (match_operand:VH 2 "s_register_operand" "w")] + UNSPEC_VABD_F))] + "TARGET_NEON_FP16INST" + "vabd.\t%0, %1, %2" + [(set_attr "type" "neon_abd")] +) + (define_insn "neon_vabdf" [(set (match_operand:VCVTF 0 "s_register_operand" "=w") (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") @@ -2513,6 +2807,40 @@ [(set_attr "type" "neon_fp_minmax_s")] ) +(define_insn "neon_vf" + [(set (match_operand:VH 0 "s_register_operand" "=w") + (unspec:VH + [(match_operand:VH 1 "s_register_operand" "w") + (match_operand:VH 2 "s_register_operand" "w")] + VMAXMINF))] + "TARGET_NEON_FP16INST" + "v.\t%0, %1, %2" + [(set_attr "type" "neon_fp_minmax_s")] +) + +(define_insn "neon_vpfv4hf" + [(set (match_operand:V4HF 0 "s_register_operand" "=w") + (unspec:V4HF + [(match_operand:V4HF 1 "s_register_operand" "w") + (match_operand:V4HF 2 "s_register_operand" "w")] + VPMAXMINF))] + "TARGET_NEON_FP16INST" + "vp.f16\t%P0, %P1, %P2" + [(set_attr "type" "neon_reduc_minmax")] +) + +(define_insn "neon_" + [(set + (match_operand:VH 0 "s_register_operand" "=w") + (unspec:VH + [(match_operand:VH 1 "s_register_operand" "w") + (match_operand:VH 2 "s_register_operand" "w")] + VMAXMINFNM))] + "TARGET_NEON_FP16INST" + ".\t%0, %1, %2" + [(set_attr "type" "neon_fp_minmax_s")] +) + ;; Vector forms for the IEEE-754 fmax()/fmin() functions (define_insn "3" [(set (match_operand:VCVTF 0 "s_register_operand" "=w") @@ -2584,6 +2912,17 @@ [(set_attr "type" "neon_fp_recps_s")] ) +(define_insn "neon_vrecps" + [(set + (match_operand:VH 0 "s_register_operand" "=w") + (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") + (match_operand:VH 2 "s_register_operand" "w")] + UNSPEC_VRECPS))] + "TARGET_NEON_FP16INST" + "vrecps.\t%0, %1, %2" + [(set_attr "type" "neon_fp_recps_s")] +) + (define_insn "neon_vrsqrts" [(set (match_operand:VCVTF 0 "s_register_operand" "=w") (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") @@ -2594,6 +2933,17 @@ [(set_attr "type" "neon_fp_rsqrts_s")] ) +(define_insn "neon_vrsqrts" + [(set + (match_operand:VH 0 "s_register_operand" "=w") + (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") + (match_operand:VH 2 "s_register_operand" "w")] + UNSPEC_VRSQRTS))] + "TARGET_NEON_FP16INST" + "vrsqrts.\t%0, %1, %2" + [(set_attr "type" "neon_fp_rsqrts_s")] +) + (define_expand "neon_vabs" [(match_operand:VDQW 0 "s_register_operand" "") (match_operand:VDQW 1 "s_register_operand" "")] @@ -2709,6 +3059,15 @@ }) (define_insn "neon_vrecpe" + [(set (match_operand:VH 0 "s_register_operand" "=w") + (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")] + UNSPEC_VRECPE))] + "TARGET_NEON_FP16INST" + "vrecpe.f16\t%0, %1" + [(set_attr "type" "neon_fp_recpe_s")] +) + +(define_insn "neon_vrecpe" [(set (match_operand:V32 0 "s_register_operand" "=w") (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")] UNSPEC_VRECPE))] @@ -3045,6 +3404,28 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_dup")] ) +(define_insn "neon_vdup_lane_internal" + [(set (match_operand:VH 0 "s_register_operand" "=w") + (vec_duplicate:VH + (vec_select: + (match_operand: 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_NEON && TARGET_FP16" +{ + if (BYTES_BIG_ENDIAN) + { + int elt = INTVAL (operands[2]); + elt = GET_MODE_NUNITS (mode) - 1 - elt; + operands[2] = GEN_INT (elt); + } + if () + return "vdup.\t%P0, %P1[%c2]"; + else + return "vdup.\t%q0, %P1[%c2]"; +} + [(set_attr "type" "neon_dup")] +) + (define_expand "neon_vdup_lane" [(match_operand:VDQW 0 "s_register_operand" "=w") (match_operand: 1 "s_register_operand" "w") @@ -3064,6 +3445,25 @@ if (BYTES_BIG_ENDIAN) DONE; }) +(define_expand "neon_vdup_lane" + [(match_operand:VH 0 "s_register_operand") + (match_operand: 1 "s_register_operand") + (match_operand:SI 2 "immediate_operand")] + "TARGET_NEON && TARGET_FP16" +{ + if (BYTES_BIG_ENDIAN) + { + unsigned int elt = INTVAL (operands[2]); + unsigned int reg_nelts + = 64 / GET_MODE_UNIT_BITSIZE (mode); + elt ^= reg_nelts - 1; + operands[2] = GEN_INT (elt); + } + emit_insn (gen_neon_vdup_lane_internal (operands[0], operands[1], + operands[2])); + DONE; +}) + ; Scalar index is ignored, since only zero is valid here. (define_expand "neon_vdup_lanedi" [(match_operand:DI 0 "s_register_operand" "=w") @@ -3210,6 +3610,28 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_fp_cvt_narrow_s_q")] ) +(define_insn "neon_vcvt" + [(set + (match_operand: 0 "s_register_operand" "=w") + (unspec: + [(match_operand:VCVTHI 1 "s_register_operand" "w")] + VCVT_US))] + "TARGET_NEON_FP16INST" + "vcvt.f16.%#16\t%0, %1" + [(set_attr "type" "neon_int_to_fp_")] +) + +(define_insn "neon_vcvt" + [(set + (match_operand: 0 "s_register_operand" "=w") + (unspec: + [(match_operand:VH 1 "s_register_operand" "w")] + VCVT_US))] + "TARGET_NEON_FP16INST" + "vcvt.%#16.f16\t%0, %1" + [(set_attr "type" "neon_fp_to_int_")] +) + (define_insn "neon_vcvt_n" [(set (match_operand: 0 "s_register_operand" "=w") (unspec: [(match_operand:VCVTF 1 "s_register_operand" "w") @@ -3224,6 +3646,20 @@ if (BYTES_BIG_ENDIAN) ) (define_insn "neon_vcvt_n" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: + [(match_operand:VH 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + VCVT_US_N))] + "TARGET_NEON_FP16INST" +{ + neon_const_bounds (operands[2], 0, 17); + return "vcvt.%#16.f16\t%0, %1, %2"; +} + [(set_attr "type" "neon_fp_to_int_")] +) + +(define_insn "neon_vcvt_n" [(set (match_operand: 0 "s_register_operand" "=w") (unspec: [(match_operand:VCVTI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i")] @@ -3236,6 +3672,31 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_int_to_fp_")] ) +(define_insn "neon_vcvt_n" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: + [(match_operand:VCVTHI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + VCVT_US_N))] + "TARGET_NEON_FP16INST" +{ + neon_const_bounds (operands[2], 0, 17); + return "vcvt.f16.%#16\t%0, %1, %2"; +} + [(set_attr "type" "neon_int_to_fp_")] +) + +(define_insn "neon_vcvt" + [(set + (match_operand: 0 "s_register_operand" "=w") + (unspec: + [(match_operand:VH 1 "s_register_operand" "w")] + VCVT_HF_US))] + "TARGET_NEON_FP16INST" + "vcvt.%#16.f16\t%0, %1" + [(set_attr "type" "neon_fp_to_int_")] +) + (define_insn "neon_vmovn" [(set (match_operand: 0 "s_register_operand" "=w") (unspec: [(match_operand:VN 1 "s_register_operand" "w")] @@ -3306,6 +3767,18 @@ if (BYTES_BIG_ENDIAN) (const_string "neon_mul__scalar")))] ) +(define_insn "neon_vmul_lane" + [(set (match_operand:VH 0 "s_register_operand" "=w") + (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") + (match_operand:V4HF 2 "s_register_operand" + "") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VMUL_LANE))] + "TARGET_NEON_FP16INST" + "vmul.f16\t%0, %1, %P2[%c3]" + [(set_attr "type" "neon_fp_mul_s_scalar")] +) + (define_insn "neon_vmull_lane" [(set (match_operand: 0 "s_register_operand" "=w") (unspec: [(match_operand:VMDI 1 "s_register_operand" "w") @@ -3560,6 +4033,19 @@ if (BYTES_BIG_ENDIAN) DONE; }) +(define_expand "neon_vmul_n" + [(match_operand:VH 0 "s_register_operand") + (match_operand:VH 1 "s_register_operand") + (match_operand: 2 "s_register_operand")] + "TARGET_NEON_FP16INST" +{ + rtx tmp = gen_reg_rtx (V4HFmode); + emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx)); + emit_insn (gen_neon_vmul_lane (operands[0], operands[1], tmp, + const0_rtx)); + DONE; +}) + (define_expand "neon_vmulls_n" [(match_operand: 0 "s_register_operand" "") (match_operand:VMDI 1 "s_register_operand" "") @@ -4281,25 +4767,25 @@ if (BYTES_BIG_ENDIAN) (define_expand "neon_vtrn_internal" [(parallel - [(set (match_operand:VDQW 0 "s_register_operand" "") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") - (match_operand:VDQW 2 "s_register_operand" "")] + [(set (match_operand:VDQWH 0 "s_register_operand") + (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") + (match_operand:VDQWH 2 "s_register_operand")] UNSPEC_VTRN1)) - (set (match_operand:VDQW 3 "s_register_operand" "") - (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])] + (set (match_operand:VDQWH 3 "s_register_operand") + (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])] "TARGET_NEON" "" ) ;; Note: Different operand numbering to handle tied registers correctly. (define_insn "*neon_vtrn_insn" - [(set (match_operand:VDQW 0 "s_register_operand" "=&w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") - (match_operand:VDQW 3 "s_register_operand" "2")] - UNSPEC_VTRN1)) - (set (match_operand:VDQW 2 "s_register_operand" "=&w") - (unspec:VDQW [(match_dup 1) (match_dup 3)] - UNSPEC_VTRN2))] + [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") + (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") + (match_operand:VDQWH 3 "s_register_operand" "2")] + UNSPEC_VTRN1)) + (set (match_operand:VDQWH 2 "s_register_operand" "=&w") + (unspec:VDQWH [(match_dup 1) (match_dup 3)] + UNSPEC_VTRN2))] "TARGET_NEON" "vtrn.\t%0, %2" [(set_attr "type" "neon_permute")] @@ -4307,25 +4793,25 @@ if (BYTES_BIG_ENDIAN) (define_expand "neon_vzip_internal" [(parallel - [(set (match_operand:VDQW 0 "s_register_operand" "") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") - (match_operand:VDQW 2 "s_register_operand" "")] - UNSPEC_VZIP1)) - (set (match_operand:VDQW 3 "s_register_operand" "") - (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])] + [(set (match_operand:VDQWH 0 "s_register_operand") + (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") + (match_operand:VDQWH 2 "s_register_operand")] + UNSPEC_VZIP1)) + (set (match_operand:VDQWH 3 "s_register_operand") + (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])] "TARGET_NEON" "" ) ;; Note: Different operand numbering to handle tied registers correctly. (define_insn "*neon_vzip_insn" - [(set (match_operand:VDQW 0 "s_register_operand" "=&w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") - (match_operand:VDQW 3 "s_register_operand" "2")] - UNSPEC_VZIP1)) - (set (match_operand:VDQW 2 "s_register_operand" "=&w") - (unspec:VDQW [(match_dup 1) (match_dup 3)] - UNSPEC_VZIP2))] + [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") + (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") + (match_operand:VDQWH 3 "s_register_operand" "2")] + UNSPEC_VZIP1)) + (set (match_operand:VDQWH 2 "s_register_operand" "=&w") + (unspec:VDQWH [(match_dup 1) (match_dup 3)] + UNSPEC_VZIP2))] "TARGET_NEON" "vzip.\t%0, %2" [(set_attr "type" "neon_zip")] @@ -4333,25 +4819,25 @@ if (BYTES_BIG_ENDIAN) (define_expand "neon_vuzp_internal" [(parallel - [(set (match_operand:VDQW 0 "s_register_operand" "") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") - (match_operand:VDQW 2 "s_register_operand" "")] + [(set (match_operand:VDQWH 0 "s_register_operand") + (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") + (match_operand:VDQWH 2 "s_register_operand")] UNSPEC_VUZP1)) - (set (match_operand:VDQW 3 "s_register_operand" "") - (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])] + (set (match_operand:VDQWH 3 "s_register_operand" "") + (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])] "TARGET_NEON" "" ) ;; Note: Different operand numbering to handle tied registers correctly. (define_insn "*neon_vuzp_insn" - [(set (match_operand:VDQW 0 "s_register_operand" "=&w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") - (match_operand:VDQW 3 "s_register_operand" "2")] - UNSPEC_VUZP1)) - (set (match_operand:VDQW 2 "s_register_operand" "=&w") - (unspec:VDQW [(match_dup 1) (match_dup 3)] - UNSPEC_VUZP2))] + [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") + (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") + (match_operand:VDQWH 3 "s_register_operand" "2")] + UNSPEC_VUZP1)) + (set (match_operand:VDQWH 2 "s_register_operand" "=&w") + (unspec:VDQWH [(match_dup 1) (match_dup 3)] + UNSPEC_VUZP2))] "TARGET_NEON" "vuzp.\t%0, %2" [(set_attr "type" "neon_zip")] diff --git a/gcc/config/arm/t-aprofile b/gcc/config/arm/t-aprofile index 90305e1..f852ecd 100644 --- a/gcc/config/arm/t-aprofile +++ b/gcc/config/arm/t-aprofile @@ -99,6 +99,8 @@ MULTILIB_MATCHES += march?armv8-a=mcpu?xgene1 MULTILIB_MATCHES += march?armv8-a=march?armv8-a+crc MULTILIB_MATCHES += march?armv8-a=march?armv8.1-a MULTILIB_MATCHES += march?armv8-a=march?armv8.1-a+crc +MULTILIB_MATCHES += march?armv8-a=march?armv8.2-a +MULTILIB_MATCHES += march?armv8-a=march?armv8.2-a+fp16 # FPU matches MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3 diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm index 749a58d..803baa2 100644 --- a/gcc/config/arm/t-arm +++ b/gcc/config/arm/t-arm @@ -95,7 +95,8 @@ arm.o: $(srcdir)/config/arm/arm.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(srcdir)/config/arm/arm-cores.def \ $(srcdir)/config/arm/arm-arches.def $(srcdir)/config/arm/arm-fpus.def \ $(srcdir)/config/arm/arm-protos.h \ - $(srcdir)/config/arm/arm_neon_builtins.def + $(srcdir)/config/arm/arm_neon_builtins.def \ + $(srcdir)/config/arm/arm_vfp_builtins.def arm-builtins.o: $(srcdir)/config/arm/arm-builtins.c $(CONFIG_H) \ $(SYSTEM_H) coretypes.h $(TM_H) \ @@ -103,6 +104,7 @@ arm-builtins.o: $(srcdir)/config/arm/arm-builtins.c $(CONFIG_H) \ $(DIAGNOSTIC_CORE_H) $(OPTABS_H) \ $(srcdir)/config/arm/arm-protos.h \ $(srcdir)/config/arm/arm_neon_builtins.def \ + $(srcdir)/config/arm/arm_vfp_builtins.def \ $(srcdir)/config/arm/arm-simd-builtin-types.def $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/arm/arm-builtins.c diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index 5744c62..bee8795 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -191,6 +191,8 @@ UNSPEC_VBSL UNSPEC_VCAGE UNSPEC_VCAGT + UNSPEC_VCALE + UNSPEC_VCALT UNSPEC_VCEQ UNSPEC_VCGE UNSPEC_VCGEU @@ -203,6 +205,20 @@ UNSPEC_VCVT_U UNSPEC_VCVT_S_N UNSPEC_VCVT_U_N + UNSPEC_VCVT_HF_S_N + UNSPEC_VCVT_HF_U_N + UNSPEC_VCVT_SI_S_N + UNSPEC_VCVT_SI_U_N + UNSPEC_VCVTH_S + UNSPEC_VCVTH_U + UNSPEC_VCVTA_S + UNSPEC_VCVTA_U + UNSPEC_VCVTM_S + UNSPEC_VCVTM_U + UNSPEC_VCVTN_S + UNSPEC_VCVTN_U + UNSPEC_VCVTP_S + UNSPEC_VCVTP_U UNSPEC_VEXT UNSPEC_VHADD_S UNSPEC_VHADD_U @@ -244,6 +260,8 @@ UNSPEC_VMLSL_S_LANE UNSPEC_VMLSL_U_LANE UNSPEC_VMLSL_LANE + UNSPEC_VFMA_LANE + UNSPEC_VFMS_LANE UNSPEC_VMOVL_S UNSPEC_VMOVL_U UNSPEC_VMOVN @@ -365,5 +383,11 @@ UNSPEC_NVRINTN UNSPEC_VQRDMLAH UNSPEC_VQRDMLSH + UNSPEC_VRND + UNSPEC_VRNDA + UNSPEC_VRNDI + UNSPEC_VRNDM + UNSPEC_VRNDN + UNSPEC_VRNDP + UNSPEC_VRNDX ]) - diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index ce98f71..645b01e 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -124,6 +124,20 @@ FAIL; }) +(define_expand "vec_perm_const" + [(match_operand:VH 0 "s_register_operand") + (match_operand:VH 1 "s_register_operand") + (match_operand:VH 2 "s_register_operand") + (match_operand: 3)] + "TARGET_NEON" +{ + if (arm_expand_vec_perm_const (operands[0], operands[1], + operands[2], operands[3])) + DONE; + else + FAIL; +}) + (define_expand "vec_perm" [(match_operand:VE 0 "s_register_operand" "") (match_operand:VE 1 "s_register_operand" "") diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index 9750ba1..f39e590 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -18,6 +18,199 @@ ;; along with GCC; see the file COPYING3. If not see ;; . */ +;; Patterns for HI moves which provide more data transfer instructions when VFP +;; support is enabled. +(define_insn "*arm_movhi_vfp" + [(set + (match_operand:HI 0 "nonimmediate_operand" + "=rk, r, r, m, r, *t, r, *t") + (match_operand:HI 1 "general_operand" + "rIk, K, n, r, mi, r, *t, *t"))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP + && !TARGET_VFP_FP16INST + && (register_operand (operands[0], HImode) + || register_operand (operands[1], HImode))" +{ + switch (which_alternative) + { + case 0: + return "mov%?\t%0, %1\t%@ movhi"; + case 1: + return "mvn%?\t%0, #%B1\t%@ movhi"; + case 2: + return "movw%?\t%0, %L1\t%@ movhi"; + case 3: + return "strh%?\t%1, %0\t%@ movhi"; + case 4: + return "ldrh%?\t%0, %1\t%@ movhi"; + case 5: + case 6: + return "vmov%?\t%0, %1\t%@ int"; + case 7: + return "vmov%?.f32\t%0, %1\t%@ int"; + default: + gcc_unreachable (); + } +} + [(set_attr "predicable" "yes") + (set_attr_alternative "type" + [(if_then_else + (match_operand 1 "const_int_operand" "") + (const_string "mov_imm") + (const_string "mov_reg")) + (const_string "mvn_imm") + (const_string "mov_imm") + (const_string "store1") + (const_string "load1") + (const_string "f_mcr") + (const_string "f_mrc") + (const_string "fmov")]) + (set_attr "arch" "*, *, v6t2, *, *, *, *, *") + (set_attr "pool_range" "*, *, *, *, 256, *, *, *") + (set_attr "neg_pool_range" "*, *, *, *, 244, *, *, *") + (set_attr "length" "4")] +) + +(define_insn "*thumb2_movhi_vfp" + [(set + (match_operand:HI 0 "nonimmediate_operand" + "=rk, r, l, r, m, r, *t, r, *t") + (match_operand:HI 1 "general_operand" + "rk, I, Py, n, r, m, r, *t, *t"))] + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP + && !TARGET_VFP_FP16INST + && (register_operand (operands[0], HImode) + || register_operand (operands[1], HImode))" +{ + switch (which_alternative) + { + case 0: + case 1: + case 2: + return "mov%?\t%0, %1\t%@ movhi"; + case 3: + return "movw%?\t%0, %L1\t%@ movhi"; + case 4: + return "strh%?\t%1, %0\t%@ movhi"; + case 5: + return "ldrh%?\t%0, %1\t%@ movhi"; + case 6: + case 7: + return "vmov%?\t%0, %1\t%@ int"; + case 8: + return "vmov%?.f32\t%0, %1\t%@ int"; + default: + gcc_unreachable (); + } +} + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" + "yes, no, yes, no, no, no, no, no, no") + (set_attr "type" + "mov_reg, mov_imm, mov_imm, mov_imm, store1, load1,\ + f_mcr, f_mrc, fmov") + (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *") + (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *") + (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *") + (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4")] +) + +;; Patterns for HI moves which provide more data transfer instructions when FP16 +;; instructions are available. +(define_insn "*arm_movhi_fp16" + [(set + (match_operand:HI 0 "nonimmediate_operand" + "=r, r, r, m, r, *t, r, *t") + (match_operand:HI 1 "general_operand" + "rIk, K, n, r, mi, r, *t, *t"))] + "TARGET_ARM && TARGET_VFP_FP16INST + && (register_operand (operands[0], HImode) + || register_operand (operands[1], HImode))" +{ + switch (which_alternative) + { + case 0: + return "mov%?\t%0, %1\t%@ movhi"; + case 1: + return "mvn%?\t%0, #%B1\t%@ movhi"; + case 2: + return "movw%?\t%0, %L1\t%@ movhi"; + case 3: + return "strh%?\t%1, %0\t%@ movhi"; + case 4: + return "ldrh%?\t%0, %1\t%@ movhi"; + case 5: + case 6: + return "vmov.f16\t%0, %1\t%@ int"; + case 7: + return "vmov%?.f32\t%0, %1\t%@ int"; + default: + gcc_unreachable (); + } +} + [(set_attr "predicable" "yes, yes, yes, yes, yes, no, no, yes") + (set_attr_alternative "type" + [(if_then_else + (match_operand 1 "const_int_operand" "") + (const_string "mov_imm") + (const_string "mov_reg")) + (const_string "mvn_imm") + (const_string "mov_imm") + (const_string "store1") + (const_string "load1") + (const_string "f_mcr") + (const_string "f_mrc") + (const_string "fmov")]) + (set_attr "arch" "*, *, v6t2, *, *, *, *, *") + (set_attr "pool_range" "*, *, *, *, 256, *, *, *") + (set_attr "neg_pool_range" "*, *, *, *, 244, *, *, *") + (set_attr "length" "4")] +) + +(define_insn "*thumb2_movhi_fp16" + [(set + (match_operand:HI 0 "nonimmediate_operand" + "=rk, r, l, r, m, r, *t, r, *t") + (match_operand:HI 1 "general_operand" + "rk, I, Py, n, r, m, r, *t, *t"))] + "TARGET_THUMB2 && TARGET_VFP_FP16INST + && (register_operand (operands[0], HImode) + || register_operand (operands[1], HImode))" +{ + switch (which_alternative) + { + case 0: + case 1: + case 2: + return "mov%?\t%0, %1\t%@ movhi"; + case 3: + return "movw%?\t%0, %L1\t%@ movhi"; + case 4: + return "strh%?\t%1, %0\t%@ movhi"; + case 5: + return "ldrh%?\t%0, %1\t%@ movhi"; + case 6: + case 7: + return "vmov.f16\t%0, %1\t%@ int"; + case 8: + return "vmov%?.f32\t%0, %1\t%@ int"; + default: + gcc_unreachable (); + } +} + [(set_attr "predicable" + "yes, yes, yes, yes, yes, yes, no, no, yes") + (set_attr "predicable_short_it" + "yes, no, yes, no, no, no, no, no, no") + (set_attr "type" + "mov_reg, mov_imm, mov_imm, mov_imm, store1, load1,\ + f_mcr, f_mrc, fmov") + (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *") + (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *") + (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *") + (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4")] +) + ;; SImode moves ;; ??? For now do not allow loading constants into vfp regs. This causes ;; problems because small constants get converted into adds. @@ -53,7 +246,8 @@ } " [(set_attr "predicable" "yes") - (set_attr "type" "mov_reg,mov_reg,mvn_imm,mov_imm,load1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores") + (set_attr "type" "mov_reg,mov_reg,mvn_imm,mov_imm,load1,store1, + f_mcr,f_mrc,fmov,f_loads,f_stores") (set_attr "pool_range" "*,*,*,*,4096,*,*,*,*,1020,*") (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")] ) @@ -211,10 +405,87 @@ ) ;; HFmode moves + +(define_insn "*movhf_vfp_fp16" + [(set (match_operand:HF 0 "nonimmediate_operand" + "= r,m,t,r,t,r,t,t,Um,r") + (match_operand:HF 1 "general_operand" + " m,r,t,r,r,t,Dv,Um,t,F"))] + "TARGET_32BIT + && TARGET_VFP_FP16INST + && (s_register_operand (operands[0], HFmode) + || s_register_operand (operands[1], HFmode))" + { + switch (which_alternative) + { + case 0: /* ARM register from memory. */ + return \"ldrh%?\\t%0, %1\\t%@ __fp16\"; + case 1: /* Memory from ARM register. */ + return \"strh%?\\t%1, %0\\t%@ __fp16\"; + case 2: /* S register from S register. */ + return \"vmov\\t%0, %1\t%@ __fp16\"; + case 3: /* ARM register from ARM register. */ + return \"mov%?\\t%0, %1\\t%@ __fp16\"; + case 4: /* S register from ARM register. */ + case 5: /* ARM register from S register. */ + case 6: /* S register from immediate. */ + return \"vmov.f16\\t%0, %1\t%@ __fp16\"; + case 7: /* S register from memory. */ + return \"vld1.16\\t{%z0}, %A1\"; + case 8: /* Memory from S register. */ + return \"vst1.16\\t{%z1}, %A0\"; + case 9: /* ARM register from constant. */ + { + long bits; + rtx ops[4]; + + bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (operands[1]), + HFmode); + ops[0] = operands[0]; + ops[1] = GEN_INT (bits); + ops[2] = GEN_INT (bits & 0xff00); + ops[3] = GEN_INT (bits & 0x00ff); + + if (arm_arch_thumb2) + output_asm_insn (\"movw\\t%0, %1\", ops); + else + output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops); + return \"\"; + } + default: + gcc_unreachable (); + } + } + [(set_attr "predicable" "yes, yes, no, yes, no, no, no, no, no, no") + (set_attr "predicable_short_it" "no, no, no, yes,\ + no, no, no, no,\ + no, no") + (set_attr_alternative "type" + [(const_string "load1") (const_string "store1") + (const_string "fmov") (const_string "mov_reg") + (const_string "f_mcr") (const_string "f_mrc") + (const_string "fconsts") (const_string "neon_load1_1reg") + (const_string "neon_store1_1reg") + (if_then_else (match_test "arm_arch_thumb2") + (const_string "mov_imm") + (const_string "multiple"))]) + (set_attr_alternative "length" + [(const_int 4) (const_int 4) + (const_int 4) (const_int 4) + (const_int 4) (const_int 4) + (const_int 4) (const_int 4) + (const_int 4) + (if_then_else (match_test "arm_arch_thumb2") + (const_int 4) + (const_int 8))])] +) + (define_insn "*movhf_vfp_neon" [(set (match_operand:HF 0 "nonimmediate_operand" "= t,Um,r,m,t,r,t,r,r") (match_operand:HF 1 "general_operand" " Um, t,m,r,t,r,r,t,F"))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16 + "TARGET_32BIT + && TARGET_HARD_FLOAT && TARGET_NEON_FP16 + && !TARGET_VFP_FP16INST && ( s_register_operand (operands[0], HFmode) || s_register_operand (operands[1], HFmode))" "* @@ -268,8 +539,10 @@ (define_insn "*movhf_vfp" [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,t,r,t,r,r") (match_operand:HF 1 "general_operand" " m,r,t,r,r,t,F"))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP + "TARGET_32BIT + && TARGET_HARD_FLOAT && TARGET_VFP && !TARGET_NEON_FP16 + && !TARGET_VFP_FP16INST && ( s_register_operand (operands[0], HFmode) || s_register_operand (operands[1], HFmode))" "* @@ -670,9 +943,63 @@ (set_attr "type" "ffarithd")] ) +;; ABS and NEG for FP16. +(define_insn "hf2" + [(set (match_operand:HF 0 "s_register_operand" "=w") + (ABSNEG:HF (match_operand:HF 1 "s_register_operand" "w")))] + "TARGET_VFP_FP16INST" + "v.f16\t%0, %1" + [(set_attr "conds" "unconditional") + (set_attr "type" "ffariths")] +) + +(define_expand "neon_vabshf" + [(set + (match_operand:HF 0 "s_register_operand") + (abs:HF (match_operand:HF 1 "s_register_operand")))] + "TARGET_VFP_FP16INST" +{ + emit_insn (gen_abshf2 (operands[0], operands[1])); + DONE; +}) + +;; VRND for FP16. +(define_insn "neon_vhf" + [(set (match_operand:HF 0 "s_register_operand" "=w") + (unspec:HF + [(match_operand:HF 1 "s_register_operand" "w")] + FP16_RND))] + "TARGET_VFP_FP16INST" + ".f16\t%0, %1" + [(set_attr "conds" "unconditional") + (set_attr "type" "neon_fp_round_s")] +) + +(define_insn "neon_vrndihf" + [(set (match_operand:HF 0 "s_register_operand" "=w") + (unspec:HF + [(match_operand:HF 1 "s_register_operand" "w")] + UNSPEC_VRNDI))] + "TARGET_VFP_FP16INST" + "vrintr.f16\t%0, %1" + [(set_attr "conds" "unconditional") + (set_attr "type" "neon_fp_round_s")] +) ;; Arithmetic insns +(define_insn "addhf3" + [(set + (match_operand:HF 0 "s_register_operand" "=w") + (plus:HF + (match_operand:HF 1 "s_register_operand" "w") + (match_operand:HF 2 "s_register_operand" "w")))] + "TARGET_VFP_FP16INST" + "vadd.f16\t%0, %1, %2" + [(set_attr "conds" "unconditional") + (set_attr "type" "fadds")] +) + (define_insn "*addsf3_vfp" [(set (match_operand:SF 0 "s_register_operand" "=t") (plus:SF (match_operand:SF 1 "s_register_operand" "t") @@ -695,6 +1022,17 @@ (set_attr "type" "faddd")] ) +(define_insn "subhf3" + [(set + (match_operand:HF 0 "s_register_operand" "=w") + (minus:HF + (match_operand:HF 1 "s_register_operand" "w") + (match_operand:HF 2 "s_register_operand" "w")))] + "TARGET_VFP_FP16INST" + "vsub.f16\t%0, %1, %2" + [(set_attr "conds" "unconditional") + (set_attr "type" "fadds")] +) (define_insn "*subsf3_vfp" [(set (match_operand:SF 0 "s_register_operand" "=t") @@ -721,6 +1059,19 @@ ;; Division insns +;; FP16 Division. +(define_insn "divhf3" + [(set + (match_operand:HF 0 "s_register_operand" "=w") + (div:HF + (match_operand:HF 1 "s_register_operand" "w") + (match_operand:HF 2 "s_register_operand" "w")))] + "TARGET_VFP_FP16INST" + "vdiv.f16\t%0, %1, %2" + [(set_attr "conds" "unconditional") + (set_attr "type" "fdivs")] +) + ; VFP9 Erratum 760019: It's potentially unsafe to overwrite the input ; operands, so mark the output as early clobber for VFPv2 on ARMv5 or ; earlier. @@ -751,6 +1102,17 @@ ;; Multiplication insns +(define_insn "mulhf3" + [(set + (match_operand:HF 0 "s_register_operand" "=w") + (mult:HF (match_operand:HF 1 "s_register_operand" "w") + (match_operand:HF 2 "s_register_operand" "w")))] + "TARGET_VFP_FP16INST" + "vmul.f16\t%0, %1, %2" + [(set_attr "conds" "unconditional") + (set_attr "type" "fmuls")] +) + (define_insn "*mulsf3_vfp" [(set (match_operand:SF 0 "s_register_operand" "=t") (mult:SF (match_operand:SF 1 "s_register_operand" "t") @@ -773,6 +1135,26 @@ (set_attr "type" "fmuld")] ) +(define_insn "*mulsf3neghf_vfp" + [(set (match_operand:HF 0 "s_register_operand" "=t") + (mult:HF (neg:HF (match_operand:HF 1 "s_register_operand" "t")) + (match_operand:HF 2 "s_register_operand" "t")))] + "TARGET_VFP_FP16INST && !flag_rounding_math" + "vnmul.f16\\t%0, %1, %2" + [(set_attr "conds" "unconditional") + (set_attr "type" "fmuls")] +) + +(define_insn "*negmulhf3_vfp" + [(set (match_operand:HF 0 "s_register_operand" "=t") + (neg:HF (mult:HF (match_operand:HF 1 "s_register_operand" "t") + (match_operand:HF 2 "s_register_operand" "t"))))] + "TARGET_VFP_FP16INST" + "vnmul.f16\\t%0, %1, %2" + [(set_attr "conds" "unconditional") + (set_attr "type" "fmuls")] +) + (define_insn "*mulsf3negsf_vfp" [(set (match_operand:SF 0 "s_register_operand" "=t") (mult:SF (neg:SF (match_operand:SF 1 "s_register_operand" "t")) @@ -822,6 +1204,18 @@ ;; Multiply-accumulate insns ;; 0 = 1 * 2 + 0 +(define_insn "*mulsf3addhf_vfp" + [(set (match_operand:HF 0 "s_register_operand" "=t") + (plus:HF + (mult:HF (match_operand:HF 2 "s_register_operand" "t") + (match_operand:HF 3 "s_register_operand" "t")) + (match_operand:HF 1 "s_register_operand" "0")))] + "TARGET_VFP_FP16INST" + "vmla.f16\\t%0, %2, %3" + [(set_attr "conds" "unconditional") + (set_attr "type" "fmacs")] +) + (define_insn "*mulsf3addsf_vfp" [(set (match_operand:SF 0 "s_register_operand" "=t") (plus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t") @@ -847,6 +1241,17 @@ ) ;; 0 = 1 * 2 - 0 +(define_insn "*mulhf3subhf_vfp" + [(set (match_operand:HF 0 "s_register_operand" "=t") + (minus:HF (mult:HF (match_operand:HF 2 "s_register_operand" "t") + (match_operand:HF 3 "s_register_operand" "t")) + (match_operand:HF 1 "s_register_operand" "0")))] + "TARGET_VFP_FP16INST" + "vnmls.f16\\t%0, %2, %3" + [(set_attr "conds" "unconditional") + (set_attr "type" "fmacs")] +) + (define_insn "*mulsf3subsf_vfp" [(set (match_operand:SF 0 "s_register_operand" "=t") (minus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t") @@ -872,6 +1277,17 @@ ) ;; 0 = -(1 * 2) + 0 +(define_insn "*mulhf3neghfaddhf_vfp" + [(set (match_operand:HF 0 "s_register_operand" "=t") + (minus:HF (match_operand:HF 1 "s_register_operand" "0") + (mult:HF (match_operand:HF 2 "s_register_operand" "t") + (match_operand:HF 3 "s_register_operand" "t"))))] + "TARGET_VFP_FP16INST" + "vmls.f16\\t%0, %2, %3" + [(set_attr "conds" "unconditional") + (set_attr "type" "fmacs")] +) + (define_insn "*mulsf3negsfaddsf_vfp" [(set (match_operand:SF 0 "s_register_operand" "=t") (minus:SF (match_operand:SF 1 "s_register_operand" "0") @@ -898,6 +1314,18 @@ ;; 0 = -(1 * 2) - 0 +(define_insn "*mulhf3neghfsubhf_vfp" + [(set (match_operand:HF 0 "s_register_operand" "=t") + (minus:HF (mult:HF + (neg:HF (match_operand:HF 2 "s_register_operand" "t")) + (match_operand:HF 3 "s_register_operand" "t")) + (match_operand:HF 1 "s_register_operand" "0")))] + "TARGET_VFP_FP16INST" + "vnmla.f16\\t%0, %2, %3" + [(set_attr "conds" "unconditional") + (set_attr "type" "fmacs")] +) + (define_insn "*mulsf3negsfsubsf_vfp" [(set (match_operand:SF 0 "s_register_operand" "=t") (minus:SF (mult:SF @@ -926,6 +1354,30 @@ ;; Fused-multiply-accumulate +(define_insn "fmahf4" + [(set (match_operand:HF 0 "register_operand" "=w") + (fma:HF + (match_operand:HF 1 "register_operand" "w") + (match_operand:HF 2 "register_operand" "w") + (match_operand:HF 3 "register_operand" "0")))] + "TARGET_VFP_FP16INST" + "vfma.f16\\t%0, %1, %2" + [(set_attr "conds" "unconditional") + (set_attr "type" "ffmas")] +) + +(define_expand "neon_vfmahf" + [(match_operand:HF 0 "s_register_operand") + (match_operand:HF 1 "s_register_operand") + (match_operand:HF 2 "s_register_operand") + (match_operand:HF 3 "s_register_operand")] + "TARGET_VFP_FP16INST" +{ + emit_insn (gen_fmahf4 (operands[0], operands[2], operands[3], + operands[1])); + DONE; +}) + (define_insn "fma4" [(set (match_operand:SDF 0 "register_operand" "=") (fma:SDF (match_operand:SDF 1 "register_operand" "") @@ -938,6 +1390,30 @@ (set_attr "type" "ffma")] ) +(define_insn "fmsubhf4_fp16" + [(set (match_operand:HF 0 "register_operand" "=w") + (fma:HF + (neg:HF (match_operand:HF 1 "register_operand" "w")) + (match_operand:HF 2 "register_operand" "w") + (match_operand:HF 3 "register_operand" "0")))] + "TARGET_VFP_FP16INST" + "vfms.f16\\t%0, %1, %2" + [(set_attr "conds" "unconditional") + (set_attr "type" "ffmas")] +) + +(define_expand "neon_vfmshf" + [(match_operand:HF 0 "s_register_operand") + (match_operand:HF 1 "s_register_operand") + (match_operand:HF 2 "s_register_operand") + (match_operand:HF 3 "s_register_operand")] + "TARGET_VFP_FP16INST" +{ + emit_insn (gen_fmsubhf4_fp16 (operands[0], operands[2], operands[3], + operands[1])); + DONE; +}) + (define_insn "*fmsub4" [(set (match_operand:SDF 0 "register_operand" "=") (fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand" @@ -951,6 +1427,17 @@ (set_attr "type" "ffma")] ) +(define_insn "*fnmsubhf4" + [(set (match_operand:HF 0 "register_operand" "=w") + (fma:HF (match_operand:HF 1 "register_operand" "w") + (match_operand:HF 2 "register_operand" "w") + (neg:HF (match_operand:HF 3 "register_operand" "0"))))] + "TARGET_VFP_FP16INST" + "vfnms.f16\\t%0, %1, %2" + [(set_attr "conds" "unconditional") + (set_attr "type" "ffmas")] +) + (define_insn "*fnmsub4" [(set (match_operand:SDF 0 "register_operand" "=") (fma:SDF (match_operand:SDF 1 "register_operand" "") @@ -963,6 +1450,17 @@ (set_attr "type" "ffma")] ) +(define_insn "*fnmaddhf4" + [(set (match_operand:HF 0 "register_operand" "=w") + (fma:HF (neg:HF (match_operand:HF 1 "register_operand" "w")) + (match_operand:HF 2 "register_operand" "w") + (neg:HF (match_operand:HF 3 "register_operand" "0"))))] + "TARGET_VFP_FP16INST" + "vfnma.f16\\t%0, %1, %2" + [(set_attr "conds" "unconditional") + (set_attr "type" "ffmas")] +) + (define_insn "*fnmadd4" [(set (match_operand:SDF 0 "register_operand" "=") (fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand" @@ -1002,7 +1500,7 @@ (define_insn "extendhfsf2" [(set (match_operand:SF 0 "s_register_operand" "=t") (float_extend:SF (match_operand:HF 1 "s_register_operand" "t")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16" + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FP16 || TARGET_VFP_FP16INST)" "vcvtb%?.f32.f16\\t%0, %1" [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") @@ -1012,7 +1510,7 @@ (define_insn "truncsfhf2" [(set (match_operand:HF 0 "s_register_operand" "=t") (float_truncate:HF (match_operand:SF 1 "s_register_operand" "t")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16" + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FP16 || TARGET_VFP_FP16INST)" "vcvtb%?.f16.f32\\t%0, %1" [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") @@ -1105,6 +1603,27 @@ ;; Sqrt insns. +(define_insn "neon_vsqrthf" + [(set (match_operand:HF 0 "s_register_operand" "=w") + (sqrt:HF (match_operand:HF 1 "s_register_operand" "w")))] + "TARGET_VFP_FP16INST" + "vsqrt.f16\t%0, %1" + [(set_attr "conds" "unconditional") + (set_attr "type" "fsqrts")] +) + +(define_insn "neon_vrsqrtshf" + [(set + (match_operand:HF 0 "s_register_operand" "=w") + (unspec:HF [(match_operand:HF 1 "s_register_operand" "w") + (match_operand:HF 2 "s_register_operand" "w")] + UNSPEC_VRSQRTS))] + "TARGET_VFP_FP16INST" + "vrsqrts.f16\t%0, %1, %2" + [(set_attr "conds" "unconditional") + (set_attr "type" "fsqrts")] +) + ; VFP9 Erratum 760019: It's potentially unsafe to overwrite the input ; operands, so mark the output as early clobber for VFPv2 on ARMv5 or ; earlier. @@ -1261,9 +1780,6 @@ ) ;; Fixed point to floating point conversions. -(define_code_iterator FCVT [unsigned_float float]) -(define_code_attr FCVTI32typename [(unsigned_float "u32") (float "s32")]) - (define_insn "*combine_vcvt_f32_" [(set (match_operand:SF 0 "s_register_operand" "=t") (mult:SF (FCVT:SF (match_operand:SI 1 "s_register_operand" "0")) @@ -1308,6 +1824,125 @@ (set_attr "type" "f_cvtf2i")] ) +;; FP16 conversions. +(define_insn "neon_vcvthhf" + [(set (match_operand:HF 0 "s_register_operand" "=w") + (unspec:HF + [(match_operand:SI 1 "s_register_operand" "w")] + VCVTH_US))] + "TARGET_VFP_FP16INST" + "vcvt.f16.%#32\t%0, %1" + [(set_attr "conds" "unconditional") + (set_attr "type" "f_cvti2f")] +) + +(define_insn "neon_vcvthsi" + [(set (match_operand:SI 0 "s_register_operand" "=w") + (unspec:SI + [(match_operand:HF 1 "s_register_operand" "w")] + VCVTH_US))] + "TARGET_VFP_FP16INST" + "vcvt.%#32.f16\t%0, %1" + [(set_attr "conds" "unconditional") + (set_attr "type" "f_cvtf2i")] +) + +;; The neon_vcvth_nhf patterns are used to generate the instruction for the +;; vcvth_n_f16_32 arm_fp16 intrinsics. They are complicated by the +;; hardware requirement that the source and destination registers are the same +;; despite having different machine modes. The approach is to use a temporary +;; register for the conversion and move that to the correct destination. + +;; Generate an unspec pattern for the intrinsic. +(define_insn "neon_vcvth_nhf_unspec" + [(set + (match_operand:SI 0 "s_register_operand" "=w") + (unspec:SI + [(match_operand:SI 1 "s_register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")] + VCVT_HF_US_N)) + (set + (match_operand:HF 3 "s_register_operand" "=w") + (float_truncate:HF (float:SF (match_dup 0))))] + "TARGET_VFP_FP16INST" +{ + neon_const_bounds (operands[2], 1, 33); + return "vcvt.f16.32\t%0, %0, %2\;vmov.f32\t%3, %0"; +} + [(set_attr "conds" "unconditional") + (set_attr "type" "f_cvti2f")] +) + +;; Generate the instruction patterns needed for vcvth_n_f16_s32 neon intrinsics. +(define_expand "neon_vcvth_nhf" + [(match_operand:HF 0 "s_register_operand") + (unspec:HF [(match_operand:SI 1 "s_register_operand") + (match_operand:SI 2 "immediate_operand")] + VCVT_HF_US_N)] +"TARGET_VFP_FP16INST" +{ + rtx op1 = gen_reg_rtx (SImode); + + neon_const_bounds (operands[2], 1, 33); + + emit_move_insn (op1, operands[1]); + emit_insn (gen_neon_vcvth_nhf_unspec (op1, op1, operands[2], + operands[0])); + DONE; +}) + +;; The neon_vcvth_nsi patterns are used to generate the instruction for the +;; vcvth_n_32_f16 arm_fp16 intrinsics. They have the same restrictions and +;; are implemented in the same way as the neon_vcvth_nhf patterns. + +;; Generate an unspec pattern, constraining the registers. +(define_insn "neon_vcvth_nsi_unspec" + [(set (match_operand:SI 0 "s_register_operand" "=w") + (unspec:SI + [(fix:SI + (fix:SF + (float_extend:SF + (match_operand:HF 1 "s_register_operand" "w")))) + (match_operand:SI 2 "immediate_operand" "i")] + VCVT_SI_US_N))] + "TARGET_VFP_FP16INST" +{ + neon_const_bounds (operands[2], 1, 33); + return "vmov.f32\t%0, %1\;vcvt.%#32.f16\t%0, %0, %2"; +} + [(set_attr "conds" "unconditional") + (set_attr "type" "f_cvtf2i")] +) + +;; Generate the instruction patterns needed for vcvth_n_f16_s32 neon intrinsics. +(define_expand "neon_vcvth_nsi" + [(match_operand:SI 0 "s_register_operand") + (unspec:SI + [(match_operand:HF 1 "s_register_operand") + (match_operand:SI 2 "immediate_operand")] + VCVT_SI_US_N)] + "TARGET_VFP_FP16INST" +{ + rtx op1 = gen_reg_rtx (SImode); + + neon_const_bounds (operands[2], 1, 33); + emit_insn (gen_neon_vcvth_nsi_unspec (op1, operands[1], operands[2])); + emit_move_insn (operands[0], op1); + DONE; +}) + +(define_insn "neon_vcvthsi" + [(set + (match_operand:SI 0 "s_register_operand" "=w") + (unspec:SI + [(match_operand:HF 1 "s_register_operand" "w")] + VCVT_HF_US))] + "TARGET_VFP_FP16INST" + "vcvt.%#32.f16\t%0, %1" + [(set_attr "conds" "unconditional") + (set_attr "type" "f_cvtf2i")] +) + ;; Store multiple insn used in function prologue. (define_insn "*push_multi_vfp" [(match_parallel 2 "multi_register_push" @@ -1377,6 +2012,20 @@ ) ;; Scalar forms for the IEEE-754 fmax()/fmin() functions + +(define_insn "neon_hf" + [(set + (match_operand:HF 0 "s_register_operand" "=w") + (unspec:HF + [(match_operand:HF 1 "s_register_operand" "w") + (match_operand:HF 2 "s_register_operand" "w")] + VMAXMINFNM))] + "TARGET_VFP_FP16INST" + ".f16\t%0, %1, %2" + [(set_attr "conds" "unconditional") + (set_attr "type" "f_minmaxs")] +) + (define_insn "3" [(set (match_operand:SDF 0 "s_register_operand" "=") (unspec:SDF [(match_operand:SDF 1 "s_register_operand" "") diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 34cb2c9..330a934 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -14017,6 +14017,19 @@ extensions. @option{-march=armv8-a+crc} enables code generation for the ARMv8-A architecture together with the optional CRC32 extensions. +@option{-march=armv8.1-a} enables compiler support for the ARMv8.1-A +architecture. This also enables the features provided by +@option{-march=armv8-a+crc}. + +@option{-march=armv8.2-a} enables compiler support for the ARMv8.2-A +architecture. This also enables the features provided by +@option{-march=armv8.1-a}. + +@option{-march=armv8.2-a+fp16} enables compiler support for the +ARMv8.2-A architecture with the optional FP16 instructions extension. +This also enables the features provided by @option{-march=armv8.1-a} +and implies @option{-mfp16-format=ieee}. + @option{-march=native} causes the compiler to auto-detect the architecture of the build computer. At present, this feature is only supported on GNU/Linux, and not all architectures are recognized. If the auto-detect diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index e1c07bf..0df71c9 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1575,6 +1575,13 @@ options, including @code{-mfp16-format=ieee} if necessary to obtain the Test system supports executing Neon half-precision float instructions. (Implies previous.) +@item arm_fp16_alternative_ok +ARM target supports the ARM FP16 alternative format. Some multilibs +may be incompatible with the options needed. + +@item arm_fp16_none_ok +ARM target supports specifying none as the ARM FP16 format. + @item arm_thumb1_ok ARM target generates Thumb-1 code for @code{-mthumb}. @@ -1599,6 +1606,7 @@ ARM target supports @code{-mfpu=neon-fp-armv8 -mfloat-abi=softfp}. Some multilibs may be incompatible with these options. @item arm_v8_1a_neon_ok +@anchor{arm_v8_1a_neon_ok} ARM target supports options to generate ARMv8.1 Adv.SIMD instructions. Some multilibs may be incompatible with these options. @@ -1610,6 +1618,28 @@ arm_v8_1a_neon_ok. @item arm_acq_rel ARM target supports acquire-release instructions. +@item arm_v8_2a_fp16_scalar_ok +@anchor{arm_v8_2a_fp16_scalar_ok} +ARM target supports options to generate instructions for ARMv8.2 and +scalar instructions from the FP16 extension. Some multilibs may be +incompatible with these options. + +@item arm_v8_2a_fp16_scalar_hw +ARM target supports executing instructions for ARMv8.2 and scalar +instructions from the FP16 extension. Some multilibs may be +incompatible with these options. Implies arm_v8_2a_fp16_neon_ok. + +@item arm_v8_2a_fp16_neon_ok +@anchor{arm_v8_2a_fp16_neon_ok} +ARM target supports options to generate instructions from ARMv8.2 with +the FP16 extension. Some multilibs may be incompatible with these +options. Implies arm_v8_2a_fp16_scalar_ok. + +@item arm_v8_2a_fp16_neon_hw +ARM target supports executing instructions from ARMv8.2 with the FP16 +extension. Some multilibs may be incompatible with these options. +Implies arm_v8_2a_fp16_neon_ok and arm_v8_2a_fp16_scalar_hw. + @item arm_prefer_ldrd_strd ARM target prefers @code{LDRD} and @code{STRD} instructions over @code{LDM} and @code{STM} instructions. @@ -2118,6 +2148,23 @@ the @ref{arm_neon_fp16_ok,,arm_neon_fp16_ok effective target keyword}. arm vfp3 floating point support; see the @ref{arm_vfp3_ok,,arm_vfp3_ok effective target keyword}. +@item arm_v8_1a_neon +Add options for ARMv8.1 with Adv.SIMD support, if this is supported +by the target; see the @ref{arm_v8_1a_neon_ok,,arm_v8_1a_neon_ok} +effective target keyword. + +@item arm_v8_2a_fp16_scalar +Add options for ARMv8.2 with scalar FP16 support, if this is +supported by the target; see the +@ref{arm_v8_2a_fp16_scalar_ok,,arm_v8_2a_fp16_scalar_ok} effective +target keyword. + +@item arm_v8_2a_fp16_neon +Add options for ARMv8.2 with Adv.SIMD FP16 support, if this is +supported by the target; see the +@ref{arm_v8_2a_fp16_neon_ok,,arm_v8_2a_fp16_neon_ok} effective target +keyword. + @item bind_pic_locally Add the target-specific flags needed to enable functions to bind locally when using pic/PIC passes in the testsuite. diff --git a/gcc/testsuite/g++.dg/ext/arm-fp16/arm-fp16-ops-3.C b/gcc/testsuite/g++.dg/ext/arm-fp16/arm-fp16-ops-3.C index 8f9ab64..29080c7 100644 --- a/gcc/testsuite/g++.dg/ext/arm-fp16/arm-fp16-ops-3.C +++ b/gcc/testsuite/g++.dg/ext/arm-fp16/arm-fp16-ops-3.C @@ -1,5 +1,6 @@ /* Test various operators on __fp16 and mixed __fp16/float operands. */ /* { dg-do run { target arm*-*-* } } */ +/* { dg-require-effective-target arm_fp16_alternative_ok } */ /* { dg-options "-mfp16-format=alternative" } */ #include "arm-fp16-ops.h" diff --git a/gcc/testsuite/g++.dg/ext/arm-fp16/arm-fp16-ops-4.C b/gcc/testsuite/g++.dg/ext/arm-fp16/arm-fp16-ops-4.C index 4877f39..4be8883 100644 --- a/gcc/testsuite/g++.dg/ext/arm-fp16/arm-fp16-ops-4.C +++ b/gcc/testsuite/g++.dg/ext/arm-fp16/arm-fp16-ops-4.C @@ -1,5 +1,6 @@ /* Test various operators on __fp16 and mixed __fp16/float operands. */ /* { dg-do run { target arm*-*-* } } */ +/* { dg-require-effective-target arm_fp16_alternative_ok } */ /* { dg-options "-mfp16-format=alternative -ffast-math" } */ #include "arm-fp16-ops.h" diff --git a/gcc/testsuite/gcc.dg/torture/arm-fp16-int-convert-alt.c b/gcc/testsuite/gcc.dg/torture/arm-fp16-int-convert-alt.c index bcd7aef..7eb73e6 100644 --- a/gcc/testsuite/gcc.dg/torture/arm-fp16-int-convert-alt.c +++ b/gcc/testsuite/gcc.dg/torture/arm-fp16-int-convert-alt.c @@ -1,5 +1,6 @@ /* Test floating-point conversions. Standard types and __fp16. */ /* { dg-do run { target arm*-*-* } } */ +/* { dg-require-effective-target arm_fp16_alternative_ok } /* { dg-options "-mfp16-format=alternative" } */ #include "fp-int-convert.h" diff --git a/gcc/testsuite/gcc.dg/torture/arm-fp16-ops-3.c b/gcc/testsuite/gcc.dg/torture/arm-fp16-ops-3.c index 8f9ab64..7716baf 100644 --- a/gcc/testsuite/gcc.dg/torture/arm-fp16-ops-3.c +++ b/gcc/testsuite/gcc.dg/torture/arm-fp16-ops-3.c @@ -1,5 +1,6 @@ /* Test various operators on __fp16 and mixed __fp16/float operands. */ /* { dg-do run { target arm*-*-* } } */ +/* { dg-require-effective-target arm_fp16_alternative_ok } /* { dg-options "-mfp16-format=alternative" } */ #include "arm-fp16-ops.h" diff --git a/gcc/testsuite/gcc.dg/torture/arm-fp16-ops-4.c b/gcc/testsuite/gcc.dg/torture/arm-fp16-ops-4.c index 4877f39..1940f43 100644 --- a/gcc/testsuite/gcc.dg/torture/arm-fp16-ops-4.c +++ b/gcc/testsuite/gcc.dg/torture/arm-fp16-ops-4.c @@ -1,5 +1,6 @@ /* Test various operators on __fp16 and mixed __fp16/float operands. */ /* { dg-do run { target arm*-*-* } } */ +/* { dg-require-effective-target arm_fp16_alternative_ok } /* { dg-options "-mfp16-format=alternative -ffast-math" } */ #include "arm-fp16-ops.h" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp index ff39973..e93b8d5 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp @@ -53,7 +53,10 @@ torture-init set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS # Make sure Neon flags are provided, if necessary. Use fp16 if we can. -if {[check_effective_target_arm_neon_fp16_ok]} then { +# Use fp16 arithmetic operations if the hardware supports it. +if {[check_effective_target_arm_v8_2a_fp16_neon_hw]} then { + set additional_flags [add_options_for_arm_v8_2a_fp16_neon ""] +} elseif {[check_effective_target_arm_neon_fp16_ok]} then { set additional_flags [add_options_for_arm_neon_fp16 ""] } else { set additional_flags [add_options_for_arm_neon ""] diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h index 3363a72..4621415 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h @@ -16,6 +16,14 @@ extern void *memset(void *, int, size_t); extern void *memcpy(void *, const void *, size_t); extern size_t strlen(const char *); +/* Helper macro to select FP16 tests. */ +#if (defined (__ARM_FP16_FORMAT_IEEE) \ + || defined (__ARM_FP16_FORMAT_ALTERNATIVE)) +#define FP16_SUPPORTED (1) +#else +#undef FP16_SUPPORTED +#endif + /* Various string construction helpers. */ /* @@ -511,15 +519,6 @@ static void clean_results (void) /* Helpers to initialize vectors. */ #define VDUP(VAR, Q, T1, T2, W, N, V) \ VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V) -#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -/* Work around that there is no vdup_n_f16 intrinsic. */ -#define vdup_n_f16(VAL) \ - __extension__ \ - ({ \ - float16_t f = VAL; \ - vld1_dup_f16(&f); \ - }) -#endif #define VSET_LANE(VAR, Q, T1, T2, W, N, L, V) \ VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V, \ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_float.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_float.inc new file mode 100644 index 0000000..cc1bfb3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_float.inc @@ -0,0 +1,170 @@ +/* Floating-point only version of binary_op_no64.inc template. Currently only + float16_t is used. */ + +#include + +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + int i; + + /* Basic test: z = INSN (x, y), then store the result. */ +#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ + +#ifdef HAS_FLOAT16_VARIANT + DECL_VARIABLE(vector, float, 16, 4); + DECL_VARIABLE(vector2, float, 16, 4); + DECL_VARIABLE(vector_res, float, 16, 4); + + DECL_VARIABLE(vector, float, 16, 8); + DECL_VARIABLE(vector2, float, 16, 8); + DECL_VARIABLE(vector_res, float, 16, 8); +#endif + +#ifdef HAS_FLOAT_VARIANT + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector2, float, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 2); + + DECL_VARIABLE(vector, float, 32, 4); + DECL_VARIABLE(vector2, float, 32, 4); + DECL_VARIABLE(vector_res, float, 32, 4); +#endif + + clean_results (); + + /* Initialize input "vector" from "buffer". */ +#ifdef HAS_FLOAT16_VARIANT + VLOAD(vector, buffer, , float, f, 16, 4); + VLOAD(vector, buffer, q, float, f, 16, 8); +#endif +#ifdef HAS_FLOAT_VARIANT + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); +#endif + + /* Choose init value arbitrarily, will be used as comparison value. */ +#ifdef HAS_FLOAT16_VARIANT + VDUP(vector2, , float, f, 16, 4, -15.5f); + VDUP(vector2, q, float, f, 16, 8, -14.5f); +#endif +#ifdef HAS_FLOAT_VARIANT + VDUP(vector2, , float, f, 32, 2, -15.5f); + VDUP(vector2, q, float, f, 32, 4, -14.5f); +#endif + +#ifdef HAS_FLOAT16_VARIANT +#define FLOAT16_VARIANT(MACRO, VAR) \ + MACRO(VAR, , float, f, 16, 4); \ + MACRO(VAR, q, float, f, 16, 8); +#else +#define FLOAT16_VARIANT(MACRO, VAR) +#endif + +#ifdef HAS_FLOAT_VARIANT +#define FLOAT_VARIANT(MACRO, VAR) \ + MACRO(VAR, , float, f, 32, 2); \ + MACRO(VAR, q, float, f, 32, 4); +#else +#define FLOAT_VARIANT(MACRO, VAR) +#endif + +#define TEST_MACRO_NO64BIT_VARIANT_1_5(MACRO, VAR) \ + + /* Apply a binary operator named INSN_NAME. */ + FLOAT16_VARIANT(TEST_BINARY_OP, INSN_NAME); + FLOAT_VARIANT(TEST_BINARY_OP, INSN_NAME); + +#ifdef HAS_FLOAT16_VARIANT + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, ""); + + /* Extra FP tests with special values (NaN, ....) */ + VDUP(vector, q, float, f, 16, 8, 1.0f); + VDUP(vector2, q, float, f, 16, 8, NAN); + TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_nan, + " FP special (NaN)"); + + VDUP(vector, q, float, f, 16, 8, -NAN); + VDUP(vector2, q, float, f, 16, 8, 1.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_mnan, + " FP special (-NaN)"); + + VDUP(vector, q, float, f, 16, 8, 1.0f); + VDUP(vector2, q, float, f, 16, 8, HUGE_VALF); + TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_inf, + " FP special (inf)"); + + VDUP(vector, q, float, f, 16, 8, -HUGE_VALF); + VDUP(vector2, q, float, f, 16, 8, 1.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_minf, + " FP special (-inf)"); + + VDUP(vector, q, float, f, 16, 8, 0.0f); + VDUP(vector2, q, float, f, 16, 8, -0.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_zero1, + " FP special (-0.0)"); + + VDUP(vector, q, float, f, 16, 8, -0.0f); + VDUP(vector2, q, float, f, 16, 8, 0.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_zero2, + " FP special (-0.0)"); +#endif + +#ifdef HAS_FLOAT_VARIANT + CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ""); + + /* Extra FP tests with special values (NaN, ....) */ + VDUP(vector, q, float, f, 32, 4, 1.0f); + VDUP(vector2, q, float, f, 32, 4, NAN); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_nan, " FP special (NaN)"); + + VDUP(vector, q, float, f, 32, 4, -NAN); + VDUP(vector2, q, float, f, 32, 4, 1.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_mnan, " FP special (-NaN)"); + + VDUP(vector, q, float, f, 32, 4, 1.0f); + VDUP(vector2, q, float, f, 32, 4, HUGE_VALF); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_inf, " FP special (inf)"); + + VDUP(vector, q, float, f, 32, 4, -HUGE_VALF); + VDUP(vector2, q, float, f, 32, 4, 1.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_minf, " FP special (-inf)"); + + VDUP(vector, q, float, f, 32, 4, 0.0f); + VDUP(vector2, q, float, f, 32, 4, -0.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_zero1, " FP special (-0.0)"); + + VDUP(vector, q, float, f, 32, 4, -0.0f); + VDUP(vector2, q, float, f, 32, 4, 0.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_zero2, " FP special (-0.0)"); +#endif +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc index 1eb9271..a30f420 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc @@ -28,6 +28,10 @@ void FNNAME (INSN_NAME) (void) /* Initialize input "vector" from "buffer". */ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#ifdef HAS_FLOAT16_VARIANT + VLOAD(vector, buffer, , float, f, 16, 4); + VLOAD(vector, buffer, q, float, f, 16, 8); +#endif #ifdef HAS_FLOAT_VARIANT VLOAD(vector, buffer, , float, f, 32, 2); VLOAD(vector, buffer, q, float, f, 32, 4); @@ -46,15 +50,27 @@ void FNNAME (INSN_NAME) (void) VDUP(vector2, q, uint, u, 8, 16, 0xf9); VDUP(vector2, q, uint, u, 16, 8, 0xfff2); VDUP(vector2, q, uint, u, 32, 4, 0xfffffff1); +#ifdef HAS_FLOAT16_VARIANT + VDUP(vector2, , float, f, 16, 4, -15.5f); + VDUP(vector2, q, float, f, 16, 8, -14.5f); +#endif #ifdef HAS_FLOAT_VARIANT VDUP(vector2, , float, f, 32, 2, -15.5f); VDUP(vector2, q, float, f, 32, 4, -14.5f); #endif +#ifdef HAS_FLOAT16_VARIANT +#define FLOAT16_VARIANT(MACRO, VAR) \ + MACRO(VAR, , float, f, 16, 4); \ + MACRO(VAR, q, float, f, 16, 8); +#else +#define FLOAT16_VARIANT(MACRO, VAR) +#endif + #ifdef HAS_FLOAT_VARIANT #define FLOAT_VARIANT(MACRO, VAR) \ MACRO(VAR, , float, f, 32, 2); \ - MACRO(VAR, q, float, f, 32, 4) + MACRO(VAR, q, float, f, 32, 4); #else #define FLOAT_VARIANT(MACRO, VAR) #endif @@ -72,7 +88,8 @@ void FNNAME (INSN_NAME) (void) MACRO(VAR, q, uint, u, 8, 16); \ MACRO(VAR, q, uint, u, 16, 8); \ MACRO(VAR, q, uint, u, 32, 4); \ - FLOAT_VARIANT(MACRO, VAR) + FLOAT_VARIANT(MACRO, VAR); \ + FLOAT16_VARIANT(MACRO, VAR); /* Apply a binary operator named INSN_NAME. */ TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME); @@ -90,6 +107,42 @@ void FNNAME (INSN_NAME) (void) CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); +#ifdef HAS_FLOAT16_VARIANT + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, ""); + + /* Extra FP tests with special values (NaN, ....) */ + VDUP(vector, q, float, f, 16, 8, 1.0f); + VDUP(vector2, q, float, f, 16, 8, NAN); + TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_nan, " FP special (NaN)"); + + VDUP(vector, q, float, f, 16, 8, -NAN); + VDUP(vector2, q, float, f, 16, 8, 1.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_mnan, " FP special (-NaN)"); + + VDUP(vector, q, float, f, 16, 8, 1.0f); + VDUP(vector2, q, float, f, 16, 8, HUGE_VALF); + TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_inf, " FP special (inf)"); + + VDUP(vector, q, float, f, 16, 8, -HUGE_VALF); + VDUP(vector2, q, float, f, 16, 8, 1.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_minf, " FP special (-inf)"); + + VDUP(vector, q, float, f, 16, 8, 0.0f); + VDUP(vector2, q, float, f, 16, 8, -0.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_zero1, " FP special (-0.0)"); + + VDUP(vector, q, float, f, 16, 8, -0.0f); + VDUP(vector2, q, float, f, 16, 8, 0.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_zero2, " FP special (-0.0)"); +#endif + #ifdef HAS_FLOAT_VARIANT CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ""); diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_scalar_op.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_scalar_op.inc new file mode 100644 index 0000000..55dedd4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_scalar_op.inc @@ -0,0 +1,160 @@ +/* Template file for binary scalar operator validation. + + This file is meant to be included by test files for binary scalar + operations. */ + +/* Check for required settings. */ + +#ifndef INSN_NAME +#error INSN_NAME (the intrinsic to test) must be defined. +#endif + +#ifndef INPUT_TYPE +#error INPUT_TYPE (basic type of an input value) must be defined. +#endif + +#ifndef OUTPUT_TYPE +#error OUTPUT_TYPE (basic type of an output value) must be defined. +#endif + +#ifndef OUTPUT_TYPE_SIZE +#error OUTPUT_TYPE_SIZE (size in bits of an output value) must be defined. +#endif + +/* Optional settings: + + INPUT_1: Input values for the first parameter. Must be of type INPUT_TYPE. + INPUT_2: Input values for the first parameter. Must be of type + INPUT_TYPE. */ + +#ifndef TEST_MSG +#define TEST_MSG "unnamed test" +#endif + +/* The test framework. */ + +#include + +extern void abort (); + +#define INFF __builtin_inf () + +/* Stringify a macro. */ +#define STR0(A) #A +#define STR(A) STR0 (A) + +/* Macro concatenation. */ +#define CAT0(A, B) A##B +#define CAT(A, B) CAT0 (A, B) + +/* Format strings for error reporting. */ +#define FMT16 "0x%04x" +#define FMT32 "0x%08x" +#define FMT CAT (FMT,OUTPUT_TYPE_SIZE) + +/* Type construction: forms TS_t, where T is the base type and S the size in + bits. */ +#define MK_TYPE0(T, S) T##S##_t +#define MK_TYPE(T, S) MK_TYPE0 (T, S) + +/* Convenience types for input and output data. */ +typedef MK_TYPE (uint, OUTPUT_TYPE_SIZE) output_hex_type; + +/* Conversion between typed values and their hexadecimal representation. */ +typedef union +{ + OUTPUT_TYPE value; + output_hex_type hex; +} output_conv_type; + +/* Default input values. */ + +float16_t input_1_float16_t[] = +{ + 0.0, -0.0, + 2.0, 3.1, + 20.0, 0.40, + -2.3, 1.33, + -7.6, 0.31, + 0.3353, 0.5, + 1.0, 13.13, + -6.3, 20.0, + (float16_t)INFF, (float16_t)-INFF, +}; + +float16_t input_2_float16_t[] = +{ + 1.0, 1.0, + -4.33, 100.0, + 30.0, -0.02, + 0.5, -7.231, + -6.3, 20.0, + -7.231, 2.3, + -7.6, 5.1, + 0.31, 0.33353, + (float16_t)-INFF, (float16_t)INFF, +}; + +#ifndef INPUT_1 +#define INPUT_1 CAT (input_1_,INPUT_TYPE) +#endif + +#ifndef INPUT_2 +#define INPUT_2 CAT (input_2_,INPUT_TYPE) +#endif + +/* Support macros and routines for the test function. */ + +#define CHECK() \ + { \ + output_conv_type actual; \ + output_conv_type expect; \ + \ + expect.hex = ((output_hex_type*)EXPECTED)[index]; \ + actual.value = INSN_NAME ((INPUT_1)[index], \ + (INPUT_2)[index]); \ + \ + if (actual.hex != expect.hex) \ + { \ + fprintf (stderr, \ + "ERROR in %s (%s line %d), buffer %s, " \ + "index %d: got " \ + FMT " != " FMT "\n", \ + TEST_MSG, __FILE__, __LINE__, \ + STR (EXPECTED), index, \ + actual.hex, expect.hex); \ + abort (); \ + } \ + fprintf (stderr, "CHECKED %s %s\n", \ + STR (EXPECTED), TEST_MSG); \ + } + +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1 (NAME) + +/* The test function. */ + +void +FNNAME (INSN_NAME) (void) +{ + /* Basic test: y[i] = OP (x[i]), for each INPUT[i], then compare the result + against EXPECTED[i]. */ + + const int num_tests = sizeof (INPUT_1) / sizeof (INPUT_1[0]); + int index; + + for (index = 0; index < num_tests; index++) + CHECK (); + +#ifdef EXTRA_TESTS + EXTRA_TESTS (); +#endif +} + +int +main (void) +{ + FNNAME (INSN_NAME) (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_fp_op.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_fp_op.inc index 33451d7..313badb 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_fp_op.inc +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_fp_op.inc @@ -15,6 +15,10 @@ each test file. */ extern ARRAY(expected2, uint, 32, 2); extern ARRAY(expected2, uint, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +extern ARRAY(expected2, uint, 16, 4); +extern ARRAY(expected2, uint, 16, 8); +#endif #define FNNAME1(NAME) exec_ ## NAME #define FNNAME(NAME) FNNAME1(NAME) @@ -37,17 +41,33 @@ void FNNAME (INSN_NAME) (void) DECL_VARIABLE(vector2, float, 32, 4); DECL_VARIABLE(vector_res, uint, 32, 2); DECL_VARIABLE(vector_res, uint, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector, float, 16, 4); + DECL_VARIABLE(vector, float, 16, 8); + DECL_VARIABLE(vector2, float, 16, 4); + DECL_VARIABLE(vector2, float, 16, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 16, 8); +#endif clean_results (); /* Initialize input "vector" from "buffer". */ VLOAD(vector, buffer, , float, f, 32, 2); VLOAD(vector, buffer, q, float, f, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VLOAD(vector, buffer, , float, f, 16, 4); + VLOAD(vector, buffer, q, float, f, 16, 8); +#endif /* Choose init value arbitrarily, will be used for vector comparison. */ VDUP(vector2, , float, f, 32, 2, -16.0f); VDUP(vector2, q, float, f, 32, 4, -14.0f); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector2, , float, f, 16, 4, -16.0f); + VDUP(vector2, q, float, f, 16, 8, -14.0f); +#endif /* Apply operator named INSN_NAME. */ TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); @@ -56,15 +76,36 @@ void FNNAME (INSN_NAME) (void) TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VCOMP(INSN_NAME, , float, f, uint, 16, 4); + CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); + + TEST_VCOMP(INSN_NAME, q, float, f, uint, 16, 8); + CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); +#endif + /* Test again, with different input values. */ VDUP(vector2, , float, f, 32, 2, -10.0f); VDUP(vector2, q, float, f, 32, 4, 10.0f); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector2, , float, f, 16, 4, -10.0f); + VDUP(vector2, q, float, f, 16, 8, 10.0f); +#endif + TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected2, ""); TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected2,""); + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VCOMP(INSN_NAME, , float, f, uint, 16, 4); + CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected2, ""); + + TEST_VCOMP(INSN_NAME, q, float, f, uint, 16, 8); + CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected2,""); +#endif } int main (void) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_op.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_op.inc index a09c5f5..c8c5dfe 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_op.inc +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_op.inc @@ -11,6 +11,17 @@ extern ARRAY(expected_uint, uint, 32, 2); extern ARRAY(expected_q_uint, uint, 8, 16); extern ARRAY(expected_q_uint, uint, 16, 8); extern ARRAY(expected_q_uint, uint, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +extern ARRAY(expected_float, uint, 16, 4); +extern ARRAY(expected_q_float, uint, 16, 8); +extern ARRAY(expected_nan, uint, 16, 4); +extern ARRAY(expected_mnan, uint, 16, 4); +extern ARRAY(expected_nan2, uint, 16, 4); +extern ARRAY(expected_inf, uint, 16, 4); +extern ARRAY(expected_minf, uint, 16, 4); +extern ARRAY(expected_inf2, uint, 16, 4); +extern ARRAY(expected_mzero, uint, 16, 4); +#endif extern ARRAY(expected_float, uint, 32, 2); extern ARRAY(expected_q_float, uint, 32, 4); extern ARRAY(expected_uint2, uint, 32, 2); @@ -48,6 +59,9 @@ void FNNAME (INSN_NAME) (void) DECL_VARIABLE(vector, uint, 8, 8); DECL_VARIABLE(vector, uint, 16, 4); DECL_VARIABLE(vector, uint, 32, 2); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE (vector, float, 16, 4); +#endif DECL_VARIABLE(vector, float, 32, 2); DECL_VARIABLE(vector, int, 8, 16); DECL_VARIABLE(vector, int, 16, 8); @@ -55,6 +69,9 @@ void FNNAME (INSN_NAME) (void) DECL_VARIABLE(vector, uint, 8, 16); DECL_VARIABLE(vector, uint, 16, 8); DECL_VARIABLE(vector, uint, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE (vector, float, 16, 8); +#endif DECL_VARIABLE(vector, float, 32, 4); DECL_VARIABLE(vector2, int, 8, 8); @@ -63,6 +80,9 @@ void FNNAME (INSN_NAME) (void) DECL_VARIABLE(vector2, uint, 8, 8); DECL_VARIABLE(vector2, uint, 16, 4); DECL_VARIABLE(vector2, uint, 32, 2); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE (vector2, float, 16, 4); +#endif DECL_VARIABLE(vector2, float, 32, 2); DECL_VARIABLE(vector2, int, 8, 16); DECL_VARIABLE(vector2, int, 16, 8); @@ -70,6 +90,9 @@ void FNNAME (INSN_NAME) (void) DECL_VARIABLE(vector2, uint, 8, 16); DECL_VARIABLE(vector2, uint, 16, 8); DECL_VARIABLE(vector2, uint, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE (vector2, float, 16, 8); +#endif DECL_VARIABLE(vector2, float, 32, 4); DECL_VARIABLE(vector_res, uint, 8, 8); @@ -88,6 +111,9 @@ void FNNAME (INSN_NAME) (void) VLOAD(vector, buffer, , uint, u, 8, 8); VLOAD(vector, buffer, , uint, u, 16, 4); VLOAD(vector, buffer, , uint, u, 32, 2); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VLOAD (vector, buffer, , float, f, 16, 4); +#endif VLOAD(vector, buffer, , float, f, 32, 2); VLOAD(vector, buffer, q, int, s, 8, 16); @@ -96,6 +122,9 @@ void FNNAME (INSN_NAME) (void) VLOAD(vector, buffer, q, uint, u, 8, 16); VLOAD(vector, buffer, q, uint, u, 16, 8); VLOAD(vector, buffer, q, uint, u, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VLOAD (vector, buffer, q, float, f, 16, 8); +#endif VLOAD(vector, buffer, q, float, f, 32, 4); /* Choose init value arbitrarily, will be used for vector @@ -106,6 +135,9 @@ void FNNAME (INSN_NAME) (void) VDUP(vector2, , uint, u, 8, 8, 0xF3); VDUP(vector2, , uint, u, 16, 4, 0xFFF2); VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF1); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP (vector2, , float, f, 16, 4, -15.0f); +#endif VDUP(vector2, , float, f, 32, 2, -15.0f); VDUP(vector2, q, int, s, 8, 16, -4); @@ -114,6 +146,9 @@ void FNNAME (INSN_NAME) (void) VDUP(vector2, q, uint, u, 8, 16, 0xF4); VDUP(vector2, q, uint, u, 16, 8, 0xFFF6); VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFF2); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP (vector2, q, float, f, 16, 8, -14.0f); +#endif VDUP(vector2, q, float, f, 32, 4, -14.0f); /* The comparison operators produce only unsigned results, which @@ -154,9 +189,17 @@ void FNNAME (INSN_NAME) (void) CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_q_uint, ""); /* The float variants. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); + CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_float, ""); +#endif TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_float, ""); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VCOMP (INSN_NAME, q, float, f, uint, 16, 8); + CHECK (TEST_MSG, uint, 16, 8, PRIx16, expected_q_float, ""); +#endif TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_q_float, ""); @@ -176,6 +219,43 @@ void FNNAME (INSN_NAME) (void) /* Extra FP tests with special values (NaN, ....). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP (vector, , float, f, 16, 4, 1.0); + VDUP (vector2, , float, f, 16, 4, NAN); + TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); + CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_nan, "FP special (NaN)"); + + VDUP (vector, , float, f, 16, 4, 1.0); + VDUP (vector2, , float, f, 16, 4, -NAN); + TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); + CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_mnan, " FP special (-NaN)"); + + VDUP (vector, , float, f, 16, 4, NAN); + VDUP (vector2, , float, f, 16, 4, 1.0); + TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); + CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_nan2, " FP special (NaN)"); + + VDUP (vector, , float, f, 16, 4, 1.0); + VDUP (vector2, , float, f, 16, 4, HUGE_VALF); + TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); + CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_inf, " FP special (inf)"); + + VDUP (vector, , float, f, 16, 4, 1.0); + VDUP (vector2, , float, f, 16, 4, -HUGE_VALF); + TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); + CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_minf, " FP special (-inf)"); + + VDUP (vector, , float, f, 16, 4, HUGE_VALF); + VDUP (vector2, , float, f, 16, 4, 1.0); + TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); + CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_inf2, " FP special (inf)"); + + VDUP (vector, , float, f, 16, 4, -0.0); + VDUP (vector2, , float, f, 16, 4, 0.0); + TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); + CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_mzero, " FP special (-0.0)"); +#endif + VDUP(vector, , float, f, 32, 2, 1.0); VDUP(vector2, , float, f, 32, 2, NAN); TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_zero_op.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_zero_op.inc new file mode 100644 index 0000000..610272f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_zero_op.inc @@ -0,0 +1,111 @@ +/* Template file for the validation of compare against zero operators. + + This file is base on cmp_op.inc. It is meant to be included by the relevant + test files, which have to define the intrinsic family to test. If a given + intrinsic supports variants which are not supported by all the other + operators, these can be tested by providing a definition for EXTRA_TESTS. */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" +#include + +/* Additional expected results declaration, they are initialized in + each test file. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +extern ARRAY(expected_float, uint, 16, 4); +extern ARRAY(expected_q_float, uint, 16, 8); +extern ARRAY(expected_uint2, uint, 16, 4); +extern ARRAY(expected_uint3, uint, 16, 4); +extern ARRAY(expected_uint4, uint, 16, 4); +extern ARRAY(expected_nan, uint, 16, 4); +extern ARRAY(expected_mnan, uint, 16, 4); +extern ARRAY(expected_inf, uint, 16, 4); +extern ARRAY(expected_minf, uint, 16, 4); +extern ARRAY(expected_zero, uint, 16, 4); +extern ARRAY(expected_mzero, uint, 16, 4); +#endif + +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + /* Basic test: y=vcomp(x1,x2), then store the result. */ +#define TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N) \ + VECT_VAR(vector_res, T3, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vector_res, T3, W, N)) + +#define TEST_VCOMP(INSN, Q, T1, T2, T3, W, N) \ + TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N) + + /* No need for 64 bits elements. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE (vector, float, 16, 4); + DECL_VARIABLE (vector, float, 16, 8); +#endif + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 16, 8); +#endif + + clean_results (); + + /* Choose init value arbitrarily, will be used for vector + comparison. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP (vector, , float, f, 16, 4, -15.0f); + VDUP (vector, q, float, f, 16, 8, 14.0f); +#endif + + /* Float variants. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); + TEST_VCOMP (INSN_NAME, q, float, f, uint, 16, 8); +#endif + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_float, ""); + CHECK (TEST_MSG, uint, 16, 8, PRIx16, expected_q_float, ""); +#endif + + /* Extra FP tests with special values (NaN, ....). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP (vector, , float, f, 16, 4, NAN); + TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); + CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_nan, "FP special (NaN)"); + + VDUP (vector, , float, f, 16, 4, -NAN); + TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); + CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_mnan, " FP special (-NaN)"); + + VDUP (vector, , float, f, 16, 4, HUGE_VALF); + TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); + CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_inf, " FP special (inf)"); + + VDUP (vector, , float, f, 16, 4, -HUGE_VALF); + TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); + CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_minf, " FP special (-inf)"); + + VDUP (vector, , float, f, 16, 4, 0.0); + TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); + CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_zero, " FP special (0.0)"); + + VDUP (vector, , float, f, 16, 4, 0.0); + TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); + CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_mzero, " FP special (-0.0)"); +#endif + +#ifdef EXTRA_TESTS + EXTRA_TESTS(); +#endif +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/ternary_scalar_op.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/ternary_scalar_op.inc new file mode 100644 index 0000000..4765091 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/ternary_scalar_op.inc @@ -0,0 +1,206 @@ +/* Template file for ternary scalar operator validation. + + This file is meant to be included by test files for binary scalar + operations. */ + +/* Check for required settings. */ + +#ifndef INSN_NAME +#error INSN_NAME (the intrinsic to test) must be defined. +#endif + +#ifndef INPUT_TYPE +#error INPUT_TYPE (basic type of an input value) must be defined. +#endif + +#ifndef OUTPUT_TYPE +#error OUTPUT_TYPE (basic type of an output value) must be defined. +#endif + +#ifndef OUTPUT_TYPE_SIZE +#error OUTPUT_TYPE_SIZE (size in bits of an output value) must be defined. +#endif + +/* Optional settings: + + INPUT_1: Input values for the first parameter. Must be of type INPUT_TYPE. + INPUT_2: Input values for the second parameter. Must be of type INPUT_TYPE. + INPUT_3: Input values for the third parameter. Must be of type + INPUT_TYPE. */ + +#ifndef TEST_MSG +#define TEST_MSG "unnamed test" +#endif + +/* The test framework. */ + +#include + +extern void abort (); + +#define INFF __builtin_inf () + +/* Stringify a macro. */ +#define STR0(A) #A +#define STR(A) STR0 (A) + +/* Macro concatenation. */ +#define CAT0(A, B) A##B +#define CAT(A, B) CAT0 (A, B) + +/* Format strings for error reporting. */ +#define FMT16 "0x%04x" +#define FMT32 "0x%08x" +#define FMT CAT (FMT,OUTPUT_TYPE_SIZE) + +/* Type construction: forms TS_t, where T is the base type and S the size in + bits. */ +#define MK_TYPE0(T, S) T##S##_t +#define MK_TYPE(T, S) MK_TYPE0 (T, S) + +/* Convenience types for input and output data. */ +typedef MK_TYPE (uint, OUTPUT_TYPE_SIZE) output_hex_type; + +/* Conversion between typed values and their hexadecimal representation. */ +typedef union +{ + OUTPUT_TYPE value; + output_hex_type hex; +} output_conv_type; + +/* Default input values. */ + +float16_t input_1_float16_t[] = +{ + 0.0, + -0.0, + 2.0, + 3.1, + 20.0, + 0.40, + -2.3, + 1.33, + -7.6, + 0.31, + 0.3353, + 0.5, + 1.0, + 13.13, + -6.3, + 20.0, + (float16_t)INFF, + (float16_t)-INFF, +}; + +float16_t input_2_float16_t[] = +{ + 1.0, + 1.0, + -4.33, + 100.0, + 30.0, + -0.02, + 0.5, + -7.231, + -6.3, + 20.0, + -7.231, + 2.3, + -7.6, + 5.1, + 0.31, + 0.33353, + (float16_t)-INFF, + (float16_t)INFF, +}; + +float16_t input_3_float16_t[] = +{ + -0.0, + 0.0, + 0.31, + -0.31, + 1.31, + 2.1, + -6.3, + 1.0, + -1.5, + 5.1, + 0.3353, + 9.3, + -9.3, + -7.231, + 0.5, + -0.33, + (float16_t)INFF, + (float16_t)INFF, +}; + +#ifndef INPUT_1 +#define INPUT_1 CAT (input_1_,INPUT_TYPE) +#endif + +#ifndef INPUT_2 +#define INPUT_2 CAT (input_2_,INPUT_TYPE) +#endif + +#ifndef INPUT_3 +#define INPUT_3 CAT (input_3_,INPUT_TYPE) +#endif + +/* Support macros and routines for the test function. */ + +#define CHECK() \ + { \ + output_conv_type actual; \ + output_conv_type expect; \ + \ + expect.hex = ((output_hex_type*)EXPECTED)[index]; \ + actual.value = INSN_NAME ((INPUT_1)[index], \ + (INPUT_2)[index], \ + (INPUT_3)[index]); \ + \ + if (actual.hex != expect.hex) \ + { \ + fprintf (stderr, \ + "ERROR in %s (%s line %d), buffer %s, " \ + "index %d: got " \ + FMT " != " FMT "\n", \ + TEST_MSG, __FILE__, __LINE__, \ + STR (EXPECTED), index, \ + actual.hex, expect.hex); \ + abort (); \ + } \ + fprintf (stderr, "CHECKED %s %s\n", \ + STR (EXPECTED), TEST_MSG); \ + } + +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1 (NAME) + +/* The test function. */ + +void +FNNAME (INSN_NAME) (void) +{ + /* Basic test: y[i] = OP (x[i]), for each INPUT[i], then compare the result + against EXPECTED[i]. */ + + const int num_tests = sizeof (INPUT_1) / sizeof (INPUT_1[0]); + int index; + + for (index = 0; index < num_tests; index++) + CHECK (); + +#ifdef EXTRA_TESTS + EXTRA_TESTS (); +#endif +} + +int +main (void) +{ + FNNAME (INSN_NAME) (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_scalar_op.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_scalar_op.inc new file mode 100644 index 0000000..66c8906 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_scalar_op.inc @@ -0,0 +1,200 @@ +/* Template file for unary scalar operator validation. + + This file is meant to be included by test files for unary scalar + operations. */ + +/* Check for required settings. */ + +#ifndef INSN_NAME +#error INSN_NAME (the intrinsic to test) must be defined. +#endif + +#ifndef INPUT_TYPE +#error INPUT_TYPE (basic type of an input value) must be defined. +#endif + +#ifndef SCALAR_OPERANDS +#ifndef EXPECTED +#error EXPECTED (an array of expected output values) must be defined. +#endif +#endif + +#ifndef OUTPUT_TYPE +#error OUTPUT_TYPE (basic type of an output value) must be defined. +#endif + +#ifndef OUTPUT_TYPE_SIZE +#error OUTPUT_TYPE_SIZE (size in bits of an output value) must be defined. +#endif + +/* Optional settings. */ + +/* SCALAR_OPERANDS: Defined iff the intrinsic has a scalar operand. + + SCALAR_1, SCALAR_2, .., SCALAR_4: If SCALAR_OPERANDS is defined, SCALAR_ + is the scalar and EXPECTED_ is array of expected values. + + INPUT: Input values for the first parameter. Must be of type INPUT_TYPE. */ + +/* Additional comments for the error message. */ +#ifndef COMMENT +#define COMMENT "" +#endif + +#ifndef TEST_MSG +#define TEST_MSG "unnamed test" +#endif + +/* The test framework. */ + +#include + +extern void abort (); + +#define INFF __builtin_inf () + +/* Stringify a macro. */ +#define STR0(A) #A +#define STR(A) STR0 (A) + +/* Macro concatenation. */ +#define CAT0(A, B) A##B +#define CAT(A, B) CAT0 (A, B) + +/* Format strings for error reporting. */ +#define FMT16 "0x%04x" +#define FMT32 "0x%08x" +#define FMT64 "0x%016x" +#define FMT CAT (FMT,OUTPUT_TYPE_SIZE) + +/* Type construction: forms TS_t, where T is the base type and S the size in + bits. */ +#define MK_TYPE0(T, S) T##S##_t +#define MK_TYPE(T, S) MK_TYPE0 (T, S) + +/* Convenience types for input and output data. */ +typedef MK_TYPE (uint, OUTPUT_TYPE_SIZE) output_hex_type; + +/* Conversion between typed values and their hexadecimal representation. */ +typedef union +{ + OUTPUT_TYPE value; + output_hex_type hex; +} output_conv_type; + +/* Default input values. */ + +float16_t input_1_float16_t[] = +{ + 0.0, -0.0, + 2.0, 3.1, + 20.0, 0.40, + -2.3, 1.33, + -7.6, 0.31, + 0.3353, 0.5, + 1.0, 13.13, + -6.3, 20.0, + (float16_t)INFF, (float16_t)-INFF, +}; + +#ifndef INPUT +#define INPUT CAT(input_1_,INPUT_TYPE) +#endif + +/* Support macros and routines for the test function. */ + +#define CHECK() \ + { \ + output_conv_type actual; \ + output_conv_type expect; \ + \ + expect.hex = ((output_hex_type*)EXPECTED)[index]; \ + actual.value = INSN_NAME ((INPUT)[index]); \ + \ + if (actual.hex != expect.hex) \ + { \ + fprintf (stderr, \ + "ERROR in %s (%s line %d), buffer %s, " \ + "index %d: got " \ + FMT " != " FMT "\n", \ + TEST_MSG, __FILE__, __LINE__, \ + STR (EXPECTED), index, \ + actual.hex, expect.hex); \ + abort (); \ + } \ + fprintf (stderr, "CHECKED %s %s\n", \ + STR (EXPECTED), TEST_MSG); \ + } + +#define CHECK_N(SCALAR, EXPECTED) \ + { \ + output_conv_type actual; \ + output_conv_type expect; \ + \ + expect.hex \ + = ((output_hex_type*)EXPECTED)[index]; \ + actual.value = INSN_NAME ((INPUT)[index], (SCALAR)); \ + \ + if (actual.hex != expect.hex) \ + { \ + fprintf (stderr, \ + "ERROR in %s (%s line %d), buffer %s, " \ + "index %d: got " \ + FMT " != " FMT "\n", \ + TEST_MSG, __FILE__, __LINE__, \ + STR (EXPECTED), index, \ + actual.hex, expect.hex); \ + abort (); \ + } \ + fprintf (stderr, "CHECKED %s %s\n", \ + STR (EXPECTED), TEST_MSG); \ + } + +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1 (NAME) + +/* The test function. */ + +void +FNNAME (INSN_NAME) (void) +{ + /* Basic test: y[i] = OP (x[i]), for each INPUT[i], then compare the result + against EXPECTED[i]. */ + + const int num_tests = sizeof (INPUT) / sizeof (INPUT[0]); + int index; + + for (index = 0; index < num_tests; index++) + { +#if defined (SCALAR_OPERANDS) + +#ifdef SCALAR_1 + CHECK_N (SCALAR_1, EXPECTED_1); +#endif +#ifdef SCALAR_2 + CHECK_N (SCALAR_2, EXPECTED_2); +#endif +#ifdef SCALAR_3 + CHECK_N (SCALAR_3, EXPECTED_3); +#endif +#ifdef SCALAR_4 + CHECK_N (SCALAR_4, EXPECTED_4); +#endif + +#else /* !defined (SCALAR_OPERAND). */ + CHECK (); +#endif + } + +#ifdef EXTRA_TESTS + EXTRA_TESTS (); +#endif +} + +int +main (void) +{ + FNNAME (INSN_NAME) (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabd.c index 67d2af1..3049065 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabd.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabd.c @@ -30,10 +30,20 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffd0, 0xffffffd1, 0xffffffd2, 0xffffffd3 }; VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x42407ae1, 0x423c7ae1, 0x42387ae1, 0x42347ae1 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0x4e13, 0x4dd3, + 0x4d93, 0x4d53 }; +VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0x5204, 0x51e4, 0x51c4, 0x51a4, + 0x5184, 0x5164, 0x5144, 0x5124 }; +#endif /* Additional expected results for float32 variants with specially chosen input values. */ VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_float16, hfloat, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +#endif #define TEST_MSG "VABD/VABDQ" void exec_vabd (void) @@ -65,6 +75,17 @@ void exec_vabd (void) DECL_VABD_VAR(vector2); DECL_VABD_VAR(vector_res); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector1, float, 16, 4); + DECL_VARIABLE(vector1, float, 16, 8); + + DECL_VARIABLE(vector2, float, 16, 4); + DECL_VARIABLE(vector2, float, 16, 8); + + DECL_VARIABLE(vector_res, float, 16, 4); + DECL_VARIABLE(vector_res, float, 16, 8); +#endif + clean_results (); /* Initialize input "vector1" from "buffer". */ @@ -82,6 +103,12 @@ void exec_vabd (void) VLOAD(vector1, buffer, q, uint, u, 16, 8); VLOAD(vector1, buffer, q, uint, u, 32, 4); VLOAD(vector1, buffer, q, float, f, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VLOAD(vector1, buffer, , float, f, 16, 4); + VLOAD(vector1, buffer, , float, f, 16, 4); + VLOAD(vector1, buffer, q, float, f, 16, 8); + VLOAD(vector1, buffer, q, float, f, 16, 8); +#endif /* Choose init value arbitrarily. */ VDUP(vector2, , int, s, 8, 8, 1); @@ -98,6 +125,10 @@ void exec_vabd (void) VDUP(vector2, q, uint, u, 16, 8, 12); VDUP(vector2, q, uint, u, 32, 4, 32); VDUP(vector2, q, float, f, 32, 4, 32.12f); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector2, , float, f, 16, 4, 8.3f); + VDUP(vector2, q, float, f, 16, 8, 32.12f); +#endif /* Execute the tests. */ TEST_VABD(, int, s, 8, 8); @@ -115,6 +146,11 @@ void exec_vabd (void) TEST_VABD(q, uint, u, 32, 4); TEST_VABD(q, float, f, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VABD(, float, f, 16, 4); + TEST_VABD(q, float, f, 16, 8); +#endif + CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); @@ -129,7 +165,10 @@ void exec_vabd (void) CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ""); - +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, ""); +#endif /* Extra FP tests with special values (-0.0, ....) */ VDUP(vector1, q, float, f, 32, 4, -0.0f); @@ -137,11 +176,27 @@ void exec_vabd (void) TEST_VABD(q, float, f, 32, 4); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, " FP special (-0.0)"); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector1, q, float, f, 16, 8, -0.0f); + VDUP(vector2, q, float, f, 16, 8, 0.0); + TEST_VABD(q, float, f, 16, 8); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_float16, + " FP special (-0.0)"); +#endif + /* Extra FP tests with special values (-0.0, ....) */ VDUP(vector1, q, float, f, 32, 4, 0.0f); VDUP(vector2, q, float, f, 32, 4, -0.0); TEST_VABD(q, float, f, 32, 4); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, " FP special (-0.0)"); + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector1, q, float, f, 16, 8, 0.0f); + VDUP(vector2, q, float, f, 16, 8, -0.0); + TEST_VABD(q, float, f, 16, 8); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_float16, + " FP special (-0.0)"); +#endif } int main (void) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabdh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabdh_f16_1.c new file mode 100644 index 0000000..3a5efa5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabdh_f16_1.c @@ -0,0 +1,44 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +#define INFF __builtin_inf () + +/* Expected results. + Absolute difference between INPUT1 and INPUT2 in binary_scalar_op.inc. */ +uint16_t expected[] = +{ + 0x3C00, + 0x3C00, + 0x4654, + 0x560E, + 0x4900, + 0x36B8, + 0x419a, + 0x4848, + 0x3d34, + 0x4cec, + 0x4791, + 0x3f34, + 0x484d, + 0x4804, + 0x469c, + 0x4ceb, + 0x7c00, + 0x7c00 +}; + +#define TEST_MSG "VABDH_F16" +#define INSN_NAME vabdh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabs.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabs.c index 9c80ef1..9d6d5b2 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabs.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabs.c @@ -21,24 +21,52 @@ VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd }; /* Expected results for float32 variants. Needs to be separated since the generic test function does not test floating-point versions. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_float16, hfloat, 16, 4) [] = { 0x409a, 0x409a, + 0x409a, 0x409a }; +VECT_VAR_DECL(expected_float16, hfloat, 16, 8) [] = { 0x42cd, 0x42cd, + 0x42cd, 0x42cd, + 0x42cd, 0x42cd, + 0x42cd, 0x42cd }; +#endif VECT_VAR_DECL(expected_float32,hfloat,32,2) [] = { 0x40133333, 0x40133333 }; VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0x4059999a, 0x4059999a, 0x4059999a, 0x4059999a }; void exec_vabs_f32(void) { +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector, float, 16, 4); + DECL_VARIABLE(vector, float, 16, 8); +#endif DECL_VARIABLE(vector, float, 32, 2); DECL_VARIABLE(vector, float, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector_res, float, 16, 4); + DECL_VARIABLE(vector_res, float, 16, 8); +#endif DECL_VARIABLE(vector_res, float, 32, 2); DECL_VARIABLE(vector_res, float, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, -2.3f); + VDUP(vector, q, float, f, 16, 8, 3.4f); +#endif VDUP(vector, , float, f, 32, 2, -2.3f); VDUP(vector, q, float, f, 32, 4, 3.4f); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_UNARY_OP(INSN_NAME, , float, f, 16, 4); + TEST_UNARY_OP(INSN_NAME, q, float, f, 16, 8); +#endif TEST_UNARY_OP(INSN_NAME, , float, f, 32, 2); TEST_UNARY_OP(INSN_NAME, q, float, f, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_float16, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_float16, ""); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_float32, ""); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, ""); } diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabsh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabsh_f16_1.c new file mode 100644 index 0000000..16a986a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabsh_f16_1.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected[] = +{ + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x4000 /* 2.000000 */, + 0x4233 /* 3.099609 */, + 0x4d00 /* 20.000000 */, + 0x3666 /* 0.399902 */, + 0x409a /* 2.300781 */, + 0x3d52 /* 1.330078 */, + 0x479a /* 7.601562 */, + 0x34f6 /* 0.310059 */, + 0x355d /* 0.335205 */, + 0x3800 /* 0.500000 */, + 0x3c00 /* 1.000000 */, + 0x4a91 /* 13.132812 */, + 0x464d /* 6.300781 */, + 0x4d00 /* 20.000000 */, + 0x7c00 /* inf */, + 0x7c00 /* inf */ +}; + +#define TEST_MSG "VABSH_F16" +#define INSN_NAME vabsh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vadd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vadd.c index 7be1401..1561dc1 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vadd.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vadd.c @@ -43,6 +43,14 @@ VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3, VECT_VAR_DECL(expected_float32,hfloat,32,2) [] = { 0x40d9999a, 0x40d9999a }; VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0x41100000, 0x41100000, 0x41100000, 0x41100000 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_float16, hfloat, 16, 4) [] = { 0x46cd, 0x46cd, + 0x46cd, 0x46cd }; +VECT_VAR_DECL(expected_float16, hfloat, 16, 8) [] = { 0x4880, 0x4880, + 0x4880, 0x4880, + 0x4880, 0x4880, + 0x4880, 0x4880 }; +#endif void exec_vadd_f32(void) { @@ -66,4 +74,27 @@ void exec_vadd_f32(void) CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_float32, ""); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, ""); + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector, float, 16, 4); + DECL_VARIABLE(vector, float, 16, 8); + + DECL_VARIABLE(vector2, float, 16, 4); + DECL_VARIABLE(vector2, float, 16, 8); + + DECL_VARIABLE(vector_res, float, 16, 4); + DECL_VARIABLE(vector_res, float, 16, 8); + + VDUP(vector, , float, f, 16, 4, 2.3f); + VDUP(vector, q, float, f, 16, 8, 3.4f); + + VDUP(vector2, , float, f, 16, 4, 4.5f); + VDUP(vector2, q, float, f, 16, 8, 5.6f); + + TEST_BINARY_OP(INSN_NAME, , float, f, 16, 4); + TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); + + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_float16, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_float16, ""); +#endif } diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddh_f16_1.c new file mode 100644 index 0000000..4b0e242 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddh_f16_1.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected[] = +{ + 0x3c00 /* 1.000000 */, + 0x3c00 /* 1.000000 */, + 0xc0a8 /* -2.328125 */, + 0x5672 /* 103.125000 */, + 0x5240 /* 50.000000 */, + 0x3614 /* 0.379883 */, + 0xbf34 /* -1.800781 */, + 0xc5e6 /* -5.898438 */, + 0xcaf4 /* -13.906250 */, + 0x4d14 /* 20.312500 */, + 0xc6e5 /* -6.894531 */, + 0x419a /* 2.800781 */, + 0xc69a /* -6.601562 */, + 0x4c8f /* 18.234375 */, + 0xc5fe /* -5.992188 */, + 0x4d15 /* 20.328125 */, + 0x7e00 /* nan */, + 0x7e00 /* nan */, +}; + +#define TEST_MSG "VADDH_F16" +#define INSN_NAME vaddh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c index c4fdbb4..e9b3dfd 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c @@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffff1 }; VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7 }; VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc09, 0xcb89, + 0xcb09, 0xca89 }; +#endif VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800004, 0xc1700004 }; VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf6, 0xf6, 0xf6, 0xf6, @@ -43,6 +47,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7 }; VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2, 0xfff4, 0xfff4, 0xfff6, 0xfff6 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc09, 0xcb89, + 0xcb09, 0xca89, + 0xca09, 0xc989, + 0xc909, 0xc889 }; +#endif VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800001, 0xc1700001, 0xc1600001, 0xc1500001 }; @@ -66,6 +76,10 @@ void exec_vbsl (void) clean_results (); TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#if defined (FP16_SUPPORTED) + VLOAD(vector, buffer, , float, f, 16, 4); + VLOAD(vector, buffer, q, float, f, 16, 8); +#endif VLOAD(vector, buffer, , float, f, 32, 2); VLOAD(vector, buffer, q, float, f, 32, 4); @@ -80,6 +94,9 @@ void exec_vbsl (void) VDUP(vector2, , uint, u, 16, 4, 0xFFF2); VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0); VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFF3); +#if defined (FP16_SUPPORTED) + VDUP(vector2, , float, f, 16, 4, -2.4f); /* -2.4f is 0xC0CD. */ +#endif VDUP(vector2, , float, f, 32, 2, -30.3f); VDUP(vector2, , poly, p, 8, 8, 0xF3); VDUP(vector2, , poly, p, 16, 4, 0xFFF2); @@ -94,6 +111,9 @@ void exec_vbsl (void) VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFF3); VDUP(vector2, q, poly, p, 8, 16, 0xF3); VDUP(vector2, q, poly, p, 16, 8, 0xFFF2); +#if defined (FP16_SUPPORTED) + VDUP(vector2, q, float, f, 16, 8, -2.4f); +#endif VDUP(vector2, q, float, f, 32, 4, -30.4f); VDUP(vector_first, , uint, u, 8, 8, 0xF4); @@ -111,10 +131,18 @@ void exec_vbsl (void) TEST_VBSL(uint, , poly, p, 16, 4); TEST_VBSL(uint, q, poly, p, 8, 16); TEST_VBSL(uint, q, poly, p, 16, 8); +#if defined (FP16_SUPPORTED) + TEST_VBSL(uint, , float, f, 16, 4); + TEST_VBSL(uint, q, float, f, 16, 8); +#endif TEST_VBSL(uint, , float, f, 32, 2); TEST_VBSL(uint, q, float, f, 32, 4); +#if defined (FP16_SUPPORTED) + CHECK_RESULTS (TEST_MSG, ""); +#else CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); +#endif } int main (void) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcage.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcage.c index 1fadf66..ab00b96 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcage.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcage.c @@ -11,3 +11,13 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xffffffff, 0xffffffff }; VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected, uint, 16, 4) [] = { 0xffff, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected, uint, 16, 8) [] = { 0xffff, 0xffff, 0xffff, 0x0, + 0x0, 0x0, 0x0, 0x0 }; + +VECT_VAR_DECL (expected2, uint, 16, 4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL (expected2, uint, 16, 8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0x0 }; +#endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcageh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcageh_f16_1.c new file mode 100644 index 0000000..0bebec7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcageh_f16_1.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +uint16_t expected[] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0xFFFF, 0xFFFF, 0x0, 0xFFFF, + 0x0, 0x0, 0x0, 0x0, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF}; + +#define TEST_MSG "VCAGEH_F16" +#define INSN_NAME vcageh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagt.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagt.c index b1144a2..81c46a6 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagt.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagt.c @@ -11,3 +11,13 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xffffffff, 0xffffffff }; VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected, uint, 16, 8) [] = { 0xffff, 0xffff, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; + +VECT_VAR_DECL (expected2, uint, 16, 4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL (expected2, uint, 16, 8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0x0, 0x0 }; +#endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagth_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagth_f16_1.c new file mode 100644 index 0000000..68ce599 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagth_f16_1.c @@ -0,0 +1,21 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +uint16_t expected[] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0xFFFF, 0xFFFF, 0x0, 0xFFFF, + 0x0, 0x0, 0x0, 0x0, 0xFFFF, 0xFFFF, 0xFFFF, 0x0, 0x0}; + +#define TEST_MSG "VCAGTH_F16" +#define INSN_NAME vcagth_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcale.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcale.c index bff9e4a..091ffaf 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcale.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcale.c @@ -9,3 +9,13 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff }; VECT_VAR_DECL(expected2,uint,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected, uint, 16, 4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL (expected, uint, 16, 8) [] = { 0x0, 0x0, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff }; + +VECT_VAR_DECL (expected2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected2, uint, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0xffff, 0xffff }; +#endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcaleh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcaleh_f16_1.c new file mode 100644 index 0000000..1b5a09b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcaleh_f16_1.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +uint16_t expected[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0, 0x0, + 0xFFFF, 0x0, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0, 0x0, + 0x0, 0xFFFF, 0xFFFF}; + +#define TEST_MSG "VCALEH_F16" +#define INSN_NAME vcaleh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalt.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalt.c index ed652eb..525176a 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalt.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalt.c @@ -9,3 +9,13 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff }; VECT_VAR_DECL(expected2,uint,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected, uint, 16, 4) [] = { 0x0, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL (expected, uint, 16, 8) [] = { 0x0, 0x0, 0x0, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff }; + +VECT_VAR_DECL (expected2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected2, uint, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0xffff }; +#endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalth_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalth_f16_1.c new file mode 100644 index 0000000..766c783 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalth_f16_1.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +uint16_t expected[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0, 0x0, + 0xFFFF, 0x0, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0, 0x0, + 0x0, 0x0, 0x0}; + +#define TEST_MSG "VCALTH_F16" +#define INSN_NAME vcalth_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceq.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceq.c index 1e21d50..ede01fb 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceq.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceq.c @@ -32,6 +32,12 @@ VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xffff, 0x0 }; VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0x0 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0x0, 0xffff, 0x0, 0x0 }; +VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0x0, 0x0, 0xffff, 0x0, + 0x0, 0x0, 0x0, 0x0, }; +#endif + VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0x0, 0xffffffff }; VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0x0 }; @@ -39,6 +45,18 @@ VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0xffffffff, 0x0 }; VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0x0, 0xffffffff }; VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0xffffffff, 0x0 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_nan2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_inf2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +#endif + VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 }; diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceqh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceqh_f16_1.c new file mode 100644 index 0000000..8f5c14b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceqh_f16_1.c @@ -0,0 +1,21 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +uint16_t expected[] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; + +#define TEST_MSG "VCEQH_F16" +#define INSN_NAME vceqh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceqz_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceqz_1.c new file mode 100644 index 0000000..eefaa7a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceqz_1.c @@ -0,0 +1,27 @@ +/* This file tests an intrinsic which currently has only an f16 variant and that + is only available when FP16 arithmetic instructions are supported. */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ + +#define INSN_NAME vceqz +#define TEST_MSG "VCEQZ/VCEQZQ" + +#include "cmp_zero_op.inc" + +/* Expected results. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +#endif + +/* Extra FP tests with special values (NaN, ....). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_zero, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +#endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceqzh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceqzh_f16_1.c new file mode 100644 index 0000000..ccfecf4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceqzh_f16_1.c @@ -0,0 +1,21 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +uint16_t expected[] = { 0xFFFF, 0xFFFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; + +#define TEST_MSG "VCEQZH_F16" +#define INSN_NAME vceqzh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcge.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcge.c index 22a5d67..0ec7c7b 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcge.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcge.c @@ -28,6 +28,14 @@ VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0, 0x0, 0xffff, 0xffff }; VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0x0, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0x0, 0x0, + 0xffff, 0xffff, + 0xffff, 0xffff, + 0xffff, 0xffff }; +#endif + VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0x0, 0xffffffff }; VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff }; @@ -35,6 +43,20 @@ VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0xffffffff, 0xffffffff }; VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0x0, 0xffffffff }; VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_nan2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL (expected_inf2, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +#endif + VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 }; diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgeh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgeh_f16_1.c new file mode 100644 index 0000000..161c7a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgeh_f16_1.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +uint16_t expected[] = { 0x0, 0x0, 0xFFFF, 0x0, 0x0, 0xFFFF, 0x0, 0xFFFF, + 0x0, 0x0, 0xFFFF, 0x0, 0xFFFF, 0xFFFF, 0x0, 0xFFFF, + 0xFFFF, 0x0}; + +#define TEST_MSG "VCGEH_F16" +#define INSN_NAME vcgeh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgez_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgez_1.c new file mode 100644 index 0000000..3ce74f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgez_1.c @@ -0,0 +1,30 @@ +/* This file tests an intrinsic which currently has only an f16 variant and that + is only available when FP16 arithmetic instructions are supported. */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ + +#define INSN_NAME vcgez +#define TEST_MSG "VCGEZ/VCGEZQ" + +#include "cmp_zero_op.inc" + +/* Expected results. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0xffff, 0xffff, + 0xffff, 0xffff, + 0xffff, 0xffff, + 0xffff, 0xffff }; +#endif + +/* Extra FP tests with special values (NaN, ....). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_zero, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +#endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgezh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgezh_f16_1.c new file mode 100644 index 0000000..2d3cd8a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgezh_f16_1.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +uint16_t expected[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0, + 0xFFFF, 0x0, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0x0, 0xFFFF, 0xFFFF, 0x0}; + +#define TEST_MSG "VCGEZH_F16" +#define INSN_NAME vcgezh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgt.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgt.c index c44819a..3976d57 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgt.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgt.c @@ -28,6 +28,14 @@ VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xffff }; VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0x0, 0x0, 0xffff, 0xffff }; +VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0x0, 0x0, + 0x0, 0xffff, + 0xffff, 0xffff, + 0xffff, 0xffff }; +#endif + VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff }; @@ -35,6 +43,19 @@ VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0x0, 0xffffffff }; VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0x0, 0xffffffff }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_nan2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL (expected_inf2, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +#endif + VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 }; diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgth_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgth_f16_1.c new file mode 100644 index 0000000..0d35385 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgth_f16_1.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +uint16_t expected[] = { 0x0, 0x0, 0xFFFF, 0x0, 0x0, 0xFFFF, 0x0, 0xFFFF, + 0x0, 0x0, 0xFFFF, 0x0, 0xFFFF, 0xFFFF, 0x0, 0xFFFF, + 0xFFFF, 0x0}; + +#define TEST_MSG "VCGTH_F16" +#define INSN_NAME vcgth_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgtz_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgtz_1.c new file mode 100644 index 0000000..a096dc7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgtz_1.c @@ -0,0 +1,28 @@ +/* This file tests an intrinsic which currently has only an f16 variant and that + is only available when FP16 arithmetic instructions are supported. */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ + +#define INSN_NAME vcgtz +#define TEST_MSG "VCGTZ/VCGTZQ" + +#include "cmp_zero_op.inc" + +/* Expected results. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0xffff, 0xffff, + 0xffff, 0xffff, + 0xffff, 0xffff, + 0xffff, 0xffff }; +#endif + +/* Extra FP tests with special values (NaN, ....). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_zero, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +#endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgtzh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgtzh_f16_1.c new file mode 100644 index 0000000..ca23e3f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgtzh_f16_1.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +uint16_t expected[] = { 0x0, 0x0, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0, 0xFFFF, + 0x0, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0, + 0xFFFF, 0xFFFF, 0x0}; + +#define TEST_MSG "VCGTZH_F16" +#define INSN_NAME vcgtzh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcle.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcle.c index a59b543..49f89d8 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcle.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcle.c @@ -31,6 +31,14 @@ VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0x0 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0xffff, 0xffff, 0x0, 0x0 }; +VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0xffff, 0xffff, + 0xffff, 0x0, + 0x0, 0x0, + 0x0, 0x0 }; +#endif + VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0xffffffff, 0xffffffff }; VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0x0 }; @@ -39,6 +47,20 @@ VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0xffffffff, 0x0 }; VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0xffffffff, 0xffffffff }; VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0xffffffff, 0x0 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_nan2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_inf2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +#endif + VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 }; diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcleh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcleh_f16_1.c new file mode 100644 index 0000000..f51cac3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcleh_f16_1.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +uint16_t expected[] = { 0xFFFF, 0xFFFF, 0x0, 0xFFFF, 0xFFFF, 0x0, 0xFFFF, 0x0, + 0xFFFF, 0xFFFF, 0x0, 0xFFFF, 0x0, 0x0, 0xFFFF, 0x0, 0x0, + 0xFFFF}; + +#define TEST_MSG "VCLEH_F16" +#define INSN_NAME vcleh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclez_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclez_1.c new file mode 100644 index 0000000..7e18e3d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclez_1.c @@ -0,0 +1,29 @@ +/* This file tests an intrinsic which currently has only an f16 variant and that + is only available when FP16 arithmetic instructions are supported. */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ + +#define INSN_NAME vclez +#define TEST_MSG "VCLEZ/VCLEZQ" + +#include "cmp_zero_op.inc" + +/* Expected results. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0 }; +#endif + +/* Extra FP tests with special values (NaN, ....). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL (expected_zero, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +#endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclezh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclezh_f16_1.c new file mode 100644 index 0000000..57901c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclezh_f16_1.c @@ -0,0 +1,21 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +uint16_t expected[] = { 0xFFFF, 0xFFFF, 0x0, 0x0, 0x0, 0x0, 0xFFFF, 0x0, 0xFFFF, + 0x0, 0x0, 0x0, 0x0, 0x0, 0xFFFF, 0x0, 0x0, 0xFFFF}; + +#define TEST_MSG "VCLEZH_F16" +#define INSN_NAME vclezh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclt.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclt.c index 6ef2b4c..b6f8d87 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclt.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclt.c @@ -30,6 +30,14 @@ VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0xffff, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0xffff, 0xffff, + 0x0, 0x0, + 0x0, 0x0, + 0x0, 0x0 }; +#endif + VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0xffffffff, 0x0 }; VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 }; @@ -38,6 +46,19 @@ VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0xffffffff, 0x0 }; VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0x0, 0x0 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_nan2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_inf2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +#endif + VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 }; diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclth_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclth_f16_1.c new file mode 100644 index 0000000..3218873 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclth_f16_1.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +uint16_t expected[] = { 0xFFFF, 0xFFFF, 0x0, 0xFFFF, 0xFFFF, 0x0, 0xFFFF, 0x0, + 0xFFFF, 0xFFFF, 0x0, 0xFFFF, 0x0, 0x0, 0xFFFF, 0x0, 0x0, + 0xFFFF}; + +#define TEST_MSG "VCLTH_F16" +#define INSN_NAME vclth_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcltz_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcltz_1.c new file mode 100644 index 0000000..9b75cc7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcltz_1.c @@ -0,0 +1,27 @@ +/* This file tests an intrinsic which currently has only an f16 variant and that + is only available when FP16 arithmetic instructions are supported. */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ + +#define INSN_NAME vcltz +#define TEST_MSG "VCLTZ/VCLTZQ" + +#include "cmp_zero_op.inc" + +/* Expected results. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0 }; +#endif + +/* Extra FP tests with special values (NaN, ....). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0xffff, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL (expected_zero, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +#endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcltzh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcltzh_f16_1.c new file mode 100644 index 0000000..af6a5b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcltzh_f16_1.c @@ -0,0 +1,21 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +uint16_t expected[] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xFFFF, 0x0, 0xFFFF, + 0x0, 0x0, 0x0, 0x0, 0x0, 0xFFFF, 0x0, 0x0, 0xFFFF}; + +#define TEST_MSG "VCltZH_F16" +#define INSN_NAME vcltzh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt.c index 8e80f1e..b2b861a 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt.c @@ -4,36 +4,99 @@ #include /* Expected results for vcvt. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_s, hfloat, 16, 4) [] = +{ 0xcc00, 0xcb80, 0xcb00, 0xca80 }; +VECT_VAR_DECL(expected_u, hfloat, 16, 4) [] = +{ 0x7c00, 0x7c00, 0x7c00, 0x7c00, }; +VECT_VAR_DECL(expected_s, hfloat, 16, 8) [] = +{ 0xcc00, 0xcb80, 0xcb00, 0xca80, + 0xca00, 0xc980, 0xc900, 0xc880 }; +VECT_VAR_DECL(expected_u, hfloat, 16, 8) [] = +{ 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, }; +#endif VECT_VAR_DECL(expected_s,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL(expected_u,hfloat,32,2) [] = { 0x4f800000, 0x4f800000 }; VECT_VAR_DECL(expected_s,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, - 0xc1600000, 0xc1500000 }; + 0xc1600000, 0xc1500000 }; VECT_VAR_DECL(expected_u,hfloat,32,4) [] = { 0x4f800000, 0x4f800000, - 0x4f800000, 0x4f800000 }; + 0x4f800000, 0x4f800000 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, int, 16, 4) [] = { 0xfff1, 0x5, 0xfff1, 0x5 }; +VECT_VAR_DECL(expected, uint, 16, 4) [] = { 0x0, 0x5, 0x0, 0x5 }; +VECT_VAR_DECL(expected, int, 16, 8) [] = { 0x0, 0x0, 0xf, 0xfff1, + 0x0, 0x0, 0xf, 0xfff1 }; +VECT_VAR_DECL(expected, uint, 16, 8) [] = { 0x0, 0x0, 0xf, 0x0, + 0x0, 0x0, 0xf, 0x0 }; +#endif VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0x5 }; VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x5 }; VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0xf, 0xfffffff1 }; VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xf, 0x0 }; /* Expected results for vcvt_n. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_vcvt_n_s, hfloat, 16, 4) [] = { 0xc400, 0xc380, + 0xc300, 0xc280 }; +VECT_VAR_DECL(expected_vcvt_n_u, hfloat, 16, 4) [] = { 0x6000, 0x6000, + 0x6000, 0x6000 }; +VECT_VAR_DECL(expected_vcvt_n_s, hfloat, 16, 8) [] = { 0xb000, 0xaf80, + 0xaf00, 0xae80, + 0xae00, 0xad80, + 0xad00, 0xac80 }; +VECT_VAR_DECL(expected_vcvt_n_u, hfloat, 16, 8) [] = { 0x4c00, 0x4c00, + 0x4c00, 0x4c00, + 0x4c00, 0x4c00, + 0x4c00, 0x4c00 }; +#endif VECT_VAR_DECL(expected_vcvt_n_s,hfloat,32,2) [] = { 0xc0800000, 0xc0700000 }; VECT_VAR_DECL(expected_vcvt_n_u,hfloat,32,2) [] = { 0x4c000000, 0x4c000000 }; VECT_VAR_DECL(expected_vcvt_n_s,hfloat,32,4) [] = { 0xb2800000, 0xb2700000, 0xb2600000, 0xb2500000 }; VECT_VAR_DECL(expected_vcvt_n_u,hfloat,32,4) [] = { 0x49800000, 0x49800000, 0x49800000, 0x49800000 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_vcvt_n, int, 16, 4) [] = { 0xffc3, 0x15, + 0xffc3, 0x15 }; +VECT_VAR_DECL(expected_vcvt_n, uint, 16, 4) [] = { 0x0, 0x2a6, 0x0, 0x2a6 }; +VECT_VAR_DECL(expected_vcvt_n, int, 16, 8) [] = { 0x0, 0x0, 0x78f, 0xf871, + 0x0, 0x0, 0x78f, 0xf871 }; +VECT_VAR_DECL(expected_vcvt_n, uint, 16, 8) [] = { 0x0, 0x0, 0xf1e0, 0x0, + 0x0, 0x0, 0xf1e0, 0x0 }; +#endif VECT_VAR_DECL(expected_vcvt_n,int,32,2) [] = { 0xff0b3333, 0x54cccd }; VECT_VAR_DECL(expected_vcvt_n,uint,32,2) [] = { 0x0, 0x15 }; VECT_VAR_DECL(expected_vcvt_n,int,32,4) [] = { 0x0, 0x0, 0x1e3d7, 0xfffe1c29 }; VECT_VAR_DECL(expected_vcvt_n,uint,32,4) [] = { 0x0, 0x0, 0x1e, 0x0 }; /* Expected results for vcvt with rounding. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_rounding, int, 16, 4) [] = { 0xa, 0xa, 0xa, 0xa }; +VECT_VAR_DECL(expected_rounding, uint, 16, 4) [] = { 0xa, 0xa, 0xa, 0xa }; +VECT_VAR_DECL(expected_rounding, int, 16, 8) [] = { 0x7d, 0x7d, 0x7d, 0x7d, + 0x7d, 0x7d, 0x7d, 0x7d }; +VECT_VAR_DECL(expected_rounding, uint, 16, 8) [] = { 0x7d, 0x7d, 0x7d, 0x7d, + 0x7d, 0x7d, 0x7d, 0x7d }; +#endif VECT_VAR_DECL(expected_rounding,int,32,2) [] = { 0xa, 0xa }; VECT_VAR_DECL(expected_rounding,uint,32,2) [] = { 0xa, 0xa }; VECT_VAR_DECL(expected_rounding,int,32,4) [] = { 0x7d, 0x7d, 0x7d, 0x7d }; VECT_VAR_DECL(expected_rounding,uint,32,4) [] = { 0x7d, 0x7d, 0x7d, 0x7d }; /* Expected results for vcvt_n with rounding. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_vcvt_n_rounding, int, 16, 4) [] = +{ 0x533, 0x533, 0x533, 0x533 }; +VECT_VAR_DECL(expected_vcvt_n_rounding, uint, 16, 4) [] = +{ 0x533, 0x533, 0x533, 0x533 }; +VECT_VAR_DECL(expected_vcvt_n_rounding, int, 16, 8) [] = +{ 0x7fff, 0x7fff, 0x7fff, 0x7fff, + 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected_vcvt_n_rounding, uint, 16, 8) [] = +{ 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff }; +#endif VECT_VAR_DECL(expected_vcvt_n_rounding,int,32,2) [] = { 0xa66666, 0xa66666 }; VECT_VAR_DECL(expected_vcvt_n_rounding,uint,32,2) [] = { 0xa66666, 0xa66666 }; VECT_VAR_DECL(expected_vcvt_n_rounding,int,32,4) [] = { 0xfbccc, 0xfbccc, @@ -42,11 +105,17 @@ VECT_VAR_DECL(expected_vcvt_n_rounding,uint,32,4) [] = { 0xfbccc, 0xfbccc, 0xfbccc, 0xfbccc }; /* Expected results for vcvt_n with saturation. */ -VECT_VAR_DECL(expected_vcvt_n_saturation,int,32,2) [] = { 0x7fffffff, - 0x7fffffff }; -VECT_VAR_DECL(expected_vcvt_n_saturation,int,32,4) [] = { 0x7fffffff, - 0x7fffffff, - 0x7fffffff, 0x7fffffff }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_vcvt_n_saturation, int, 16, 4) [] = +{ 0x533, 0x533, 0x533, 0x533 }; +VECT_VAR_DECL(expected_vcvt_n_saturation, int, 16, 8) [] = +{ 0x7fff, 0x7fff, 0x7fff, 0x7fff, + 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +#endif +VECT_VAR_DECL(expected_vcvt_n_saturation,int,32,2) [] = +{ 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected_vcvt_n_saturation,int,32,4) [] = +{ 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; #define TEST_MSG "VCVT/VCVTQ" void exec_vcvt (void) @@ -89,11 +158,26 @@ void exec_vcvt (void) /* Initialize input "vector" from "buffer". */ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VLOAD(vector, buffer, , float, f, 16, 4); + VLOAD(vector, buffer, q, float, f, 16, 8); +#endif VLOAD(vector, buffer, , float, f, 32, 2); VLOAD(vector, buffer, q, float, f, 32, 4); /* Make sure some elements have a fractional part, to exercise integer conversions. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VSET_LANE(vector, , float, f, 16, 4, 0, -15.3f); + VSET_LANE(vector, , float, f, 16, 4, 1, 5.3f); + VSET_LANE(vector, , float, f, 16, 4, 2, -15.3f); + VSET_LANE(vector, , float, f, 16, 4, 3, 5.3f); + VSET_LANE(vector, q, float, f, 16, 8, 4, -15.3f); + VSET_LANE(vector, q, float, f, 16, 8, 5, 5.3f); + VSET_LANE(vector, q, float, f, 16, 8, 6, -15.3f); + VSET_LANE(vector, q, float, f, 16, 8, 7, 5.3f); +#endif + VSET_LANE(vector, , float, f, 32, 2, 0, -15.3f); VSET_LANE(vector, , float, f, 32, 2, 1, 5.3f); VSET_LANE(vector, q, float, f, 32, 4, 2, -15.3f); @@ -103,23 +187,55 @@ void exec_vcvt (void) before overwriting them. */ #define TEST_MSG2 "" +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + /* vcvt_f16_xx. */ + TEST_VCVT_FP(, float, f, 16, 4, int, s, expected_s); + TEST_VCVT_FP(, float, f, 16, 4, uint, u, expected_u); +#endif /* vcvt_f32_xx. */ TEST_VCVT_FP(, float, f, 32, 2, int, s, expected_s); TEST_VCVT_FP(, float, f, 32, 2, uint, u, expected_u); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + /* vcvtq_f16_xx. */ + TEST_VCVT_FP(q, float, f, 16, 8, int, s, expected_s); + TEST_VCVT_FP(q, float, f, 16, 8, uint, u, expected_u); +#endif /* vcvtq_f32_xx. */ TEST_VCVT_FP(q, float, f, 32, 4, int, s, expected_s); TEST_VCVT_FP(q, float, f, 32, 4, uint, u, expected_u); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + /* vcvt_xx_f16. */ + TEST_VCVT(, int, s, 16, 4, float, f, expected); + TEST_VCVT(, uint, u, 16, 4, float, f, expected); +#endif /* vcvt_xx_f32. */ TEST_VCVT(, int, s, 32, 2, float, f, expected); TEST_VCVT(, uint, u, 32, 2, float, f, expected); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VSET_LANE(vector, q, float, f, 16, 8, 0, 0.0f); + VSET_LANE(vector, q, float, f, 16, 8, 1, -0.0f); + VSET_LANE(vector, q, float, f, 16, 8, 2, 15.12f); + VSET_LANE(vector, q, float, f, 16, 8, 3, -15.12f); + VSET_LANE(vector, q, float, f, 16, 8, 4, 0.0f); + VSET_LANE(vector, q, float, f, 16, 8, 5, -0.0f); + VSET_LANE(vector, q, float, f, 16, 8, 6, 15.12f); + VSET_LANE(vector, q, float, f, 16, 8, 7, -15.12f); +#endif + VSET_LANE(vector, q, float, f, 32, 4, 0, 0.0f); VSET_LANE(vector, q, float, f, 32, 4, 1, -0.0f); VSET_LANE(vector, q, float, f, 32, 4, 2, 15.12f); VSET_LANE(vector, q, float, f, 32, 4, 3, -15.12f); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + /* vcvtq_xx_f16. */ + TEST_VCVT(q, int, s, 16, 8, float, f, expected); + TEST_VCVT(q, uint, u, 16, 8, float, f, expected); +#endif + /* vcvtq_xx_f32. */ TEST_VCVT(q, int, s, 32, 4, float, f, expected); TEST_VCVT(q, uint, u, 32, 4, float, f, expected); @@ -129,18 +245,38 @@ void exec_vcvt (void) #undef TEST_MSG #define TEST_MSG "VCVT_N/VCVTQ_N" +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + /* vcvt_n_f16_xx. */ + TEST_VCVT_N_FP(, float, f, 16, 4, int, s, 2, expected_vcvt_n_s); + TEST_VCVT_N_FP(, float, f, 16, 4, uint, u, 7, expected_vcvt_n_u); +#endif /* vcvt_n_f32_xx. */ TEST_VCVT_N_FP(, float, f, 32, 2, int, s, 2, expected_vcvt_n_s); TEST_VCVT_N_FP(, float, f, 32, 2, uint, u, 7, expected_vcvt_n_u); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + /* vcvtq_n_f16_xx. */ + TEST_VCVT_N_FP(q, float, f, 16, 8, int, s, 7, expected_vcvt_n_s); + TEST_VCVT_N_FP(q, float, f, 16, 8, uint, u, 12, expected_vcvt_n_u); +#endif /* vcvtq_n_f32_xx. */ TEST_VCVT_N_FP(q, float, f, 32, 4, int, s, 30, expected_vcvt_n_s); TEST_VCVT_N_FP(q, float, f, 32, 4, uint, u, 12, expected_vcvt_n_u); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + /* vcvt_n_xx_f16. */ + TEST_VCVT_N(, int, s, 16, 4, float, f, 2, expected_vcvt_n); + TEST_VCVT_N(, uint, u, 16, 4, float, f, 7, expected_vcvt_n); +#endif /* vcvt_n_xx_f32. */ TEST_VCVT_N(, int, s, 32, 2, float, f, 20, expected_vcvt_n); TEST_VCVT_N(, uint, u, 32, 2, float, f, 2, expected_vcvt_n); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + /* vcvtq_n_xx_f16. */ + TEST_VCVT_N(q, int, s, 16, 8, float, f, 7, expected_vcvt_n); + TEST_VCVT_N(q, uint, u, 16, 8, float, f, 12, expected_vcvt_n); +#endif /* vcvtq_n_xx_f32. */ TEST_VCVT_N(q, int, s, 32, 4, float, f, 13, expected_vcvt_n); TEST_VCVT_N(q, uint, u, 32, 4, float, f, 1, expected_vcvt_n); @@ -150,20 +286,49 @@ void exec_vcvt (void) #define TEST_MSG "VCVT/VCVTQ" #undef TEST_MSG2 #define TEST_MSG2 "(check rounding)" + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, 10.4f); + VDUP(vector, q, float, f, 16, 8, 125.9f); +#endif VDUP(vector, , float, f, 32, 2, 10.4f); VDUP(vector, q, float, f, 32, 4, 125.9f); + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + /* vcvt_xx_f16. */ + TEST_VCVT(, int, s, 16, 4, float, f, expected_rounding); + TEST_VCVT(, uint, u, 16, 4, float, f, expected_rounding); +#endif /* vcvt_xx_f32. */ TEST_VCVT(, int, s, 32, 2, float, f, expected_rounding); TEST_VCVT(, uint, u, 32, 2, float, f, expected_rounding); + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + /* vcvtq_xx_f16. */ + TEST_VCVT(q, int, s, 16, 8, float, f, expected_rounding); + TEST_VCVT(q, uint, u, 16, 8, float, f, expected_rounding); +#endif /* vcvtq_xx_f32. */ TEST_VCVT(q, int, s, 32, 4, float, f, expected_rounding); TEST_VCVT(q, uint, u, 32, 4, float, f, expected_rounding); #undef TEST_MSG #define TEST_MSG "VCVT_N/VCVTQ_N" + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + /* vcvt_n_xx_f16. */ + TEST_VCVT_N(, int, s, 16, 4, float, f, 7, expected_vcvt_n_rounding); + TEST_VCVT_N(, uint, u, 16, 4, float, f, 7, expected_vcvt_n_rounding); +#endif /* vcvt_n_xx_f32. */ TEST_VCVT_N(, int, s, 32, 2, float, f, 20, expected_vcvt_n_rounding); TEST_VCVT_N(, uint, u, 32, 2, float, f, 20, expected_vcvt_n_rounding); + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + /* vcvtq_n_xx_f16. */ + TEST_VCVT_N(q, int, s, 16, 8, float, f, 13, expected_vcvt_n_rounding); + TEST_VCVT_N(q, uint, u, 16, 8, float, f, 13, expected_vcvt_n_rounding); +#endif /* vcvtq_n_xx_f32. */ TEST_VCVT_N(q, int, s, 32, 4, float, f, 13, expected_vcvt_n_rounding); TEST_VCVT_N(q, uint, u, 32, 4, float, f, 13, expected_vcvt_n_rounding); @@ -172,8 +337,18 @@ void exec_vcvt (void) #define TEST_MSG "VCVT_N/VCVTQ_N" #undef TEST_MSG2 #define TEST_MSG2 "(check saturation)" + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + /* vcvt_n_xx_f16. */ + TEST_VCVT_N(, int, s, 16, 4, float, f, 7, expected_vcvt_n_saturation); +#endif /* vcvt_n_xx_f32. */ TEST_VCVT_N(, int, s, 32, 2, float, f, 31, expected_vcvt_n_saturation); + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + /* vcvtq_n_xx_f16. */ + TEST_VCVT_N(q, int, s, 16, 8, float, f, 13, expected_vcvt_n_saturation); +#endif /* vcvtq_n_xx_f32. */ TEST_VCVT_N(q, int, s, 32, 4, float, f, 31, expected_vcvt_n_saturation); } diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtX.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtX.inc new file mode 100644 index 0000000..e0a479f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtX.inc @@ -0,0 +1,113 @@ +/* Template file for VCVT operator validation. + + This file is meant to be included by the relevant test files, which + have to define the intrinsic family to test. If a given intrinsic + supports variants which are not supported by all the other vcvt + operators, these can be tested by providing a definition for + EXTRA_TESTS. + + This file is only used for VCVT? tests, which currently have only f16 to + integer variants. It is based on vcvt.c. */ + +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1 (NAME) + +void FNNAME (INSN_NAME) (void) +{ + int i; + + /* Basic test: y=vcvt(x), then store the result. */ +#define TEST_VCVT1(INSN, Q, T1, T2, W, N, TS1, TS2, EXP) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W##_##TS2##W(VECT_VAR(vector, TS1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + CHECK(TEST_MSG, T1, W, N, PRIx##W, EXP, TEST_MSG2); + +#define TEST_VCVT(INSN, Q, T1, T2, W, N, TS1, TS2, EXP) \ + TEST_VCVT1 (INSN, Q, T1, T2, W, N, TS1, TS2, EXP) + + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer". */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VLOAD(vector, buffer, , float, f, 16, 4); + VLOAD(vector, buffer, q, float, f, 16, 8); +#endif + + /* Make sure some elements have a fractional part, to exercise + integer conversions. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VSET_LANE(vector, , float, f, 16, 4, 0, -15.3f); + VSET_LANE(vector, , float, f, 16, 4, 1, 5.3f); + VSET_LANE(vector, , float, f, 16, 4, 2, -15.3f); + VSET_LANE(vector, , float, f, 16, 4, 3, 5.3f); + VSET_LANE(vector, q, float, f, 16, 8, 4, -15.3f); + VSET_LANE(vector, q, float, f, 16, 8, 5, 5.3f); + VSET_LANE(vector, q, float, f, 16, 8, 6, -15.3f); + VSET_LANE(vector, q, float, f, 16, 8, 7, 5.3f); +#endif + + /* The same result buffers are used multiple times, so we check them + before overwriting them. */ +#define TEST_MSG2 "" + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + /* vcvt?_xx_f16. */ + TEST_VCVT(INSN_NAME, , int, s, 16, 4, float, f, expected); + TEST_VCVT(INSN_NAME, , uint, u, 16, 4, float, f, expected); +#endif + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VSET_LANE(vector, q, float, f, 16, 8, 0, 0.0f); + VSET_LANE(vector, q, float, f, 16, 8, 1, -0.0f); + VSET_LANE(vector, q, float, f, 16, 8, 2, 15.12f); + VSET_LANE(vector, q, float, f, 16, 8, 3, -15.12f); + VSET_LANE(vector, q, float, f, 16, 8, 4, 0.0f); + VSET_LANE(vector, q, float, f, 16, 8, 5, -0.0f); + VSET_LANE(vector, q, float, f, 16, 8, 6, 15.12f); + VSET_LANE(vector, q, float, f, 16, 8, 7, -15.12f); +#endif + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + /* vcvt?q_xx_f16. */ + TEST_VCVT(INSN_NAME, q, int, s, 16, 8, float, f, expected); + TEST_VCVT(INSN_NAME, q, uint, u, 16, 8, float, f, expected); +#endif + + /* Check rounding. */ +#undef TEST_MSG2 +#define TEST_MSG2 "(check rounding)" + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, 10.4f); + VDUP(vector, q, float, f, 16, 8, 125.9f); +#endif + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + /* vcvt?_xx_f16. */ + TEST_VCVT(INSN_NAME, , int, s, 16, 4, float, f, expected_rounding); + TEST_VCVT(INSN_NAME, , uint, u, 16, 4, float, f, expected_rounding); +#endif + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + /* vcvt?q_xx_f16. */ + TEST_VCVT(INSN_NAME, q, int, s, 16, 8, float, f, expected_rounding); + TEST_VCVT(INSN_NAME, q, uint, u, 16, 8, float, f, expected_rounding); +#endif + +#ifdef EXTRA_TESTS + EXTRA_TESTS(); +#endif +} + +int +main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvta_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvta_1.c new file mode 100644 index 0000000..c467f05 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvta_1.c @@ -0,0 +1,33 @@ +/* This file tests an intrinsic which currently has only an f16 variant and that + is only available when FP16 arithmetic instructions are supported. */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" +#include + +/* Expected results. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, int, 16, 4) [] = { 0xfff1, 0x5, 0xfff1, 0x5 }; +VECT_VAR_DECL(expected, uint, 16, 4) [] = { 0x0, 0x5, 0x0, 0x5 }; +VECT_VAR_DECL(expected, int, 16, 8) [] = { 0x0, 0x0, 0xf, 0xfff1, + 0x0, 0x0, 0xf, 0xfff1 }; +VECT_VAR_DECL(expected, uint, 16, 8) [] = { 0x0, 0x0, 0xf, 0x0, + 0x0, 0x0, 0xf, 0x0 }; +#endif + +/* Expected results with rounding. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_rounding, int, 16, 4) [] = { 0xa, 0xa, 0xa, 0xa }; +VECT_VAR_DECL(expected_rounding, uint, 16, 4) [] = { 0xa, 0xa, 0xa, 0xa }; +VECT_VAR_DECL(expected_rounding, int, 16, 8) [] = { 0x7e, 0x7e, 0x7e, 0x7e, + 0x7e, 0x7e, 0x7e, 0x7e }; +VECT_VAR_DECL(expected_rounding, uint, 16, 8) [] = { 0x7e, 0x7e, 0x7e, 0x7e, + 0x7e, 0x7e, 0x7e, 0x7e }; +#endif + +#define TEST_MSG "VCVTA/VCVTAQ" +#define INSN_NAME vcvta + +#include "vcvtX.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_s16_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_s16_f16_1.c new file mode 100644 index 0000000..2084c30 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_s16_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; +int16_t expected[] = { 124, -57, 1, 25, -64, 169, -4, 77 }; + +#define TEST_MSG "VCVTAH_S16_F16" +#define INSN_NAME vcvtah_s16_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE int16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_s32_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_s32_f16_1.c new file mode 100644 index 0000000..ebfd62a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_s32_f16_1.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Input values. */ +float16_t input[] = +{ + 0.0, -0.0, + 123.4, -567.8, + -34.8, 1024, + 663.1, 169.1, + -4.8, 77.0, + -144.5, -56.8, + + (float16_t) -16, (float16_t) -15, + (float16_t) -14, (float16_t) -13, +}; + +/* Expected results (32-bit hexadecimal representation). */ +uint32_t expected[] = +{ + 0x00000000, + 0x00000000, + 0x0000007b, + 0xfffffdc8, + 0xffffffdd, + 0x00000400, + 0x00000297, + 0x000000a9, + 0xfffffffb, + 0x0000004d, + 0xffffff6f, + 0xffffffc7, + 0xfffffff0, + 0xfffffff1, + 0xfffffff2, + 0xfffffff3 +}; + +#define TEST_MSG "VCVTAH_S32_F16" +#define INSN_NAME vcvtah_s32_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE int32_t +#define OUTPUT_TYPE_SIZE 32 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_s64_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_s64_f16_1.c new file mode 100644 index 0000000..a27871b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_s64_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; +int64_t expected[] = { 124, -57, 1, 25, -64, 169, -4, 77 }; + +#define TEST_MSG "VCVTAH_S64_F16" +#define INSN_NAME vcvtah_s64_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE int64_t +#define OUTPUT_TYPE_SIZE 64 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_u16_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_u16_f16_1.c new file mode 100644 index 0000000..0642ae0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_u16_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; +uint16_t expected[] = { 124, 57, 1, 25, 64, 169, 4, 77 }; + +#define TEST_MSG "VCVTAH_u16_F16" +#define INSN_NAME vcvtah_u16_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_u32_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_u32_f16_1.c new file mode 100644 index 0000000..5ae28fc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_u32_f16_1.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Input values. */ +float16_t input[] = +{ + 0.0, -0.0, + 123.4, -567.8, + -34.8, 1024, + 663.1, 169.1, + -4.8, 77.0, + -144.5, -56.8, + + (float16_t) -16, (float16_t) -15, + (float16_t) -14, (float16_t) -13, +}; + +/* Expected results (32-bit hexadecimal representation). */ +uint32_t expected[] = +{ + 0x00000000, + 0x00000000, + 0x0000007b, + 0x00000000, + 0x00000000, + 0x00000400, + 0x00000297, + 0x000000a9, + 0x00000000, + 0x0000004d, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000 +}; + +#define TEST_MSG "VCVTAH_U32_F16" +#define INSN_NAME vcvtah_u32_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint32_t +#define OUTPUT_TYPE_SIZE 32 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_u64_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_u64_f16_1.c new file mode 100644 index 0000000..2d197b4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_u64_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; +uint64_t expected[] = { 124, 57, 1, 25, 64, 169, 4, 77 }; + +#define TEST_MSG "VCVTAH_u64_F16" +#define INSN_NAME vcvtah_u64_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint64_t +#define OUTPUT_TYPE_SIZE 64 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_s16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_s16_1.c new file mode 100644 index 0000000..540b637 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_s16_1.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +int16_t input[] = { 123, -567, 0, 1024, -63, 169, -4, 77 }; +uint16_t expected[] = { 0x57B0 /* 123.0. */, 0xE06E /* -567.0. */, + 0x0000 /* 0.0. */, 0x6400 /* 1024. */, + 0xD3E0 /* -63. */, 0x5948 /* 169. */, + 0xC400 /* -4. */, 0x54D0 /* 77. */ }; + +#define TEST_MSG "VCVTH_F16_S16" +#define INSN_NAME vcvth_f16_s16 + +#define EXPECTED expected + +#define INPUT input +#define INPUT_TYPE int16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_s32_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_s32_1.c new file mode 100644 index 0000000..2173a0e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_s32_1.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Input values. */ +uint32_t input[] = +{ + 0, -0, + 123, -567, + -34, 1024, + -63, 169, + -4, 77, + -144, -56, + -16, -15, + -14, -13, +}; + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected[] = +{ + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x57b0 /* 123.000000 */, + 0xe06e /* -567.000000 */, + 0xd040 /* -34.000000 */, + 0x6400 /* 1024.000000 */, + 0xd3e0 /* -63.000000 */, + 0x5948 /* 169.000000 */, + 0xc400 /* -4.000000 */, + 0x54d0 /* 77.000000 */, + 0xd880 /* -144.000000 */, + 0xd300 /* -56.000000 */, + 0xcc00 /* -16.000000 */, + 0xcb80 /* -15.000000 */, + 0xcb00 /* -14.000000 */, + 0xca80 /* -13.000000 */ +}; + +#define TEST_MSG "VCVTH_F16_S32" +#define INSN_NAME vcvth_f16_s32 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE uint32_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_s64_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_s64_1.c new file mode 100644 index 0000000..5f17dbe --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_s64_1.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +int64_t input[] = { 123, -567, 0, 1024, -63, 169, -4, 77 }; +uint16_t expected[] = { 0x57B0 /* 123.0. */, 0xE06E /* -567.0. */, + 0x0000 /* 0.0. */, 0x6400 /* 1024. */, + 0xD3E0 /* -63. */, 0x5948 /* 169. */, + 0xC400 /* -4. */, 0x54D0 /* 77. */ }; + +#define TEST_MSG "VCVTH_F16_S64" +#define INSN_NAME vcvth_f16_s64 + +#define EXPECTED expected + +#define INPUT input +#define INPUT_TYPE int64_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_u16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_u16_1.c new file mode 100644 index 0000000..426700c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_u16_1.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +uint16_t input[] = { 123, 567, 0, 1024, 63, 169, 4, 77 }; +uint16_t expected[] = { 0x57B0 /* 123.0. */, 0x606E /* 567.0. */, + 0x0000 /* 0.0. */, 0x6400 /* 1024.0. */, + 0x53E0 /* 63.0. */, 0x5948 /* 169.0. */, + 0x4400 /* 4.0. */, 0x54D0 /* 77.0. */ }; + +#define TEST_MSG "VCVTH_F16_U16" +#define INSN_NAME vcvth_f16_u16 + +#define EXPECTED expected + +#define INPUT input +#define INPUT_TYPE uint16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_u32_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_u32_1.c new file mode 100644 index 0000000..1583202 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_u32_1.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Input values. */ +int32_t input[] = +{ + 0, -0, + 123, -567, + -34, 1024, + -63, 169, + -4, 77, + -144, -56, + -16, -15, + -14, -13, +}; + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected[] = +{ + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x57b0 /* 123.000000 */, + 0x7c00 /* inf */, + 0x7c00 /* inf */, + 0x6400 /* 1024.000000 */, + 0x7c00 /* inf */, + 0x5948 /* 169.000000 */, + 0x7c00 /* inf */, + 0x54d0 /* 77.000000 */, + 0x7c00 /* inf */, + 0x7c00 /* inf */, + 0x7c00 /* inf */, + 0x7c00 /* inf */, + 0x7c00 /* inf */, + 0x7c00 /* inf */ +}; + +#define TEST_MSG "VCVTH_F16_U32" +#define INSN_NAME vcvth_f16_u32 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE int32_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_u64_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_u64_1.c new file mode 100644 index 0000000..3413de0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_u64_1.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +uint64_t input[] = { 123, 567, 0, 1024, 63, 169, 4, 77 }; +uint16_t expected[] = { 0x57B0 /* 123.0. */, 0x606E /* 567.0. */, + 0x0000 /* 0.0. */, 0x6400 /* 1024.0. */, + 0x53E0 /* 63.0. */, 0x5948 /* 169.0. */, + 0x4400 /* 4.0. */, 0x54D0 /* 77.0. */ }; + +#define TEST_MSG "VCVTH_F16_U64" +#define INSN_NAME vcvth_f16_u64 + +#define EXPECTED expected + +#define INPUT input +#define INPUT_TYPE uint64_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_s16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_s16_1.c new file mode 100644 index 0000000..25265d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_s16_1.c @@ -0,0 +1,46 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +int16_t input[] = { 1, 10, 48, 100, -1, -10, 7, -7 }; + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected_1[] = { 0x3800 /* 0.5. */, + 0x4500 /* 5. */, + 0x4E00 /* 24. */, + 0x5240 /* 50. */, + 0xB800 /* -0.5. */, + 0xC500 /* -5. */, + 0x4300 /* 3.5. */, + 0xC300 /* -3.5. */ }; + +uint16_t expected_2[] = { 0x3400 /* 0.25. */, + 0x4100 /* 2.5. */, + 0x4A00 /* 12. */, + 0x4E40 /* 25. */, + 0xB400 /* -0.25. */, + 0xC100 /* -2.5. */, + 0x3F00 /* 1.75. */, + 0xBF00 /* -1.75. */ }; + +#define TEST_MSG "VCVTH_N_F16_S16" +#define INSN_NAME vcvth_n_f16_s16 + +#define INPUT input +#define EXPECTED_1 expected_1 +#define EXPECTED_2 expected_2 + +#define INPUT_TYPE int16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +#define SCALAR_OPERANDS +#define SCALAR_1 1 +#define SCALAR_2 2 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_s32_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_s32_1.c new file mode 100644 index 0000000..9ce9558 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_s32_1.c @@ -0,0 +1,99 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Input values. */ +uint32_t input[] = +{ + 0, -0, + 123, -567, + -34, 1024, + -63, 169, + -4, 77, + -144, -56, + -16, -15, + -14, -13, +}; + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected_1[] = +{ + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x53b0 /* 61.500000 */, + 0xdc6e /* -283.500000 */, + 0xcc40 /* -17.000000 */, + 0x6000 /* 512.000000 */, + 0xcfe0 /* -31.500000 */, + 0x5548 /* 84.500000 */, + 0xc000 /* -2.000000 */, + 0x50d0 /* 38.500000 */, + 0xd480 /* -72.000000 */, + 0xcf00 /* -28.000000 */, + 0xc800 /* -8.000000 */, + 0xc780 /* -7.500000 */, + 0xc700 /* -7.000000 */, + 0xc680 /* -6.500000 */ +}; + +uint16_t expected_2[] = +{ + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x4fb0 /* 30.750000 */, + 0xd86e /* -141.750000 */, + 0xc840 /* -8.500000 */, + 0x5c00 /* 256.000000 */, + 0xcbe0 /* -15.750000 */, + 0x5148 /* 42.250000 */, + 0xbc00 /* -1.000000 */, + 0x4cd0 /* 19.250000 */, + 0xd080 /* -36.000000 */, + 0xcb00 /* -14.000000 */, + 0xc400 /* -4.000000 */, + 0xc380 /* -3.750000 */, + 0xc300 /* -3.500000 */, + 0xc280 /* -3.250000 */ +}; + +uint16_t expected_3[] = +{ + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x8002 /* -0.000000 */, + 0x8000 /* -0.000000 */, + 0x0004 /* 0.000000 */, + 0x8000 /* -0.000000 */, + 0x0001 /* 0.000000 */, + 0x8000 /* -0.000000 */, + 0x0000 /* 0.000000 */, + 0x8001 /* -0.000000 */, + 0x8000 /* -0.000000 */, + 0x8000 /* -0.000000 */, + 0x8000 /* -0.000000 */, + 0x8000 /* -0.000000 */, + 0x8000 /* -0.000000 */ +}; + +#define TEST_MSG "VCVTH_N_F16_S32" +#define INSN_NAME vcvth_n_f16_s32 + +#define INPUT input +#define EXPECTED_1 expected_1 +#define EXPECTED_2 expected_2 +#define EXPECTED_3 expected_3 + +#define INPUT_TYPE int32_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +#define SCALAR_OPERANDS +#define SCALAR_1 1 +#define SCALAR_2 2 +#define SCALAR_3 32 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_s64_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_s64_1.c new file mode 100644 index 0000000..f0adb09 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_s64_1.c @@ -0,0 +1,46 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +int64_t input[] = { 1, 10, 48, 100, -1, -10, 7, -7 }; + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected_1[] = { 0x3800 /* 0.5. */, + 0x4500 /* 5. */, + 0x4E00 /* 24. */, + 0x5240 /* 50. */, + 0xB800 /* -0.5. */, + 0xC500 /* -5. */, + 0x4300 /* 3.5. */, + 0xC300 /* -3.5. */ }; + +uint16_t expected_2[] = { 0x3400 /* 0.25. */, + 0x4100 /* 2.5. */, + 0x4A00 /* 12. */, + 0x4E40 /* 25. */, + 0xB400 /* -0.25. */, + 0xC100 /* -2.5. */, + 0x3F00 /* 1.75. */, + 0xBF00 /* -1.75. */ }; + +#define TEST_MSG "VCVTH_N_F16_S64" +#define INSN_NAME vcvth_n_f16_s64 + +#define INPUT input +#define EXPECTED_1 expected_1 +#define EXPECTED_2 expected_2 + +#define INPUT_TYPE int64_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +#define SCALAR_OPERANDS +#define SCALAR_1 1 +#define SCALAR_2 2 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_u16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_u16_1.c new file mode 100644 index 0000000..74c4e60 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_u16_1.c @@ -0,0 +1,46 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +uint16_t input[] = { 1, 10, 48, 100, 1000, 0, 500, 9 }; + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected_1[] = { 0x3800 /* 0.5. */, + 0x4500 /* 5. */, + 0x4E00 /* 24. */, + 0x5240 /* 50. */, + 0x5FD0 /* 500. */, + 0x0000 /* 0.0. */, + 0x5BD0 /* 250. */, + 0x4480 /* 4.5. */ }; + +uint16_t expected_2[] = { 0x3400 /* 0.25. */, + 0x4100 /* 2.5. */, + 0x4A00 /* 12. */, + 0x4E40 /* 25. */, + 0x5BD0 /* 250. */, + 0x0000 /* 0.0. */, + 0x57D0 /* 125. */, + 0x4080 /* 2.25. */ }; + +#define TEST_MSG "VCVTH_N_F16_U16" +#define INSN_NAME vcvth_n_f16_u16 + +#define INPUT input +#define EXPECTED_1 expected_1 +#define EXPECTED_2 expected_2 + +#define INPUT_TYPE uint16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +#define SCALAR_OPERANDS +#define SCALAR_1 1 +#define SCALAR_2 2 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_u32_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_u32_1.c new file mode 100644 index 0000000..d308c35 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_u32_1.c @@ -0,0 +1,99 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Input values. */ +uint32_t input[] = +{ + 0, -0, + 123, -567, + -34, 1024, + -63, 169, + -4, 77, + -144, -56, + -16, -15, + -14, -13, +}; + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected_1[] = +{ + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x53b0 /* 61.500000 */, + 0x7c00 /* inf */, + 0x7c00 /* inf */, + 0x6000 /* 512.000000 */, + 0x7c00 /* inf */, + 0x5548 /* 84.500000 */, + 0x7c00 /* inf */, + 0x50d0 /* 38.500000 */, + 0x7c00 /* inf */, + 0x7c00 /* inf */, + 0x7c00 /* inf */, + 0x7c00 /* inf */, + 0x7c00 /* inf */, + 0x7c00 /* inf */ +}; + +uint16_t expected_2[] = +{ + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x4fb0 /* 30.750000 */, + 0x7c00 /* inf */, + 0x7c00 /* inf */, + 0x5c00 /* 256.000000 */, + 0x7c00 /* inf */, + 0x5148 /* 42.250000 */, + 0x7c00 /* inf */, + 0x4cd0 /* 19.250000 */, + 0x7c00 /* inf */, + 0x7c00 /* inf */, + 0x7c00 /* inf */, + 0x7c00 /* inf */, + 0x7c00 /* inf */, + 0x7c00 /* inf */ +}; + +uint16_t expected_3[] = +{ + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x3c00 /* 1.000000 */, + 0x3c00 /* 1.000000 */, + 0x0004 /* 0.000000 */, + 0x3c00 /* 1.000000 */, + 0x0001 /* 0.000000 */, + 0x3c00 /* 1.000000 */, + 0x0000 /* 0.000000 */, + 0x3c00 /* 1.000000 */, + 0x3c00 /* 1.000000 */, + 0x3c00 /* 1.000000 */, + 0x3c00 /* 1.000000 */, + 0x3c00 /* 1.000000 */, + 0x3c00 /* 1.000000 */ +}; + +#define TEST_MSG "VCVTH_N_F16_U32" +#define INSN_NAME vcvth_n_f16_u32 + +#define INPUT input +#define EXPECTED_1 expected_1 +#define EXPECTED_2 expected_2 +#define EXPECTED_3 expected_3 + +#define INPUT_TYPE uint32_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +#define SCALAR_OPERANDS +#define SCALAR_1 1 +#define SCALAR_2 2 +#define SCALAR_3 32 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_u64_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_u64_1.c new file mode 100644 index 0000000..b393767 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_u64_1.c @@ -0,0 +1,46 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +uint64_t input[] = { 1, 10, 48, 100, 1000, 0, 500, 9 }; + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected_1[] = { 0x3800 /* 0.5. */, + 0x4500 /* 5. */, + 0x4E00 /* 24. */, + 0x5240 /* 50. */, + 0x5FD0 /* 500. */, + 0x0000 /* 0.0. */, + 0x5BD0 /* 250. */, + 0x4480 /* 4.5. */ }; + +uint16_t expected_2[] = { 0x3400 /* 0.25. */, + 0x4100 /* 2.5. */, + 0x4A00 /* 12. */, + 0x4E40 /* 25. */, + 0x5BD0 /* 250. */, + 0x0000 /* 0.0. */, + 0x57D0 /* 125. */, + 0x4080 /* 2.25. */ }; + +#define TEST_MSG "VCVTH_N_F16_U64" +#define INSN_NAME vcvth_n_f16_u64 + +#define INPUT input +#define EXPECTED_1 expected_1 +#define EXPECTED_2 expected_2 + +#define INPUT_TYPE uint64_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +#define SCALAR_OPERANDS +#define SCALAR_1 1 +#define SCALAR_2 2 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_s16_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_s16_f16_1.c new file mode 100644 index 0000000..247f7c9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_s16_f16_1.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 2.5, 100, 7.1, -9.9, -5.0, 9.1, -4.8, 77 }; +int16_t expected_1[] = { 5, 200, 14, -19, -10, 18, -9, 154 }; +int16_t expected_2[] = { 10, 400, 28, -39, -20, 36, -19, 308 }; + +#define TEST_MSG "VCVTH_N_S16_F16" +#define INSN_NAME vcvth_n_s16_f16 + +#define INPUT input +#define EXPECTED_1 expected_1 +#define EXPECTED_2 expected_2 + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE int16_t +#define OUTPUT_TYPE_SIZE 16 + +#define SCALAR_OPERANDS +#define SCALAR_1 1 +#define SCALAR_2 2 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_s32_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_s32_f16_1.c new file mode 100644 index 0000000..6e2ee50 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_s32_f16_1.c @@ -0,0 +1,100 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Input values. */ +float16_t input[] = +{ + 0.0, -0.0, + 123.4, -567.8, + -34.8, 1024, + 663.1, 169.1, + -4.8, 77.0, + -144.5, -56.8, + + (float16_t) -16, (float16_t) -15, + (float16_t) -14, (float16_t) -13, +}; + +/* Expected results (32-bit hexadecimal representation). */ +uint32_t expected_1[] = +{ + 0x00000000, + 0x00000000, + 0x000000f6, + 0xfffffb90, + 0xffffffbb, + 0x00000800, + 0x0000052e, + 0x00000152, + 0xfffffff7, + 0x0000009a, + 0xfffffedf, + 0xffffff8f, + 0xffffffe0, + 0xffffffe2, + 0xffffffe4, + 0xffffffe6, +}; + +uint32_t expected_2[] = +{ + 0x00000000, + 0x00000000, + 0x000001ed, + 0xfffff720, + 0xffffff75, + 0x00001000, + 0x00000a5c, + 0x000002a4, + 0xffffffed, + 0x00000134, + 0xfffffdbe, + 0xffffff1d, + 0xffffffc0, + 0xffffffc4, + 0xffffffc8, + 0xffffffcc, +}; + +uint32_t expected_3[] = +{ + 0x00000000, + 0x00000000, + 0x7fffffff, + 0x80000000, + 0x80000000, + 0x7fffffff, + 0x7fffffff, + 0x7fffffff, + 0x80000000, + 0x7fffffff, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, +}; + +#define TEST_MSG "VCVTH_N_S32_F16" +#define INSN_NAME vcvth_n_s32_f16 + +#define INPUT input +#define EXPECTED_1 expected_1 +#define EXPECTED_2 expected_2 +#define EXPECTED_3 expected_3 + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint32_t +#define OUTPUT_TYPE_SIZE 32 + +#define SCALAR_OPERANDS +#define SCALAR_1 1 +#define SCALAR_2 2 +#define SCALAR_3 32 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_s64_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_s64_f16_1.c new file mode 100644 index 0000000..27502c2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_s64_f16_1.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 2.5, 100, 7.1, -9.9, -5.0, 9.1, -4.8, 77 }; +int64_t expected_1[] = { 5, 200, 14, -19, -10, 18, -9, 154 }; +int64_t expected_2[] = { 10, 400, 28, -39, -20, 36, -19, 308 }; + +#define TEST_MSG "VCVTH_N_S64_F16" +#define INSN_NAME vcvth_n_s64_f16 + +#define INPUT input +#define EXPECTED_1 expected_1 +#define EXPECTED_2 expected_2 + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE int64_t +#define OUTPUT_TYPE_SIZE 64 + +#define SCALAR_OPERANDS +#define SCALAR_1 1 +#define SCALAR_2 2 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_u16_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_u16_f16_1.c new file mode 100644 index 0000000..e5f57f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_u16_f16_1.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 2.5, 100, 7.1, 9.9, 5.0, 9.1, 4.8, 77 }; +uint16_t expected_1[] = {5, 200, 14, 19, 10, 18, 9, 154}; +uint16_t expected_2[] = {10, 400, 28, 39, 20, 36, 19, 308}; + +#define TEST_MSG "VCVTH_N_U16_F16" +#define INSN_NAME vcvth_n_u16_f16 + +#define INPUT input +#define EXPECTED_1 expected_1 +#define EXPECTED_2 expected_2 + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +#define SCALAR_OPERANDS +#define SCALAR_1 1 +#define SCALAR_2 2 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_u32_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_u32_f16_1.c new file mode 100644 index 0000000..188f60c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_u32_f16_1.c @@ -0,0 +1,100 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Input values. */ +float16_t input[] = +{ + 0.0, -0.0, + 123.4, -567.8, + -34.8, 1024, + 663.1, 169.1, + -4.8, 77.0, + -144.5, -56.8, + + (float16_t) -16, (float16_t) -15, + (float16_t) -14, (float16_t) -13, +}; + +/* Expected results (32-bit hexadecimal representation). */ +uint32_t expected_1[] = +{ + 0x00000000, + 0x00000000, + 0x000000f6, + 0x00000000, + 0x00000000, + 0x00000800, + 0x0000052e, + 0x00000152, + 0x00000000, + 0x0000009a, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, +}; + +uint32_t expected_2[] = +{ + 0x00000000, + 0x00000000, + 0x000001ed, + 0x00000000, + 0x00000000, + 0x00001000, + 0x00000a5c, + 0x000002a4, + 0x00000000, + 0x00000134, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, +}; + +uint32_t expected_3[] = +{ + 0x00000000, + 0x00000000, + 0xffffffff, + 0x00000000, + 0x00000000, + 0xffffffff, + 0xffffffff, + 0xffffffff, + 0x00000000, + 0xffffffff, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, +}; + +#define TEST_MSG "VCVTH_N_U32_F16" +#define INSN_NAME vcvth_n_u32_f16 + +#define INPUT input +#define EXPECTED_1 expected_1 +#define EXPECTED_2 expected_2 +#define EXPECTED_3 expected_3 + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint32_t +#define OUTPUT_TYPE_SIZE 32 + +#define SCALAR_OPERANDS +#define SCALAR_1 1 +#define SCALAR_2 2 +#define SCALAR_3 32 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_u64_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_u64_f16_1.c new file mode 100644 index 0000000..cfc33c2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_u64_f16_1.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 2.5, 100, 7.1, 9.9, 5.0, 9.1, 4.8, 77 }; +uint64_t expected_1[] = { 5, 200, 14, 19, 10, 18, 9, 154 }; +uint64_t expected_2[] = { 10, 400, 28, 39, 20, 36, 19, 308 }; + +#define TEST_MSG "VCVTH_N_U64_F16" +#define INSN_NAME vcvth_n_u64_f16 + +#define INPUT input +#define EXPECTED_1 expected_1 +#define EXPECTED_2 expected_2 + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint64_t +#define OUTPUT_TYPE_SIZE 64 + +#define SCALAR_OPERANDS +#define SCALAR_1 1 +#define SCALAR_2 2 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_s16_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_s16_f16_1.c new file mode 100644 index 0000000..9965654 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_s16_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; +int16_t expected[] = { 123, -56, 0, 24, -63, 169, -4, 77 }; + +#define TEST_MSG "VCVTH_S16_F16" +#define INSN_NAME vcvth_s16_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE int16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_s32_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_s32_f16_1.c new file mode 100644 index 0000000..6bff954 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_s32_f16_1.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Input values. */ +float16_t input[] = +{ + 0.0, -0.0, + 123.4, -567.8, + -34.8, 1024, + 663.1, 169.1, + -4.8, 77.0, + -144.5, -56.8, + + (float16_t) -16, (float16_t) -15, + (float16_t) -14, (float16_t) -13, +}; + +/* Expected results (32-bit hexadecimal representation). */ +uint32_t expected[] = +{ + 0x00000000, + 0x00000000, + 0x0000007b, + 0xfffffdc8, + 0xffffffde, + 0x00000400, + 0x00000297, + 0x000000a9, + 0xfffffffc, + 0x0000004d, + 0xffffff70, + 0xffffffc8, + 0xfffffff0, + 0xfffffff1, + 0xfffffff2, + 0xfffffff3, +}; + +#define TEST_MSG "VCVTH_S32_F16" +#define INSN_NAME vcvth_s32_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE int32_t +#define OUTPUT_TYPE_SIZE 32 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_s64_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_s64_f16_1.c new file mode 100644 index 0000000..c7b3d17 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_s64_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; +int64_t expected[] = { 123, -56, 0, 24, -63, 169, -4, 77 }; + +#define TEST_MSG "VCVTH_S64_F16" +#define INSN_NAME vcvth_s64_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE int64_t +#define OUTPUT_TYPE_SIZE 64 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_u16_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_u16_f16_1.c new file mode 100644 index 0000000..e3c5d3a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_u16_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; +uint16_t expected[] = { 123, 56, 0, 24, 63, 169, 4, 77 }; + +#define TEST_MSG "VCVTH_u16_F16" +#define INSN_NAME vcvth_u16_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_u32_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_u32_f16_1.c new file mode 100644 index 0000000..d5807d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_u32_f16_1.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Input values. */ +float16_t input[] = +{ + 0.0, -0.0, + 123.4, -567.8, + -34.8, 1024, + 663.1, 169.1, + -4.8, 77.0, + -144.5, -56.8, + + (float16_t) -16, (float16_t) -15, + (float16_t) -14, (float16_t) -13, +}; + +/* Expected results (32-bit hexadecimal representation). */ +uint32_t expected[] = +{ + 0x00000000, + 0x00000000, + 0x0000007b, + 0x00000000, + 0x00000000, + 0x00000400, + 0x00000297, + 0x000000a9, + 0x00000000, + 0x0000004d, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, +}; + +#define TEST_MSG "VCVTH_U32_F16" +#define INSN_NAME vcvth_u32_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint32_t +#define OUTPUT_TYPE_SIZE 32 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_u64_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_u64_f16_1.c new file mode 100644 index 0000000..a904e5e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_u64_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; +uint64_t expected[] = { 123, 56, 0, 24, 63, 169, 4, 77 }; + +#define TEST_MSG "VCVTH_u64_F16" +#define INSN_NAME vcvth_u64_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint64_t +#define OUTPUT_TYPE_SIZE 64 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtm_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtm_1.c new file mode 100644 index 0000000..1c22772 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtm_1.c @@ -0,0 +1,33 @@ +/* This file tests an intrinsic which currently has only an f16 variant and that + is only available when FP16 arithmetic instructions are supported. */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" +#include + +/* Expected results. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, int, 16, 4) [] = { 0xfff0, 0x5, 0xfff0, 0x5 }; +VECT_VAR_DECL(expected, uint, 16, 4) [] = { 0x0, 0x5, 0x0, 0x5 }; +VECT_VAR_DECL(expected, int, 16, 8) [] = { 0x0, 0x0, 0xf, 0xfff0, 0x0, + 0x0, 0xf, 0xfff0 }; +VECT_VAR_DECL(expected, uint, 16, 8) [] = { 0x0, 0x0, 0xf, 0x0, + 0x0, 0x0, 0xf, 0x0 }; +#endif + +/* Expected results with rounding. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_rounding, int, 16, 4) [] = { 0xa, 0xa, 0xa, 0xa }; +VECT_VAR_DECL(expected_rounding, uint, 16, 4) [] = { 0xa, 0xa, 0xa, 0xa }; +VECT_VAR_DECL(expected_rounding, int, 16, 8) [] = { 0x7d, 0x7d, 0x7d, 0x7d, + 0x7d, 0x7d, 0x7d, 0x7d }; +VECT_VAR_DECL(expected_rounding, uint, 16, 8) [] = { 0x7d, 0x7d, 0x7d, 0x7d, + 0x7d, 0x7d, 0x7d, 0x7d }; +#endif + +#define TEST_MSG "VCVTM/VCVTMQ" +#define INSN_NAME vcvtm + +#include "vcvtX.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_s16_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_s16_f16_1.c new file mode 100644 index 0000000..ef0132a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_s16_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; +int16_t expected[] = { 123, -57, 0, 24, -64, 169, -5, 77 }; + +#define TEST_MSG "VCVTMH_S16_F16" +#define INSN_NAME vcvtmh_s16_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE int16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_s32_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_s32_f16_1.c new file mode 100644 index 0000000..f4f7b37 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_s32_f16_1.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Input values. */ +float16_t input[] = +{ + 0.0, -0.0, + 123.4, -567.8, + -34.8, 1024, + 663.1, 169.1, + -4.8, 77.0, + -144.5, -56.8, + + (float16_t) -16, (float16_t) -15, + (float16_t) -14, (float16_t) -13, +}; + +/* Expected results (32-bit hexadecimal representation). */ +uint32_t expected[] = +{ + 0x00000000, + 0x00000000, + 0x0000007b, + 0xfffffdc8, + 0xffffffdd, + 0x00000400, + 0x00000297, + 0x000000a9, + 0xfffffffb, + 0x0000004d, + 0xffffff6f, + 0xffffffc7, + 0xfffffff0, + 0xfffffff1, + 0xfffffff2, + 0xfffffff3 +}; + +#define TEST_MSG "VCVTMH_S32_F16" +#define INSN_NAME vcvtmh_s32_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE int32_t +#define OUTPUT_TYPE_SIZE 32 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_s64_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_s64_f16_1.c new file mode 100644 index 0000000..7b5b16f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_s64_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; +int64_t expected[] = { 123, -57, 0, 24, -64, 169, -5, 77 }; + +#define TEST_MSG "VCVTMH_S64_F16" +#define INSN_NAME vcvtmh_s64_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE int64_t +#define OUTPUT_TYPE_SIZE 64 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_u16_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_u16_f16_1.c new file mode 100644 index 0000000..db56171 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_u16_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; +uint16_t expected[] = { 123, 56, 0, 24, 63, 169, 4, 77 }; + +#define TEST_MSG "VCVTMH_u16_F16" +#define INSN_NAME vcvtmh_u16_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_u32_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_u32_f16_1.c new file mode 100644 index 0000000..6cda3b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_u32_f16_1.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Input values. */ +float16_t input[] = +{ + 0.0, -0.0, + 123.4, -567.8, + -34.8, 1024, + 663.1, 169.1, + -4.8, 77.0, + -144.5, -56.8, + + (float16_t) -16, (float16_t) -15, + (float16_t) -14, (float16_t) -13, +}; + +/* Expected results (32-bit hexadecimal representation). */ +uint32_t expected[] = +{ + 0x00000000, + 0x00000000, + 0x0000007b, + 0x00000000, + 0x00000000, + 0x00000400, + 0x00000297, + 0x000000a9, + 0x00000000, + 0x0000004d, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, +}; + +#define TEST_MSG "VCVTMH_U32_F16" +#define INSN_NAME vcvtmh_u32_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint32_t +#define OUTPUT_TYPE_SIZE 32 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_u64_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_u64_f16_1.c new file mode 100644 index 0000000..cae69a3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_u64_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; +uint64_t expected[] = { 123, 56, 0, 24, 63, 169, 4, 77 }; + +#define TEST_MSG "VCVTMH_u64_F16" +#define INSN_NAME vcvtmh_u64_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint64_t +#define OUTPUT_TYPE_SIZE 64 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_s16_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_s16_f16_1.c new file mode 100644 index 0000000..dec8d85 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_s16_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; +int16_t expected[] = { 124, -57, 1, 25, -64, 169, -4, 77 }; + +#define TEST_MSG "VCVTNH_S16_F16" +#define INSN_NAME vcvtnh_s16_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE int16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_s32_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_s32_f16_1.c new file mode 100644 index 0000000..94c333e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_s32_f16_1.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Input values. */ +float16_t input[] = +{ + 0.0, -0.0, + 123.4, -567.8, + -34.8, 1024, + 663.1, 169.1, + -4.8, 77.0, + -144.5, -56.8, + + (float16_t) -16, (float16_t) -15, + (float16_t) -14, (float16_t) -13, +}; + +/* Expected results (32-bit hexadecimal representation). */ +uint32_t expected[] = +{ + 0x00000000, + 0x00000000, + 0x0000007b, + 0xfffffdc8, + 0xffffffdd, + 0x00000400, + 0x00000297, + 0x000000a9, + 0xfffffffb, + 0x0000004d, + 0xffffff70, + 0xffffffc7, + 0xfffffff0, + 0xfffffff1, + 0xfffffff2, + 0xfffffff3 +}; + +#define TEST_MSG "VCVTNH_S32_F16" +#define INSN_NAME vcvtnh_s32_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE int32_t +#define OUTPUT_TYPE_SIZE 32 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_s64_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_s64_f16_1.c new file mode 100644 index 0000000..0048b5b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_s64_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; +int64_t expected[] = { 124, -57, 1, 25, -64, 169, -4, 77 }; + +#define TEST_MSG "VCVTNH_S64_F16" +#define INSN_NAME vcvtnh_s64_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE int64_t +#define OUTPUT_TYPE_SIZE 64 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_u16_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_u16_f16_1.c new file mode 100644 index 0000000..0a95cea --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_u16_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; +uint16_t expected[] = { 124, 57, 1, 25, 64, 169, 4, 77 }; + +#define TEST_MSG "VCVTNH_u16_F16" +#define INSN_NAME vcvtnh_u16_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_u32_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_u32_f16_1.c new file mode 100644 index 0000000..97d5fba --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_u32_f16_1.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Input values. */ +float16_t input[] = +{ + 0.0, -0.0, + 123.4, -567.8, + -34.8, 1024, + 663.1, 169.1, + -4.8, 77.0, + -144.5, -56.8, + + (float16_t) -16, (float16_t) -15, + (float16_t) -14, (float16_t) -13, +}; + +/* Expected results (32-bit hexadecimal representation). */ +uint32_t expected[] = +{ + 0x00000000, + 0x00000000, + 0x0000007b, + 0x00000000, + 0x00000000, + 0x00000400, + 0x00000297, + 0x000000a9, + 0x00000000, + 0x0000004d, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, +}; + +#define TEST_MSG "VCVTNH_U32_F16" +#define INSN_NAME vcvtnh_u32_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint32_t +#define OUTPUT_TYPE_SIZE 32 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_u64_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_u64_f16_1.c new file mode 100644 index 0000000..3b1b273 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_u64_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; +uint64_t expected[] = { 124, 57, 1, 25, 64, 169, 4, 77 }; + +#define TEST_MSG "VCVTNH_u64_F16" +#define INSN_NAME vcvtnh_u64_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint64_t +#define OUTPUT_TYPE_SIZE 64 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtp_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtp_1.c new file mode 100644 index 0000000..7057909 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtp_1.c @@ -0,0 +1,33 @@ +/* This file tests an intrinsic which currently has only an f16 variant and that + is only available when FP16 arithmetic instructions are supported. */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" +#include + +/* Expected results. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, int, 16, 4) [] = { 0xfff1, 0x6, 0xfff1, 0x6 }; +VECT_VAR_DECL(expected, uint, 16, 4) [] = { 0x0, 0x6, 0x0, 0x6 }; +VECT_VAR_DECL(expected, int, 16, 8) [] = { 0x0, 0x0, 0x10, 0xfff1, + 0x0, 0x0, 0x10, 0xfff1 }; +VECT_VAR_DECL(expected, uint, 16, 8) [] = { 0x0, 0x0, 0x10, 0x0, + 0x0, 0x0, 0x10, 0x0 }; +#endif + +/* Expected results with rounding. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_rounding, int, 16, 4) [] = { 0xb, 0xb, 0xb, 0xb }; +VECT_VAR_DECL(expected_rounding, uint, 16, 4) [] = { 0xb, 0xb, 0xb, 0xb }; +VECT_VAR_DECL(expected_rounding, int, 16, 8) [] = { 0x7e, 0x7e, 0x7e, 0x7e, + 0x7e, 0x7e, 0x7e, 0x7e }; +VECT_VAR_DECL(expected_rounding, uint, 16, 8) [] = { 0x7e, 0x7e, 0x7e, 0x7e, + 0x7e, 0x7e, 0x7e, 0x7e }; +#endif + +#define TEST_MSG "VCVTP/VCVTPQ" +#define INSN_NAME vcvtp + +#include "vcvtX.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_s16_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_s16_f16_1.c new file mode 100644 index 0000000..5ff0d22 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_s16_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; +int16_t expected[] = { 124, -56, 1, 25, -63, 170, -4, 77 }; + +#define TEST_MSG "VCVTPH_S16_F16" +#define INSN_NAME vcvtph_s16_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE int16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_s32_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_s32_f16_1.c new file mode 100644 index 0000000..105d236 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_s32_f16_1.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Input values. */ +float16_t input[] = +{ + 0.0, -0.0, + 123.4, -567.8, + -34.8, 1024, + 663.1, 169.1, + -4.8, 77.0, + -144.5, -56.8, + + (float16_t) -16, (float16_t) -15, + (float16_t) -14, (float16_t) -13, +}; + +/* Expected results (32-bit hexadecimal representation). */ +uint32_t expected[] = +{ + 0x00000000, + 0x00000000, + 0x0000007c, + 0xfffffdc8, + 0xffffffde, + 0x00000400, + 0x00000297, + 0x000000aa, + 0xfffffffc, + 0x0000004d, + 0xffffff70, + 0xffffffc8, + 0xfffffff0, + 0xfffffff1, + 0xfffffff2, + 0xfffffff3 +}; + +#define TEST_MSG "VCVTPH_S32_F16" +#define INSN_NAME vcvtph_s32_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE int32_t +#define OUTPUT_TYPE_SIZE 32 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_s64_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_s64_f16_1.c new file mode 100644 index 0000000..290c5b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_s64_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; +int64_t expected[] = { 124, -56, 1, 25, -63, 170, -4, 77 }; + +#define TEST_MSG "VCVTPH_S64_F16" +#define INSN_NAME vcvtph_s64_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE int64_t +#define OUTPUT_TYPE_SIZE 64 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_u16_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_u16_f16_1.c new file mode 100644 index 0000000..e367dad --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_u16_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; +uint16_t expected[] = { 124, 57, 1, 25, 64, 170, 5, 77 }; + +#define TEST_MSG "VCVTPH_u16_F16" +#define INSN_NAME vcvtph_u16_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_u32_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_u32_f16_1.c new file mode 100644 index 0000000..d66adcd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_u32_f16_1.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Input values. */ +float16_t input[] = +{ + 0.0, -0.0, + 123.4, -567.8, + -34.8, 1024, + 663.1, 169.1, + -4.8, 77.0, + -144.5, -56.8, + + (float16_t) -16, (float16_t) -15, + (float16_t) -14, (float16_t) -13, +}; + +/* Expected results (32-bit hexadecimal representation). */ +uint32_t expected[] = +{ + 0x00000000, + 0x00000000, + 0x0000007c, + 0x00000000, + 0x00000000, + 0x00000400, + 0x00000297, + 0x000000aa, + 0x00000000, + 0x0000004d, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, +}; + +#define TEST_MSG "VCVTPH_U32_F16" +#define INSN_NAME vcvtph_u32_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint32_t +#define OUTPUT_TYPE_SIZE 32 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_u64_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_u64_f16_1.c new file mode 100644 index 0000000..0229099 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_u64_f16_1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; +uint64_t expected[] = { 124, 57, 1, 25, 64, 170, 5, 77 }; + +#define TEST_MSG "VCVTPH_u64_F16" +#define INSN_NAME vcvtph_u64_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE uint64_t +#define OUTPUT_TYPE_SIZE 64 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdiv_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdiv_f16_1.c new file mode 100644 index 0000000..c0103fb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdiv_f16_1.c @@ -0,0 +1,86 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define FP16_C(a) ((__fp16) a) +#define A FP16_C (13.4) +#define B FP16_C (-56.8) +#define C FP16_C (-34.8) +#define D FP16_C (12) +#define E FP16_C (63.1) +#define F FP16_C (19.1) +#define G FP16_C (-4.8) +#define H FP16_C (77) + +#define I FP16_C (0.7) +#define J FP16_C (-78) +#define K FP16_C (11.23) +#define L FP16_C (98) +#define M FP16_C (87.1) +#define N FP16_C (-8) +#define O FP16_C (-1.1) +#define P FP16_C (-9.7) + +/* Expected results for vdiv. */ +VECT_VAR_DECL (expected_div_static, hfloat, 16, 4) [] + = { 0x32CC /* A / E. */, 0xC1F3 /* B / F. */, + 0x4740 /* C / G. */, 0x30FD /* D / H. */ }; + +VECT_VAR_DECL (expected_div_static, hfloat, 16, 8) [] + = { 0x32CC /* A / E. */, 0xC1F3 /* B / F. */, + 0x4740 /* C / G. */, 0x30FD /* D / H. */, + 0x201D /* I / M. */, 0x48E0 /* J / N. */, + 0xC91B /* K / O. */, 0xC90D /* L / P. */ }; + +void exec_vdiv_f16 (void) +{ +#undef TEST_MSG +#define TEST_MSG "VDIV (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc_1, float, 16, 4); + DECL_VARIABLE(vsrc_2, float, 16, 4); + VECT_VAR_DECL (buf_src_1, float, 16, 4) [] = {A, B, C, D}; + VECT_VAR_DECL (buf_src_2, float, 16, 4) [] = {E, F, G, H}; + VLOAD (vsrc_1, buf_src_1, , float, f, 16, 4); + VLOAD (vsrc_2, buf_src_2, , float, f, 16, 4); + + DECL_VARIABLE (vector_res, float, 16, 4) + = vdiv_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4)); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_div_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VDIVQ (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc_1, float, 16, 8); + DECL_VARIABLE(vsrc_2, float, 16, 8); + VECT_VAR_DECL (buf_src_1, float, 16, 8) [] = {A, B, C, D, I, J, K, L}; + VECT_VAR_DECL (buf_src_2, float, 16, 8) [] = {E, F, G, H, M, N, O, P}; + VLOAD (vsrc_1, buf_src_1, q, float, f, 16, 8); + VLOAD (vsrc_2, buf_src_2, q, float, f, 16, 8); + + DECL_VARIABLE (vector_res, float, 16, 8) + = vdivq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8)); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_div_static, ""); +} + +int +main (void) +{ + exec_vdiv_f16 (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdivh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdivh_f16_1.c new file mode 100644 index 0000000..6a99109 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdivh_f16_1.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +#define INFF __builtin_inf () + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected[] = +{ + 0x0000 /* 0.000000 */, + 0x8000 /* -0.000000 */, + 0xb765 /* -0.462158 */, + 0x27ef /* 0.030991 */, + 0x3955 /* 0.666504 */, + 0xccff /* -19.984375 */, + 0xc49a /* -4.601562 */, + 0xb1e3 /* -0.183960 */, + 0x3cd3 /* 1.206055 */, + 0x23f0 /* 0.015503 */, + 0xa9ef /* -0.046356 */, + 0x32f4 /* 0.217285 */, + 0xb036 /* -0.131592 */, + 0x4126 /* 2.574219 */, + 0xcd15 /* -20.328125 */, + 0x537f /* 59.968750 */, + 0x7e00 /* nan */, + 0x7e00 /* nan */ +}; + +#define TEST_MSG "VDIVH_F16" +#define INSN_NAME vdivh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c index 22d45d5..aef4173 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c @@ -19,6 +19,10 @@ VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcc00, + 0xcc00, 0xcc00 }; +#endif VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 }; VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, @@ -46,6 +50,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcc00, + 0xcc00, 0xcc00, + 0xcc00, 0xcc00, + 0xcc00, 0xcc00 }; +#endif VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000, 0xc1800000, 0xc1800000 }; @@ -63,6 +73,10 @@ VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 }; VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xcb80, + 0xcb80, 0xcb80 }; +#endif VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, @@ -90,6 +104,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb80, 0xcb80, + 0xcb80, 0xcb80, + 0xcb80, 0xcb80, + 0xcb80, 0xcb80 }; +#endif VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, 0xc1700000, 0xc1700000 }; @@ -107,6 +127,10 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 }; VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb00, 0xcb00, + 0xcb00, 0xcb00 }; +#endif VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 }; VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, @@ -134,6 +158,12 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb00, 0xcb00, + 0xcb00, 0xcb00, + 0xcb00, 0xcb00, + 0xcb00, 0xcb00 }; +#endif VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000, 0xc1600000, 0xc1600000 }; @@ -171,6 +201,9 @@ void exec_vdup_vmov (void) TEST_VDUP(, uint, u, 64, 1); TEST_VDUP(, poly, p, 8, 8); TEST_VDUP(, poly, p, 16, 4); +#if defined (FP16_SUPPORTED) + TEST_VDUP(, float, f, 16, 4); +#endif TEST_VDUP(, float, f, 32, 2); TEST_VDUP(q, int, s, 8, 16); @@ -183,8 +216,26 @@ void exec_vdup_vmov (void) TEST_VDUP(q, uint, u, 64, 2); TEST_VDUP(q, poly, p, 8, 16); TEST_VDUP(q, poly, p, 16, 8); +#if defined (FP16_SUPPORTED) + TEST_VDUP(q, float, f, 16, 8); +#endif TEST_VDUP(q, float, f, 32, 4); +#if defined (FP16_SUPPORTED) + switch (i) { + case 0: + CHECK_RESULTS_NAMED (TEST_MSG, expected0, ""); + break; + case 1: + CHECK_RESULTS_NAMED (TEST_MSG, expected1, ""); + break; + case 2: + CHECK_RESULTS_NAMED (TEST_MSG, expected2, ""); + break; + default: + abort(); + } +#else switch (i) { case 0: CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, ""); @@ -198,6 +249,7 @@ void exec_vdup_vmov (void) default: abort(); } +#endif } /* Do the same tests with vmov. Use the same expected results. */ @@ -216,6 +268,9 @@ void exec_vdup_vmov (void) TEST_VMOV(, uint, u, 64, 1); TEST_VMOV(, poly, p, 8, 8); TEST_VMOV(, poly, p, 16, 4); +#if defined (FP16_SUPPORTED) + TEST_VMOV(, float, f, 16, 4); +#endif TEST_VMOV(, float, f, 32, 2); TEST_VMOV(q, int, s, 8, 16); @@ -228,8 +283,26 @@ void exec_vdup_vmov (void) TEST_VMOV(q, uint, u, 64, 2); TEST_VMOV(q, poly, p, 8, 16); TEST_VMOV(q, poly, p, 16, 8); +#if defined (FP16_SUPPORTED) + TEST_VMOV(q, float, f, 16, 8); +#endif TEST_VMOV(q, float, f, 32, 4); +#if defined (FP16_SUPPORTED) + switch (i) { + case 0: + CHECK_RESULTS_NAMED (TEST_MSG, expected0, ""); + break; + case 1: + CHECK_RESULTS_NAMED (TEST_MSG, expected1, ""); + break; + case 2: + CHECK_RESULTS_NAMED (TEST_MSG, expected2, ""); + break; + default: + abort(); + } +#else switch (i) { case 0: CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, ""); @@ -243,6 +316,8 @@ void exec_vdup_vmov (void) default: abort(); } +#endif + } } diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c index ef708dc..5d0dba3 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c @@ -17,6 +17,10 @@ VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7 }; VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xca80, 0xca80, + 0xca80, 0xca80 }; +#endif VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, @@ -43,10 +47,16 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5 }; VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xca80, 0xca80, + 0xca80, 0xca80, + 0xca80, 0xca80, + 0xca80, 0xca80 }; +#endif VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, 0xc1700000, 0xc1700000 }; -#define TEST_MSG "VDUP_LANE/VDUP_LANEQ" +#define TEST_MSG "VDUP_LANE/VDUPQ_LANE" void exec_vdup_lane (void) { /* Basic test: vec1=vdup_lane(vec2, lane), then store the result. */ @@ -63,6 +73,9 @@ void exec_vdup_lane (void) clean_results (); TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector, buffer); +#if defined (FP16_SUPPORTED) + VLOAD(vector, buffer, , float, f, 16, 4); +#endif VLOAD(vector, buffer, , float, f, 32, 2); /* Choose lane arbitrarily. */ @@ -76,6 +89,9 @@ void exec_vdup_lane (void) TEST_VDUP_LANE(, uint, u, 64, 1, 1, 0); TEST_VDUP_LANE(, poly, p, 8, 8, 8, 7); TEST_VDUP_LANE(, poly, p, 16, 4, 4, 3); +#if defined (FP16_SUPPORTED) + TEST_VDUP_LANE(, float, f, 16, 4, 4, 3); +#endif TEST_VDUP_LANE(, float, f, 32, 2, 2, 1); TEST_VDUP_LANE(q, int, s, 8, 16, 8, 2); @@ -88,9 +104,133 @@ void exec_vdup_lane (void) TEST_VDUP_LANE(q, uint, u, 64, 2, 1, 0); TEST_VDUP_LANE(q, poly, p, 8, 16, 8, 5); TEST_VDUP_LANE(q, poly, p, 16, 8, 4, 1); +#if defined (FP16_SUPPORTED) + TEST_VDUP_LANE(q, float, f, 16, 8, 4, 3); +#endif TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1); +#if defined (FP16_SUPPORTED) + CHECK_RESULTS (TEST_MSG, ""); +#else CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); +#endif + +#if defined (__aarch64__) + +#undef TEST_MSG +#define TEST_MSG "VDUP_LANEQ/VDUPQ_LANEQ" + + /* Expected results for vdup*_laneq tests. */ +VECT_VAR_DECL(expected2,int,8,8) [] = { 0xfd, 0xfd, 0xfd, 0xfd, + 0xfd, 0xfd, 0xfd, 0xfd }; +VECT_VAR_DECL(expected2,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected2,int,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected2,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; +VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7, + 0xf7, 0xf7, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; +VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xca80, 0xca80, + 0xca80, 0xca80 }; +#endif +VECT_VAR_DECL(expected2,int,8,16) [] = { 0xfb, 0xfb, 0xfb, 0xfb, + 0xfb, 0xfb, 0xfb, 0xfb, + 0xfb, 0xfb, 0xfb, 0xfb, + 0xfb, 0xfb, 0xfb, 0xfb }; +VECT_VAR_DECL(expected2,int,16,8) [] = { 0xfff7, 0xfff7, 0xfff7, 0xfff7, + 0xfff7, 0xfff7, 0xfff7, 0xfff7 }; +VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfffffff1, 0xfffffff1, + 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected2,uint,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5, + 0xf5, 0xf5, 0xf5, 0xf5, + 0xf5, 0xf5, 0xf5, 0xf5, + 0xf5, 0xf5, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected2,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, + 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff0, 0xfffffff0, + 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5, + 0xf5, 0xf5, 0xf5, 0xf5, + 0xf5, 0xf5, 0xf5, 0xf5, + 0xf5, 0xf5, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, + 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xc880, 0xc880, + 0xc880, 0xc880, + 0xc880, 0xc880, + 0xc880, 0xc880 }; +#endif +VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, + 0xc1700000, 0xc1700000 }; + + /* Clean all results for vdup*_laneq tests. */ + clean_results (); + /* Basic test: vec1=vdup_lane(vec2, lane), then store the result. */ +#define TEST_VDUP_LANEQ(Q, T1, T2, W, N, N2, L) \ + VECT_VAR(vector_res, T1, W, N) = \ + vdup##Q##_laneq_##T2##W(VECT_VAR(vector, T1, W, N2), L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* Input vector can only have 64 bits. */ + DECL_VARIABLE_128BITS_VARIANTS(vector); + + clean_results (); + + TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector, buffer); +#if defined (FP16_SUPPORTED) + VLOAD(vector, buffer, q, float, f, 16, 8); +#endif + VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Choose lane arbitrarily. */ + TEST_VDUP_LANEQ(, int, s, 8, 8, 16, 13); + TEST_VDUP_LANEQ(, int, s, 16, 4, 8, 2); + TEST_VDUP_LANEQ(, int, s, 32, 2, 4, 1); + TEST_VDUP_LANEQ(, int, s, 64, 1, 2, 0); + TEST_VDUP_LANEQ(, uint, u, 8, 8, 16, 15); + TEST_VDUP_LANEQ(, uint, u, 16, 4, 8, 3); + TEST_VDUP_LANEQ(, uint, u, 32, 2, 4, 1); + TEST_VDUP_LANEQ(, uint, u, 64, 1, 2, 0); + TEST_VDUP_LANEQ(, poly, p, 8, 8, 16, 7); + TEST_VDUP_LANEQ(, poly, p, 16, 4, 8, 3); +#if defined (FP16_SUPPORTED) + TEST_VDUP_LANEQ(, float, f, 16, 4, 8, 3); +#endif + TEST_VDUP_LANEQ(, float, f, 32, 2, 4, 1); + + TEST_VDUP_LANEQ(q, int, s, 8, 16, 16, 11); + TEST_VDUP_LANEQ(q, int, s, 16, 8, 8, 7); + TEST_VDUP_LANEQ(q, int, s, 32, 4, 4, 1); + TEST_VDUP_LANEQ(q, int, s, 64, 2, 2, 0); + TEST_VDUP_LANEQ(q, uint, u, 8, 16, 16, 5); + TEST_VDUP_LANEQ(q, uint, u, 16, 8, 8, 1); + TEST_VDUP_LANEQ(q, uint, u, 32, 4, 4, 0); + TEST_VDUP_LANEQ(q, uint, u, 64, 2, 2, 0); + TEST_VDUP_LANEQ(q, poly, p, 8, 16, 16, 5); + TEST_VDUP_LANEQ(q, poly, p, 16, 8, 8, 1); +#if defined (FP16_SUPPORTED) + TEST_VDUP_LANEQ(q, float, f, 16, 8, 8, 7); +#endif + TEST_VDUP_LANEQ(q, float, f, 32, 4, 4, 1); + + CHECK_RESULTS_NAMED (TEST_MSG, expected2, ""); +#if defined (FP16_SUPPORTED) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected2, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected2, ""); +#endif + +#endif /* __aarch64__. */ } int main (void) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vduph_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vduph_lane.c new file mode 100644 index 0000000..c9d553a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vduph_lane.c @@ -0,0 +1,137 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define A -16 +#define B -15 +#define C -14 +#define D -13 +#define E -12 +#define F -11 +#define G -10 +#define H -9 + +#define F16_C(a) ((__fp16) a) +#define AF F16_C (A) +#define BF F16_C (B) +#define CF F16_C (C) +#define DF F16_C (D) +#define EF F16_C (E) +#define FF F16_C (F) +#define GF F16_C (G) +#define HF F16_C (H) + +#define S16_C(a) ((int16_t) a) +#define AS S16_C (A) +#define BS S16_C (B) +#define CS S16_C (C) +#define DS S16_C (D) +#define ES S16_C (E) +#define FS S16_C (F) +#define GS S16_C (G) +#define HS S16_C (H) + +#define U16_C(a) ((int16_t) a) +#define AU U16_C (A) +#define BU U16_C (B) +#define CU U16_C (C) +#define DU U16_C (D) +#define EU U16_C (E) +#define FU U16_C (F) +#define GU U16_C (G) +#define HU U16_C (H) + +#define P16_C(a) ((poly16_t) a) +#define AP P16_C (A) +#define BP P16_C (B) +#define CP P16_C (C) +#define DP P16_C (D) +#define EP P16_C (E) +#define FP P16_C (F) +#define GP P16_C (G) +#define HP P16_C (H) + +/* Expected results for vduph_lane. */ +float16_t expected_f16 = AF; +int16_t expected_s16 = DS; +uint16_t expected_u16 = BU; +poly16_t expected_p16 = CP; + +/* Expected results for vduph_laneq. */ +float16_t expected_q_f16 = EF; +int16_t expected_q_s16 = BS; +uint16_t expected_q_u16 = GU; +poly16_t expected_q_p16 = FP; + +void exec_vduph_lane_f16 (void) +{ + /* vduph_lane. */ + DECL_VARIABLE(vsrc, float, 16, 4); + DECL_VARIABLE(vsrc, int, 16, 4); + DECL_VARIABLE(vsrc, uint, 16, 4); + DECL_VARIABLE(vsrc, poly, 16, 4); + VECT_VAR_DECL (buf_src, float, 16, 4) [] = {AF, BF, CF, DF}; + VECT_VAR_DECL (buf_src, int, 16, 4) [] = {AS, BS, CS, DS}; + VECT_VAR_DECL (buf_src, uint, 16, 4) [] = {AU, BU, CU, DU}; + VECT_VAR_DECL (buf_src, poly, 16, 4) [] = {AP, BP, CP, DP}; + VLOAD (vsrc, buf_src, , int, s, 16, 4); + VLOAD (vsrc, buf_src, , float, f, 16, 4); + VLOAD (vsrc, buf_src, , uint, u, 16, 4); + VLOAD (vsrc, buf_src, , poly, p, 16, 4); + + float16_t res_f = vduph_lane_f16 (VECT_VAR (vsrc, float, 16, 4), 0); + if (* (unsigned short *) &res_f != * (unsigned short *) &expected_f16) + abort (); + + int16_t res_s = vduph_lane_s16 (VECT_VAR (vsrc, int, 16, 4), 3); + if (* (unsigned short *) &res_s != * (unsigned short *) &expected_s16) + abort (); + + uint16_t res_u = vduph_lane_u16 (VECT_VAR (vsrc, uint, 16, 4), 1); + if (* (unsigned short *) &res_u != * (unsigned short *) &expected_u16) + abort (); + + poly16_t res_p = vduph_lane_p16 (VECT_VAR (vsrc, poly, 16, 4), 2); + if (* (unsigned short *) &res_p != * (unsigned short *) &expected_p16) + abort (); + + /* vduph_laneq. */ + DECL_VARIABLE(vsrc, float, 16, 8); + DECL_VARIABLE(vsrc, int, 16, 8); + DECL_VARIABLE(vsrc, uint, 16, 8); + DECL_VARIABLE(vsrc, poly, 16, 8); + VECT_VAR_DECL (buf_src, float, 16, 8) [] = {AF, BF, CF, DF, EF, FF, GF, HF}; + VECT_VAR_DECL (buf_src, int, 16, 8) [] = {AS, BS, CS, DS, ES, FS, GS, HS}; + VECT_VAR_DECL (buf_src, uint, 16, 8) [] = {AU, BU, CU, DU, EU, FU, GU, HU}; + VECT_VAR_DECL (buf_src, poly, 16, 8) [] = {AP, BP, CP, DP, EP, FP, GP, HP}; + VLOAD (vsrc, buf_src, q, int, s, 16, 8); + VLOAD (vsrc, buf_src, q, float, f, 16, 8); + VLOAD (vsrc, buf_src, q, uint, u, 16, 8); + VLOAD (vsrc, buf_src, q, poly, p, 16, 8); + + res_f = vduph_laneq_f16 (VECT_VAR (vsrc, float, 16, 8), 4); + if (* (unsigned short *) &res_f != * (unsigned short *) &expected_q_f16) + abort (); + + res_s = vduph_laneq_s16 (VECT_VAR (vsrc, int, 16, 8), 1); + if (* (unsigned short *) &res_s != * (unsigned short *) &expected_q_s16) + abort (); + + res_u = vduph_laneq_u16 (VECT_VAR (vsrc, uint, 16, 8), 6); + if (* (unsigned short *) &res_u != * (unsigned short *) &expected_q_u16) + abort (); + + res_p = vduph_laneq_p16 (VECT_VAR (vsrc, poly, 16, 8), 5); + if (* (unsigned short *) &res_p != * (unsigned short *) &expected_q_p16) + abort (); +} + +int +main (void) +{ + exec_vduph_lane_f16 (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c index 98f88a6..908294a 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c @@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0xf7, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcb00, 0xca80, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x42066666 }; VECT_VAR_DECL(expected,int,8,16) [] = { 0xfe, 0xff, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, @@ -39,6 +43,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff, 0x55, 0x55, 0x55, 0x55 }; VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff6, 0xfff7, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xc880, 0x4b4d, + 0x4b4d, 0x4b4d, + 0x4b4d, 0x4b4d, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1500000, 0x4204cccd, 0x4204cccd, 0x4204cccd }; @@ -60,6 +70,10 @@ void exec_vext (void) clean_results (); TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer); +#ifdef FP16_SUPPORTED + VLOAD(vector1, buffer, , float, f, 16, 4); + VLOAD(vector1, buffer, q, float, f, 16, 8); +#endif VLOAD(vector1, buffer, , float, f, 32, 2); VLOAD(vector1, buffer, q, float, f, 32, 4); @@ -74,6 +88,9 @@ void exec_vext (void) VDUP(vector2, , uint, u, 64, 1, 0x88); VDUP(vector2, , poly, p, 8, 8, 0x55); VDUP(vector2, , poly, p, 16, 4, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ +#endif VDUP(vector2, , float, f, 32, 2, 33.6f); VDUP(vector2, q, int, s, 8, 16, 0x11); @@ -86,6 +103,9 @@ void exec_vext (void) VDUP(vector2, q, uint, u, 64, 2, 0x88); VDUP(vector2, q, poly, p, 8, 16, 0x55); VDUP(vector2, q, poly, p, 16, 8, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, q, float, f, 16, 8, 14.6f); +#endif VDUP(vector2, q, float, f, 32, 4, 33.2f); /* Choose arbitrary extract offsets. */ @@ -99,6 +119,9 @@ void exec_vext (void) TEST_VEXT(, uint, u, 64, 1, 0); TEST_VEXT(, poly, p, 8, 8, 6); TEST_VEXT(, poly, p, 16, 4, 2); +#if defined (FP16_SUPPORTED) + TEST_VEXT(, float, f, 16, 4, 2); +#endif TEST_VEXT(, float, f, 32, 2, 1); TEST_VEXT(q, int, s, 8, 16, 14); @@ -111,9 +134,16 @@ void exec_vext (void) TEST_VEXT(q, uint, u, 64, 2, 1); TEST_VEXT(q, poly, p, 8, 16, 12); TEST_VEXT(q, poly, p, 16, 8, 6); +#if defined (FP16_SUPPORTED) + TEST_VEXT(q, float, f, 16, 8, 7); +#endif TEST_VEXT(q, float, f, 32, 4, 3); +#if defined (FP16_SUPPORTED) + CHECK_RESULTS (TEST_MSG, ""); +#else CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); +#endif } int main (void) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfma.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfma.c index 8180108..2cf68fe 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfma.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfma.c @@ -3,11 +3,19 @@ #include "compute-ref-data.h" #ifdef __ARM_FEATURE_FMA + /* Expected results. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0x61c6, 0x61c8, 0x61ca, 0x61cc }; +VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0x6435, 0x6436, 0x6437, 0x6438, + 0x6439, 0x643a, 0x643b, 0x643c }; +#endif VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x4438ca3d, 0x44390a3d }; -VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x44869eb8, 0x4486beb8, 0x4486deb8, 0x4486feb8 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x44869eb8, 0x4486beb8, + 0x4486deb8, 0x4486feb8 }; #ifdef __aarch64__ -VECT_VAR_DECL(expected,hfloat,64,2) [] = { 0x408906e1532b8520, 0x40890ee1532b8520 }; +VECT_VAR_DECL(expected,hfloat,64,2) [] = { 0x408906e1532b8520, + 0x40890ee1532b8520 }; #endif #define TEST_MSG "VFMA/VFMAQ" @@ -44,6 +52,18 @@ void exec_vfma (void) DECL_VARIABLE(VAR, float, 32, 4); #endif +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector1, float, 16, 4); + DECL_VARIABLE(vector2, float, 16, 4); + DECL_VARIABLE(vector3, float, 16, 4); + DECL_VARIABLE(vector_res, float, 16, 4); + + DECL_VARIABLE(vector1, float, 16, 8); + DECL_VARIABLE(vector2, float, 16, 8); + DECL_VARIABLE(vector3, float, 16, 8); + DECL_VARIABLE(vector_res, float, 16, 8); +#endif + DECL_VFMA_VAR(vector1); DECL_VFMA_VAR(vector2); DECL_VFMA_VAR(vector3); @@ -52,6 +72,10 @@ void exec_vfma (void) clean_results (); /* Initialize input "vector1" from "buffer". */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VLOAD(vector1, buffer, , float, f, 16, 4); + VLOAD(vector1, buffer, q, float, f, 16, 8); +#endif VLOAD(vector1, buffer, , float, f, 32, 2); VLOAD(vector1, buffer, q, float, f, 32, 4); #ifdef __aarch64__ @@ -59,13 +83,21 @@ void exec_vfma (void) #endif /* Choose init value arbitrarily. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector2, , float, f, 16, 4, 9.3f); + VDUP(vector2, q, float, f, 16, 8, 29.7f); +#endif VDUP(vector2, , float, f, 32, 2, 9.3f); VDUP(vector2, q, float, f, 32, 4, 29.7f); #ifdef __aarch64__ VDUP(vector2, q, float, f, 64, 2, 15.8f); #endif - + /* Choose init value arbitrarily. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector3, , float, f, 16, 4, 81.2f); + VDUP(vector3, q, float, f, 16, 8, 36.8f); +#endif VDUP(vector3, , float, f, 32, 2, 81.2f); VDUP(vector3, q, float, f, 32, 4, 36.8f); #ifdef __aarch64__ @@ -73,12 +105,20 @@ void exec_vfma (void) #endif /* Execute the tests. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VFMA(, float, f, 16, 4); + TEST_VFMA(q, float, f, 16, 8); +#endif TEST_VFMA(, float, f, 32, 2); TEST_VFMA(q, float, f, 32, 4); #ifdef __aarch64__ TEST_VFMA(q, float, f, 64, 2); #endif +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, ""); +#endif CHECK_VFMA_RESULTS (TEST_MSG, ""); } #endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmah_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmah_f16_1.c new file mode 100644 index 0000000..1ac6b67 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmah_f16_1.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected[] = +{ + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x3944 /* 0.658203 */, + 0xcefa /* -27.906250 */, + 0x5369 /* 59.281250 */, + 0x35ba /* 0.357910 */, + 0xc574 /* -5.453125 */, + 0xc5e6 /* -5.898438 */, + 0x3f66 /* 1.849609 */, + 0x5665 /* 102.312500 */, + 0xc02d /* -2.087891 */, + 0x4d79 /* 21.890625 */, + 0x547b /* 71.687500 */, + 0xcdf0 /* -23.750000 */, + 0xc625 /* -6.144531 */, + 0x4cf9 /* 19.890625 */, + 0x7e00 /* nan */, + 0x7e00 /* nan */ +}; + +#define TEST_MSG "VFMAH_F16" +#define INSN_NAME vfmah_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "ternary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmas_lane_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmas_lane_f16_1.c new file mode 100644 index 0000000..00c95d3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmas_lane_f16_1.c @@ -0,0 +1,908 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define FP16_C(a) ((__fp16) a) +#define A0 FP16_C (123.4) +#define A1 FP16_C (-5.8) +#define A2 FP16_C (-0.0) +#define A3 FP16_C (10) +#define A4 FP16_C (123412.43) +#define A5 FP16_C (-5.8) +#define A6 FP16_C (90.8) +#define A7 FP16_C (24) + +#define B0 FP16_C (23.4) +#define B1 FP16_C (-5.8) +#define B2 FP16_C (8.9) +#define B3 FP16_C (4.0) +#define B4 FP16_C (3.4) +#define B5 FP16_C (-550.8) +#define B6 FP16_C (-31.8) +#define B7 FP16_C (20000.0) + +/* Expected results for vfma_lane. */ +VECT_VAR_DECL (expected0_static, hfloat, 16, 4) [] + = { 0x613E /* A0 + B0 * B0. */, + 0xD86D /* A1 + B1 * B0. */, + 0x5A82 /* A2 + B2 * B0. */, + 0x567A /* A3 + B3 * B0. */}; + +VECT_VAR_DECL (expected1_static, hfloat, 16, 4) [] + = { 0xCA33 /* A0 + B0 * B1. */, + 0x4EF6 /* A1 + B1 * B1. */, + 0xD274 /* A2 + B2 * B1. */, + 0xCA9A /* A3 + B3 * B1. */ }; + +VECT_VAR_DECL (expected2_static, hfloat, 16, 4) [] + = { 0x5D2F /* A0 + B0 * B2. */, + 0xD32D /* A1 + B1 * B2. */, + 0x54F3 /* A2 + B2 * B2. */, + 0x51B3 /* A3 + B3 * B2. */ }; + +VECT_VAR_DECL (expected3_static, hfloat, 16, 4) [] + = { 0x5AC8 /* A0 + B0 * B3. */, + 0xCF40 /* A1 + B1 * B3. */, + 0x5073 /* A2 + B2 * B3. */, + 0x4E80 /* A3 + B3 * B3. */ }; + +/* Expected results for vfmaq_lane. */ +VECT_VAR_DECL (expected0_static, hfloat, 16, 8) [] + = { 0x613E /* A0 + B0 * B0. */, + 0xD86D /* A1 + B1 * B0. */, + 0x5A82 /* A2 + B2 * B0. */, + 0x567A /* A3 + B3 * B0. */, + 0x7C00 /* A4 + B4 * B0. */, + 0xF24D /* A5 + B5 * B0. */, + 0xE11B /* A6 + B6 * B0. */, + 0x7C00 /* A7 + B7 * B0. */ }; + +VECT_VAR_DECL (expected1_static, hfloat, 16, 8) [] + = { 0xCA33 /* A0 + B0 * B1. */, + 0x4EF6 /* A1 + B1 * B1. */, + 0xD274 /* A2 + B2 * B1. */, + 0xCA9A /* A3 + B3 * B1. */, + 0x7C00 /* A4 + B4 * B1. */, + 0x6A3B /* A5 + B5 * B1. */, + 0x5C4D /* A6 + B6 * B1. */, + 0xFC00 /* A7 + B7 * B1. */ }; + +VECT_VAR_DECL (expected2_static, hfloat, 16, 8) [] + = { 0x5D2F /* A0 + B0 * B2. */, + 0xD32D /* A1 + B1 * B2. */, + 0x54F3 /* A2 + B2 * B2. */, + 0x51B3 /* A3 + B3 * B2. */, + 0x7C00 /* A4 + B4 * B2. */, + 0xECCB /* A5 + B5 * B2. */, + 0xDA01 /* A6 + B6 * B2. */, + 0x7C00 /* A7 + B7 * B2. */ }; + +VECT_VAR_DECL (expected3_static, hfloat, 16, 8) [] + = { 0x5AC8 /* A0 + B0 * B3. */, + 0xCF40 /* A1 + B1 * B3. */, + 0x5073 /* A2 + B2 * B3. */, + 0x4E80 /* A3 + B3 * B3. */, + 0x7C00 /* A4 + B4 * B3. */, + 0xE851 /* A5 + B5 * B3. */, + 0xD08C /* A6 + B6 * B3. */, + 0x7C00 /* A7 + B7 * B3. */ }; + +/* Expected results for vfma_laneq. */ +VECT_VAR_DECL (expected0_laneq_static, hfloat, 16, 4) [] + = { 0x613E /* A0 + B0 * B0. */, + 0xD86D /* A1 + B1 * B0. */, + 0x5A82 /* A2 + B2 * B0. */, + 0x567A /* A3 + B3 * B0. */ }; + +VECT_VAR_DECL (expected1_laneq_static, hfloat, 16, 4) [] + = { 0xCA33 /* A0 + B0 * B1. */, + 0x4EF6 /* A1 + B1 * B1. */, + 0xD274 /* A2 + B2 * B1. */, + 0xCA9A /* A3 + B3 * B1. */ }; + +VECT_VAR_DECL (expected2_laneq_static, hfloat, 16, 4) [] + = { 0x5D2F /* A0 + B0 * B2. */, + 0xD32D /* A1 + B1 * B2. */, + 0x54F3 /* A2 + B2 * B2. */, + 0x51B3 /* A3 + B3 * B2. */ }; + +VECT_VAR_DECL (expected3_laneq_static, hfloat, 16, 4) [] + = { 0x5AC8 /* A0 + B0 * B3. */, + 0xCF40 /* A1 + B1 * B3. */, + 0x5073 /* A2 + B2 * B3. */, + 0x4E80 /* A3 + B3 * B3. */ }; + +VECT_VAR_DECL (expected4_laneq_static, hfloat, 16, 4) [] + = { 0x5A58 /* A0 + B0 * B4. */, + 0xCE62 /* A1 + B1 * B4. */, + 0x4F91 /* A2 + B2 * B4. */, + 0x4DE6 /* A3 + B3 * B4. */ }; + +VECT_VAR_DECL (expected5_laneq_static, hfloat, 16, 4) [] + = { 0xF23D /* A0 + B0 * B5. */, + 0x6A3B /* A1 + B1 * B5. */, + 0xECCA /* A2 + B2 * B5. */, + 0xE849 /* A3 + B3 * B5. */ }; + +VECT_VAR_DECL (expected6_laneq_static, hfloat, 16, 4) [] + = { 0xE0DA /* A0 + B0 * B6. */, + 0x5995 /* A1 + B1 * B6. */, + 0xDC6C /* A2 + B2 * B6. */, + 0xD753 /* A3 + B3 * B6. */ }; + +VECT_VAR_DECL (expected7_laneq_static, hfloat, 16, 4) [] + = { 0x7C00 /* A0 + B0 * B7. */, + 0xFC00 /* A1 + B1 * B7. */, + 0x7C00 /* A2 + B2 * B7. */, + 0x7C00 /* A3 + B3 * B7. */ }; + +/* Expected results for vfmaq_laneq. */ +VECT_VAR_DECL (expected0_laneq_static, hfloat, 16, 8) [] + = { 0x613E /* A0 + B0 * B0. */, + 0xD86D /* A1 + B1 * B0. */, + 0x5A82 /* A2 + B2 * B0. */, + 0x567A /* A3 + B3 * B0. */, + 0x7C00 /* A4 + B4 * B0. */, + 0xF24D /* A5 + B5 * B0. */, + 0xE11B /* A6 + B6 * B0. */, + 0x7C00 /* A7 + B7 * B0. */ }; + +VECT_VAR_DECL (expected1_laneq_static, hfloat, 16, 8) [] + = { 0xCA33 /* A0 + B0 * B1. */, + 0x4EF6 /* A1 + B1 * B1. */, + 0xD274 /* A2 + B2 * B1. */, + 0xCA9A /* A3 + B3 * B1. */, + 0x7C00 /* A4 + B4 * B1. */, + 0x6A3B /* A5 + B5 * B1. */, + 0x5C4D /* A6 + B6 * B1. */, + 0xFC00 /* A7 + B7 * B1. */ }; + +VECT_VAR_DECL (expected2_laneq_static, hfloat, 16, 8) [] + = { 0x5D2F /* A0 + B0 * B2. */, + 0xD32D /* A1 + B1 * B2. */, + 0x54F3 /* A2 + B2 * B2. */, + 0x51B3 /* A3 + B3 * B2. */, + 0x7C00 /* A4 + B4 * B2. */, + 0xECCB /* A5 + B5 * B2. */, + 0xDA01 /* A6 + B6 * B2. */, + 0x7C00 /* A7 + B7 * B2. */ }; + +VECT_VAR_DECL (expected3_laneq_static, hfloat, 16, 8) [] + = { 0x5AC8 /* A0 + B0 * B3. */, + 0xCF40 /* A1 + B1 * B3. */, + 0x5073 /* A2 + B2 * B3. */, + 0x4E80 /* A3 + B3 * B3. */, + 0x7C00 /* A4 + B4 * B3. */, + 0xE851 /* A5 + B5 * B3. */, + 0xD08C /* A6 + B6 * B3. */, + 0x7C00 /* A7 + B7 * B3. */ }; + +VECT_VAR_DECL (expected4_laneq_static, hfloat, 16, 8) [] + = { 0x5A58 /* A0 + B0 * B4. */, + 0xCE62 /* A1 + B1 * B4. */, + 0x4F91 /* A2 + B2 * B4. */, + 0x4DE6 /* A3 + B3 * B4. */, + 0x7C00 /* A4 + B4 * B4. */, + 0xE757 /* A5 + B5 * B4. */, + 0xCC54 /* A6 + B6 * B4. */, + 0x7C00 /* A7 + B7 * B4. */ }; + +VECT_VAR_DECL (expected5_laneq_static, hfloat, 16, 8) [] + = { 0xF23D /* A0 + B0 * B5. */, + 0x6A3B /* A1 + B1 * B5. */, + 0xECCA /* A2 + B2 * B5. */, + 0xE849 /* A3 + B3 * B5. */, + 0x7C00 /* A4 + B4 * B5. */, + 0x7C00 /* A5 + B5 * B5. */, + 0x744D /* A6 + B6 * B5. */, + 0xFC00 /* A7 + B7 * B5. */ }; + +VECT_VAR_DECL (expected6_laneq_static, hfloat, 16, 8) [] + = { 0xE0DA /* A0 + B0 * B6. */, + 0x5995 /* A1 + B1 * B6. */, + 0xDC6C /* A2 + B2 * B6. */, + 0xD753 /* A3 + B3 * B6. */, + 0x7C00 /* A4 + B4 * B6. */, + 0x7447 /* A5 + B5 * B6. */, + 0x644E /* A6 + B6 * B6. */, + 0xFC00 /* A7 + B7 * B6. */ }; + +VECT_VAR_DECL (expected7_laneq_static, hfloat, 16, 8) [] + = { 0x7C00 /* A0 + B0 * B7. */, + 0xFC00 /* A1 + B1 * B7. */, + 0x7C00 /* A2 + B2 * B7. */, + 0x7C00 /* A3 + B3 * B7. */, + 0x7C00 /* A4 + B4 * B7. */, + 0xFC00 /* A5 + B5 * B7. */, + 0xFC00 /* A6 + B6 * B7. */, + 0x7C00 /* A7 + B7 * B7. */ }; + +/* Expected results for vfms_lane. */ +VECT_VAR_DECL (expected0_fms_static, hfloat, 16, 4) [] + = { 0xDEA2 /* A0 + (-B0) * B0. */, + 0x5810 /* A1 + (-B1) * B0. */, + 0xDA82 /* A2 + (-B2) * B0. */, + 0xD53A /* A3 + (-B3) * B0. */ }; + +VECT_VAR_DECL (expected1_fms_static, hfloat, 16, 4) [] + = { 0x5C0D /* A0 + (-B0) * B1. */, + 0xD0EE /* A1 + (-B1) * B1. */, + 0x5274 /* A2 + (-B2) * B1. */, + 0x5026 /* A3 + (-B3) * B1. */ }; + +VECT_VAR_DECL (expected2_fms_static, hfloat, 16, 4) [] + = { 0xD54E /* A0 + (-B0) * B2. */, + 0x51BA /* A1 + (-B1) * B2. */, + 0xD4F3 /* A2 + (-B2) * B2. */, + 0xCE66 /* A3 + (-B3) * B2. */ }; + +VECT_VAR_DECL (expected3_fms_static, hfloat, 16, 4) [] + = { 0x4F70 /* A0 + (-B0) * B3. */, + 0x4C5A /* A1 + (-B1) * B3. */, + 0xD073 /* A2 + (-B2) * B3. */, + 0xC600 /* A3 + (-B3) * B3. */ }; + +/* Expected results for vfmsq_lane. */ +VECT_VAR_DECL (expected0_fms_static, hfloat, 16, 8) [] + = { 0xDEA2 /* A0 + (-B0) * B0. */, + 0x5810 /* A1 + (-B1) * B0. */, + 0xDA82 /* A2 + (-B2) * B0. */, + 0xD53A /* A3 + (-B3) * B0. */, + 0x7C00 /* A4 + (-B4) * B0. */, + 0x724B /* A5 + (-B5) * B0. */, + 0x6286 /* A6 + (-B6) * B0. */, + 0xFC00 /* A7 + (-B7) * B0. */ }; + +VECT_VAR_DECL (expected1_fms_static, hfloat, 16, 8) [] + = { 0x5C0D /* A0 + (-B0) * B1. */, + 0xD0EE /* A1 + (-B1) * B1. */, + 0x5274 /* A2 + (-B2) * B1. */, + 0x5026 /* A3 + (-B3) * B1. */, + 0x7C00 /* A4 + (-B4) * B1. */, + 0xEA41 /* A5 + (-B5) * B1. */, + 0xD5DA /* A6 + (-B6) * B1. */, + 0x7C00 /* A7 + (-B7) * B1. */ }; + +VECT_VAR_DECL (expected2_fms_static, hfloat, 16, 8) [] + = { 0xD54E /* A0 + (-B0) * B2. */, + 0x51BA /* A1 + (-B1) * B2. */, + 0xD4F3 /* A2 + (-B2) * B2. */, + 0xCE66 /* A3 + (-B3) * B2. */, + 0x7C00 /* A4 + (-B4) * B2. */, + 0x6CC8 /* A5 + (-B5) * B2. */, + 0x5DD7 /* A6 + (-B6) * B2. */, + 0xFC00 /* A7 + (-B7) * B2. */ }; + +VECT_VAR_DECL (expected3_fms_static, hfloat, 16, 8) [] + = { 0x4F70 /* A0 + (-B0) * B3. */, + 0x4C5A /* A1 + (-B1) * B3. */, + 0xD073 /* A2 + (-B2) * B3. */, + 0xC600 /* A3 + (-B3) * B3. */, + 0x7C00 /* A4 + (-B4) * B3. */, + 0x684B /* A5 + (-B5) * B3. */, + 0x5AD0 /* A6 + (-B6) * B3. */, + 0xFC00 /* A7 + (-B7) * B3. */ }; + +/* Expected results for vfms_laneq. */ +VECT_VAR_DECL (expected0_fms_laneq_static, hfloat, 16, 4) [] + = { 0xDEA2 /* A0 + (-B0) * B0. */, + 0x5810 /* A1 + (-B1) * B0. */, + 0xDA82 /* A2 + (-B2) * B0. */, + 0xD53A /* A3 + (-B3) * B0. */ }; + +VECT_VAR_DECL (expected1_fms_laneq_static, hfloat, 16, 4) [] + = { 0x5C0D /* A0 + (-B0) * B1. */, + 0xD0EE /* A1 + (-B1) * B1. */, + 0x5274 /* A2 + (-B2) * B1. */, + 0x5026 /* A3 + (-B3) * B1. */ }; + +VECT_VAR_DECL (expected2_fms_laneq_static, hfloat, 16, 4) [] + = { 0xD54E /* A0 + (-B0) * B2. */, + 0x51BA /* A1 + (-B1) * B2. */, + 0xD4F3 /* A2 + (-B2) * B2. */, + 0xCE66 /* A3 + (-B3) * B2. */ }; + +VECT_VAR_DECL (expected3_fms_laneq_static, hfloat, 16, 4) [] + = { 0x4F70 /* A0 + (-B0) * B3. */, + 0x4C5A /* A1 + (-B1) * B3. */, + 0xD073 /* A2 + (-B2) * B3. */, + 0xC600 /* A3 + (-B3) * B3. */ }; + +VECT_VAR_DECL (expected4_fms_laneq_static, hfloat, 16, 4) [] + = { 0x5179 /* A0 + (-B0) * B4. */, + 0x4AF6 /* A1 + (-B1) * B4. */, + 0xCF91 /* A2 + (-B2) * B4. */, + 0xC334 /* A3 + (-B3) * B4. */ }; + +VECT_VAR_DECL (expected5_fms_laneq_static, hfloat, 16, 4) [] + = { 0x725C /* A0 + (-B0) * B5. */, + 0xEA41 /* A1 + (-B1) * B5. */, + 0x6CCA /* A2 + (-B2) * B5. */, + 0x6853 /* A3 + (-B3) * B5. */ }; + +VECT_VAR_DECL (expected6_fms_laneq_static, hfloat, 16, 4) [] + = { 0x62C7 /* A0 + (-B0) * B6. */, + 0xD9F2 /* A1 + (-B1) * B6. */, + 0x5C6C /* A2 + (-B2) * B6. */, + 0x584A /* A3 + (-B3) * B6. */ }; + +VECT_VAR_DECL (expected7_fms_laneq_static, hfloat, 16, 4) [] + = { 0xFC00 /* A0 + (-B0) * B7. */, + 0x7C00 /* A1 + (-B1) * B7. */, + 0xFC00 /* A2 + (-B2) * B7. */, + 0xFC00 /* A3 + (-B3) * B7. */ }; + +/* Expected results for vfmsq_laneq. */ +VECT_VAR_DECL (expected0_fms_laneq_static, hfloat, 16, 8) [] + = { 0xDEA2 /* A0 + (-B0) * B0. */, + 0x5810 /* A1 + (-B1) * B0. */, + 0xDA82 /* A2 + (-B2) * B0. */, + 0xD53A /* A3 + (-B3) * B0. */, + 0x7C00 /* A4 + (-B4) * B0. */, + 0x724B /* A5 + (-B5) * B0. */, + 0x6286 /* A6 + (-B6) * B0. */, + 0xFC00 /* A7 + (-B7) * B0. */ }; + +VECT_VAR_DECL (expected1_fms_laneq_static, hfloat, 16, 8) [] + = { 0x5C0D /* A0 + (-B0) * B1. */, + 0xD0EE /* A1 + (-B1) * B1. */, + 0x5274 /* A2 + (-B2) * B1. */, + 0x5026 /* A3 + (-B3) * B1. */, + 0x7C00 /* A4 + (-B4) * B1. */, + 0xEA41 /* A5 + (-B5) * B1. */, + 0xD5DA /* A6 + (-B6) * B1. */, + 0x7C00 /* A7 + (-B7) * B1. */ }; + +VECT_VAR_DECL (expected2_fms_laneq_static, hfloat, 16, 8) [] + = { 0xD54E /* A0 + (-B0) * B2. */, + 0x51BA /* A1 + (-B1) * B2. */, + 0xD4F3 /* A2 + (-B2) * B2. */, + 0xCE66 /* A3 + (-B3) * B2. */, + 0x7C00 /* A4 + (-B4) * B2. */, + 0x6CC8 /* A5 + (-B5) * B2. */, + 0x5DD7 /* A6 + (-B6) * B2. */, + 0xFC00 /* A7 + (-B7) * B2. */ }; + +VECT_VAR_DECL (expected3_fms_laneq_static, hfloat, 16, 8) [] + = { 0x4F70 /* A0 + (-B0) * B3. */, + 0x4C5A /* A1 + (-B1) * B3. */, + 0xD073 /* A2 + (-B2) * B3. */, + 0xC600 /* A3 + (-B3) * B3. */, + 0x7C00 /* A4 + (-B4) * B3. */, + 0x684B /* A5 + (-B5) * B3. */, + 0x5AD0 /* A6 + (-B6) * B3. */, + 0xFC00 /* A7 + (-B7) * B3. */ }; + +VECT_VAR_DECL (expected4_fms_laneq_static, hfloat, 16, 8) [] + = { 0x5179 /* A0 + (-B0) * B4. */, + 0x4AF6 /* A1 + (-B1) * B4. */, + 0xCF91 /* A2 + (-B2) * B4. */, + 0xC334 /* A3 + (-B3) * B4. */, + 0x7C00 /* A4 + (-B4) * B4. */, + 0x674C /* A5 + (-B5) * B4. */, + 0x5A37 /* A6 + (-B6) * B4. */, + 0xFC00 /* A7 + (-B7) * B4. */ }; + +VECT_VAR_DECL (expected5_fms_laneq_static, hfloat, 16, 8) [] + = { 0x725C /* A0 + (-B0) * B5. */, + 0xEA41 /* A1 + (-B1) * B5. */, + 0x6CCA /* A2 + (-B2) * B5. */, + 0x6853 /* A3 + (-B3) * B5. */, + 0x7C00 /* A4 + (-B4) * B5. */, + 0xFC00 /* A5 + (-B5) * B5. */, + 0xF441 /* A6 + (-B6) * B5. */, + 0x7C00 /* A7 + (-B7) * B5. */ }; + +VECT_VAR_DECL (expected6_fms_laneq_static, hfloat, 16, 8) [] + = { 0x62C7 /* A0 + (-B0) * B6. */, + 0xD9F2 /* A1 + (-B1) * B6. */, + 0x5C6C /* A2 + (-B2) * B6. */, + 0x584A /* A3 + (-B3) * B6. */, + 0x7C00 /* A4 + (-B4) * B6. */, + 0xF447 /* A5 + (-B5) * B6. */, + 0xE330 /* A6 + (-B6) * B6. */, + 0x7C00 /* A7 + (-B7) * B6. */ }; + +VECT_VAR_DECL (expected7_fms_laneq_static, hfloat, 16, 8) [] + = { 0xFC00 /* A0 + (-B0) * B7. */, + 0x7C00 /* A1 + (-B1) * B7. */, + 0xFC00 /* A2 + (-B2) * B7. */, + 0xFC00 /* A3 + (-B3) * B7. */, + 0x7C00 /* A4 + (-B4) * B7. */, + 0x7C00 /* A5 + (-B5) * B7. */, + 0x7C00 /* A6 + (-B6) * B7. */, + 0xFC00 /* A7 + (-B7) * B7. */ }; + +void exec_vfmas_lane_f16 (void) +{ +#undef TEST_MSG +#define TEST_MSG "VFMA_LANE (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc_1, float, 16, 4); + DECL_VARIABLE(vsrc_2, float, 16, 4); + VECT_VAR_DECL (buf_src_1, float, 16, 4) [] = {A0, A1, A2, A3}; + VECT_VAR_DECL (buf_src_2, float, 16, 4) [] = {B0, B1, B2, B3}; + VLOAD (vsrc_1, buf_src_1, , float, f, 16, 4); + VLOAD (vsrc_2, buf_src_2, , float, f, 16, 4); + DECL_VARIABLE (vector_res, float, 16, 4) + = vfma_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), 0); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected0_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfma_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), 1); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected1_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfma_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), 2); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected2_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfma_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), 3); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected3_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VFMAQ_LANE (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc_1, float, 16, 8); + DECL_VARIABLE(vsrc_2, float, 16, 8); + VECT_VAR_DECL (buf_src_1, float, 16, 8) [] = {A0, A1, A2, A3, A4, A5, A6, A7}; + VECT_VAR_DECL (buf_src_2, float, 16, 8) [] = {B0, B1, B2, B3, B4, B5, B6, B7}; + VLOAD (vsrc_1, buf_src_1, q, float, f, 16, 8); + VLOAD (vsrc_2, buf_src_2, q, float, f, 16, 8); + DECL_VARIABLE (vector_res, float, 16, 8) + = vfmaq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 4), 0); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected0_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmaq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 4), 1); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected1_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmaq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 4), 2); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected2_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmaq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 4), 3); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected3_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VFMA_LANEQ (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc_3, float, 16, 8); + VECT_VAR_DECL (buf_src_3, float, 16, 8) [] = {B0, B1, B2, B3, B4, B5, B6, B7}; + VLOAD (vsrc_3, buf_src_3, q, float, f, 16, 8); + VECT_VAR (vector_res, float, 16, 4) + = vfma_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_3, float, 16, 8), 0); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected0_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfma_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_3, float, 16, 8), 1); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected1_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfma_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_3, float, 16, 8), 2); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected2_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfma_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_3, float, 16, 8), 3); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected3_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfma_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_3, float, 16, 8), 4); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected4_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfma_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_3, float, 16, 8), 5); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected5_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfma_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_3, float, 16, 8), 6); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected6_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfma_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_3, float, 16, 8), 7); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected7_laneq_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VFMAQ_LANEQ (FP16)" + clean_results (); + + VECT_VAR (vector_res, float, 16, 8) + = vfmaq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_3, float, 16, 8), 0); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected0_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmaq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_3, float, 16, 8), 1); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected1_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmaq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_3, float, 16, 8), 2); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected2_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmaq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_3, float, 16, 8), 3); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected3_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmaq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_3, float, 16, 8), 4); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected4_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmaq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_3, float, 16, 8), 5); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected5_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmaq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_3, float, 16, 8), 6); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected6_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmaq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_3, float, 16, 8), 7); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected7_laneq_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VFMS_LANE (FP16)" + clean_results (); + + VECT_VAR (vector_res, float, 16, 4) + = vfms_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), 0); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected0_fms_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfms_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), 1); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected1_fms_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfms_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), 2); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected2_fms_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfms_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), 3); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected3_fms_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VFMSQ_LANE (FP16)" + clean_results (); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 4), 0); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected0_fms_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 4), 1); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected1_fms_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 4), 2); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected2_fms_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 4), 3); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected3_fms_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VFMS_LANEQ (FP16)" + clean_results (); + + VECT_VAR (vector_res, float, 16, 4) + = vfms_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_3, float, 16, 8), 0); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected0_fms_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfms_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_3, float, 16, 8), 1); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected1_fms_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfms_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_3, float, 16, 8), 2); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected2_fms_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfms_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_3, float, 16, 8), 3); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected3_fms_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfms_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_3, float, 16, 8), 4); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected4_fms_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfms_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_3, float, 16, 8), 5); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected5_fms_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfms_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_3, float, 16, 8), 6); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected6_fms_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfms_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), + VECT_VAR (vsrc_3, float, 16, 8), 7); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected7_fms_laneq_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VFMSQ_LANEQ (FP16)" + clean_results (); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_3, float, 16, 8), 0); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected0_fms_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_3, float, 16, 8), 1); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected1_fms_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_3, float, 16, 8), 2); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected2_fms_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_3, float, 16, 8), 3); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected3_fms_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_3, float, 16, 8), 4); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected4_fms_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_3, float, 16, 8), 5); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected5_fms_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_3, float, 16, 8), 6); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected6_fms_laneq_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), + VECT_VAR (vsrc_3, float, 16, 8), 7); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected7_fms_laneq_static, ""); +} + +int +main (void) +{ + exec_vfmas_lane_f16 (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmas_n_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmas_n_f16_1.c new file mode 100644 index 0000000..f01aefb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmas_n_f16_1.c @@ -0,0 +1,469 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define FP16_C(a) ((__fp16) a) +#define A0 FP16_C (123.4) +#define A1 FP16_C (-5.8) +#define A2 FP16_C (-0.0) +#define A3 FP16_C (10) +#define A4 FP16_C (123412.43) +#define A5 FP16_C (-5.8) +#define A6 FP16_C (90.8) +#define A7 FP16_C (24) + +#define B0 FP16_C (23.4) +#define B1 FP16_C (-5.8) +#define B2 FP16_C (8.9) +#define B3 FP16_C (4.0) +#define B4 FP16_C (3.4) +#define B5 FP16_C (-550.8) +#define B6 FP16_C (-31.8) +#define B7 FP16_C (20000.0) + +/* Expected results for vfma_n. */ +VECT_VAR_DECL (expected_fma0_static, hfloat, 16, 4) [] + = { 0x613E /* A0 + B0 * B0. */, + 0xD86D /* A1 + B1 * B0. */, + 0x5A82 /* A2 + B2 * B0. */, + 0x567A /* A3 + B3 * B0. */ }; + +VECT_VAR_DECL (expected_fma1_static, hfloat, 16, 4) [] + = { 0xCA33 /* A0 + B0 * B1. */, + 0x4EF6 /* A1 + B1 * B1. */, + 0xD274 /* A2 + B2 * B1. */, + 0xCA9A /* A3 + B3 * B1. */ }; + +VECT_VAR_DECL (expected_fma2_static, hfloat, 16, 4) [] + = { 0x5D2F /* A0 + B0 * B2. */, + 0xD32D /* A1 + B1 * B2. */, + 0x54F3 /* A2 + B2 * B2. */, + 0x51B3 /* A3 + B3 * B2. */ }; + +VECT_VAR_DECL (expected_fma3_static, hfloat, 16, 4) [] + = { 0x5AC8 /* A0 + B0 * B3. */, + 0xCF40 /* A1 + B1 * B3. */, + 0x5073 /* A2 + B2 * B3. */, + 0x4E80 /* A3 + B3 * B3. */ }; + +VECT_VAR_DECL (expected_fma0_static, hfloat, 16, 8) [] + = { 0x613E /* A0 + B0 * B0. */, + 0xD86D /* A1 + B1 * B0. */, + 0x5A82 /* A2 + B2 * B0. */, + 0x567A /* A3 + B3 * B0. */, + 0x7C00 /* A4 + B4 * B0. */, + 0xF24D /* A5 + B5 * B0. */, + 0xE11B /* A6 + B6 * B0. */, + 0x7C00 /* A7 + B7 * B0. */ }; + +VECT_VAR_DECL (expected_fma1_static, hfloat, 16, 8) [] + = { 0xCA33 /* A0 + B0 * B1. */, + 0x4EF6 /* A1 + B1 * B1. */, + 0xD274 /* A2 + B2 * B1. */, + 0xCA9A /* A3 + B3 * B1. */, + 0x7C00 /* A4 + B4 * B1. */, + 0x6A3B /* A5 + B5 * B1. */, + 0x5C4D /* A6 + B6 * B1. */, + 0xFC00 /* A7 + B7 * B1. */ }; + +VECT_VAR_DECL (expected_fma2_static, hfloat, 16, 8) [] + = { 0x5D2F /* A0 + B0 * B2. */, + 0xD32D /* A1 + B1 * B2. */, + 0x54F3 /* A2 + B2 * B2. */, + 0x51B3 /* A3 + B3 * B2. */, + 0x7C00 /* A4 + B4 * B2. */, + 0xECCB /* A5 + B5 * B2. */, + 0xDA01 /* A6 + B6 * B2. */, + 0x7C00 /* A7 + B7 * B2. */ }; + +VECT_VAR_DECL (expected_fma3_static, hfloat, 16, 8) [] + = { 0x5AC8 /* A0 + B0 * B3. */, + 0xCF40 /* A1 + B1 * B3. */, + 0x5073 /* A2 + B2 * B3. */, + 0x4E80 /* A3 + B3 * B3. */, + 0x7C00 /* A4 + B4 * B3. */, + 0xE851 /* A5 + B5 * B3. */, + 0xD08C /* A6 + B6 * B3. */, + 0x7C00 /* A7 + B7 * B3. */ }; + +VECT_VAR_DECL (expected_fma4_static, hfloat, 16, 8) [] + = { 0x5A58 /* A0 + B0 * B4. */, + 0xCE62 /* A1 + B1 * B4. */, + 0x4F91 /* A2 + B2 * B4. */, + 0x4DE6 /* A3 + B3 * B4. */, + 0x7C00 /* A4 + B4 * B4. */, + 0xE757 /* A5 + B5 * B4. */, + 0xCC54 /* A6 + B6 * B4. */, + 0x7C00 /* A7 + B7 * B4. */ }; + +VECT_VAR_DECL (expected_fma5_static, hfloat, 16, 8) [] + = { 0xF23D /* A0 + B0 * B5. */, + 0x6A3B /* A1 + B1 * B5. */, + 0xECCA /* A2 + B2 * B5. */, + 0xE849 /* A3 + B3 * B5. */, + 0x7C00 /* A4 + B4 * B5. */, + 0x7C00 /* A5 + B5 * B5. */, + 0x744D /* A6 + B6 * B5. */, + 0xFC00 /* A7 + B7 * B5. */ }; + +VECT_VAR_DECL (expected_fma6_static, hfloat, 16, 8) [] + = { 0xE0DA /* A0 + B0 * B6. */, + 0x5995 /* A1 + B1 * B6. */, + 0xDC6C /* A2 + B2 * B6. */, + 0xD753 /* A3 + B3 * B6. */, + 0x7C00 /* A4 + B4 * B6. */, + 0x7447 /* A5 + B5 * B6. */, + 0x644E /* A6 + B6 * B6. */, + 0xFC00 /* A7 + B7 * B6. */ }; + +VECT_VAR_DECL (expected_fma7_static, hfloat, 16, 8) [] + = { 0x7C00 /* A0 + B0 * B7. */, + 0xFC00 /* A1 + B1 * B7. */, + 0x7C00 /* A2 + B2 * B7. */, + 0x7C00 /* A3 + B3 * B7. */, + 0x7C00 /* A4 + B4 * B7. */, + 0xFC00 /* A5 + B5 * B7. */, + 0xFC00 /* A6 + B6 * B7. */, + 0x7C00 /* A7 + B7 * B7. */ }; + +/* Expected results for vfms_n. */ +VECT_VAR_DECL (expected_fms0_static, hfloat, 16, 4) [] + = { 0xDEA2 /* A0 + (-B0) * B0. */, + 0x5810 /* A1 + (-B1) * B0. */, + 0xDA82 /* A2 + (-B2) * B0. */, + 0xD53A /* A3 + (-B3) * B0. */ }; + +VECT_VAR_DECL (expected_fms1_static, hfloat, 16, 4) [] + = { 0x5C0D /* A0 + (-B0) * B1. */, + 0xD0EE /* A1 + (-B1) * B1. */, + 0x5274 /* A2 + (-B2) * B1. */, + 0x5026 /* A3 + (-B3) * B1. */ }; + +VECT_VAR_DECL (expected_fms2_static, hfloat, 16, 4) [] + = { 0xD54E /* A0 + (-B0) * B2. */, + 0x51BA /* A1 + (-B1) * B2. */, + 0xD4F3 /* A2 + (-B2) * B2. */, + 0xCE66 /* A3 + (-B3) * B2. */ }; + +VECT_VAR_DECL (expected_fms3_static, hfloat, 16, 4) [] + = { 0x4F70 /* A0 + (-B0) * B3. */, + 0x4C5A /* A1 + (-B1) * B3. */, + 0xD073 /* A2 + (-B2) * B3. */, + 0xC600 /* A3 + (-B3) * B3. */ }; + +VECT_VAR_DECL (expected_fms0_static, hfloat, 16, 8) [] + = { 0xDEA2 /* A0 + (-B0) * B0. */, + 0x5810 /* A1 + (-B1) * B0. */, + 0xDA82 /* A2 + (-B2) * B0. */, + 0xD53A /* A3 + (-B3) * B0. */, + 0x7C00 /* A4 + (-B4) * B0. */, + 0x724B /* A5 + (-B5) * B0. */, + 0x6286 /* A6 + (-B6) * B0. */, + 0xFC00 /* A7 + (-B7) * B0. */ }; + +VECT_VAR_DECL (expected_fms1_static, hfloat, 16, 8) [] + = { 0x5C0D /* A0 + (-B0) * B1. */, + 0xD0EE /* A1 + (-B1) * B1. */, + 0x5274 /* A2 + (-B2) * B1. */, + 0x5026 /* A3 + (-B3) * B1. */, + 0x7C00 /* A4 + (-B4) * B1. */, + 0xEA41 /* A5 + (-B5) * B1. */, + 0xD5DA /* A6 + (-B6) * B1. */, + 0x7C00 /* A7 + (-B7) * B1. */ }; + +VECT_VAR_DECL (expected_fms2_static, hfloat, 16, 8) [] + = { 0xD54E /* A0 + (-B0) * B2. */, + 0x51BA /* A1 + (-B1) * B2. */, + 0xD4F3 /* A2 + (-B2) * B2. */, + 0xCE66 /* A3 + (-B3) * B2. */, + 0x7C00 /* A4 + (-B4) * B2. */, + 0x6CC8 /* A5 + (-B5) * B2. */, + 0x5DD7 /* A6 + (-B6) * B2. */, + 0xFC00 /* A7 + (-B7) * B2. */ }; + +VECT_VAR_DECL (expected_fms3_static, hfloat, 16, 8) [] + = { 0x4F70 /* A0 + (-B0) * B3. */, + 0x4C5A /* A1 + (-B1) * B3. */, + 0xD073 /* A2 + (-B2) * B3. */, + 0xC600 /* A3 + (-B3) * B3. */, + 0x7C00 /* A4 + (-B4) * B3. */, + 0x684B /* A5 + (-B5) * B3. */, + 0x5AD0 /* A6 + (-B6) * B3. */, + 0xFC00 /* A7 + (-B7) * B3. */ }; + +VECT_VAR_DECL (expected_fms4_static, hfloat, 16, 8) [] + = { 0x5179 /* A0 + (-B0) * B4. */, + 0x4AF6 /* A1 + (-B1) * B4. */, + 0xCF91 /* A2 + (-B2) * B4. */, + 0xC334 /* A3 + (-B3) * B4. */, + 0x7C00 /* A4 + (-B4) * B4. */, + 0x674C /* A5 + (-B5) * B4. */, + 0x5A37 /* A6 + (-B6) * B4. */, + 0xFC00 /* A7 + (-B7) * B4. */ }; + +VECT_VAR_DECL (expected_fms5_static, hfloat, 16, 8) [] + = { 0x725C /* A0 + (-B0) * B5. */, + 0xEA41 /* A1 + (-B1) * B5. */, + 0x6CCA /* A2 + (-B2) * B5. */, + 0x6853 /* A3 + (-B3) * B5. */, + 0x7C00 /* A4 + (-B4) * B5. */, + 0xFC00 /* A5 + (-B5) * B5. */, + 0xF441 /* A6 + (-B6) * B5. */, + 0x7C00 /* A7 + (-B7) * B5. */ }; + +VECT_VAR_DECL (expected_fms6_static, hfloat, 16, 8) [] + = { 0x62C7 /* A0 + (-B0) * B6. */, + 0xD9F2 /* A1 + (-B1) * B6. */, + 0x5C6C /* A2 + (-B2) * B6. */, + 0x584A /* A3 + (-B3) * B6. */, + 0x7C00 /* A4 + (-B4) * B6. */, + 0xF447 /* A5 + (-B5) * B6. */, + 0xE330 /* A6 + (-B6) * B6. */, + 0x7C00 /* A7 + (-B7) * B6. */ }; + +VECT_VAR_DECL (expected_fms7_static, hfloat, 16, 8) [] + = { 0xFC00 /* A0 + (-B0) * B7. */, + 0x7C00 /* A1 + (-B1) * B7. */, + 0xFC00 /* A2 + (-B2) * B7. */, + 0xFC00 /* A3 + (-B3) * B7. */, + 0x7C00 /* A4 + (-B4) * B7. */, + 0x7C00 /* A5 + (-B5) * B7. */, + 0x7C00 /* A6 + (-B6) * B7. */, + 0xFC00 /* A7 + (-B7) * B7. */ }; + +void exec_vfmas_n_f16 (void) +{ +#undef TEST_MSG +#define TEST_MSG "VFMA_N (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc_1, float, 16, 4); + DECL_VARIABLE(vsrc_2, float, 16, 4); + VECT_VAR_DECL (buf_src_1, float, 16, 4) [] = {A0, A1, A2, A3}; + VECT_VAR_DECL (buf_src_2, float, 16, 4) [] = {B0, B1, B2, B3}; + VLOAD (vsrc_1, buf_src_1, , float, f, 16, 4); + VLOAD (vsrc_2, buf_src_2, , float, f, 16, 4); + DECL_VARIABLE (vector_res, float, 16, 4) + = vfma_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), B0); + + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_fma0_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfma_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), B1); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_fma1_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfma_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), B2); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_fma2_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfma_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), B3); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_fma3_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VFMAQ_N (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc_1, float, 16, 8); + DECL_VARIABLE(vsrc_2, float, 16, 8); + VECT_VAR_DECL (buf_src_1, float, 16, 8) [] = {A0, A1, A2, A3, A4, A5, A6, A7}; + VECT_VAR_DECL (buf_src_2, float, 16, 8) [] = {B0, B1, B2, B3, B4, B5, B6, B7}; + VLOAD (vsrc_1, buf_src_1, q, float, f, 16, 8); + VLOAD (vsrc_2, buf_src_2, q, float, f, 16, 8); + DECL_VARIABLE (vector_res, float, 16, 8) + = vfmaq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), B0); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fma0_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmaq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), B1); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fma1_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmaq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), B2); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fma2_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmaq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), B3); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fma3_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmaq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), B4); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fma4_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmaq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), B5); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fma5_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmaq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), B6); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fma6_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmaq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), B7); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fma7_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VFMA_N (FP16)" + clean_results (); + + VECT_VAR (vector_res, float, 16, 4) + = vfms_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), B0); + + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_fms0_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfms_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), B1); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_fms1_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfms_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), B2); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_fms2_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vfms_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), B3); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_fms3_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VFMAQ_N (FP16)" + clean_results (); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), B0); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fms0_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), B1); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fms1_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), B2); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fms2_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), B3); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fms3_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), B4); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fms4_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), B5); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fms5_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), B6); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fms6_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vfmsq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), B7); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fms7_static, ""); +} + +int +main (void) +{ + exec_vfmas_n_f16 (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmash_lane_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmash_lane_f16_1.c new file mode 100644 index 0000000..ea751da --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmash_lane_f16_1.c @@ -0,0 +1,143 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define FP16_C(a) ((__fp16) a) +#define A0 FP16_C (123.4) +#define B0 FP16_C (-5.8) +#define C0 FP16_C (-3.8) +#define D0 FP16_C (10) + +#define A1 FP16_C (12.4) +#define B1 FP16_C (-5.8) +#define C1 FP16_C (90.8) +#define D1 FP16_C (24) + +#define A2 FP16_C (23.4) +#define B2 FP16_C (-5.8) +#define C2 FP16_C (8.9) +#define D2 FP16_C (4) + +#define E0 FP16_C (3.4) +#define F0 FP16_C (-55.8) +#define G0 FP16_C (-31.8) +#define H0 FP16_C (2) + +#define E1 FP16_C (123.4) +#define F1 FP16_C (-5.8) +#define G1 FP16_C (-3.8) +#define H1 FP16_C (102) + +#define E2 FP16_C (4.9) +#define F2 FP16_C (-15.8) +#define G2 FP16_C (39.8) +#define H2 FP16_C (49) + +extern void abort (); + +float16_t src1[8] = { A0, B0, C0, D0, E0, F0, G0, H0 }; +float16_t src2[8] = { A1, B1, C1, D1, E1, F1, G1, H1 }; +VECT_VAR_DECL (src3, float, 16, 4) [] = { A2, B2, C2, D2 }; +VECT_VAR_DECL (src3, float, 16, 8) [] = { A2, B2, C2, D2, E2, F2, G2, H2 }; + +/* Expected results for vfmah_lane_f16. */ +uint16_t expected[4] = { 0x5E76 /* A0 + A1 * A2. */, + 0x4EF6 /* B0 + B1 * B2. */, + 0x6249 /* C0 + C1 * C2. */, + 0x56A0 /* D0 + D1 * D2. */ }; + +/* Expected results for vfmah_laneq_f16. */ +uint16_t expected_laneq[8] = { 0x5E76 /* A0 + A1 * A2. */, + 0x4EF6 /* B0 + B1 * B2. */, + 0x6249 /* C0 + C1 * C2. */, + 0x56A0 /* D0 + D1 * D2. */, + 0x60BF /* E0 + E1 * E2. */, + 0x507A /* F0 + F1 * F2. */, + 0xD9B9 /* G0 + G1 * G2. */, + 0x6CE2 /* H0 + H1 * H2. */ }; + +/* Expected results for vfmsh_lane_f16. */ +uint16_t expected_fms[4] = { 0xD937 /* A0 + -A1 * A2. */, + 0xD0EE /* B0 + -B1 * B2. */, + 0xE258 /* C0 + -C1 * C2. */, + 0xD560 /* D0 + -D1 * D2. */ }; + +/* Expected results for vfmsh_laneq_f16. */ +uint16_t expected_fms_laneq[8] = { 0xD937 /* A0 + -A1 * A2. */, + 0xD0EE /* B0 + -B1 * B2. */, + 0xE258 /* C0 + -C1 * C2. */, + 0xD560 /* D0 + -D1 * D2. */, + 0xE0B2 /* E0 + -E1 * E2. */, + 0xD89C /* F0 + -F1 * F2. */, + 0x5778 /* G0 + -G1 * G2. */, + 0xECE1 /* H0 + -H1 * H2. */ }; + +void exec_vfmash_lane_f16 (void) +{ +#define CHECK_LANE(N) \ + ret = vfmah_lane_f16 (src1[N], src2[N], VECT_VAR (vsrc3, float, 16, 4), N);\ + if (*(uint16_t *) &ret != expected[N])\ + abort (); + + DECL_VARIABLE(vsrc3, float, 16, 4); + VLOAD (vsrc3, src3, , float, f, 16, 4); + float16_t ret; + CHECK_LANE(0) + CHECK_LANE(1) + CHECK_LANE(2) + CHECK_LANE(3) + +#undef CHECK_LANE +#define CHECK_LANE(N) \ + ret = vfmah_laneq_f16 (src1[N], src2[N], VECT_VAR (vsrc3, float, 16, 8), N);\ + if (*(uint16_t *) &ret != expected_laneq[N]) \ + abort (); + + DECL_VARIABLE(vsrc3, float, 16, 8); + VLOAD (vsrc3, src3, q, float, f, 16, 8); + CHECK_LANE(0) + CHECK_LANE(1) + CHECK_LANE(2) + CHECK_LANE(3) + CHECK_LANE(4) + CHECK_LANE(5) + CHECK_LANE(6) + CHECK_LANE(7) + +#undef CHECK_LANE +#define CHECK_LANE(N) \ + ret = vfmsh_lane_f16 (src1[N], src2[N], VECT_VAR (vsrc3, float, 16, 4), N);\ + if (*(uint16_t *) &ret != expected_fms[N])\ + abort (); + + CHECK_LANE(0) + CHECK_LANE(1) + CHECK_LANE(2) + +#undef CHECK_LANE +#define CHECK_LANE(N) \ + ret = vfmsh_laneq_f16 (src1[N], src2[N], VECT_VAR (vsrc3, float, 16, 8), N);\ + if (*(uint16_t *) &ret != expected_fms_laneq[N]) \ + abort (); + + CHECK_LANE(0) + CHECK_LANE(1) + CHECK_LANE(2) + CHECK_LANE(3) + CHECK_LANE(4) + CHECK_LANE(5) + CHECK_LANE(6) + CHECK_LANE(7) +} + +int +main (void) +{ + exec_vfmash_lane_f16 (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfms.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfms.c index 02bef09..555654d 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfms.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfms.c @@ -4,10 +4,17 @@ #ifdef __ARM_FEATURE_FMA /* Expected results. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xe206, 0xe204, 0xe202, 0xe200 }; +VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xe455, 0xe454, 0xe453, 0xe452, + 0xe451, 0xe450, 0xe44f, 0xe44e }; +#endif VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc440ca3d, 0xc4408a3d }; -VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc48a9eb8, 0xc48a7eb8, 0xc48a5eb8, 0xc48a3eb8 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc48a9eb8, 0xc48a7eb8, + 0xc48a5eb8, 0xc48a3eb8 }; #ifdef __aarch64__ -VECT_VAR_DECL(expected,hfloat,64,2) [] = { 0xc08a06e1532b8520, 0xc089fee1532b8520 }; +VECT_VAR_DECL(expected,hfloat,64,2) [] = { 0xc08a06e1532b8520, + 0xc089fee1532b8520 }; #endif #define TEST_MSG "VFMS/VFMSQ" @@ -44,6 +51,18 @@ void exec_vfms (void) DECL_VARIABLE(VAR, float, 32, 4); #endif +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector1, float, 16, 4); + DECL_VARIABLE(vector2, float, 16, 4); + DECL_VARIABLE(vector3, float, 16, 4); + DECL_VARIABLE(vector_res, float, 16, 4); + + DECL_VARIABLE(vector1, float, 16, 8); + DECL_VARIABLE(vector2, float, 16, 8); + DECL_VARIABLE(vector3, float, 16, 8); + DECL_VARIABLE(vector_res, float, 16, 8); +#endif + DECL_VFMS_VAR(vector1); DECL_VFMS_VAR(vector2); DECL_VFMS_VAR(vector3); @@ -52,6 +71,10 @@ void exec_vfms (void) clean_results (); /* Initialize input "vector1" from "buffer". */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VLOAD(vector1, buffer, , float, f, 16, 4); + VLOAD(vector1, buffer, q, float, f, 16, 8); +#endif VLOAD(vector1, buffer, , float, f, 32, 2); VLOAD(vector1, buffer, q, float, f, 32, 4); #ifdef __aarch64__ @@ -59,13 +82,21 @@ void exec_vfms (void) #endif /* Choose init value arbitrarily. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector2, , float, f, 16, 4, 9.3f); + VDUP(vector2, q, float, f, 16, 8, 29.7f); +#endif VDUP(vector2, , float, f, 32, 2, 9.3f); VDUP(vector2, q, float, f, 32, 4, 29.7f); #ifdef __aarch64__ VDUP(vector2, q, float, f, 64, 2, 15.8f); #endif - + /* Choose init value arbitrarily. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector3, , float, f, 16, 4, 81.2f); + VDUP(vector3, q, float, f, 16, 8, 36.8f); +#endif VDUP(vector3, , float, f, 32, 2, 81.2f); VDUP(vector3, q, float, f, 32, 4, 36.8f); #ifdef __aarch64__ @@ -73,12 +104,20 @@ void exec_vfms (void) #endif /* Execute the tests. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VFMS(, float, f, 16, 4); + TEST_VFMS(q, float, f, 16, 8); +#endif TEST_VFMS(, float, f, 32, 2); TEST_VFMS(q, float, f, 32, 4); #ifdef __aarch64__ TEST_VFMS(q, float, f, 64, 2); #endif +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, ""); +#endif CHECK_VFMS_RESULTS (TEST_MSG, ""); } #endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmsh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmsh_f16_1.c new file mode 100644 index 0000000..77021be --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmsh_f16_1.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected[] = +{ + 0x0000 /* 0.000000 */, + 0x8000 /* -0.000000 */, + 0x42af /* 3.341797 */, + 0x5043 /* 34.093750 */, + 0xccd2 /* -19.281250 */, + 0x3712 /* 0.441895 */, + 0x3acc /* 0.849609 */, + 0x4848 /* 8.562500 */, + 0xcc43 /* -17.046875 */, + 0xd65c /* -101.750000 */, + 0x4185 /* 2.759766 */, + 0xcd39 /* -20.890625 */, + 0xd45b /* -69.687500 */, + 0x5241 /* 50.031250 */, + 0xc675 /* -6.457031 */, + 0x4d07 /* 20.109375 */, + 0x7c00 /* inf */, + 0xfc00 /* -inf */ +}; + +#define TEST_MSG "VFMSH_F16" +#define INSN_NAME vfmsh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "ternary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c index 830603d..80f8bec 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c @@ -7,6 +7,10 @@ #define HAS_FLOAT_VARIANT +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +#define HAS_FLOAT16_VARIANT +#endif + /* Expected results. */ VECT_VAR_DECL(expected,int,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; @@ -16,6 +20,9 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff3 }; VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xcbc0, 0xcb80, 0xcb00, 0xca80 }; +#endif VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1780000, 0xc1700000 }; VECT_VAR_DECL(expected,int,8,16) [] = { 0xf4, 0xf4, 0xf4, 0xf4, 0xf4, 0xf5, 0xf6, 0xf7, @@ -33,10 +40,36 @@ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xcb40, 0xcb40, 0xcb00, 0xca80, + 0xca00, 0xc980, 0xc900, 0xc880 }; +#endif VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1680000, 0xc1680000, 0xc1600000, 0xc1500000 }; /* Expected results with special FP values. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_nan, hfloat, 16, 8) [] = { 0x7e00, 0x7e00, + 0x7e00, 0x7e00, + 0x7e00, 0x7e00, + 0x7e00, 0x7e00 }; +VECT_VAR_DECL(expected_mnan, hfloat, 16, 8) [] = { 0x7e00, 0x7e00, + 0x7e00, 0x7e00, + 0x7e00, 0x7e00, + 0x7e00, 0x7e00 }; +VECT_VAR_DECL(expected_inf, hfloat, 16, 8) [] = { 0x7c00, 0x7c00, + 0x7c00, 0x7c00, + 0x7c00, 0x7c00, + 0x7c00, 0x7c00 }; +VECT_VAR_DECL(expected_minf, hfloat, 16, 8) [] = { 0x3c00, 0x3c00, + 0x3c00, 0x3c00, + 0x3c00, 0x3c00, + 0x3c00, 0x3c00 }; +VECT_VAR_DECL(expected_zero1, hfloat, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected_zero2, hfloat, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +#endif VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 }; VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxh_f16_1.c new file mode 100644 index 0000000..182463e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxh_f16_1.c @@ -0,0 +1,34 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +#define A 123.4 +#define B -567.8 +#define C -34.8 +#define D 1024 +#define E 663.1 +#define F 169.1 +#define G -4.8 +#define H 77 + +float16_t input_1[] = { A, B, C, D }; +float16_t input_2[] = { E, F, G, H }; +float16_t expected[] = { E, F, G, D }; + +#define TEST_MSG "VMAXH_F16" +#define INSN_NAME vmaxh_f16 + +#define INPUT_1 input_1 +#define INPUT_2 input_2 +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxnm_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxnm_1.c new file mode 100644 index 0000000..e546bd5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxnm_1.c @@ -0,0 +1,47 @@ +/* This file tests an intrinsic which currently has only an f16 variant and that + is only available when FP16 arithmetic instructions are supported. */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vmaxnm +#define TEST_MSG "VMAXNM/VMAXNMQ" + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +#define HAS_FLOAT16_VARIANT +#endif + +/* Expected results. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xcbc0, 0xcb80, 0xcb00, 0xca80 }; +VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xcb40, 0xcb40, 0xcb00, 0xca80, + 0xca00, 0xc980, 0xc900, 0xc880 }; +#endif + +/* Expected results with special FP values. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_nan, hfloat, 16, 8) [] = { 0x3c00, 0x3c00, + 0x3c00, 0x3c00, + 0x3c00, 0x3c00, + 0x3c00, 0x3c00 }; +VECT_VAR_DECL(expected_mnan, hfloat, 16, 8) [] = { 0x3c00, 0x3c00, + 0x3c00, 0x3c00, + 0x3c00, 0x3c00, + 0x3c00, 0x3c00 }; +VECT_VAR_DECL(expected_inf, hfloat, 16, 8) [] = { 0x7c00, 0x7c00, + 0x7c00, 0x7c00, + 0x7c00, 0x7c00, + 0x7c00, 0x7c00 }; +VECT_VAR_DECL(expected_minf, hfloat, 16, 8) [] = { 0x3c00, 0x3c00, + 0x3c00, 0x3c00, + 0x3c00, 0x3c00, + 0x3c00, 0x3c00 }; +VECT_VAR_DECL(expected_zero1, hfloat, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected_zero2, hfloat, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +#endif + +#include "binary_op_float.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxnmh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxnmh_f16_1.c new file mode 100644 index 0000000..4db4b84 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxnmh_f16_1.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +#define INFF __builtin_inf () + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected[] = +{ + 0x3c00 /* 1.000000 */, + 0x3c00 /* 1.000000 */, + 0x4000 /* 2.000000 */, + 0x5640 /* 100.000000 */, + 0x4f80 /* 30.000000 */, + 0x3666 /* 0.399902 */, + 0x3800 /* 0.500000 */, + 0x3d52 /* 1.330078 */, + 0xc64d /* -6.300781 */, + 0x4d00 /* 20.000000 */, + 0x355d /* 0.335205 */, + 0x409a /* 2.300781 */, + 0x3c00 /* 1.000000 */, + 0x4a91 /* 13.132812 */, + 0x34f6 /* 0.310059 */, + 0x4d00 /* 20.000000 */, + 0x7c00 /* inf */, + 0x7c00 /* inf */ +}; + +#define TEST_MSG "VMAXNMH_F16" +#define INSN_NAME vmaxnmh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxnmv_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxnmv_f16_1.c new file mode 100644 index 0000000..ce9872f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxnmv_f16_1.c @@ -0,0 +1,131 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define FP16_C(a) ((__fp16) a) +#define A0 FP16_C (34.8) +#define B0 FP16_C (__builtin_nanf ("")) +#define C0 FP16_C (-__builtin_nanf ("")) +#define D0 FP16_C (0.0) + +#define A1 FP16_C (1025.8) +#define B1 FP16_C (13.4) +#define C1 FP16_C (__builtin_nanf ("")) +#define D1 FP16_C (10) +#define E1 FP16_C (-0.0) +#define F1 FP16_C (-__builtin_nanf ("")) +#define G1 FP16_C (0.0) +#define H1 FP16_C (10) + +/* Expected results for vmaxnmv. */ +uint16_t expect = 0x505A /* A0. */; +uint16_t expect_alt = 0x6402 /* A1. */; + +void exec_vmaxnmv_f16 (void) +{ +#undef TEST_MSG +#define TEST_MSG "VMAXNMV (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc, float, 16, 4); + VECT_VAR_DECL (buf_src, float, 16, 4) [] = {A0, B0, C0, D0}; + VLOAD (vsrc, buf_src, , float, f, 16, 4); + float16_t vector_res = vmaxnmv_f16 (VECT_VAR (vsrc, float, 16, 4)); + + if (* (uint16_t *) &vector_res != expect) + abort (); + + VECT_VAR_DECL (buf_src1, float, 16, 4) [] = {B0, A0, C0, D0}; + VLOAD (vsrc, buf_src1, , float, f, 16, 4); + vector_res = vmaxnmv_f16 (VECT_VAR (vsrc, float, 16, 4)); + + if (* (uint16_t *) &vector_res != expect) + abort (); + + VECT_VAR_DECL (buf_src2, float, 16, 4) [] = {B0, C0, A0, D0}; + VLOAD (vsrc, buf_src2, , float, f, 16, 4); + vector_res = vmaxnmv_f16 (VECT_VAR (vsrc, float, 16, 4)); + + if (* (uint16_t *) &vector_res != expect) + abort (); + + VECT_VAR_DECL (buf_src3, float, 16, 4) [] = {B0, C0, D0, A0}; + VLOAD (vsrc, buf_src3, , float, f, 16, 4); + vector_res = vmaxnmv_f16 (VECT_VAR (vsrc, float, 16, 4)); + + if (* (uint16_t *) &vector_res != expect) + abort (); + +#undef TEST_MSG +#define TEST_MSG "VMAXNMVQ (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc, float, 16, 8); + VECT_VAR_DECL (buf_src, float, 16, 8) [] = {A1, B1, C1, D1, E1, F1, G1, H1}; + VLOAD (vsrc, buf_src, q, float, f, 16, 8); + vector_res = vmaxnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src1, float, 16, 8) [] = {B1, A1, C1, D1, E1, F1, G1, H1}; + VLOAD (vsrc, buf_src1, q, float, f, 16, 8); + vector_res = vmaxnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src2, float, 16, 8) [] = {B1, C1, A1, D1, E1, F1, G1, H1}; + VLOAD (vsrc, buf_src2, q, float, f, 16, 8); + vector_res = vmaxnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src3, float, 16, 8) [] = {B1, C1, D1, A1, E1, F1, G1, H1}; + VLOAD (vsrc, buf_src3, q, float, f, 16, 8); + vector_res = vmaxnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src4, float, 16, 8) [] = {B1, C1, D1, E1, A1, F1, G1, H1}; + VLOAD (vsrc, buf_src4, q, float, f, 16, 8); + vector_res = vmaxnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src5, float, 16, 8) [] = {B1, C1, D1, E1, F1, A1, G1, H1}; + VLOAD (vsrc, buf_src5, q, float, f, 16, 8); + vector_res = vmaxnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src6, float, 16, 8) [] = {B1, C1, D1, E1, F1, G1, A1, H1}; + VLOAD (vsrc, buf_src6, q, float, f, 16, 8); + vector_res = vmaxnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src7, float, 16, 8) [] = {B1, C1, D1, E1, F1, G1, H1, A1}; + VLOAD (vsrc, buf_src7, q, float, f, 16, 8); + vector_res = vmaxnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); +} + +int +main (void) +{ + exec_vmaxnmv_f16 (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxv_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxv_f16_1.c new file mode 100644 index 0000000..39c4897 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxv_f16_1.c @@ -0,0 +1,131 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define FP16_C(a) ((__fp16) a) +#define A0 FP16_C (123.4) +#define B0 FP16_C (-567.8) +#define C0 FP16_C (34.8) +#define D0 FP16_C (0.0) + +#define A1 FP16_C (1025.8) +#define B1 FP16_C (13.4) +#define C1 FP16_C (-567.8) +#define D1 FP16_C (10) +#define E1 FP16_C (-0.0) +#define F1 FP16_C (567.8) +#define G1 FP16_C (0.0) +#define H1 FP16_C (10) + +/* Expected results for vmaxv. */ +uint16_t expect = 0x57B6 /* A0. */; +uint16_t expect_alt = 0x6402 /* A1. */; + +void exec_vmaxv_f16 (void) +{ +#undef TEST_MSG +#define TEST_MSG "VMAXV (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc, float, 16, 4); + VECT_VAR_DECL (buf_src, float, 16, 4) [] = {A0, B0, C0, D0}; + VLOAD (vsrc, buf_src, , float, f, 16, 4); + float16_t vector_res = vmaxv_f16 (VECT_VAR (vsrc, float, 16, 4)); + + if (* (uint16_t *) &vector_res != expect) + abort (); + + VECT_VAR_DECL (buf_src1, float, 16, 4) [] = {B0, A0, C0, D0}; + VLOAD (vsrc, buf_src1, , float, f, 16, 4); + vector_res = vmaxv_f16 (VECT_VAR (vsrc, float, 16, 4)); + + if (* (uint16_t *) &vector_res != expect) + abort (); + + VECT_VAR_DECL (buf_src2, float, 16, 4) [] = {B0, C0, A0, D0}; + VLOAD (vsrc, buf_src2, , float, f, 16, 4); + vector_res = vmaxv_f16 (VECT_VAR (vsrc, float, 16, 4)); + + if (* (uint16_t *) &vector_res != expect) + abort (); + + VECT_VAR_DECL (buf_src3, float, 16, 4) [] = {B0, C0, D0, A0}; + VLOAD (vsrc, buf_src3, , float, f, 16, 4); + vector_res = vmaxv_f16 (VECT_VAR (vsrc, float, 16, 4)); + + if (* (uint16_t *) &vector_res != expect) + abort (); + +#undef TEST_MSG +#define TEST_MSG "VMAXVQ (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc, float, 16, 8); + VECT_VAR_DECL (buf_src, float, 16, 8) [] = {A1, B1, C1, D1, E1, F1, G1, H1}; + VLOAD (vsrc, buf_src, q, float, f, 16, 8); + vector_res = vmaxvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src1, float, 16, 8) [] = {B1, A1, C1, D1, E1, F1, G1, H1}; + VLOAD (vsrc, buf_src1, q, float, f, 16, 8); + vector_res = vmaxvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src2, float, 16, 8) [] = {B1, C1, A1, D1, E1, F1, G1, H1}; + VLOAD (vsrc, buf_src2, q, float, f, 16, 8); + vector_res = vmaxvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src3, float, 16, 8) [] = {B1, C1, D1, A1, E1, F1, G1, H1}; + VLOAD (vsrc, buf_src3, q, float, f, 16, 8); + vector_res = vmaxvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src4, float, 16, 8) [] = {B1, C1, D1, E1, A1, F1, G1, H1}; + VLOAD (vsrc, buf_src4, q, float, f, 16, 8); + vector_res = vmaxvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src5, float, 16, 8) [] = {B1, C1, D1, E1, F1, A1, G1, H1}; + VLOAD (vsrc, buf_src5, q, float, f, 16, 8); + vector_res = vmaxvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src6, float, 16, 8) [] = {B1, C1, D1, E1, F1, G1, A1, H1}; + VLOAD (vsrc, buf_src6, q, float, f, 16, 8); + vector_res = vmaxvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src7, float, 16, 8) [] = {B1, C1, D1, E1, F1, G1, H1, A1}; + VLOAD (vsrc, buf_src7, q, float, f, 16, 8); + vector_res = vmaxvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); +} + +int +main (void) +{ + exec_vmaxv_f16 (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c index 8ad2703..4ee3c1e 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c @@ -7,6 +7,10 @@ #define HAS_FLOAT_VARIANT +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +#define HAS_FLOAT16_VARIANT +#endif + /* Expected results. */ VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf3, 0xf3, 0xf3, 0xf3 }; @@ -16,6 +20,9 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf3, 0xf3, 0xf3, 0xf3 }; VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff1, 0xfff1 }; VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xcc00, 0xcbc0, 0xcbc0, 0xcbc0 }; +#endif VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1780000 }; VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf4, 0xf4, 0xf4, @@ -31,11 +38,41 @@ VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf9, 0xf9, 0xf9, 0xf9 }; VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, 0xcb40, 0xcb40, + 0xcb40, 0xcb40, 0xcb40, 0xcb40 }; +#endif VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff1, 0xfffffff1 }; VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1680000, 0xc1680000 }; /* Expected results with special FP values. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_nan, hfloat, 16, 8) [] = { 0x7e00, 0x7e00, + 0x7e00, 0x7e00, + 0x7e00, 0x7e00, + 0x7e00, 0x7e00 }; +VECT_VAR_DECL(expected_mnan, hfloat, 16, 8) [] = { 0x7e00, 0x7e00, + 0x7e00, 0x7e00, + 0x7e00, 0x7e00, + 0x7e00, 0x7e00 }; +VECT_VAR_DECL(expected_inf, hfloat, 16, 8) [] = { 0x3c00, 0x3c00, + 0x3c00, 0x3c00, + 0x3c00, 0x3c00, + 0x3c00, 0x3c00 }; +VECT_VAR_DECL(expected_minf, hfloat, 16, 8) [] = { 0xfc00, 0xfc00, + 0xfc00, 0xfc00, + 0xfc00, 0xfc00, + 0xfc00, 0xfc00 }; +VECT_VAR_DECL(expected_zero1, hfloat, 16, 8) [] = { 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000 }; +VECT_VAR_DECL(expected_zero2, hfloat, 16, 8) [] = { 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000 }; +#endif VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 }; VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminh_f16_1.c new file mode 100644 index 0000000..d8efbca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminh_f16_1.c @@ -0,0 +1,34 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +#define A 123.4 +#define B -567.8 +#define C -34.8 +#define D 1024 +#define E 663.1 +#define F 169.1 +#define G -4.8 +#define H 77 + +float16_t input_1[] = { A, B, C, D }; +float16_t input_2[] = { E, F, G, H }; +float16_t expected[] = { A, B, C, H }; + +#define TEST_MSG "VMINH_F16" +#define INSN_NAME vminh_f16 + +#define INPUT_1 input_1 +#define INPUT_2 input_2 +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminnm_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminnm_1.c new file mode 100644 index 0000000..975fc56 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminnm_1.c @@ -0,0 +1,51 @@ +/* This file tests an intrinsic which currently has only an f16 variant and that + is only available when FP16 arithmetic instructions are supported. */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vminnm +#define TEST_MSG "VMINNM/VMINMQ" + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +#define HAS_FLOAT16_VARIANT +#endif + +/* Expected results. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xcc00, 0xcbc0, 0xcbc0, 0xcbc0 }; +VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, 0xcb40, 0xcb40, + 0xcb40, 0xcb40, 0xcb40, 0xcb40 }; +#endif + +/* Expected results with special FP values. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_nan, hfloat, 16, 8) [] = { 0x3c00, 0x3c00, + 0x3c00, 0x3c00, + 0x3c00, 0x3c00, + 0x3c00, 0x3c00 }; +VECT_VAR_DECL(expected_mnan, hfloat, 16, 8) [] = { 0x3c00, 0x3c00, + 0x3c00, 0x3c00, + 0x3c00, 0x3c00, + 0x3c00, 0x3c00 }; +VECT_VAR_DECL(expected_inf, hfloat, 16, 8) [] = { 0x3c00, 0x3c00, + 0x3c00, 0x3c00, + 0x3c00, 0x3c00, + 0x3c00, 0x3c00 }; +VECT_VAR_DECL(expected_minf, hfloat, 16, 8) [] = { 0xfc00, 0xfc00, + 0xfc00, 0xfc00, + 0xfc00, 0xfc00, + 0xfc00, 0xfc00 }; +VECT_VAR_DECL(expected_zero1, hfloat, 16, 8) [] = { 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000 }; +VECT_VAR_DECL(expected_zero2, hfloat, 16, 8) [] = { 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000 }; +#endif + +#include "binary_op_float.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminnmh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminnmh_f16_1.c new file mode 100644 index 0000000..f6b0216 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminnmh_f16_1.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +#define INFF __builtin_inf () + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected[] = +{ + 0x0000 /* 0.000000 */, + 0x8000 /* -0.000000 */, + 0xc454 /* -4.328125 */, + 0x4233 /* 3.099609 */, + 0x4d00 /* 20.000000 */, + 0xa51f /* -0.020004 */, + 0xc09a /* -2.300781 */, + 0xc73b /* -7.230469 */, + 0xc79a /* -7.601562 */, + 0x34f6 /* 0.310059 */, + 0xc73b /* -7.230469 */, + 0x3800 /* 0.500000 */, + 0xc79a /* -7.601562 */, + 0x451a /* 5.101562 */, + 0xc64d /* -6.300781 */, + 0x3556 /* 0.333496 */, + 0xfc00 /* -inf */, + 0xfc00 /* -inf */ +}; + +#define TEST_MSG "VMINNMH_F16" +#define INSN_NAME vminnmh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminnmv_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminnmv_f16_1.c new file mode 100644 index 0000000..b7c5101 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminnmv_f16_1.c @@ -0,0 +1,131 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define FP16_C(a) ((__fp16) a) +#define A0 FP16_C (-567.8) +#define B0 FP16_C (__builtin_nanf ("")) +#define C0 FP16_C (34.8) +#define D0 FP16_C (-__builtin_nanf ("")) + +#define A1 FP16_C (-567.8) +#define B1 FP16_C (1025.8) +#define C1 FP16_C (-__builtin_nanf ("")) +#define D1 FP16_C (10) +#define E1 FP16_C (-0.0) +#define F1 FP16_C (__builtin_nanf ("")) +#define G1 FP16_C (0.0) +#define H1 FP16_C (10) + +/* Expected results for vminnmv. */ +uint16_t expect = 0xE070 /* A0. */; +uint16_t expect_alt = 0xE070 /* A1. */; + +void exec_vminnmv_f16 (void) +{ +#undef TEST_MSG +#define TEST_MSG "VMINNMV (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc, float, 16, 4); + VECT_VAR_DECL (buf_src, float, 16, 4) [] = {A0, B0, C0, D0}; + VLOAD (vsrc, buf_src, , float, f, 16, 4); + float16_t vector_res = vminnmv_f16 (VECT_VAR (vsrc, float, 16, 4)); + + if (* (uint16_t *) &vector_res != expect) + abort (); + + VECT_VAR_DECL (buf_src1, float, 16, 4) [] = {B0, A0, C0, D0}; + VLOAD (vsrc, buf_src1, , float, f, 16, 4); + vector_res = vminnmv_f16 (VECT_VAR (vsrc, float, 16, 4)); + + if (* (uint16_t *) &vector_res != expect) + abort (); + + VECT_VAR_DECL (buf_src2, float, 16, 4) [] = {B0, C0, A0, D0}; + VLOAD (vsrc, buf_src2, , float, f, 16, 4); + vector_res = vminnmv_f16 (VECT_VAR (vsrc, float, 16, 4)); + + if (* (uint16_t *) &vector_res != expect) + abort (); + + VECT_VAR_DECL (buf_src3, float, 16, 4) [] = {B0, C0, D0, A0}; + VLOAD (vsrc, buf_src3, , float, f, 16, 4); + vector_res = vminnmv_f16 (VECT_VAR (vsrc, float, 16, 4)); + + if (* (uint16_t *) &vector_res != expect) + abort (); + +#undef TEST_MSG +#define TEST_MSG "VMINNMVQ (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc, float, 16, 8); + VECT_VAR_DECL (buf_src, float, 16, 8) [] = {A1, B1, C1, D1, E1, F1, G1, H1}; + VLOAD (vsrc, buf_src, q, float, f, 16, 8); + vector_res = vminnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src1, float, 16, 8) [] = {B1, A1, C1, D1, E1, F1, G1, H1}; + VLOAD (vsrc, buf_src1, q, float, f, 16, 8); + vector_res = vminnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src2, float, 16, 8) [] = {B1, C1, A1, D1, E1, F1, G1, H1}; + VLOAD (vsrc, buf_src2, q, float, f, 16, 8); + vector_res = vminnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src3, float, 16, 8) [] = {B1, C1, D1, A1, E1, F1, G1, H1}; + VLOAD (vsrc, buf_src3, q, float, f, 16, 8); + vector_res = vminnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src4, float, 16, 8) [] = {B1, C1, D1, E1, A1, F1, G1, H1}; + VLOAD (vsrc, buf_src4, q, float, f, 16, 8); + vector_res = vminnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src5, float, 16, 8) [] = {B1, C1, D1, E1, F1, A1, G1, H1}; + VLOAD (vsrc, buf_src5, q, float, f, 16, 8); + vector_res = vminnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src6, float, 16, 8) [] = {B1, C1, D1, E1, F1, G1, A1, H1}; + VLOAD (vsrc, buf_src6, q, float, f, 16, 8); + vector_res = vminnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src7, float, 16, 8) [] = {B1, C1, D1, E1, F1, G1, H1, A1}; + VLOAD (vsrc, buf_src7, q, float, f, 16, 8); + vector_res = vminnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); +} + +int +main (void) +{ + exec_vminnmv_f16 (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminv_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminv_f16_1.c new file mode 100644 index 0000000..c454a53 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminv_f16_1.c @@ -0,0 +1,131 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define FP16_C(a) ((__fp16) a) +#define A0 FP16_C (-567.8) +#define B0 FP16_C (123.4) +#define C0 FP16_C (34.8) +#define D0 FP16_C (0.0) + +#define A1 FP16_C (-567.8) +#define B1 FP16_C (1025.8) +#define C1 FP16_C (13.4) +#define D1 FP16_C (10) +#define E1 FP16_C (-0.0) +#define F1 FP16_C (567.8) +#define G1 FP16_C (0.0) +#define H1 FP16_C (10) + +/* Expected results for vminv. */ +uint16_t expect = 0xE070 /* A0. */; +uint16_t expect_alt = 0xE070 /* A1. */; + +void exec_vminv_f16 (void) +{ +#undef TEST_MSG +#define TEST_MSG "VMINV (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc, float, 16, 4); + VECT_VAR_DECL (buf_src, float, 16, 4) [] = {A0, B0, C0, D0}; + VLOAD (vsrc, buf_src, , float, f, 16, 4); + float16_t vector_res = vminv_f16 (VECT_VAR (vsrc, float, 16, 4)); + + if (* (uint16_t *) &vector_res != expect) + abort (); + + VECT_VAR_DECL (buf_src1, float, 16, 4) [] = {B0, A0, C0, D0}; + VLOAD (vsrc, buf_src1, , float, f, 16, 4); + vector_res = vminv_f16 (VECT_VAR (vsrc, float, 16, 4)); + + if (* (uint16_t *) &vector_res != expect) + abort (); + + VECT_VAR_DECL (buf_src2, float, 16, 4) [] = {B0, C0, A0, D0}; + VLOAD (vsrc, buf_src2, , float, f, 16, 4); + vector_res = vminv_f16 (VECT_VAR (vsrc, float, 16, 4)); + + if (* (uint16_t *) &vector_res != expect) + abort (); + + VECT_VAR_DECL (buf_src3, float, 16, 4) [] = {B0, C0, D0, A0}; + VLOAD (vsrc, buf_src3, , float, f, 16, 4); + vector_res = vminv_f16 (VECT_VAR (vsrc, float, 16, 4)); + + if (* (uint16_t *) &vector_res != expect) + abort (); + +#undef TEST_MSG +#define TEST_MSG "VMINVQ (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc, float, 16, 8); + VECT_VAR_DECL (buf_src, float, 16, 8) [] = {A1, B1, C1, D1, E1, F1, G1, H1}; + VLOAD (vsrc, buf_src, q, float, f, 16, 8); + vector_res = vminvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src1, float, 16, 8) [] = {B1, A1, C1, D1, E1, F1, G1, H1}; + VLOAD (vsrc, buf_src1, q, float, f, 16, 8); + vector_res = vminvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src2, float, 16, 8) [] = {B1, C1, A1, D1, E1, F1, G1, H1}; + VLOAD (vsrc, buf_src2, q, float, f, 16, 8); + vector_res = vminvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src3, float, 16, 8) [] = {B1, C1, D1, A1, E1, F1, G1, H1}; + VLOAD (vsrc, buf_src3, q, float, f, 16, 8); + vector_res = vminvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src4, float, 16, 8) [] = {B1, C1, D1, E1, A1, F1, G1, H1}; + VLOAD (vsrc, buf_src4, q, float, f, 16, 8); + vector_res = vminvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src5, float, 16, 8) [] = {B1, C1, D1, E1, F1, A1, G1, H1}; + VLOAD (vsrc, buf_src5, q, float, f, 16, 8); + vector_res = vminvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src6, float, 16, 8) [] = {B1, C1, D1, E1, F1, G1, A1, H1}; + VLOAD (vsrc, buf_src6, q, float, f, 16, 8); + vector_res = vminvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); + + VECT_VAR_DECL (buf_src7, float, 16, 8) [] = {B1, C1, D1, E1, F1, G1, H1, A1}; + VLOAD (vsrc, buf_src7, q, float, f, 16, 8); + vector_res = vminvq_f16 (VECT_VAR (vsrc, float, 16, 8)); + + if (* (uint16_t *) &vector_res != expect_alt) + abort (); +} + +int +main (void) +{ + exec_vminv_f16 (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul.c index 63f0d8d..c5fe31a 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul.c @@ -13,6 +13,10 @@ VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfab0, 0xfb05, 0xfb5a, 0xfbaf }; VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff9a0, 0xfffffa06 }; VECT_VAR_DECL(expected,poly,8,8) [] = { 0xc0, 0x84, 0x48, 0xc, 0xd0, 0x94, 0x58, 0x1c }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xe02a, 0xdfcf, + 0xdf4a, 0xdec4 }; +#endif VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc4053333, 0xc3f9c000 }; VECT_VAR_DECL(expected,int,8,16) [] = { 0x90, 0x7, 0x7e, 0xf5, 0x6c, 0xe3, 0x5a, 0xd1, @@ -34,6 +38,10 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0x60, 0xca, 0x34, 0x9e, 0xc8, 0x62, 0x9c, 0x36, 0x30, 0x9a, 0x64, 0xce, 0x98, 0x32, 0xcc, 0x66 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xe63a, 0xe5d6, 0xe573, 0xe50f, + 0xe4ac, 0xe448, 0xe3c8, 0xe301 }; +#endif VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc4c73333, 0xc4bac000, 0xc4ae4ccd, 0xc4a1d999 }; @@ -78,6 +86,17 @@ void FNNAME (INSN_NAME) (void) DECL_VMUL(poly, 8, 16); DECL_VMUL(float, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector1, float, 16, 4); + DECL_VARIABLE(vector1, float, 16, 8); + + DECL_VARIABLE(vector2, float, 16, 4); + DECL_VARIABLE(vector2, float, 16, 8); + + DECL_VARIABLE(vector_res, float, 16, 4); + DECL_VARIABLE(vector_res, float, 16, 8); +#endif + clean_results (); /* Initialize input "vector1" from "buffer". */ @@ -97,6 +116,10 @@ void FNNAME (INSN_NAME) (void) VLOAD(vector1, buffer, q, uint, u, 32, 4); VLOAD(vector1, buffer, q, poly, p, 8, 16); VLOAD(vector1, buffer, q, float, f, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VLOAD(vector1, buffer, , float, f, 16, 4); + VLOAD(vector1, buffer, q, float, f, 16, 8); +#endif /* Choose init value arbitrarily. */ VDUP(vector2, , int, s, 8, 8, 0x11); @@ -115,6 +138,10 @@ void FNNAME (INSN_NAME) (void) VDUP(vector2, q, uint, u, 32, 4, 0xCC); VDUP(vector2, q, poly, p, 8, 16, 0xAA); VDUP(vector2, q, float, f, 32, 4, 99.6f); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector2, , float, f, 16, 4, 33.3f); + VDUP(vector2, q, float, f, 16, 8, 99.6f); +#endif /* Execute the tests. */ TEST_VMUL(INSN_NAME, , int, s, 8, 8); @@ -133,6 +160,10 @@ void FNNAME (INSN_NAME) (void) TEST_VMUL(INSN_NAME, q, uint, u, 32, 4); TEST_VMUL(INSN_NAME, q, poly, p, 8, 16); TEST_VMUL(INSN_NAME, q, float, f, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VMUL(INSN_NAME, , float, f, 16, 4); + TEST_VMUL(INSN_NAME, q, float, f, 16, 8); +#endif CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); @@ -150,6 +181,10 @@ void FNNAME (INSN_NAME) (void) CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, ""); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ""); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, ""); +#endif } int main (void) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c index 978cd9b..e6cf4d7 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c @@ -7,6 +7,9 @@ VECT_VAR_DECL(expected,int,16,4) [] = { 0xffc0, 0xffc4, 0xffc8, 0xffcc }; VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffde0, 0xfffffe02 }; VECT_VAR_DECL(expected,uint,16,4) [] = { 0xbbc0, 0xc004, 0xc448, 0xc88c }; VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffface0, 0xffffb212 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xddb3, 0xdd58, 0xdcfd, 0xdca1 }; +#endif VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc3b66666, 0xc3ab0000 }; VECT_VAR_DECL(expected,int,16,8) [] = { 0xffc0, 0xffc4, 0xffc8, 0xffcc, 0xffd0, 0xffd4, 0xffd8, 0xffdc }; @@ -16,6 +19,10 @@ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xbbc0, 0xc004, 0xc448, 0xc88c, 0xccd0, 0xd114, 0xd558, 0xd99c }; VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffface0, 0xffffb212, 0xffffb744, 0xffffbc76 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xddb3, 0xdd58, 0xdcfd, 0xdca1, + 0xdc46, 0xdbd6, 0xdb20, 0xda69 }; +#endif VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc3b66666, 0xc3ab0000, 0xc39f9999, 0xc3943333 }; @@ -45,11 +52,20 @@ void exec_vmul_lane (void) DECL_VMUL(vector); DECL_VMUL(vector_res); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector, float, 16, 4); + DECL_VARIABLE(vector, float, 16, 8); + DECL_VARIABLE(vector_res, float, 16, 4); + DECL_VARIABLE(vector_res, float, 16, 8); +#endif DECL_VARIABLE(vector2, int, 16, 4); DECL_VARIABLE(vector2, int, 32, 2); DECL_VARIABLE(vector2, uint, 16, 4); DECL_VARIABLE(vector2, uint, 32, 2); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector2, float, 16, 4); +#endif DECL_VARIABLE(vector2, float, 32, 2); clean_results (); @@ -59,11 +75,17 @@ void exec_vmul_lane (void) VLOAD(vector, buffer, , int, s, 32, 2); VLOAD(vector, buffer, , uint, u, 16, 4); VLOAD(vector, buffer, , uint, u, 32, 2); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VLOAD(vector, buffer, , float, f, 16, 4); +#endif VLOAD(vector, buffer, , float, f, 32, 2); VLOAD(vector, buffer, q, int, s, 16, 8); VLOAD(vector, buffer, q, int, s, 32, 4); VLOAD(vector, buffer, q, uint, u, 16, 8); VLOAD(vector, buffer, q, uint, u, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VLOAD(vector, buffer, q, float, f, 16, 8); +#endif VLOAD(vector, buffer, q, float, f, 32, 4); /* Initialize vector2. */ @@ -71,6 +93,9 @@ void exec_vmul_lane (void) VDUP(vector2, , int, s, 32, 2, 0x22); VDUP(vector2, , uint, u, 16, 4, 0x444); VDUP(vector2, , uint, u, 32, 2, 0x532); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector2, , float, f, 16, 4, 22.8f); +#endif VDUP(vector2, , float, f, 32, 2, 22.8f); /* Choose lane arbitrarily. */ @@ -78,22 +103,34 @@ void exec_vmul_lane (void) TEST_VMUL_LANE(, int, s, 32, 2, 2, 1); TEST_VMUL_LANE(, uint, u, 16, 4, 4, 2); TEST_VMUL_LANE(, uint, u, 32, 2, 2, 1); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VMUL_LANE(, float, f, 16, 4, 4, 1); +#endif TEST_VMUL_LANE(, float, f, 32, 2, 2, 1); TEST_VMUL_LANE(q, int, s, 16, 8, 4, 2); TEST_VMUL_LANE(q, int, s, 32, 4, 2, 0); TEST_VMUL_LANE(q, uint, u, 16, 8, 4, 2); TEST_VMUL_LANE(q, uint, u, 32, 4, 2, 1); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VMUL_LANE(q, float, f, 16, 8, 4, 0); +#endif TEST_VMUL_LANE(q, float, f, 32, 4, 2, 0); CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, ""); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, ""); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); CHECK(TEST_MSG, int, 16, 8, PRIx64, expected, ""); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); CHECK(TEST_MSG, uint, 16, 8, PRIx64, expected, ""); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, ""); +#endif CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ""); } diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane_f16_1.c new file mode 100644 index 0000000..1719d56 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane_f16_1.c @@ -0,0 +1,454 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define FP16_C(a) ((__fp16) a) +#define A FP16_C (13.4) +#define B FP16_C (-56.8) +#define C FP16_C (-34.8) +#define D FP16_C (12) +#define E FP16_C (63.1) +#define F FP16_C (19.1) +#define G FP16_C (-4.8) +#define H FP16_C (77) + +#define I FP16_C (0.7) +#define J FP16_C (-78) +#define K FP16_C (11.23) +#define L FP16_C (98) +#define M FP16_C (87.1) +#define N FP16_C (-8) +#define O FP16_C (-1.1) +#define P FP16_C (-9.7) + +/* Expected results for vmul_lane. */ +VECT_VAR_DECL (expected0_static, hfloat, 16, 4) [] + = { 0x629B /* A * E. */, + 0xEB00 /* B * E. */, + 0xE84A /* C * E. */, + 0x61EA /* D * E. */ }; + +VECT_VAR_DECL (expected1_static, hfloat, 16, 4) [] + = { 0x5BFF /* A * F. */, + 0xE43D /* B * F. */, + 0xE131 /* C * F. */, + 0x5B29 /* D * F. */ }; + +VECT_VAR_DECL (expected2_static, hfloat, 16, 4) [] + = { 0xD405 /* A * G. */, + 0x5C43 /* B * G. */, + 0x5939 /* C * G. */, + 0xD334 /* D * G. */ }; + +VECT_VAR_DECL (expected3_static, hfloat, 16, 4) [] + = { 0x6408 /* A * H. */, + 0xEC46 /* B * H. */, + 0xE93C /* C * H. */, + 0x6338 /* D * H. */ }; + +/* Expected results for vmulq_lane. */ +VECT_VAR_DECL (expected0_static, hfloat, 16, 8) [] + = { 0x629B /* A * E. */, + 0xEB00 /* B * E. */, + 0xE84A /* C * E. */, + 0x61EA /* D * E. */, + 0x5186 /* I * E. */, + 0xECCE /* J * E. */, + 0x6189 /* K * E. */, + 0x6E0A /* L * E. */ }; + +VECT_VAR_DECL (expected1_static, hfloat, 16, 8) [] + = { 0x5BFF /* A * F. */, + 0xE43D /* B * F. */, + 0xE131 /* C * F. */, + 0x5B29 /* D * F. */, + 0x4AAF /* I * F. */, + 0xE5D1 /* J * F. */, + 0x5AB3 /* K * F. */, + 0x674F /* L * F. */ }; + +VECT_VAR_DECL (expected2_static, hfloat, 16, 8) [] + = { 0xD405 /* A * G. */, + 0x5C43 /* B * G. */, + 0x5939 /* C * G. */, + 0xD334 /* D * G. */, + 0xC2B9 /* I * G. */, + 0x5DDA /* J * G. */, + 0xD2BD /* K * G. */, + 0xDF5A /* L * G. */ }; + +VECT_VAR_DECL (expected3_static, hfloat, 16, 8) [] + = { 0x6408 /* A * H. */, + 0xEC46 /* B * H. */, + 0xE93C /* C * H. */, + 0x6338 /* D * H. */, + 0x52BD /* I * H. */, + 0xEDDE /* J * H. */, + 0x62C1 /* K * H. */, + 0x6F5E /* L * H. */ }; + +/* Expected results for vmul_laneq. */ +VECT_VAR_DECL (expected_laneq0_static, hfloat, 16, 4) [] + = { 0x629B /* A * E. */, + 0xEB00 /* B * E. */, + 0xE84A /* C * E. */, + 0x61EA /* D * E. */ }; + +VECT_VAR_DECL (expected_laneq1_static, hfloat, 16, 4) [] + = { 0x5BFF /* A * F. */, + 0xE43D /* B * F. */, + 0xE131 /* C * F. */, + 0x5B29 /* D * F. */ }; + +VECT_VAR_DECL (expected_laneq2_static, hfloat, 16, 4) [] + = { 0xD405 /* A * G. */, + 0x5C43 /* B * G. */, + 0x5939 /* C * G. */, + 0xD334 /* D * G. */ }; + +VECT_VAR_DECL (expected_laneq3_static, hfloat, 16, 4) [] + = { 0x6408 /* A * H. */, + 0xEC46 /* B * H. */, + 0xE93C /* C * H. */, + 0x6338 /* D * H. */ }; + +VECT_VAR_DECL (expected_laneq4_static, hfloat, 16, 4) [] + = { 0x648F /* A * M. */, + 0xECD5 /* B * M. */, + 0xE9ED /* C * M. */, + 0x6416 /* D * M. */ }; + +VECT_VAR_DECL (expected_laneq5_static, hfloat, 16, 4) [] + = { 0xD6B3 /* A * N. */, + 0x5F1A /* B * N. */, + 0x5C5A /* C * N. */, + 0xD600 /* D * N. */ }; + +VECT_VAR_DECL (expected_laneq6_static, hfloat, 16, 4) [] + = { 0xCB5E /* A * O. */, + 0x53CF /* B * O. */, + 0x50C9 /* C * O. */, + 0xCA99 /* D * O. */ }; + +VECT_VAR_DECL (expected_laneq7_static, hfloat, 16, 4) [] + = { 0xD810 /* A * P. */, + 0x604F /* B * P. */, + 0x5D47 /* C * P. */, + 0xD747 /* D * P. */ }; + +/* Expected results for vmulq_laneq. */ +VECT_VAR_DECL (expected_laneq0_static, hfloat, 16, 8) [] + = { 0x629B /* A * E. */, + 0xEB00 /* B * E. */, + 0xE84A /* C * E. */, + 0x61EA /* D * E. */, + 0x5186 /* I * E. */, + 0xECCE /* J * E. */, + 0x6189 /* K * E. */, + 0x6E0A /* L * E. */ }; + +VECT_VAR_DECL (expected_laneq1_static, hfloat, 16, 8) [] + = { 0x5BFF /* A * F. */, + 0xE43D /* B * F. */, + 0xE131 /* C * F. */, + 0x5B29 /* D * F. */, + 0x4AAF /* I * F. */, + 0xE5D1 /* J * F. */, + 0x5AB3 /* K * F. */, + 0x674F /* L * F. */ }; + +VECT_VAR_DECL (expected_laneq2_static, hfloat, 16, 8) [] + = { 0xD405 /* A * G. */, + 0x5C43 /* B * G. */, + 0x5939 /* C * G. */, + 0xD334 /* D * G. */, + 0xC2B9 /* I * G. */, + 0x5DDA /* J * G. */, + 0xD2BD /* K * G. */, + 0xDF5A /* L * G. */ }; + +VECT_VAR_DECL (expected_laneq3_static, hfloat, 16, 8) [] + = { 0x6408 /* A * H. */, + 0xEC46 /* B * H. */, + 0xE93C /* C * H. */, + 0x6338 /* D * H. */, + 0x52BD /* I * H. */, + 0xEDDE /* J * H. */, + 0x62C1 /* K * H. */, + 0x6F5E /* L * H. */ }; + +VECT_VAR_DECL (expected_laneq4_static, hfloat, 16, 8) [] + = { 0x648F /* A * M. */, + 0xECD5 /* B * M. */, + 0xE9ED /* C * M. */, + 0x6416 /* D * M. */, + 0x53A0 /* I * M. */, + 0xEEA3 /* J * M. */, + 0x63A4 /* K * M. */, + 0x702B /* L * M. */ }; + +VECT_VAR_DECL (expected_laneq5_static, hfloat, 16, 8) [] + = { 0xD6B3 /* A * N. */, + 0x5F1A /* B * N. */, + 0x5C5A /* C * N. */, + 0xD600 /* D * N. */, + 0xC59A /* I * N. */, + 0x60E0 /* J * N. */, + 0xD59D /* K * N. */, + 0xE220 /* L * N. */ }; + +VECT_VAR_DECL (expected_laneq6_static, hfloat, 16, 8) [] + = { 0xCB5E /* A * O. */, + 0x53CF /* B * O. */, + 0x50C9 /* C * O. */, + 0xCA99 /* D * O. */, + 0xBA29 /* I * O. */, + 0x555C /* J * O. */, + 0xCA2C /* K * O. */, + 0xD6BC /* L * O. */ }; + +VECT_VAR_DECL (expected_laneq7_static, hfloat, 16, 8) [] + = { 0xD810 /* A * P. */, + 0x604F /* B * P. */, + 0x5D47 /* C * P. */, + 0xD747 /* D * P. */, + 0xC6CB /* I * P. */, + 0x61EA /* J * P. */, + 0xD6CF /* K * P. */, + 0xE36E /* L * P. */ }; + +void exec_vmul_lane_f16 (void) +{ +#undef TEST_MSG +#define TEST_MSG "VMUL_LANE (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc_1, float, 16, 4); + DECL_VARIABLE(vsrc_2, float, 16, 4); + VECT_VAR_DECL (buf_src_1, float, 16, 4) [] = {A, B, C, D}; + VECT_VAR_DECL (buf_src_2, float, 16, 4) [] = {E, F, G, H}; + VLOAD (vsrc_1, buf_src_1, , float, f, 16, 4); + VLOAD (vsrc_2, buf_src_2, , float, f, 16, 4); + DECL_VARIABLE (vector_res, float, 16, 4) + = vmul_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), 0); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected0_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmul_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), 1); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected1_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmul_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), 2); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected2_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmul_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), 3); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected3_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VMULQ_LANE (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc_1, float, 16, 8); + VECT_VAR_DECL (buf_src_1, float, 16, 8) [] = {A, B, C, D, I, J, K, L}; + VLOAD (vsrc_1, buf_src_1, q, float, f, 16, 8); + DECL_VARIABLE (vector_res, float, 16, 8) + = vmulq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 4), 0); + + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected0_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 4), 1); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected1_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 4), 2); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected2_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 4), 3); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected3_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VMUL_LANEQ (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc_2, float, 16, 8); + VECT_VAR_DECL (buf_src_2, float, 16, 8) [] = {E, F, G, H, M, N, O, P}; + VLOAD (vsrc_2, buf_src_2, q, float, f, 16, 8); + VECT_VAR (vector_res, float, 16, 4) + = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 8), 0); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq0_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 8), 1); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq1_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 8), 2); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq2_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 8), 3); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq3_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 8), 4); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq4_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 8), 5); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq5_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 8), 6); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq6_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 8), 7); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq7_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VMULQ_LANEQ (FP16)" + clean_results (); + + VECT_VAR (vector_res, float, 16, 8) + = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), 0); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq0_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), 1); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq1_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), 2); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq2_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), 3); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq3_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), 4); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq4_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), 5); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq5_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), 6); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq6_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), 7); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq7_static, ""); +} + +int +main (void) +{ + exec_vmul_lane_f16 (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_n.c index be0ee65..16f7dac 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_n.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_n.c @@ -7,6 +7,9 @@ VECT_VAR_DECL(expected,int,16,4) [] = { 0xfef0, 0xff01, 0xff12, 0xff23 }; VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffde0, 0xfffffe02 }; VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfcd0, 0xfd03, 0xfd36, 0xfd69 }; VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffbc0, 0xfffffc04 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xdd93, 0xdd3a, 0xdce1, 0xdc87 }; +#endif VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc3b26666, 0xc3a74000 }; VECT_VAR_DECL(expected,int,16,8) [] = { 0xfab0, 0xfb05, 0xfb5a, 0xfbaf, 0xfc04, 0xfc59, 0xfcae, 0xfd03 }; @@ -16,6 +19,10 @@ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf890, 0xf907, 0xf97e, 0xf9f5, 0xfa6c, 0xfae3, 0xfb5a, 0xfbd1 }; VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffff780, 0xfffff808, 0xfffff890, 0xfffff918 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xe58e, 0xe535, 0xe4dc, 0xe483, + 0xe42a, 0xe3a3, 0xe2f2, 0xe240 }; +#endif VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc4b1cccd, 0xc4a6b000, 0xc49b9333, 0xc4907667 }; @@ -50,6 +57,13 @@ void FNNAME (INSN_NAME) (void) DECL_VMUL(vector); DECL_VMUL(vector_res); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector, float, 16, 4); + DECL_VARIABLE(vector, float, 16, 8); + DECL_VARIABLE(vector_res, float, 16, 4); + DECL_VARIABLE(vector_res, float, 16, 8); +#endif + clean_results (); /* Initialize vector from pre-initialized values. */ @@ -57,11 +71,17 @@ void FNNAME (INSN_NAME) (void) VLOAD(vector, buffer, , int, s, 32, 2); VLOAD(vector, buffer, , uint, u, 16, 4); VLOAD(vector, buffer, , uint, u, 32, 2); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VLOAD(vector, buffer, , float, f, 16, 4); +#endif VLOAD(vector, buffer, , float, f, 32, 2); VLOAD(vector, buffer, q, int, s, 16, 8); VLOAD(vector, buffer, q, int, s, 32, 4); VLOAD(vector, buffer, q, uint, u, 16, 8); VLOAD(vector, buffer, q, uint, u, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VLOAD(vector, buffer, q, float, f, 16, 8); +#endif VLOAD(vector, buffer, q, float, f, 32, 4); /* Choose multiplier arbitrarily. */ @@ -69,22 +89,34 @@ void FNNAME (INSN_NAME) (void) TEST_VMUL_N(, int, s, 32, 2, 0x22); TEST_VMUL_N(, uint, u, 16, 4, 0x33); TEST_VMUL_N(, uint, u, 32, 2, 0x44); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VMUL_N(, float, f, 16, 4, 22.3f); +#endif TEST_VMUL_N(, float, f, 32, 2, 22.3f); TEST_VMUL_N(q, int, s, 16, 8, 0x55); TEST_VMUL_N(q, int, s, 32, 4, 0x66); TEST_VMUL_N(q, uint, u, 16, 8, 0x77); TEST_VMUL_N(q, uint, u, 32, 4, 0x88); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VMUL_N(q, float, f, 16, 8, 88.9f); +#endif TEST_VMUL_N(q, float, f, 32, 4, 88.9f); CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, ""); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, ""); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); CHECK(TEST_MSG, int, 16, 8, PRIx64, expected, ""); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); CHECK(TEST_MSG, uint, 16, 8, PRIx64, expected, ""); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, ""); +#endif CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ""); } diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulh_f16_1.c new file mode 100644 index 0000000..09684d2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulh_f16_1.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +#define INFF __builtin_inf () + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected[] = +{ + 0x0000 /* 0.000000 */, + 0x8000 /* -0.000000 */, + 0xc854 /* -8.656250 */, + 0x5cd8 /* 310.000000 */, + 0x60b0 /* 600.000000 */, + 0xa019 /* -0.008003 */, + 0xbc9a /* -1.150391 */, + 0xc8cf /* -9.617188 */, + 0x51fd /* 47.906250 */, + 0x4634 /* 6.203125 */, + 0xc0d9 /* -2.423828 */, + 0x3c9a /* 1.150391 */, + 0xc79a /* -7.601562 */, + 0x5430 /* 67.000000 */, + 0xbfd0 /* -1.953125 */, + 0x46ac /* 6.671875 */, + 0xfc00 /* -inf */, + 0xfc00 /* -inf */ +}; + +#define TEST_MSG "VMULH_F16" +#define INSN_NAME vmulh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulh_lane_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulh_lane_f16_1.c new file mode 100644 index 0000000..4cd5c37 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulh_lane_f16_1.c @@ -0,0 +1,90 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define FP16_C(a) ((__fp16) a) +#define A FP16_C (13.4) +#define B FP16_C (-56.8) +#define C FP16_C (-34.8) +#define D FP16_C (12) +#define E FP16_C (63.1) +#define F FP16_C (19.1) +#define G FP16_C (-4.8) +#define H FP16_C (77) + +#define I FP16_C (0.7) +#define J FP16_C (-78) +#define K FP16_C (11.23) +#define L FP16_C (98) +#define M FP16_C (87.1) +#define N FP16_C (-8) +#define O FP16_C (-1.1) +#define P FP16_C (-9.7) + +extern void abort (); + +float16_t src1[8] = { A, B, C, D, I, J, K, L }; +VECT_VAR_DECL (src2, float, 16, 4) [] = { E, F, G, H }; +VECT_VAR_DECL (src2, float, 16, 8) [] = { E, F, G, H, M, N, O, P }; + +/* Expected results for vmulh_lane. */ +uint16_t expected[4] = { 0x629B /* A * E. */, 0xE43D /* B * F. */, + 0x5939 /* C * G. */, 0x6338 /* D * H. */ }; + + +/* Expected results for vmulh_lane. */ +uint16_t expected_laneq[8] = { 0x629B /* A * E. */, + 0xE43D /* B * F. */, + 0x5939 /* C * G. */, + 0x6338 /* D * H. */, + 0x53A0 /* I * M. */, + 0x60E0 /* J * N. */, + 0xCA2C /* K * O. */, + 0xE36E /* L * P. */ }; + +void exec_vmulh_lane_f16 (void) +{ +#define CHECK_LANE(N)\ + ret = vmulh_lane_f16 (src1[N], VECT_VAR (vsrc2, float, 16, 4), N);\ + if (*(uint16_t *) &ret != expected[N])\ + abort (); + + DECL_VARIABLE(vsrc2, float, 16, 4); + VLOAD (vsrc2, src2, , float, f, 16, 4); + float16_t ret; + + CHECK_LANE(0) + CHECK_LANE(1) + CHECK_LANE(2) + CHECK_LANE(3) + +#undef CHECK_LANE +#define CHECK_LANE(N)\ + ret = vmulh_laneq_f16 (src1[N], VECT_VAR (vsrc2, float, 16, 8), N);\ + if (*(uint16_t *) &ret != expected_laneq[N])\ + abort (); + + DECL_VARIABLE(vsrc2, float, 16, 8); + VLOAD (vsrc2, src2, q, float, f, 16, 8); + + CHECK_LANE(0) + CHECK_LANE(1) + CHECK_LANE(2) + CHECK_LANE(3) + CHECK_LANE(4) + CHECK_LANE(5) + CHECK_LANE(6) + CHECK_LANE(7) +} + +int +main (void) +{ + exec_vmulh_lane_f16 (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulx_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulx_f16_1.c new file mode 100644 index 0000000..51bbead --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulx_f16_1.c @@ -0,0 +1,84 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define FP16_C(a) ((__fp16) a) +#define A FP16_C (13.4) +#define B FP16_C (__builtin_inff ()) +#define C FP16_C (-34.8) +#define D FP16_C (-__builtin_inff ()) +#define E FP16_C (63.1) +#define F FP16_C (0.0) +#define G FP16_C (-4.8) +#define H FP16_C (0.0) + +#define I FP16_C (0.7) +#define J FP16_C (-__builtin_inff ()) +#define K FP16_C (11.23) +#define L FP16_C (98) +#define M FP16_C (87.1) +#define N FP16_C (-0.0) +#define O FP16_C (-1.1) +#define P FP16_C (7) + +/* Expected results for vmulx. */ +VECT_VAR_DECL (expected_static, hfloat, 16, 4) [] + = { 0x629B /* A * E. */, 0x4000 /* FP16_C (2.0f). */, + 0x5939 /* C * G. */, 0xC000 /* FP16_C (-2.0f). */ }; + +VECT_VAR_DECL (expected_static, hfloat, 16, 8) [] + = { 0x629B /* A * E. */, 0x4000 /* FP16_C (2.0f). */, + 0x5939 /* C * G. */, 0xC000 /* FP16_C (-2.0f). */, + 0x53A0 /* I * M. */, 0x4000 /* FP16_C (2.0f). */, + 0xCA2C /* K * O. */, 0x615C /* L * P. */ }; + +void exec_vmulx_f16 (void) +{ +#undef TEST_MSG +#define TEST_MSG "VMULX (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc_1, float, 16, 4); + DECL_VARIABLE(vsrc_2, float, 16, 4); + VECT_VAR_DECL (buf_src_1, float, 16, 4) [] = {A, B, C, D}; + VECT_VAR_DECL (buf_src_2, float, 16, 4) [] = {E, F, G, H}; + VLOAD (vsrc_1, buf_src_1, , float, f, 16, 4); + VLOAD (vsrc_2, buf_src_2, , float, f, 16, 4); + DECL_VARIABLE (vector_res, float, 16, 4) + = vmulx_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4)); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VMULXQ (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc_1, float, 16, 8); + DECL_VARIABLE(vsrc_2, float, 16, 8); + VECT_VAR_DECL (buf_src_1, float, 16, 8) [] = {A, B, C, D, I, J, K, L}; + VECT_VAR_DECL (buf_src_2, float, 16, 8) [] = {E, F, G, H, M, N, O, P}; + VLOAD (vsrc_1, buf_src_1, q, float, f, 16, 8); + VLOAD (vsrc_2, buf_src_2, q, float, f, 16, 8); + DECL_VARIABLE (vector_res, float, 16, 8) + = vmulxq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8)); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_static, ""); +} + +int +main (void) +{ + exec_vmulx_f16 (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulx_lane_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulx_lane_f16_1.c new file mode 100644 index 0000000..f90a36d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulx_lane_f16_1.c @@ -0,0 +1,452 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define FP16_C(a) ((__fp16) a) +#define A FP16_C (13.4) +#define B FP16_C (__builtin_inff ()) +#define C FP16_C (-34.8) +#define D FP16_C (-__builtin_inff ()) +#define E FP16_C (-0.0) +#define F FP16_C (19.1) +#define G FP16_C (-4.8) +#define H FP16_C (0.0) + +#define I FP16_C (0.7) +#define J FP16_C (-78) +#define K FP16_C (-__builtin_inff ()) +#define L FP16_C (98) +#define M FP16_C (87.1) +#define N FP16_C (-8) +#define O FP16_C (-1.1) +#define P FP16_C (-0.0) + +/* Expected results for vmulx_lane. */ +VECT_VAR_DECL (expected0_static, hfloat, 16, 4) [] + = { 0x8000 /* A * E. */, + 0xC000 /* FP16_C (-2.0f). */, + 0x0000 /* C * E. */, + 0x4000 /* FP16_C (2.0f). */ }; + +VECT_VAR_DECL (expected1_static, hfloat, 16, 4) [] + = { 0x5BFF /* A * F. */, + 0x7C00 /* B * F. */, + 0xE131 /* C * F. */, + 0xFC00 /* D * F. */ }; + +VECT_VAR_DECL (expected2_static, hfloat, 16, 4) [] + = { 0xD405 /* A * G. */, + 0xFC00 /* B * G. */, + 0x5939 /* C * G. */, + 0x7C00 /* D * G. */ }; + +VECT_VAR_DECL (expected3_static, hfloat, 16, 4) [] + = { 0x0000 /* A * H. */, + 0x4000 /* FP16_C (2.0f). */, + 0x8000 /* C * H. */, + 0xC000 /* FP16_C (-2.0f). */ }; + +/* Expected results for vmulxq_lane. */ +VECT_VAR_DECL (expected0_static, hfloat, 16, 8) [] + = { 0x8000 /* A * E. */, + 0xC000 /* FP16_C (-2.0f). */, + 0x0000 /* C * E. */, + 0x4000 /* FP16_C (2.0f). */, + 0x8000 /* I * E. */, + 0x0000 /* J * E. */, + 0x4000 /* FP16_C (2.0f). */, + 0x8000 /* L * E. */ }; + +VECT_VAR_DECL (expected1_static, hfloat, 16, 8) [] + = { 0x5BFF /* A * F. */, + 0x7C00 /* B * F. */, + 0xE131 /* C * F. */, + 0xFC00 /* D * F. */, + 0x4AAF /* I * F. */, + 0xE5D1 /* J * F. */, + 0xFC00 /* K * F. */, + 0x674F /* L * F. */ }; + +VECT_VAR_DECL (expected2_static, hfloat, 16, 8) [] + = { 0xD405 /* A * G. */, + 0xFC00 /* B * G. */, + 0x5939 /* C * G. */, + 0x7C00 /* D * G. */, + 0xC2B9 /* I * G. */, + 0x5DDA /* J * G. */, + 0x7C00 /* K * G. */, + 0xDF5A /* L * G. */ }; + +VECT_VAR_DECL (expected3_static, hfloat, 16, 8) [] + = { 0x0000 /* A * H. */, + 0x4000 /* FP16_C (2.0f). */, + 0x8000 /* C * H. */, + 0xC000 /* FP16_C (-2.0f). */, + 0x0000 /* I * H. */, + 0x8000 /* J * H. */, + 0xC000 /* FP16_C (-2.0f). */, + 0x0000 /* L * H. */}; + +/* Expected results for vmulx_laneq. */ +VECT_VAR_DECL (expected_laneq0_static, hfloat, 16, 4) [] + = { 0x8000 /* A * E. */, + 0xC000 /* FP16_C (-2.0f). */, + 0x0000 /* C * E. */, + 0x4000 /* FP16_C (2.0f). */ }; + +VECT_VAR_DECL (expected_laneq1_static, hfloat, 16, 4) [] + = { 0x5BFF /* A * F. */, + 0x7C00 /* B * F. */, + 0xE131 /* C * F. */, + 0xFC00 /* D * F. */ }; + +VECT_VAR_DECL (expected_laneq2_static, hfloat, 16, 4) [] + = { 0xD405 /* A * G. */, + 0xFC00 /* B * G. */, + 0x5939 /* C * G. */, + 0x7C00 /* D * G. */ }; + +VECT_VAR_DECL (expected_laneq3_static, hfloat, 16, 4) [] + = { 0x0000 /* A * H. */, + 0x4000 /* FP16_C (2.0f). */, + 0x8000 /* C * H. */, + 0xC000 /* FP16_C (-2.0f). */ }; + +VECT_VAR_DECL (expected_laneq4_static, hfloat, 16, 4) [] + = { 0x648F /* A * M. */, + 0x7C00 /* B * M. */, + 0xE9ED /* C * M. */, + 0xFC00 /* D * M. */ }; + +VECT_VAR_DECL (expected_laneq5_static, hfloat, 16, 4) [] + = { 0xD6B3 /* A * N. */, + 0xFC00 /* B * N. */, + 0x5C5A /* C * N. */, + 0x7C00 /* D * N. */ }; + +VECT_VAR_DECL (expected_laneq6_static, hfloat, 16, 4) [] + = { 0xCB5E /* A * O. */, + 0xFC00 /* B * O. */, + 0x50C9 /* C * O. */, + 0x7C00 /* D * O. */ }; + +VECT_VAR_DECL (expected_laneq7_static, hfloat, 16, 4) [] + = { 0x8000 /* A * P. */, + 0xC000 /* FP16_C (-2.0f). */, + 0x0000 /* C * P. */, + 0x4000 /* FP16_C (2.0f). */ }; + +VECT_VAR_DECL (expected_laneq0_static, hfloat, 16, 8) [] + = { 0x8000 /* A * E. */, + 0xC000 /* FP16_C (-2.0f). */, + 0x0000 /* C * E. */, + 0x4000 /* FP16_C (2.0f). */, + 0x8000 /* I * E. */, + 0x0000 /* J * E. */, + 0x4000 /* FP16_C (2.0f). */, + 0x8000 /* L * E. */ }; + +VECT_VAR_DECL (expected_laneq1_static, hfloat, 16, 8) [] + = { 0x5BFF /* A * F. */, + 0x7C00 /* B * F. */, + 0xE131 /* C * F. */, + 0xFC00 /* D * F. */, + 0x4AAF /* I * F. */, + 0xE5D1 /* J * F. */, + 0xFC00 /* K * F. */, + 0x674F /* L * F. */ }; + +VECT_VAR_DECL (expected_laneq2_static, hfloat, 16, 8) [] + = { 0xD405 /* A * G. */, + 0xFC00 /* B * G. */, + 0x5939 /* C * G. */, + 0x7C00 /* D * G. */, + 0xC2B9 /* I * G. */, + 0x5DDA /* J * G. */, + 0x7C00 /* K * G. */, + 0xDF5A /* L * G. */ }; + +VECT_VAR_DECL (expected_laneq3_static, hfloat, 16, 8) [] + = { 0x0000 /* A * H. */, + 0x4000 /* FP16_C (2.0f). */, + 0x8000 /* C * H. */, + 0xC000 /* FP16_C (-2.0f). */, + 0x0000 /* I * H. */, + 0x8000 /* J * H. */, + 0xC000 /* FP16_C (-2.0f). */, + 0x0000 /* L * H. */ }; + +VECT_VAR_DECL (expected_laneq4_static, hfloat, 16, 8) [] + = { 0x648F /* A * M. */, + 0x7C00 /* B * M. */, + 0xE9ED /* C * M. */, + 0xFC00 /* D * M. */, + 0x53A0 /* I * M. */, + 0xEEA3 /* J * M. */, + 0xFC00 /* K * M. */, + 0x702B /* L * M. */ }; + +VECT_VAR_DECL (expected_laneq5_static, hfloat, 16, 8) [] + = { 0xD6B3 /* A * N. */, + 0xFC00 /* B * N. */, + 0x5C5A /* C * N. */, + 0x7C00 /* D * N. */, + 0xC59A /* I * N. */, + 0x60E0 /* J * N. */, + 0x7C00 /* K * N. */, + 0xE220 /* L * N. */ }; + +VECT_VAR_DECL (expected_laneq6_static, hfloat, 16, 8) [] + = { 0xCB5E /* A * O. */, + 0xFC00 /* B * O. */, + 0x50C9 /* C * O. */, + 0x7C00 /* D * O. */, + 0xBA29 /* I * O. */, + 0x555C /* J * O. */, + 0x7C00 /* K * O. */, + 0xD6BC /* L * O. */ }; + +VECT_VAR_DECL (expected_laneq7_static, hfloat, 16, 8) [] + = { 0x8000 /* A * P. */, + 0xC000 /* FP16_C (-2.0f). */, + 0x0000 /* C * P. */, + 0x4000 /* FP16_C (2.0f). */, + 0x8000 /* I * P. */, + 0x0000 /* J * P. */, + 0x4000 /* FP16_C (2.0f). */, + 0x8000 /* L * P. */ }; + +void exec_vmulx_lane_f16 (void) +{ +#undef TEST_MSG +#define TEST_MSG "VMULX_LANE (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc_1, float, 16, 4); + DECL_VARIABLE(vsrc_2, float, 16, 4); + VECT_VAR_DECL (buf_src_1, float, 16, 4) [] = {A, B, C, D}; + VECT_VAR_DECL (buf_src_2, float, 16, 4) [] = {E, F, G, H}; + VLOAD (vsrc_1, buf_src_1, , float, f, 16, 4); + VLOAD (vsrc_2, buf_src_2, , float, f, 16, 4); + DECL_VARIABLE (vector_res, float, 16, 4) + = vmulx_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), 0); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected0_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmulx_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), 1); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected1_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmulx_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), 2); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected2_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmulx_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4), 3); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected3_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VMULXQ_LANE (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc_1, float, 16, 8); + VECT_VAR_DECL (buf_src_1, float, 16, 8) [] = {A, B, C, D, I, J, K, L}; + VLOAD (vsrc_1, buf_src_1, q, float, f, 16, 8); + DECL_VARIABLE (vector_res, float, 16, 8) + = vmulxq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 4), 0); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected0_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulxq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 4), 1); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected1_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulxq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 4), 2); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected2_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulxq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 4), 3); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected3_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VMULX_LANEQ (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc_2, float, 16, 8); + VECT_VAR_DECL (buf_src_2, float, 16, 8) [] = {E, F, G, H, M, N, O, P}; + VLOAD (vsrc_2, buf_src_2, q, float, f, 16, 8); + VECT_VAR (vector_res, float, 16, 4) + = vmulx_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 8), 0); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq0_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmulx_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 8), 1); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq1_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmulx_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 8), 2); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq2_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmulx_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 8), 3); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq3_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmulx_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 8), 4); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq4_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmulx_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 8), 5); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq5_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmulx_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 8), 6); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq6_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmulx_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 8), 7); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq7_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VMULXQ_LANEQ (FP16)" + clean_results (); + + VECT_VAR (vector_res, float, 16, 8) + = vmulxq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), 0); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq0_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulxq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), 1); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq1_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulxq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), 2); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq2_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulxq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), 3); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq3_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulxq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), 4); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq4_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulxq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), 5); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq5_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulxq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), 6); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq6_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulxq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8), 7); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq7_static, ""); +} + +int +main (void) +{ + exec_vmulx_lane_f16 (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulx_n_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulx_n_f16_1.c new file mode 100644 index 0000000..140647b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulx_n_f16_1.c @@ -0,0 +1,177 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define FP16_C(a) ((__fp16) a) +#define A FP16_C (13.4) +#define B FP16_C (__builtin_inff ()) +#define C FP16_C (-34.8) +#define D FP16_C (-__builtin_inff ()) +#define E FP16_C (-0.0) +#define F FP16_C (19.1) +#define G FP16_C (-4.8) +#define H FP16_C (0.0) + +float16_t elemE = E; +float16_t elemF = F; +float16_t elemG = G; +float16_t elemH = H; + +#define I FP16_C (0.7) +#define J FP16_C (-78) +#define K FP16_C (11.23) +#define L FP16_C (98) +#define M FP16_C (87.1) +#define N FP16_C (-8) +#define O FP16_C (-1.1) +#define P FP16_C (-9.7) + +/* Expected results for vmulx_n. */ +VECT_VAR_DECL (expected0_static, hfloat, 16, 4) [] + = { 0x8000 /* A * E. */, + 0xC000 /* FP16_C (-2.0f). */, + 0x0000 /* C * E. */, + 0x4000 /* FP16_C (2.0f). */ }; + +VECT_VAR_DECL (expected1_static, hfloat, 16, 4) [] + = { 0x5BFF /* A * F. */, + 0x7C00 /* B * F. */, + 0xE131 /* C * F. */, + 0xFC00 /* D * F. */ }; + +VECT_VAR_DECL (expected2_static, hfloat, 16, 4) [] + = { 0xD405 /* A * G. */, + 0xFC00 /* B * G. */, + 0x5939 /* C * G. */, + 0x7C00 /* D * G. */ }; + +VECT_VAR_DECL (expected3_static, hfloat, 16, 4) [] + = { 0x0000 /* A * H. */, + 0x4000 /* FP16_C (2.0f). */, + 0x8000 /* C * H. */, + 0xC000 /* FP16_C (-2.0f). */ }; + +VECT_VAR_DECL (expected0_static, hfloat, 16, 8) [] + = { 0x8000 /* A * E. */, + 0xC000 /* FP16_C (-2.0f). */, + 0x0000 /* C * E. */, + 0x4000 /* FP16_C (2.0f). */, + 0x8000 /* I * E. */, + 0x0000 /* J * E. */, + 0x8000 /* K * E. */, + 0x8000 /* L * E. */ }; + +VECT_VAR_DECL (expected1_static, hfloat, 16, 8) [] + = { 0x5BFF /* A * F. */, + 0x7C00 /* B * F. */, + 0xE131 /* C * F. */, + 0xFC00 /* D * F. */, + 0x4AAF /* I * F. */, + 0xE5D1 /* J * F. */, + 0x5AB3 /* K * F. */, + 0x674F /* L * F. */ }; + +VECT_VAR_DECL (expected2_static, hfloat, 16, 8) [] + = { 0xD405 /* A * G. */, + 0xFC00 /* B * G. */, + 0x5939 /* C * G. */, + 0x7C00 /* D * G. */, + 0xC2B9 /* I * G. */, + 0x5DDA /* J * G. */, + 0xD2BD /* K * G. */, + 0xDF5A /* L * G. */ }; + +VECT_VAR_DECL (expected3_static, hfloat, 16, 8) [] + = { 0x0000 /* A * H. */, + 0x4000 /* FP16_C (2.0f). */, + 0x8000 /* C * H. */, + 0xC000 /* FP16_C (-2.0f). */, + 0x0000 /* I * H. */, + 0x8000 /* J * H. */, + 0x0000 /* K * H. */, + 0x0000 /* L * H. */ }; + +void exec_vmulx_n_f16 (void) +{ +#undef TEST_MSG +#define TEST_MSG "VMULX_N (FP16)" + clean_results (); + + DECL_VARIABLE (vsrc_1, float, 16, 4); + VECT_VAR_DECL (buf_src_1, float, 16, 4) [] = {A, B, C, D}; + VLOAD (vsrc_1, buf_src_1, , float, f, 16, 4); + DECL_VARIABLE (vector_res, float, 16, 4) + = vmulx_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), elemE); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected0_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmulx_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), elemF); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected1_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmulx_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), elemG); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected2_static, ""); + + VECT_VAR (vector_res, float, 16, 4) + = vmulx_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), elemH); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected3_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VMULXQ_N (FP16)" + clean_results (); + + DECL_VARIABLE (vsrc_1, float, 16, 8); + VECT_VAR_DECL (buf_src_1, float, 16, 8) [] = {A, B, C, D, I, J, K, L}; + VLOAD (vsrc_1, buf_src_1, q, float, f, 16, 8); + DECL_VARIABLE (vector_res, float, 16, 8) + = vmulxq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), elemE); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected0_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulxq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), elemF); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected1_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulxq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), elemG); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected2_static, ""); + + VECT_VAR (vector_res, float, 16, 8) + = vmulxq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), elemH); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected3_static, ""); +} + +int +main (void) +{ + exec_vmulx_n_f16 (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulxh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulxh_f16_1.c new file mode 100644 index 0000000..66c744c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulxh_f16_1.c @@ -0,0 +1,50 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +#define A 13.4 +#define B __builtin_inff () +#define C -34.8 +#define D -__builtin_inff () +#define E 63.1 +#define F 0.0 +#define G -4.8 +#define H 0.0 + +#define I 0.7 +#define J -__builtin_inff () +#define K 11.23 +#define L 98 +#define M 87.1 +#define N -0.0 +#define O -1.1 +#define P 7 + +float16_t input_1[] = { A, B, C, D, I, J, K, L }; +float16_t input_2[] = { E, F, G, H, M, N, O, P }; +uint16_t expected[] = { 0x629B /* A * E. */, + 0x4000 /* FP16_C (2.0f). */, + 0x5939 /* C * G. */, + 0xC000 /* FP16_C (-2.0f). */, + 0x53A0 /* I * M. */, + 0x4000 /* FP16_C (2.0f). */, + 0xCA2C /* K * O. */, + 0x615C /* L * P. */ }; + +#define TEST_MSG "VMULXH_F16" +#define INSN_NAME vmulxh_f16 + +#define INPUT_1 input_1 +#define INPUT_2 input_2 +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulxh_lane_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulxh_lane_f16_1.c new file mode 100644 index 0000000..90a5be8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulxh_lane_f16_1.c @@ -0,0 +1,91 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define FP16_C(a) ((__fp16) a) +#define A FP16_C (13.4) +#define B FP16_C (__builtin_inff ()) +#define C FP16_C (-34.8) +#define D FP16_C (-__builtin_inff ()) +#define E FP16_C (63.1) +#define F FP16_C (0.0) +#define G FP16_C (-4.8) +#define H FP16_C (0.0) + +#define I FP16_C (0.7) +#define J FP16_C (-__builtin_inff ()) +#define K FP16_C (11.23) +#define L FP16_C (98) +#define M FP16_C (87.1) +#define N FP16_C (-0.0) +#define O FP16_C (-1.1) +#define P FP16_C (7) + +extern void abort (); + +float16_t src1[8] = { A, B, C, D, I, J, K, L }; +VECT_VAR_DECL (src2, float, 16, 4) [] = { E, F, G, H }; +VECT_VAR_DECL (src2, float, 16, 8) [] = { E, F, G, H, M, N, O, P }; + +/* Expected results for vmulxh_lane. */ +uint16_t expected[4] = { 0x629B /* A * E. */, + 0x4000 /* FP16_C (2.0f). */, + 0x5939 /* C * G. */, + 0xC000 /* FP16_C (-2.0f). */ }; + +/* Expected results for vmulxh_lane. */ +uint16_t expected_laneq[8] = { 0x629B /* A * E. */, + 0x4000 /* FP16_C (2.0f). */, + 0x5939 /* C * G. */, + 0xC000 /* FP16_C (-2.0f). */, + 0x53A0 /* I * M. */, + 0x4000 /* FP16_C (2.0f). */, + 0xCA2C /* K * O. */, + 0x615C /* L * P. */ }; + +void exec_vmulxh_lane_f16 (void) +{ +#define CHECK_LANE(N)\ + ret = vmulxh_lane_f16 (src1[N], VECT_VAR (vsrc2, float, 16, 4), N);\ + if (*(uint16_t *) &ret != expected[N])\ + abort (); + + DECL_VARIABLE(vsrc2, float, 16, 4); + VLOAD (vsrc2, src2, , float, f, 16, 4); + float16_t ret; + + CHECK_LANE(0) + CHECK_LANE(1) + CHECK_LANE(2) + CHECK_LANE(3) + +#undef CHECK_LANE +#define CHECK_LANE(N)\ + ret = vmulxh_laneq_f16 (src1[N], VECT_VAR (vsrc2, float, 16, 8), N);\ + if (*(uint16_t *) &ret != expected_laneq[N])\ + abort (); + + DECL_VARIABLE(vsrc2, float, 16, 8); + VLOAD (vsrc2, src2, q, float, f, 16, 8); + + CHECK_LANE(0) + CHECK_LANE(1) + CHECK_LANE(2) + CHECK_LANE(3) + CHECK_LANE(4) + CHECK_LANE(5) + CHECK_LANE(6) + CHECK_LANE(7) +} + +int +main (void) +{ + exec_vmulxh_lane_f16 (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vneg.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vneg.c index 78f17ed..7bd9d55 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vneg.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vneg.c @@ -21,24 +21,53 @@ VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd }; /* Expected results for float32 variants. Needs to be separated since the generic test function does not test floating-point versions. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_float16, hfloat, 16, 4) [] = { 0xc09a, 0xc09a, + 0xc09a, 0xc09a }; +VECT_VAR_DECL(expected_float16, hfloat, 16, 8) [] = { 0xc2cd, 0xc2cd, + 0xc2cd, 0xc2cd, + 0xc2cd, 0xc2cd, + 0xc2cd, 0xc2cd }; +#endif VECT_VAR_DECL(expected_float32,hfloat,32,2) [] = { 0xc0133333, 0xc0133333 }; VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0xc059999a, 0xc059999a, 0xc059999a, 0xc059999a }; void exec_vneg_f32(void) { +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector, float, 16, 4); + DECL_VARIABLE(vector, float, 16, 8); +#endif DECL_VARIABLE(vector, float, 32, 2); DECL_VARIABLE(vector, float, 32, 4); + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector_res, float, 16, 4); + DECL_VARIABLE(vector_res, float, 16, 8); +#endif DECL_VARIABLE(vector_res, float, 32, 2); DECL_VARIABLE(vector_res, float, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, 2.3f); + VDUP(vector, q, float, f, 16, 8, 3.4f); +#endif VDUP(vector, , float, f, 32, 2, 2.3f); VDUP(vector, q, float, f, 32, 4, 3.4f); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_UNARY_OP(INSN_NAME, , float, f, 16, 4); + TEST_UNARY_OP(INSN_NAME, q, float, f, 16, 8); +#endif TEST_UNARY_OP(INSN_NAME, , float, f, 32, 2); TEST_UNARY_OP(INSN_NAME, q, float, f, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_float16, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_float16, ""); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_float32, ""); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, ""); } diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vnegh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vnegh_f16_1.c new file mode 100644 index 0000000..421d827 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vnegh_f16_1.c @@ -0,0 +1,39 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +uint16_t expected[] = +{ + 0x8000 /* -0.000000 */, + 0x0000 /* 0.000000 */, + 0xc000 /* -2.000000 */, + 0xc233 /* -3.099609 */, + 0xcd00 /* -20.000000 */, + 0xb666 /* -0.399902 */, + 0x409a /* 2.300781 */, + 0xbd52 /* -1.330078 */, + 0x479a /* 7.601562 */, + 0xb4f6 /* -0.310059 */, + 0xb55d /* -0.335205 */, + 0xb800 /* -0.500000 */, + 0xbc00 /* -1.000000 */, + 0xca91 /* -13.132812 */, + 0x464d /* 6.300781 */, + 0xcd00 /* -20.000000 */, + 0xfc00 /* -inf */, + 0x7c00 /* inf */ +}; + +#define TEST_MSG "VNEGH_F16" +#define INSN_NAME vnegh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpXXX.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpXXX.inc index c1b7235..a9b0c62 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpXXX.inc +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpXXX.inc @@ -21,6 +21,9 @@ void FNNAME (INSN_NAME) (void) DECL_VARIABLE(vector, uint, 8, 8); DECL_VARIABLE(vector, uint, 16, 4); DECL_VARIABLE(vector, uint, 32, 2); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector, float, 16, 4); +#endif DECL_VARIABLE(vector, float, 32, 2); DECL_VARIABLE(vector_res, int, 8, 8); @@ -29,6 +32,9 @@ void FNNAME (INSN_NAME) (void) DECL_VARIABLE(vector_res, uint, 8, 8); DECL_VARIABLE(vector_res, uint, 16, 4); DECL_VARIABLE(vector_res, uint, 32, 2); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector_res, float, 16, 4); +#endif DECL_VARIABLE(vector_res, float, 32, 2); clean_results (); @@ -40,6 +46,9 @@ void FNNAME (INSN_NAME) (void) VLOAD(vector, buffer, , uint, u, 8, 8); VLOAD(vector, buffer, , uint, u, 16, 4); VLOAD(vector, buffer, , uint, u, 32, 2); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VLOAD(vector, buffer, , float, f, 16, 4); +#endif VLOAD(vector, buffer, , float, f, 32, 2); /* Apply a binary operator named INSN_NAME. */ @@ -49,6 +58,9 @@ void FNNAME (INSN_NAME) (void) TEST_VPXXX(INSN_NAME, uint, u, 8, 8); TEST_VPXXX(INSN_NAME, uint, u, 16, 4); TEST_VPXXX(INSN_NAME, uint, u, 32, 2); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VPXXX(INSN_NAME, float, f, 16, 4); +#endif TEST_VPXXX(INSN_NAME, float, f, 32, 2); CHECK(TEST_MSG, int, 8, 8, PRIx32, expected, ""); @@ -57,6 +69,9 @@ void FNNAME (INSN_NAME) (void) CHECK(TEST_MSG, uint, 8, 8, PRIx32, expected, ""); CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, ""); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); } diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpadd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpadd.c index 5ddfd3d..f1bbe09 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpadd.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpadd.c @@ -14,6 +14,9 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe1, 0xe5, 0xe9, 0xed, 0xe1, 0xe5, 0xe9, 0xed }; VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffe1, 0xffe5, 0xffe1, 0xffe5 }; VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe1, 0xffffffe1 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xcfc0, 0xcec0, 0xcfc0, 0xcec0 }; +#endif VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1f80000, 0xc1f80000 }; #include "vpXXX.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpmax.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpmax.c index f27a9a9..c962114 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpmax.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpmax.c @@ -15,6 +15,9 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7, 0xf1, 0xf3, 0xf5, 0xf7 }; VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff3, 0xfff1, 0xfff3 }; VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xcb80, 0xca80, 0xcb80, 0xca80 }; +#endif VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; #include "vpXXX.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpmin.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpmin.c index a7cb696..7c75cf5 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpmin.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpmin.c @@ -15,6 +15,9 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf2, 0xf4, 0xf6, 0xf0, 0xf2, 0xf4, 0xf6 }; VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff2, 0xfff0, 0xfff2 }; VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xcc00, 0xcb00, 0xcc00, 0xcb00 }; +#endif VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 }; #include "vpXXX.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpminmaxnm_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpminmaxnm_f16_1.c new file mode 100644 index 0000000..c8df677 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpminmaxnm_f16_1.c @@ -0,0 +1,114 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define FP16_C(a) ((__fp16) a) +#define A FP16_C (123.4) +#define B FP16_C (__builtin_nanf ("")) /* NaN */ +#define C FP16_C (-34.8) +#define D FP16_C (1024) +#define E FP16_C (663.1) +#define F FP16_C (169.1) +#define G FP16_C (-4.8) +#define H FP16_C (-__builtin_nanf ("")) /* NaN */ + +#define I FP16_C (0.7) +#define J FP16_C (-78) +#define K FP16_C (101.23) +#define L FP16_C (-1098) +#define M FP16_C (870.1) +#define N FP16_C (-8781) +#define O FP16_C (__builtin_inff ()) /* +Inf */ +#define P FP16_C (-__builtin_inff ()) /* -Inf */ + + +/* Expected results for vpminnm. */ +VECT_VAR_DECL (expected_min_static, hfloat, 16, 4) [] + = { 0x57B6 /* A. */, 0xD05A /* C. */, 0x5949 /* F. */, 0xC4CD /* G. */ }; + +VECT_VAR_DECL (expected_min_static, hfloat, 16, 8) [] + = { 0x57B6 /* A. */, 0xD05A /* C. */, 0xD4E0 /* J. */, 0xE44A /* L. */, + 0x5949 /* F. */, 0xC4CD /* G. */, 0xF04A /* N. */, 0xFC00 /* P. */ }; + +/* expected_max results for vpmaxnm. */ +VECT_VAR_DECL (expected_max_static, hfloat, 16, 4) [] + = { 0x57B6 /* A. */, 0x6400 /* D. */, 0x612E /* E. */, 0xC4CD /* G. */ }; + +VECT_VAR_DECL (expected_max_static, hfloat, 16, 8) [] + = { 0x57B6 /* A. */, 0x6400 /* D. */, 0x399A /* I. */, 0x5654 /* K. */, + 0x612E /* E. */, 0xC4CD /* G. */, 0x62CC /* M. */, 0x7C00 /* O. */ }; + +void exec_vpminmaxnm_f16 (void) +{ +#undef TEST_MSG +#define TEST_MSG "VPMINNM (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc_1, float, 16, 4); + DECL_VARIABLE(vsrc_2, float, 16, 4); + VECT_VAR_DECL (buf_src_1, float, 16, 4) [] = {A, B, C, D}; + VECT_VAR_DECL (buf_src_2, float, 16, 4) [] = {E, F, G, H}; + VLOAD (vsrc_1, buf_src_1, , float, f, 16, 4); + VLOAD (vsrc_2, buf_src_2, , float, f, 16, 4); + DECL_VARIABLE (vector_res, float, 16, 4) + = vpminnm_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4)); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_min_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VPMINNMQ (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc_1, float, 16, 8); + DECL_VARIABLE(vsrc_2, float, 16, 8); + VECT_VAR_DECL (buf_src_1, float, 16, 8) [] = {A, B, C, D, I, J, K, L}; + VECT_VAR_DECL (buf_src_2, float, 16, 8) [] = {E, F, G, H, M, N, O, P}; + VLOAD (vsrc_1, buf_src_1, q, float, f, 16, 8); + VLOAD (vsrc_2, buf_src_2, q, float, f, 16, 8); + DECL_VARIABLE (vector_res, float, 16, 8) + = vpminnmq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8)); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_min_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VPMAXNM (FP16)" + clean_results (); + + VECT_VAR (vector_res, float, 16, 4) + = vpmaxnm_f16 (VECT_VAR (vsrc_1, float, 16, 4), + VECT_VAR (vsrc_2, float, 16, 4)); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_max_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VPMAXNMQ (FP16)" + clean_results (); + + VECT_VAR (vector_res, float, 16, 8) + = vpmaxnmq_f16 (VECT_VAR (vsrc_1, float, 16, 8), + VECT_VAR (vsrc_2, float, 16, 8)); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_max_static, ""); +} + +int +main (void) +{ + exec_vpminmaxnm_f16 (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpe.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpe.c index 55b45b7..cd6a17f 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpe.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpe.c @@ -7,6 +7,14 @@ VECT_VAR_DECL(expected_positive,uint,32,2) [] = { 0xffffffff, 0xffffffff }; VECT_VAR_DECL(expected_positive,uint,32,4) [] = { 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_positive, hfloat, 16, 4) [] = { 0x3834, 0x3834, + 0x3834, 0x3834 }; +VECT_VAR_DECL(expected_positive, hfloat, 16, 8) [] = { 0x2018, 0x2018, + 0x2018, 0x2018, + 0x2018, 0x2018, + 0x2018, 0x2018 }; +#endif VECT_VAR_DECL(expected_positive,hfloat,32,2) [] = { 0x3f068000, 0x3f068000 }; VECT_VAR_DECL(expected_positive,hfloat,32,4) [] = { 0x3c030000, 0x3c030000, 0x3c030000, 0x3c030000 }; @@ -15,24 +23,56 @@ VECT_VAR_DECL(expected_positive,hfloat,32,4) [] = { 0x3c030000, 0x3c030000, VECT_VAR_DECL(expected_negative,uint,32,2) [] = { 0x80000000, 0x80000000 }; VECT_VAR_DECL(expected_negative,uint,32,4) [] = { 0xee800000, 0xee800000, 0xee800000, 0xee800000 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_negative, hfloat, 16, 4) [] = { 0xae64, 0xae64, + 0xae64, 0xae64 }; +VECT_VAR_DECL(expected_negative, hfloat, 16, 8) [] = { 0xa018, 0xa018, + 0xa018, 0xa018, + 0xa018, 0xa018, + 0xa018, 0xa018 }; +#endif VECT_VAR_DECL(expected_negative,hfloat,32,2) [] = { 0xbdcc8000, 0xbdcc8000 }; VECT_VAR_DECL(expected_negative,hfloat,32,4) [] = { 0xbc030000, 0xbc030000, 0xbc030000, 0xbc030000 }; /* Expected results with FP special values (NaN, infinity). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_fp1, hfloat, 16, 4) [] = { 0x7e00, 0x7e00, + 0x7e00, 0x7e00 }; +VECT_VAR_DECL(expected_fp1, hfloat, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +#endif VECT_VAR_DECL(expected_fp1,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; VECT_VAR_DECL(expected_fp1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; /* Expected results with FP special values (zero, large value). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_fp2, hfloat, 16, 4) [] = { 0x7c00, 0x7c00, + 0x7c00, 0x7c00 }; +VECT_VAR_DECL(expected_fp2, hfloat, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +#endif VECT_VAR_DECL(expected_fp2,hfloat,32,2) [] = { 0x7f800000, 0x7f800000 }; VECT_VAR_DECL(expected_fp2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; /* Expected results with FP special values (-0, -infinity). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_fp3, hfloat, 16, 4) [] = { 0xfc00, 0xfc00, + 0xfc00, 0xfc00}; +VECT_VAR_DECL(expected_fp3, hfloat, 16, 8) [] = { 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000 }; +#endif VECT_VAR_DECL(expected_fp3,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; VECT_VAR_DECL(expected_fp3,hfloat,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; /* Expected results with FP special large negative value. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_fp4, hfloat, 16, 4) [] = { 0x8000, 0x8000, + 0x8000, 0x8000 }; +#endif VECT_VAR_DECL(expected_fp4,hfloat,32,2) [] = { 0x80000000, 0x80000000 }; #define TEST_MSG "VRECPE/VRECPEQ" @@ -50,11 +90,19 @@ void exec_vrecpe(void) /* No need for 64 bits variants. */ DECL_VARIABLE(vector, uint, 32, 2); DECL_VARIABLE(vector, uint, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector, float, 16, 4); + DECL_VARIABLE(vector, float, 16, 8); +#endif DECL_VARIABLE(vector, float, 32, 2); DECL_VARIABLE(vector, float, 32, 4); DECL_VARIABLE(vector_res, uint, 32, 2); DECL_VARIABLE(vector_res, uint, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector_res, float, 16, 4); + DECL_VARIABLE(vector_res, float, 16, 8); +#endif DECL_VARIABLE(vector_res, float, 32, 2); DECL_VARIABLE(vector_res, float, 32, 4); @@ -62,88 +110,165 @@ void exec_vrecpe(void) /* Choose init value arbitrarily, positive. */ VDUP(vector, , uint, u, 32, 2, 0x12345678); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, 1.9f); +#endif VDUP(vector, , float, f, 32, 2, 1.9f); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, q, float, f, 16, 8, 125.0f); +#endif VDUP(vector, q, uint, u, 32, 4, 0xABCDEF10); VDUP(vector, q, float, f, 32, 4, 125.0f); /* Apply the operator. */ TEST_VRECPE(, uint, u, 32, 2); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRECPE(, float, f, 16, 4); +#endif TEST_VRECPE(, float, f, 32, 2); TEST_VRECPE(q, uint, u, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRECPE(q, float, f, 16, 8); +#endif TEST_VRECPE(q, float, f, 32, 4); #define CMT " (positive input)" CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_positive, CMT); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_positive, CMT); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_positive, CMT); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_positive, CMT); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_positive, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_positive, CMT); /* Choose init value arbitrarily,negative. */ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, -10.0f); +#endif VDUP(vector, , float, f, 32, 2, -10.0f); VDUP(vector, q, uint, u, 32, 4, 0x89081234); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, q, float, f, 16, 8, -125.0f); +#endif VDUP(vector, q, float, f, 32, 4, -125.0f); /* Apply the operator. */ TEST_VRECPE(, uint, u, 32, 2); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRECPE(, float, f, 16, 4); +#endif TEST_VRECPE(, float, f, 32, 2); TEST_VRECPE(q, uint, u, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRECPE(q, float, f, 16, 8); +#endif TEST_VRECPE(q, float, f, 32, 4); #undef CMT #define CMT " (negative input)" CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_negative, CMT); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_negative, CMT); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_negative, CMT); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_negative, CMT); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_negative, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_negative, CMT); /* Test FP variants with special input values (NaN, infinity). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, NAN); + VDUP(vector, q, float, f, 16, 8, HUGE_VALF); +#endif VDUP(vector, , float, f, 32, 2, NAN); VDUP(vector, q, float, f, 32, 4, HUGE_VALF); /* Apply the operator. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRECPE(, float, f, 16, 4); + TEST_VRECPE(q, float, f, 16, 8); +#endif TEST_VRECPE(, float, f, 32, 2); TEST_VRECPE(q, float, f, 32, 4); #undef CMT #define CMT " FP special (NaN, infinity)" +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp1, CMT); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp1, CMT); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT); /* Test FP variants with special input values (zero, large value). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, 0.0f); + VDUP(vector, q, float, f, 16, 8, 8.97229e37f /*9.0e37f*/); +#endif VDUP(vector, , float, f, 32, 2, 0.0f); VDUP(vector, q, float, f, 32, 4, 8.97229e37f /*9.0e37f*/); /* Apply the operator. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRECPE(, float, f, 16, 4); + TEST_VRECPE(q, float, f, 16, 8); +#endif TEST_VRECPE(, float, f, 32, 2); TEST_VRECPE(q, float, f, 32, 4); #undef CMT #define CMT " FP special (zero, large value)" +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp2, CMT); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp2, CMT); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT); /* Test FP variants with special input values (-0, -infinity). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, -0.0f); + VDUP(vector, q, float, f, 16, 8, -HUGE_VALF); +#endif VDUP(vector, , float, f, 32, 2, -0.0f); VDUP(vector, q, float, f, 32, 4, -HUGE_VALF); /* Apply the operator. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRECPE(, float, f, 16, 4); + TEST_VRECPE(q, float, f, 16, 8); +#endif TEST_VRECPE(, float, f, 32, 2); TEST_VRECPE(q, float, f, 32, 4); #undef CMT #define CMT " FP special (-0, -infinity)" +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp3, CMT); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp3, CMT); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp3, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp3, CMT); /* Test FP variants with special input values (large negative value). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, -9.0e37f); +#endif VDUP(vector, , float, f, 32, 2, -9.0e37f); /* Apply the operator. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRECPE(, float, f, 16, 4); +#endif TEST_VRECPE(, float, f, 32, 2); #undef CMT #define CMT " FP special (large negative value)" +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp4, CMT); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp4, CMT); } diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpeh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpeh_f16_1.c new file mode 100644 index 0000000..3740d6a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpeh_f16_1.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +#define A 123.4 +#define B 567.8 +#define C 34.8 +#define D 1024 +#define E 663.1 +#define F 144.0 +#define G 4.8 +#define H 77 + +#define RECP_A 0x2028 /* 1/A. */ +#define RECP_B 0x1734 /* 1/B. */ +#define RECP_C 0x275C /* 1/C. */ +#define RECP_D 0x13FC /* 1/D. */ +#define RECP_E 0x162C /* 1/E. */ +#define RECP_F 0x1F18 /* 1/F. */ +#define RECP_G 0x32A8 /* 1/G. */ +#define RECP_H 0x22A4 /* 1/H. */ + +float16_t input[] = { A, B, C, D, E, F, G, H }; +uint16_t expected[] = { RECP_A, RECP_B, RECP_C, RECP_D, + RECP_E, RECP_F, RECP_G, RECP_H }; + +#define TEST_MSG "VRECPEH_F16" +#define INSN_NAME vrecpeh_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecps.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecps.c index 0e41947..b06da22 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecps.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecps.c @@ -4,22 +4,51 @@ #include /* Expected results with positive input. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xd70c, 0xd70c, 0xd70c, 0xd70c }; +VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xcedc, 0xcedc, 0xcedc, 0xcedc, + 0xcedc, 0xcedc, 0xcedc, 0xcedc }; +#endif VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc2e19eb7, 0xc2e19eb7 }; VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1db851f, 0xc1db851f, 0xc1db851f, 0xc1db851f }; /* Expected results with FP special values (NaN). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_fp1, hfloat, 16, 4) [] = { 0x7e00, 0x7e00, + 0x7e00, 0x7e00 }; +VECT_VAR_DECL(expected_fp1, hfloat, 16, 8) [] = { 0x7e00, 0x7e00, + 0x7e00, 0x7e00, + 0x7e00, 0x7e00, + 0x7e00, 0x7e00 }; +#endif VECT_VAR_DECL(expected_fp1,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; VECT_VAR_DECL(expected_fp1,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 }; /* Expected results with FP special values (infinity, 0) and normal values. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_fp2, hfloat, 16, 4) [] = { 0xfc00, 0xfc00, + 0xfc00, 0xfc00 }; +VECT_VAR_DECL(expected_fp2, hfloat, 16, 8) [] = { 0x4000, 0x4000, + 0x4000, 0x4000, + 0x4000, 0x4000, + 0x4000, 0x4000 }; +#endif VECT_VAR_DECL(expected_fp2,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; VECT_VAR_DECL(expected_fp2,hfloat,32,4) [] = { 0x40000000, 0x40000000, 0x40000000, 0x40000000 }; /* Expected results with FP special values (infinity, 0). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_fp3, hfloat, 16, 4) [] = { 0x4000, 0x4000, + 0x4000, 0x4000 }; +VECT_VAR_DECL(expected_fp3, hfloat, 16, 8) [] = { 0x4000, 0x4000, + 0x4000, 0x4000, + 0x4000, 0x4000, + 0x4000, 0x4000 }; +#endif VECT_VAR_DECL(expected_fp3,hfloat,32,2) [] = { 0x40000000, 0x40000000 }; VECT_VAR_DECL(expected_fp3,hfloat,32,4) [] = { 0x40000000, 0x40000000, 0x40000000, 0x40000000 }; @@ -38,74 +67,143 @@ void exec_vrecps(void) VECT_VAR(vector_res, T1, W, N)) /* No need for integer variants. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector, float, 16, 4); + DECL_VARIABLE(vector, float, 16, 8); +#endif DECL_VARIABLE(vector, float, 32, 2); DECL_VARIABLE(vector, float, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector2, float, 16, 4); + DECL_VARIABLE(vector2, float, 16, 8); +#endif DECL_VARIABLE(vector2, float, 32, 2); DECL_VARIABLE(vector2, float, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector_res, float, 16, 4); + DECL_VARIABLE(vector_res, float, 16, 8); +#endif DECL_VARIABLE(vector_res, float, 32, 2); DECL_VARIABLE(vector_res, float, 32, 4); clean_results (); /* Choose init value arbitrarily. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, 12.9f); + VDUP(vector, q, float, f, 16, 8, 9.2f); +#endif VDUP(vector, , float, f, 32, 2, 12.9f); VDUP(vector, q, float, f, 32, 4, 9.2f); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector2, , float, f, 16, 4, 8.9f); + VDUP(vector2, q, float, f, 16, 8, 3.2f); +#endif VDUP(vector2, , float, f, 32, 2, 8.9f); VDUP(vector2, q, float, f, 32, 4, 3.2f); /* Apply the operator. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRECPS(, float, f, 16, 4); + TEST_VRECPS(q, float, f, 16, 8); +#endif TEST_VRECPS(, float, f, 32, 2); TEST_VRECPS(q, float, f, 32, 4); #define CMT " (positive input)" +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, CMT); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, CMT); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, CMT); /* Test FP variants with special input values (NaN). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, NAN); + VDUP(vector2, q, float, f, 16, 8, NAN); +#endif VDUP(vector, , float, f, 32, 2, NAN); VDUP(vector2, q, float, f, 32, 4, NAN); /* Apply the operator. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRECPS(, float, f, 16, 4); + TEST_VRECPS(q, float, f, 16, 8); +#endif TEST_VRECPS(, float, f, 32, 2); TEST_VRECPS(q, float, f, 32, 4); #undef CMT #define CMT " FP special (NaN)" +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp1, CMT); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp1, CMT); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT); /* Test FP variants with special input values (infinity, 0). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, HUGE_VALF); + VDUP(vector, q, float, f, 16, 8, 0.0f); + VDUP(vector2, q, float, f, 16, 8, 3.2f); /* Restore a normal value. */ +#endif VDUP(vector, , float, f, 32, 2, HUGE_VALF); VDUP(vector, q, float, f, 32, 4, 0.0f); VDUP(vector2, q, float, f, 32, 4, 3.2f); /* Restore a normal value. */ + /* Apply the operator. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRECPS(, float, f, 16, 4); + TEST_VRECPS(q, float, f, 16, 8); +#endif TEST_VRECPS(, float, f, 32, 2); TEST_VRECPS(q, float, f, 32, 4); #undef CMT #define CMT " FP special (infinity, 0) and normal value" +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp2, CMT); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp2, CMT); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT); /* Test FP variants with only special input values (infinity, 0). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, HUGE_VALF); + VDUP(vector, q, float, f, 16, 8, 0.0f); + VDUP(vector2, , float, f, 16, 4, 0.0f); + VDUP(vector2, q, float, f, 16, 8, HUGE_VALF); +#endif VDUP(vector, , float, f, 32, 2, HUGE_VALF); VDUP(vector, q, float, f, 32, 4, 0.0f); VDUP(vector2, , float, f, 32, 2, 0.0f); VDUP(vector2, q, float, f, 32, 4, HUGE_VALF); + /* Apply the operator */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRECPS(, float, f, 16, 4); + TEST_VRECPS(q, float, f, 16, 8); +#endif TEST_VRECPS(, float, f, 32, 2); TEST_VRECPS(q, float, f, 32, 4); #undef CMT #define CMT " FP special (infinity, 0)" +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp3, CMT); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp3, CMT); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp3, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp3, CMT); } diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpsh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpsh_f16_1.c new file mode 100644 index 0000000..3e6b24e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpsh_f16_1.c @@ -0,0 +1,50 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +#define A 12.4 +#define B -5.8 +#define C -3.8 +#define D 10 +#define E 66.1 +#define F 16.1 +#define G -4.8 +#define H -77 + +#define I 0.7 +#define J -78 +#define K 10.23 +#define L 98 +#define M 87 +#define N -87.81 +#define O -1.1 +#define P 47.8 + +float16_t input_1[] = { A, B, C, D, I, J, K, L }; +float16_t input_2[] = { E, F, G, H, M, N, O, P }; +uint16_t expected[] = { 0xE264 /* 2.0f - A * E. */, + 0x55F6 /* 2.0f - B * F. */, + 0xCC10 /* 2.0f - C * G. */, + 0x6208 /* 2.0f - D * H. */, + 0xD35D /* 2.0f - I * M. */, + 0xEEB0 /* 2.0f - J * N. */, + 0x4A9F /* 2.0f - K * O. */, + 0xEC93 /* 2.0f - L * P. */ }; + +#define TEST_MSG "VRECPSH_F16" +#define INSN_NAME vrecpsh_f16 + +#define INPUT_1 input_1 +#define INPUT_2 input_2 +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpxh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpxh_f16_1.c new file mode 100644 index 0000000..fc02b6b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpxh_f16_1.c @@ -0,0 +1,32 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ + +float16_t input[] = { 123.4, 567.8, 34.8, 1024, 663.1, 144.0, 4.8, 77 }; +/* Expected results are calculated by: + for (index = 0; index < 8; index++) + { + uint16_t src_cast = * (uint16_t *) &src[index]; + * (uint16_t *) &expected[index] = + (src_cast & 0x8000) | (~src_cast & 0x7C00); + } */ +uint16_t expected[8] = { 0x2800, 0x1C00, 0x2C00, 0x1800, + 0x1C00, 0x2400, 0x3800, 0x2800 }; + +#define TEST_MSG "VRECPXH_F16" +#define INSN_NAME vrecpxh_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c index 3b574da..0c01318 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c @@ -63,6 +63,10 @@ VECT_VAR_DECL(expected_vrev64,uint,32,2) [] = { 0xfffffff1, 0xfffffff0 }; VECT_VAR_DECL(expected_vrev64,poly,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0 }; VECT_VAR_DECL(expected_vrev64,poly,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected_vrev64, hfloat, 16, 4) [] = { 0xca80, 0xcb00, + 0xcb80, 0xcc00 }; +#endif VECT_VAR_DECL(expected_vrev64,hfloat,32,2) [] = { 0xc1700000, 0xc1800000 }; VECT_VAR_DECL(expected_vrev64,int,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0, @@ -86,6 +90,12 @@ VECT_VAR_DECL(expected_vrev64,poly,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xfb, 0xfa, 0xf9, 0xf8 }; VECT_VAR_DECL(expected_vrev64,poly,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0, 0xfff7, 0xfff6, 0xfff5, 0xfff4 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected_vrev64, hfloat, 16, 8) [] = { 0xca80, 0xcb00, + 0xcb80, 0xcc00, + 0xc880, 0xc900, + 0xc980, 0xca00 }; +#endif VECT_VAR_DECL(expected_vrev64,hfloat,32,4) [] = { 0xc1700000, 0xc1800000, 0xc1500000, 0xc1600000 }; @@ -104,6 +114,10 @@ void exec_vrev (void) /* Initialize input "vector" from "buffer". */ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#if defined (FP16_SUPPORTED) + VLOAD (vector, buffer, , float, f, 16, 4); + VLOAD (vector, buffer, q, float, f, 16, 8); +#endif VLOAD(vector, buffer, , float, f, 32, 2); VLOAD(vector, buffer, q, float, f, 32, 4); @@ -187,6 +201,12 @@ void exec_vrev (void) CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev64, ""); CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_vrev64, ""); +#if defined (FP16_SUPPORTED) + TEST_VREV (, float, f, 16, 4, 64); + TEST_VREV (q, float, f, 16, 8, 64); + CHECK_FP(TEST_MSG, float, 16, 4, PRIx32, expected_vrev64, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx32, expected_vrev64, ""); +#endif TEST_VREV(, float, f, 32, 2, 64); TEST_VREV(q, float, f, 32, 4, 64); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_vrev64, ""); diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnd.c index d97a3a2..fe6715f 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnd.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnd.c @@ -6,6 +6,14 @@ #include "compute-ref-data.h" /* Expected results. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, + 0xcb00, 0xca80 }; +VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, + 0xcb00, 0xca80, + 0xca00, 0xc980, + 0xc900, 0xc880 }; +#endif VECT_VAR_DECL (expected, hfloat, 32, 2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL (expected, hfloat, 32, 4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndX.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndX.inc index 629240d..bb4a6ba 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndX.inc +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndX.inc @@ -17,20 +17,40 @@ void FNNAME (INSN) (void) #define TEST_VRND(Q, T1, T2, W, N) \ TEST_VRND1 (INSN, Q, T1, T2, W, N) +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector, float, 16, 4); + DECL_VARIABLE(vector, float, 16, 8); +#endif DECL_VARIABLE (vector, float, 32, 2); DECL_VARIABLE (vector, float, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector_res, float, 16, 4); + DECL_VARIABLE(vector_res, float, 16, 8); +#endif DECL_VARIABLE (vector_res, float, 32, 2); DECL_VARIABLE (vector_res, float, 32, 4); clean_results (); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VLOAD (vector, buffer, , float, f, 16, 4); + VLOAD (vector, buffer, q, float, f, 16, 8); +#endif VLOAD (vector, buffer, , float, f, 32, 2); VLOAD (vector, buffer, q, float, f, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRND ( , float, f, 16, 4); + TEST_VRND (q, float, f, 16, 8); +#endif TEST_VRND ( , float, f, 32, 2); TEST_VRND (q, float, f, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected, ""); + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected, ""); +#endif CHECK_FP (TEST_MSG, float, 32, 2, PRIx32, expected, ""); CHECK_FP (TEST_MSG, float, 32, 4, PRIx32, expected, ""); } diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnda.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnda.c index ff2bdc0..9c0f7ff 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnda.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnda.c @@ -6,6 +6,15 @@ #include "compute-ref-data.h" /* Expected results. */ +/* Expected results. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, + 0xcb00, 0xca80 }; +VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, + 0xcb00, 0xca80, + 0xca00, 0xc980, + 0xc900, 0xc880 }; +#endif VECT_VAR_DECL (expected, hfloat, 32, 2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL (expected, hfloat, 32, 4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndah_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndah_f16_1.c new file mode 100644 index 0000000..bcf47f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndah_f16_1.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected[] = +{ + 0x0000 /* 0.000000 */, + 0x8000 /* -0.000000 */, + 0x4000 /* 2.000000 */, + 0x4200 /* 3.000000 */, + 0x4d00 /* 20.000000 */, + 0x0000 /* 0.000000 */, + 0xc000 /* -2.000000 */, + 0x3c00 /* 1.000000 */, + 0xc800 /* -8.000000 */, + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x3c00 /* 1.000000 */, + 0x3c00 /* 1.000000 */, + 0x4a80 /* 13.000000 */, + 0xc600 /* -6.000000 */, + 0x4d00 /* 20.000000 */, + 0x7c00 /* inf */, + 0xfc00 /* -inf */ +}; + +#define TEST_MSG "VRNDAH_F16" +#define INSN_NAME vrndah_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndh_f16_1.c new file mode 100644 index 0000000..3c4649e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndh_f16_1.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected[] = +{ + 0x0000 /* 0.000000 */, + 0x8000 /* -0.000000 */, + 0x4000 /* 2.000000 */, + 0x4200 /* 3.000000 */, + 0x4d00 /* 20.000000 */, + 0x0000 /* 0.000000 */, + 0xc000 /* -2.000000 */, + 0x3c00 /* 1.000000 */, + 0xc700 /* -7.000000 */, + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x3c00 /* 1.000000 */, + 0x4a80 /* 13.000000 */, + 0xc600 /* -6.000000 */, + 0x4d00 /* 20.000000 */, + 0x7c00 /* inf */, + 0xfc00 /* -inf */ +}; + +#define TEST_MSG "VRNDH_F16" +#define INSN_NAME vrndh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndi_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndi_f16_1.c new file mode 100644 index 0000000..7a4620b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndi_f16_1.c @@ -0,0 +1,71 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define FP16_C(a) ((__fp16) a) +#define A FP16_C (123.4) +#define RNDI_A 0x57B0 /* FP16_C (123). */ +#define B FP16_C (-567.5) +#define RNDI_B 0xE070 /* FP16_C (-568). */ +#define C FP16_C (-34.8) +#define RNDI_C 0xD060 /* FP16_C (-35). */ +#define D FP16_C (1024) +#define RNDI_D 0x6400 /* FP16_C (1024). */ +#define E FP16_C (663.1) +#define RNDI_E 0x612E /* FP16_C (663). */ +#define F FP16_C (169.1) +#define RNDI_F 0x5948 /* FP16_C (169). */ +#define G FP16_C (-4.8) +#define RNDI_G 0xC500 /* FP16_C (-5). */ +#define H FP16_C (77.5) +#define RNDI_H 0x54E0 /* FP16_C (78). */ + +/* Expected results for vrndi. */ +VECT_VAR_DECL (expected_static, hfloat, 16, 4) [] + = { RNDI_A, RNDI_B, RNDI_C, RNDI_D }; + +VECT_VAR_DECL (expected_static, hfloat, 16, 8) [] + = { RNDI_A, RNDI_B, RNDI_C, RNDI_D, RNDI_E, RNDI_F, RNDI_G, RNDI_H }; + +void exec_vrndi_f16 (void) +{ +#undef TEST_MSG +#define TEST_MSG "VRNDI (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc, float, 16, 4); + VECT_VAR_DECL (buf_src, float, 16, 4) [] = {A, B, C, D}; + VLOAD (vsrc, buf_src, , float, f, 16, 4); + DECL_VARIABLE (vector_res, float, 16, 4) + = vrndi_f16 (VECT_VAR (vsrc, float, 16, 4)); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VRNDIQ (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc, float, 16, 8); + VECT_VAR_DECL (buf_src, float, 16, 8) [] = {A, B, C, D, E, F, G, H}; + VLOAD (vsrc, buf_src, q, float, f, 16, 8); + DECL_VARIABLE (vector_res, float, 16, 8) + = vrndiq_f16 (VECT_VAR (vsrc, float, 16, 8)); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_static, ""); +} + +int +main (void) +{ + exec_vrndi_f16 (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndih_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndih_f16_1.c new file mode 100644 index 0000000..4a7b721 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndih_f16_1.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected[] = +{ + 0x0000 /* 0.000000 */, + 0x8000 /* -0.000000 */, + 0x4000 /* 2.000000 */, + 0x4200 /* 3.000000 */, + 0x4d00 /* 20.000000 */, + 0x0000 /* 0.000000 */, + 0xc000 /* -2.000000 */, + 0x3c00 /* 1.000000 */, + 0xc800 /* -8.000000 */, + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x3c00 /* 1.000000 */, + 0x4a80 /* 13.000000 */, + 0xc600 /* -6.000000 */, + 0x4d00 /* 20.000000 */, + 0x7c00 /* inf */, + 0xfc00 /* -inf */ +}; + +#define TEST_MSG "VRNDIH_F16" +#define INSN_NAME vrndih_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndm.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndm.c index eae9f61..9bfaffc 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndm.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndm.c @@ -6,6 +6,15 @@ #include "compute-ref-data.h" /* Expected results. */ +/* Expected results. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, + 0xcb00, 0xca80 }; +VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, + 0xcb00, 0xca80, + 0xca00, 0xc980, + 0xc900, 0xc880 }; +#endif VECT_VAR_DECL (expected, hfloat, 32, 2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL (expected, hfloat, 32, 4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndmh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndmh_f16_1.c new file mode 100644 index 0000000..9af357d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndmh_f16_1.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected[] = +{ + 0x0000 /* 0.000000 */, + 0x8000 /* -0.000000 */, + 0x4000 /* 2.000000 */, + 0x4200 /* 3.000000 */, + 0x4d00 /* 20.000000 */, + 0x0000 /* 0.000000 */, + 0xc200 /* -3.000000 */, + 0x3c00 /* 1.000000 */, + 0xc800 /* -8.000000 */, + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x3c00 /* 1.000000 */, + 0x4a80 /* 13.000000 */, + 0xc700 /* -7.000000 */, + 0x4d00 /* 20.000000 */, + 0x7c00 /* inf */, + 0xfc00 /* -inf */ +}; + +#define TEST_MSG "VRNDMH_F16" +#define INSN_NAME vrndmh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndn.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndn.c index c6c707d..52b9942 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndn.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndn.c @@ -6,6 +6,15 @@ #include "compute-ref-data.h" /* Expected results. */ +/* Expected results. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, + 0xcb00, 0xca80 }; +VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, + 0xcb00, 0xca80, + 0xca00, 0xc980, + 0xc900, 0xc880 }; +#endif VECT_VAR_DECL (expected, hfloat, 32, 2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL (expected, hfloat, 32, 4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndnh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndnh_f16_1.c new file mode 100644 index 0000000..eb4b27d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndnh_f16_1.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected[] = +{ + 0x0000 /* 0.000000 */, + 0x8000 /* -0.000000 */, + 0x4000 /* 2.000000 */, + 0x4200 /* 3.000000 */, + 0x4d00 /* 20.000000 */, + 0x0000 /* 0.000000 */, + 0xc000 /* -2.000000 */, + 0x3c00 /* 1.000000 */, + 0xc800 /* -8.000000 */, + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x3c00 /* 1.000000 */, + 0x4a80 /* 13.000000 */, + 0xc600 /* -6.000000 */, + 0x4d00 /* 20.000000 */, + 0x7c00 /* inf */, + 0xfc00 /* -inf */ +}; + +#define TEST_MSG "VRNDNH_F16" +#define INSN_NAME vrndnh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndp.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndp.c index e94eb6b..2e888b9 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndp.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndp.c @@ -6,6 +6,14 @@ #include "compute-ref-data.h" /* Expected results. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, + 0xcb00, 0xca80 }; +VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, + 0xcb00, 0xca80, + 0xca00, 0xc980, + 0xc900, 0xc880 }; +#endif VECT_VAR_DECL (expected, hfloat, 32, 2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL (expected, hfloat, 32, 4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndph_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndph_f16_1.c new file mode 100644 index 0000000..3fa9749 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndph_f16_1.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected[] = +{ + 0x0000 /* 0.000000 */, + 0x8000 /* -0.000000 */, + 0x4000 /* 2.000000 */, + 0x4400 /* 4.000000 */, + 0x4d00 /* 20.000000 */, + 0x3c00 /* 1.000000 */, + 0xc000 /* -2.000000 */, + 0x4000 /* 2.000000 */, + 0xc700 /* -7.000000 */, + 0x3c00 /* 1.000000 */, + 0x3c00 /* 1.000000 */, + 0x3c00 /* 1.000000 */, + 0x3c00 /* 1.000000 */, + 0x4b00 /* 14.000000 */, + 0xc600 /* -6.000000 */, + 0x4d00 /* 20.000000 */, + 0x7c00 /* inf */, + 0xfc00 /* -inf */ +}; + +#define TEST_MSG "VRNDPH_F16" +#define INSN_NAME vrndph_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndx.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndx.c index 0d2a63e..400ddf8 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndx.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndx.c @@ -6,6 +6,14 @@ #include "compute-ref-data.h" /* Expected results. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, + 0xcb00, 0xca80 }; +VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, + 0xcb00, 0xca80, + 0xca00, 0xc980, + 0xc900, 0xc880 }; +#endif VECT_VAR_DECL (expected, hfloat, 32, 2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL (expected, hfloat, 32, 4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndxh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndxh_f16_1.c new file mode 100644 index 0000000..eb4b27d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndxh_f16_1.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected[] = +{ + 0x0000 /* 0.000000 */, + 0x8000 /* -0.000000 */, + 0x4000 /* 2.000000 */, + 0x4200 /* 3.000000 */, + 0x4d00 /* 20.000000 */, + 0x0000 /* 0.000000 */, + 0xc000 /* -2.000000 */, + 0x3c00 /* 1.000000 */, + 0xc800 /* -8.000000 */, + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x0000 /* 0.000000 */, + 0x3c00 /* 1.000000 */, + 0x4a80 /* 13.000000 */, + 0xc600 /* -6.000000 */, + 0x4d00 /* 20.000000 */, + 0x7c00 /* inf */, + 0xfc00 /* -inf */ +}; + +#define TEST_MSG "VRNDNH_F16" +#define INSN_NAME vrndnh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrte.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrte.c index 0291ec0..77e2210 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrte.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrte.c @@ -7,6 +7,11 @@ VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; VECT_VAR_DECL(expected,uint,32,4) [] = { 0x9c800000, 0x9c800000, 0x9c800000, 0x9c800000 }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0x324c, 0x324c, 0x324c, 0x324c }; +VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0x3380, 0x3380, 0x3380, 0x3380, + 0x3380, 0x3380, 0x3380, 0x3380 }; +#endif VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x3e498000, 0x3e498000 }; VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x3e700000, 0x3e700000, 0x3e700000, 0x3e700000 }; @@ -22,17 +27,39 @@ VECT_VAR_DECL(expected_2,uint,32,4) [] = { 0xed000000, 0xed000000, 0xed000000, 0xed000000 }; /* Expected results with FP special inputs values (NaNs, ...). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_fp1, hfloat, 16, 4) [] = { 0x7e00, 0x7e00, + 0x7e00, 0x7e00 }; +VECT_VAR_DECL(expected_fp1, hfloat, 16, 8) [] = { 0x7c00, 0x7c00, + 0x7c00, 0x7c00, + 0x7c00, 0x7c00, + 0x7c00, 0x7c00 }; +#endif VECT_VAR_DECL(expected_fp1,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; VECT_VAR_DECL(expected_fp1,hfloat,32,4) [] = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; /* Expected results with FP special inputs values (negative, infinity). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_fp2, hfloat, 16, 4) [] = { 0x7e00, 0x7e00, + 0x7e00, 0x7e00 }; +VECT_VAR_DECL(expected_fp2, hfloat, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0 }; +#endif VECT_VAR_DECL(expected_fp2,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; VECT_VAR_DECL(expected_fp2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; /* Expected results with FP special inputs values (-0, -infinity). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_fp3, hfloat, 16, 4) [] = { 0xfc00, 0xfc00, + 0xfc00, 0xfc00 }; +VECT_VAR_DECL(expected_fp3, hfloat, 16, 8) [] = { 0x7e00, 0x7e00, + 0x7e00, 0x7e00, + 0x7e00, 0x7e00, + 0x7e00, 0x7e00 }; +#endif VECT_VAR_DECL(expected_fp3,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; VECT_VAR_DECL(expected_fp3,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 }; @@ -50,32 +77,60 @@ void exec_vrsqrte(void) VECT_VAR(vector_res, T1, W, N)) DECL_VARIABLE(vector, uint, 32, 2); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector, float, 16, 4); +#endif DECL_VARIABLE(vector, float, 32, 2); DECL_VARIABLE(vector, uint, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector, float, 16, 8); +#endif DECL_VARIABLE(vector, float, 32, 4); DECL_VARIABLE(vector_res, uint, 32, 2); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector_res, float, 16, 4); +#endif DECL_VARIABLE(vector_res, float, 32, 2); DECL_VARIABLE(vector_res, uint, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector_res, float, 16, 8); +#endif DECL_VARIABLE(vector_res, float, 32, 4); clean_results (); /* Choose init value arbitrarily. */ VDUP(vector, , uint, u, 32, 2, 0x12345678); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, 25.799999f); +#endif VDUP(vector, , float, f, 32, 2, 25.799999f); VDUP(vector, q, uint, u, 32, 4, 0xABCDEF10); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, q, float, f, 16, 8, 18.2f); +#endif VDUP(vector, q, float, f, 32, 4, 18.2f); /* Apply the operator. */ TEST_VRSQRTE(, uint, u, 32, 2); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRSQRTE(, float, f, 16, 4); +#endif TEST_VRSQRTE(, float, f, 32, 2); TEST_VRSQRTE(q, uint, u, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRSQRTE(q, float, f, 16, 8); +#endif TEST_VRSQRTE(q, float, f, 32, 4); #define CMT "" CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, CMT); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, CMT); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, CMT); @@ -110,42 +165,78 @@ void exec_vrsqrte(void) /* Test FP variants with special input values (NaNs, ...). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, NAN); + VDUP(vector, q, float, f, 16, 8, 0.0f); +#endif VDUP(vector, , float, f, 32, 2, NAN); VDUP(vector, q, float, f, 32, 4, 0.0f); /* Apply the operator. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRSQRTE(, float, f, 16, 4); + TEST_VRSQRTE(q, float, f, 16, 8); +#endif TEST_VRSQRTE(, float, f, 32, 2); TEST_VRSQRTE(q, float, f, 32, 4); #undef CMT #define CMT " FP special (NaN, 0)" +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp1, CMT); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp1, CMT); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT); /* Test FP variants with special input values (negative, infinity). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, -1.0f); + VDUP(vector, q, float, f, 16, 8, HUGE_VALF); +#endif VDUP(vector, , float, f, 32, 2, -1.0f); VDUP(vector, q, float, f, 32, 4, HUGE_VALF); /* Apply the operator. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRSQRTE(, float, f, 16, 4); + TEST_VRSQRTE(q, float, f, 16, 8); +#endif TEST_VRSQRTE(, float, f, 32, 2); TEST_VRSQRTE(q, float, f, 32, 4); #undef CMT #define CMT " FP special (negative, infinity)" +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp2, CMT); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp2, CMT); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT); /* Test FP variants with special input values (-0, -infinity). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, -0.0f); + VDUP(vector, q, float, f, 16, 8, -HUGE_VALF); +#endif VDUP(vector, , float, f, 32, 2, -0.0f); VDUP(vector, q, float, f, 32, 4, -HUGE_VALF); /* Apply the operator. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRSQRTE(, float, f, 16, 4); + TEST_VRSQRTE(q, float, f, 16, 8); +#endif TEST_VRSQRTE(, float, f, 32, 2); TEST_VRSQRTE(q, float, f, 32, 4); #undef CMT #define CMT " FP special (-0, -infinity)" +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp3, CMT); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp3, CMT); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp3, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp3, CMT); } diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrteh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrteh_f16_1.c new file mode 100644 index 0000000..7c0e619 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrteh_f16_1.c @@ -0,0 +1,30 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +float16_t input[] = { 123.4, 67.8, 34.8, 24.0, 66.1, 144.0, 4.8, 77.0 }; +uint16_t expected[] = { 0x2DC4 /* FP16_C (1/__builtin_sqrtf (123.4)). */, + 0x2FC8 /* FP16_C (1/__builtin_sqrtf (67.8)). */, + 0x316C /* FP16_C (1/__builtin_sqrtf (34.8)). */, + 0x3288 /* FP16_C (1/__builtin_sqrtf (24.0)). */, + 0x2FDC /* FP16_C (1/__builtin_sqrtf (66.1)). */, + 0x2D54 /* FP16_C (1/__builtin_sqrtf (144.0)). */, + 0x3750 /* FP16_C (1/__builtin_sqrtf (4.8)). */, + 0x2F48 /* FP16_C (1/__builtin_sqrtf (77.0)). */ }; + +#define TEST_MSG "VRSQRTEH_F16" +#define INSN_NAME vrsqrteh_f16 + +#define INPUT input +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrts.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrts.c index 4531026..06626e4 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrts.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrts.c @@ -4,22 +4,51 @@ #include /* Expected results. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xd3cb, 0xd3cb, 0xd3cb, 0xd3cb }; +VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xc726, 0xc726, 0xc726, 0xc726, + 0xc726, 0xc726, 0xc726, 0xc726 }; +#endif VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc2796b84, 0xc2796b84 }; VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc0e4a3d8, 0xc0e4a3d8, 0xc0e4a3d8, 0xc0e4a3d8 }; /* Expected results with input=NaN. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_nan, hfloat, 16, 4) [] = { 0x7e00, 0x7e00, + 0x7e00, 0x7e00 }; +VECT_VAR_DECL(expected_nan, hfloat, 16, 8) [] = { 0x7e00, 0x7e00, + 0x7e00, 0x7e00, + 0x7e00, 0x7e00, + 0x7e00, 0x7e00 }; +#endif VECT_VAR_DECL(expected_nan,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 }; /* Expected results with FP special inputs values (infinity, 0). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_fp1, hfloat, 16, 4) [] = { 0xfc00, 0xfc00, + 0xfc00, 0xfc00 }; +VECT_VAR_DECL(expected_fp1, hfloat, 16, 8) [] = { 0x3e00, 0x3e00, + 0x3e00, 0x3e00, + 0x3e00, 0x3e00, + 0x3e00, 0x3e00 }; +#endif VECT_VAR_DECL(expected_fp1,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; VECT_VAR_DECL(expected_fp1,hfloat,32,4) [] = { 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000 }; /* Expected results with only FP special inputs values (infinity, 0). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_fp2, hfloat, 16, 4) [] = { 0x3e00, 0x3e00, + 0x3e00, 0x3e00 }; +VECT_VAR_DECL(expected_fp2, hfloat, 16, 8) [] = { 0x3e00, 0x3e00, + 0x3e00, 0x3e00, + 0x3e00, 0x3e00, + 0x3e00, 0x3e00 }; +#endif VECT_VAR_DECL(expected_fp2,hfloat,32,2) [] = { 0x3fc00000, 0x3fc00000 }; VECT_VAR_DECL(expected_fp2,hfloat,32,4) [] = { 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000 }; @@ -38,75 +67,143 @@ void exec_vrsqrts(void) VECT_VAR(vector_res, T1, W, N)) /* No need for integer variants. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector, float, 16, 4); + DECL_VARIABLE(vector, float, 16, 8); +#endif DECL_VARIABLE(vector, float, 32, 2); DECL_VARIABLE(vector, float, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector2, float, 16, 4); + DECL_VARIABLE(vector2, float, 16, 8); +#endif DECL_VARIABLE(vector2, float, 32, 2); DECL_VARIABLE(vector2, float, 32, 4); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector_res, float, 16, 4); + DECL_VARIABLE(vector_res, float, 16, 8); +#endif DECL_VARIABLE(vector_res, float, 32, 2); DECL_VARIABLE(vector_res, float, 32, 4); clean_results (); /* Choose init value arbitrarily. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, 12.9f); + VDUP(vector, q, float, f, 16, 8, 9.1f); +#endif VDUP(vector, , float, f, 32, 2, 12.9f); VDUP(vector, q, float, f, 32, 4, 9.1f); +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector2, , float, f, 16, 4, 9.9f); + VDUP(vector2, q, float, f, 16, 8, 1.9f); +#endif VDUP(vector2, , float, f, 32, 2, 9.9f); VDUP(vector2, q, float, f, 32, 4, 1.9f); /* Apply the operator. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRSQRTS(, float, f, 16, 4); + TEST_VRSQRTS(q, float, f, 16, 8); +#endif TEST_VRSQRTS(, float, f, 32, 2); TEST_VRSQRTS(q, float, f, 32, 4); #define CMT "" +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, CMT); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, CMT); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, CMT); /* Test FP variants with special input values (NaN). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, NAN); + VDUP(vector2, q, float, f, 16, 8, NAN); +#endif VDUP(vector, , float, f, 32, 2, NAN); VDUP(vector2, q, float, f, 32, 4, NAN); /* Apply the operator. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRSQRTS(, float, f, 16, 4); + TEST_VRSQRTS(q, float, f, 16, 8); +#endif TEST_VRSQRTS(, float, f, 32, 2); TEST_VRSQRTS(q, float, f, 32, 4); #undef CMT #define CMT " FP special (NAN) and normal values" +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_nan, CMT); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_nan, CMT); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_nan, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_nan, CMT); /* Test FP variants with special input values (infinity, 0). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, HUGE_VALF); + VDUP(vector, q, float, f, 16, 8, 0.0f); + /* Restore a normal value in vector2. */ + VDUP(vector2, q, float, f, 16, 8, 3.2f); +#endif VDUP(vector, , float, f, 32, 2, HUGE_VALF); VDUP(vector, q, float, f, 32, 4, 0.0f); /* Restore a normal value in vector2. */ VDUP(vector2, q, float, f, 32, 4, 3.2f); /* Apply the operator. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRSQRTS(, float, f, 16, 4); + TEST_VRSQRTS(q, float, f, 16, 8); +#endif TEST_VRSQRTS(, float, f, 32, 2); TEST_VRSQRTS(q, float, f, 32, 4); #undef CMT #define CMT " FP special (infinity, 0) and normal values" +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp1, CMT); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp1, CMT); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT); /* Test FP variants with only special input values (infinity, 0). */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + VDUP(vector, , float, f, 16, 4, HUGE_VALF); + VDUP(vector, q, float, f, 16, 8, 0.0f); + VDUP(vector2, , float, f, 16, 4, -0.0f); + VDUP(vector2, q, float, f, 16, 8, HUGE_VALF); +#endif VDUP(vector, , float, f, 32, 2, HUGE_VALF); VDUP(vector, q, float, f, 32, 4, 0.0f); VDUP(vector2, , float, f, 32, 2, -0.0f); VDUP(vector2, q, float, f, 32, 4, HUGE_VALF); /* Apply the operator. */ +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + TEST_VRSQRTS(, float, f, 16, 4); + TEST_VRSQRTS(q, float, f, 16, 8); +#endif TEST_VRSQRTS(, float, f, 32, 2); TEST_VRSQRTS(q, float, f, 32, 4); #undef CMT #define CMT " only FP special (infinity, 0)" +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp2, CMT); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp2, CMT); +#endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT); } diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrtsh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrtsh_f16_1.c new file mode 100644 index 0000000..a9753a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrtsh_f16_1.c @@ -0,0 +1,50 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include + +/* Input values. */ +#define A 12.4 +#define B -5.8 +#define C -3.8 +#define D 10 +#define E 66.1 +#define F 16.1 +#define G -4.8 +#define H -77 + +#define I 0.7 +#define J -78 +#define K 10.23 +#define L 98 +#define M 87 +#define N -87.81 +#define O -1.1 +#define P 47.8 + +float16_t input_1[] = { A, B, C, D, I, J, K, L }; +float16_t input_2[] = { E, F, G, H, M, N, O, P }; +uint16_t expected[] = { 0xDE62 /* (3.0f + (-A) * E) / 2.0f. */, + 0x5206 /* (3.0f + (-B) * F) / 2.0f. */, + 0xC7A0 /* (3.0f + (-C) * G) / 2.0f. */, + 0x5E0A /* (3.0f + (-D) * H) / 2.0f. */, + 0xCF3D /* (3.0f + (-I) * M) / 2.0f. */, + 0xEAB0 /* (3.0f + (-J) * N) / 2.0f. */, + 0x471F /* (3.0f + (-K) * O) / 2.0f. */, + 0xE893 /* (3.0f + (-L) * P) / 2.0f. */ }; + +#define TEST_MSG "VRSQRTSH_F16" +#define INSN_NAME vrsqrtsh_f16 + +#define INPUT_1 input_1 +#define INPUT_2 input_2 +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc index b55a205..ad5bf31 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc @@ -53,9 +53,17 @@ void FNNAME (INSN_NAME) (void) DECL_VSHUFFLE(float, 32, 4) DECL_ALL_VSHUFFLE(); +#if defined (FP16_SUPPORTED) + DECL_VSHUFFLE (float, 16, 4); + DECL_VSHUFFLE (float, 16, 8); +#endif /* Initialize input "vector" from "buffer". */ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer); +#if defined (FP16_SUPPORTED) + VLOAD (vector1, buffer, , float, f, 16, 4); + VLOAD (vector1, buffer, q, float, f, 16, 8); +#endif VLOAD(vector1, buffer, , float, f, 32, 2); VLOAD(vector1, buffer, q, float, f, 32, 4); @@ -68,6 +76,9 @@ void FNNAME (INSN_NAME) (void) VDUP(vector2, , uint, u, 32, 2, 0x77); VDUP(vector2, , poly, p, 8, 8, 0x55); VDUP(vector2, , poly, p, 16, 4, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ +#endif VDUP(vector2, , float, f, 32, 2, 33.6f); VDUP(vector2, q, int, s, 8, 16, 0x11); @@ -78,8 +89,11 @@ void FNNAME (INSN_NAME) (void) VDUP(vector2, q, uint, u, 32, 4, 0x77); VDUP(vector2, q, poly, p, 8, 16, 0x55); VDUP(vector2, q, poly, p, 16, 8, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, q, float, f, 16, 8, 14.6f); +#endif VDUP(vector2, q, float, f, 32, 4, 33.8f); - + #define TEST_ALL_VSHUFFLE(INSN) \ TEST_VSHUFFLE(INSN, , int, s, 8, 8); \ TEST_VSHUFFLE(INSN, , int, s, 16, 4); \ @@ -100,6 +114,10 @@ void FNNAME (INSN_NAME) (void) TEST_VSHUFFLE(INSN, q, poly, p, 16, 8); \ TEST_VSHUFFLE(INSN, q, float, f, 32, 4) +#define TEST_VSHUFFLE_FP16(INSN) \ + TEST_VSHUFFLE(INSN, , float, f, 16, 4); \ + TEST_VSHUFFLE(INSN, q, float, f, 16, 8); + #define TEST_ALL_EXTRA_CHUNKS() \ TEST_EXTRA_CHUNK(int, 8, 8, 1); \ TEST_EXTRA_CHUNK(int, 16, 4, 1); \ @@ -143,17 +161,37 @@ void FNNAME (INSN_NAME) (void) CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \ CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \ CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \ - } \ + } + +#define CHECK_RESULTS_VSHUFFLE_FP16(test_name,EXPECTED,comment) \ + { \ + CHECK_FP (test_name, float, 16, 4, PRIx16, EXPECTED, comment); \ + CHECK_FP (test_name, float, 16, 8, PRIx16, EXPECTED, comment); \ + } clean_results (); /* Execute the tests. */ TEST_ALL_VSHUFFLE(INSN_NAME); +#if defined (FP16_SUPPORTED) + TEST_VSHUFFLE_FP16 (INSN_NAME); +#endif CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected0, "(chunk 0)"); +#if defined (FP16_SUPPORTED) + CHECK_RESULTS_VSHUFFLE_FP16 (TEST_MSG, expected0, "(chunk 0)"); +#endif TEST_ALL_EXTRA_CHUNKS(); +#if defined (FP16_SUPPORTED) + TEST_EXTRA_CHUNK (float, 16, 4, 1); + TEST_EXTRA_CHUNK (float, 16, 8, 1); +#endif + CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected1, "(chunk 1)"); +#if defined (FP16_SUPPORTED) + CHECK_RESULTS_VSHUFFLE_FP16 (TEST_MSG, expected1, "(chunk 1)"); +#endif } int main (void) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsqrt_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsqrt_f16_1.c new file mode 100644 index 0000000..82249a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsqrt_f16_1.c @@ -0,0 +1,72 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define FP16_C(a) ((__fp16) a) +#define A FP16_C (123.4) +#define B FP16_C (567.8) +#define C FP16_C (34.8) +#define D FP16_C (1024) +#define E FP16_C (663.1) +#define F FP16_C (144.0) +#define G FP16_C (4.8) +#define H FP16_C (77) + +#define SQRT_A 0x498E /* FP16_C (__builtin_sqrtf (123.4)). */ +#define SQRT_B 0x4DF5 /* FP16_C (__builtin_sqrtf (567.8)). */ +#define SQRT_C 0x45E6 /* FP16_C (__builtin_sqrtf (34.8)). */ +#define SQRT_D 0x5000 /* FP16_C (__builtin_sqrtf (1024)). */ +#define SQRT_E 0x4E70 /* FP16_C (__builtin_sqrtf (663.1)). */ +#define SQRT_F 0x4A00 /* FP16_C (__builtin_sqrtf (144.0)). */ +#define SQRT_G 0x4062 /* FP16_C (__builtin_sqrtf (4.8)). */ +#define SQRT_H 0x4863 /* FP16_C (__builtin_sqrtf (77)). */ + +/* Expected results for vsqrt. */ +VECT_VAR_DECL (expected_static, hfloat, 16, 4) [] + = { SQRT_A, SQRT_B, SQRT_C, SQRT_D }; + +VECT_VAR_DECL (expected_static, hfloat, 16, 8) [] + = { SQRT_A, SQRT_B, SQRT_C, SQRT_D, SQRT_E, SQRT_F, SQRT_G, SQRT_H }; + +void exec_vsqrt_f16 (void) +{ +#undef TEST_MSG +#define TEST_MSG "VSQRT (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc, float, 16, 4); + VECT_VAR_DECL (buf_src, float, 16, 4) [] = {A, B, C, D}; + VLOAD (vsrc, buf_src, , float, f, 16, 4); + DECL_VARIABLE (vector_res, float, 16, 4) + = vsqrt_f16 (VECT_VAR (vsrc, float, 16, 4)); + vst1_f16 (VECT_VAR (result, float, 16, 4), + VECT_VAR (vector_res, float, 16, 4)); + + CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VSQRTQ (FP16)" + clean_results (); + + DECL_VARIABLE(vsrc, float, 16, 8); + VECT_VAR_DECL (buf_src, float, 16, 8) [] = {A, B, C, D, E, F, G, H}; + VLOAD (vsrc, buf_src, q, float, f, 16, 8); + DECL_VARIABLE (vector_res, float, 16, 8) + = vsqrtq_f16 (VECT_VAR (vsrc, float, 16, 8)); + vst1q_f16 (VECT_VAR (result, float, 16, 8), + VECT_VAR (vector_res, float, 16, 8)); + + CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_static, ""); +} + +int +main (void) +{ + exec_vsqrt_f16 (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsqrth_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsqrth_f16_1.c new file mode 100644 index 0000000..7d03827 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsqrth_f16_1.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected[] = +{ + 0x0000 /* 0.000000 */, + 0x8000 /* -0.000000 */, + 0x3da8 /* 1.414062 */, + 0x3f0b /* 1.760742 */, + 0x4479 /* 4.472656 */, + 0x390f /* 0.632324 */, + 0x7e00 /* nan */, + 0x3c9d /* 1.153320 */, + 0x7e00 /* nan */, + 0x3874 /* 0.556641 */, + 0x38a2 /* 0.579102 */, + 0x39a8 /* 0.707031 */, + 0x3c00 /* 1.000000 */, + 0x433f /* 3.623047 */, + 0x7e00 /* nan */, + 0x4479 /* 4.472656 */, + 0x7c00 /* inf */, + 0x7e00 /* nan */ +}; + +#define TEST_MSG "VSQRTH_F16" +#define INSN_NAME vsqrth_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for unary scalar operations. */ +#include "unary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsub.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsub.c index 1a108d5..19d1fd2 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsub.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsub.c @@ -44,6 +44,14 @@ VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffed, VECT_VAR_DECL(expected_float32,hfloat,32,2) [] = { 0xc00ccccd, 0xc00ccccd }; VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0xc00ccccc, 0xc00ccccc, 0xc00ccccc, 0xc00ccccc }; +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +VECT_VAR_DECL(expected_float16, hfloat, 16, 4) [] = { 0xc066, 0xc066, + 0xc066, 0xc066 }; +VECT_VAR_DECL(expected_float16, hfloat, 16, 8) [] = { 0xc067, 0xc067, + 0xc067, 0xc067, + 0xc067, 0xc067, + 0xc067, 0xc067 }; +#endif void exec_vsub_f32(void) { @@ -67,4 +75,27 @@ void exec_vsub_f32(void) CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_float32, ""); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, ""); + +#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + DECL_VARIABLE(vector, float, 16, 4); + DECL_VARIABLE(vector, float, 16, 8); + + DECL_VARIABLE(vector2, float, 16, 4); + DECL_VARIABLE(vector2, float, 16, 8); + + DECL_VARIABLE(vector_res, float, 16, 4); + DECL_VARIABLE(vector_res, float, 16, 8); + + VDUP(vector, , float, f, 16, 4, 2.3f); + VDUP(vector, q, float, f, 16, 8, 3.4f); + + VDUP(vector2, , float, f, 16, 4, 4.5f); + VDUP(vector2, q, float, f, 16, 8, 5.6f); + + TEST_BINARY_OP(INSN_NAME, , float, f, 16, 4); + TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); + + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_float16, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_float16, ""); +#endif } diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsubh_f16_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsubh_f16_1.c new file mode 100644 index 0000000..a7aba11 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsubh_f16_1.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include + +#define INFF __builtin_inf () + +/* Expected results (16-bit hexadecimal representation). */ +uint16_t expected[] = +{ + 0xbc00 /* -1.000000 */, + 0xbc00 /* -1.000000 */, + 0x4654 /* 6.328125 */, + 0xd60e /* -96.875000 */, + 0xc900 /* -10.000000 */, + 0x36b8 /* 0.419922 */, + 0xc19a /* -2.800781 */, + 0x4848 /* 8.562500 */, + 0xbd34 /* -1.300781 */, + 0xccec /* -19.687500 */, + 0x4791 /* 7.566406 */, + 0xbf34 /* -1.800781 */, + 0x484d /* 8.601562 */, + 0x4804 /* 8.031250 */, + 0xc69c /* -6.609375 */, + 0x4ceb /* 19.671875 */, + 0x7c00 /* inf */, + 0xfc00 /* -inf */ +}; + +#define TEST_MSG "VSUB_F16" +#define INSN_NAME vsubh_f16 + +#define EXPECTED expected + +#define INPUT_TYPE float16_t +#define OUTPUT_TYPE float16_t +#define OUTPUT_TYPE_SIZE 16 + +/* Include the template for binary scalar operations. */ +#include "binary_scalar_op.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c index 2c4a09c..ea2d8d8 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c @@ -15,6 +15,10 @@ VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55, 0xf2, 0xf3, 0x55, 0x55 }; VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0x11, 0x11, 0xf2, 0xf3, 0x11, 0x11, @@ -36,6 +40,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55, 0xf6, 0xf7, 0x55, 0x55 }; VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1, 0x66, 0x66, 0xfff2, 0xfff3, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, + 0x4b4d, 0x4b4d, + 0xcb00, 0xca80, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0x42073333, 0x42073333 }; @@ -51,6 +61,10 @@ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 }; VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55, 0xf6, 0xf7, 0x55, 0x55 }; VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb00, 0xca80, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 }; VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf8, 0xf9, 0x11, 0x11, 0xfa, 0xfb, 0x11, 0x11, @@ -72,6 +86,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55, 0xfe, 0xff, 0x55, 0x55 }; VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff4, 0xfff5, 0x66, 0x66, 0xfff6, 0xfff7, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xca00, 0xc980, + 0x4b4d, 0x4b4d, + 0xc900, 0xc880, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1600000, 0xc1500000, 0x42073333, 0x42073333 }; diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c new file mode 100644 index 0000000..63f820f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c @@ -0,0 +1,263 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0x11, 0xf2, 0x11, + 0xf4, 0x11, 0xf6, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0x22, 0xfff2, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0x33 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0x55, 0xf2, 0x55, + 0xf4, 0x55, 0xf6, 0x55 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0x66, 0xfff2, 0x66 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0x77 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0x55, 0xf2, 0x55, + 0xf4, 0x55, 0xf6, 0x55 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0x66, 0xfff2, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0x42066666 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0x4b4d, + 0xcb00, 0x4b4d }; +#endif +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0x11, 0xf2, 0x11, + 0xf4, 0x11, 0xf6, 0x11, + 0xf8, 0x11, 0xfa, 0x11, + 0xfc, 0x11, 0xfe, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0x22, 0xfff2, 0x22, + 0xfff4, 0x22, 0xfff6, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0x33, + 0xfffffff2, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, + 0x44 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0x55, 0xf2, 0x55, + 0xf4, 0x55, 0xf6, 0x55, + 0xf8, 0x55, 0xfa, 0x55, + 0xfc, 0x55, 0xfe, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0x66, 0xfff2, 0x66, + 0xfff4, 0x66, 0xfff6, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0x77, + 0xfffffff2, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, + 0x88 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0x55, 0xf2, 0x55, + 0xf4, 0x55, 0xf6, 0x55, + 0xf8, 0x55, 0xfa, 0x55, + 0xfc, 0x55, 0xfe, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0x66, 0xfff2, 0x66, + 0xfff4, 0x66, 0xfff6, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0x4b4d, + 0xcb00, 0x4b4d, + 0xca00, 0x4b4d, + 0xc900, 0x4b4d }; +#endif +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0x42073333, + 0xc1600000, 0x42073333 }; + +#define TEST_MSG "VTRN1" +void exec_vtrn_half (void) +{ +#define TEST_VTRN(PART, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vtrn##PART##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VTRN1(Q, T1, T2, W, N) TEST_VTRN(1, Q, T1, T2, W, N) + + /* Input vector can only have 64 bits. */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE(vector, float, 64, 2); + DECL_VARIABLE(vector2, float, 64, 2); + + DECL_VARIABLE_ALL_VARIANTS(vector_res); + DECL_VARIABLE(vector_res, float, 64, 2); + + clean_results (); + /* We don't have vtrn1_T64x1, so set expected to the clean value. */ + CLEAN(expected, int, 64, 1); + CLEAN(expected, uint, 64, 1); + + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#if defined (FP16_SUPPORTED) + VLOAD(vector, buffer, , float, f, 16, 4); + VLOAD(vector, buffer, q, float, f, 16, 8); +#endif + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); + VLOAD(vector, buffer, q, float, f, 64, 2); + + /* Choose arbitrary initialization values. */ + VDUP(vector2, , int, s, 8, 8, 0x11); + VDUP(vector2, , int, s, 16, 4, 0x22); + VDUP(vector2, , int, s, 32, 2, 0x33); + VDUP(vector2, , uint, u, 8, 8, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x66); + VDUP(vector2, , uint, u, 32, 2, 0x77); + VDUP(vector2, , poly, p, 8, 8, 0x55); + VDUP(vector2, , poly, p, 16, 4, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ +#endif + VDUP(vector2, , float, f, 32, 2, 33.6f); + + VDUP(vector2, q, int, s, 8, 16, 0x11); + VDUP(vector2, q, int, s, 16, 8, 0x22); + VDUP(vector2, q, int, s, 32, 4, 0x33); + VDUP(vector2, q, int, s, 64, 2, 0x44); + VDUP(vector2, q, uint, u, 8, 16, 0x55); + VDUP(vector2, q, uint, u, 16, 8, 0x66); + VDUP(vector2, q, uint, u, 32, 4, 0x77); + VDUP(vector2, q, uint, u, 64, 2, 0x88); + VDUP(vector2, q, poly, p, 8, 16, 0x55); + VDUP(vector2, q, poly, p, 16, 8, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, q, float, f, 16, 8, 14.6f); +#endif + VDUP(vector2, q, float, f, 32, 4, 33.8f); + VDUP(vector2, q, float, f, 64, 2, 33.8f); + + TEST_VTRN1(, int, s, 8, 8); + TEST_VTRN1(, int, s, 16, 4); + TEST_VTRN1(, int, s, 32, 2); + TEST_VTRN1(, uint, u, 8, 8); + TEST_VTRN1(, uint, u, 16, 4); + TEST_VTRN1(, uint, u, 32, 2); + TEST_VTRN1(, poly, p, 8, 8); + TEST_VTRN1(, poly, p, 16, 4); +#if defined (FP16_SUPPORTED) + TEST_VTRN1(, float, f, 16, 4); +#endif + TEST_VTRN1(, float, f, 32, 2); + + TEST_VTRN1(q, int, s, 8, 16); + TEST_VTRN1(q, int, s, 16, 8); + TEST_VTRN1(q, int, s, 32, 4); + TEST_VTRN1(q, int, s, 64, 2); + TEST_VTRN1(q, uint, u, 8, 16); + TEST_VTRN1(q, uint, u, 16, 8); + TEST_VTRN1(q, uint, u, 32, 4); + TEST_VTRN1(q, uint, u, 64, 2); + TEST_VTRN1(q, poly, p, 8, 16); + TEST_VTRN1(q, poly, p, 16, 8); +#if defined (FP16_SUPPORTED) + TEST_VTRN1(q, float, f, 16, 8); +#endif + TEST_VTRN1(q, float, f, 32, 4); + TEST_VTRN1(q, float, f, 64, 2); + +#if defined (FP16_SUPPORTED) + CHECK_RESULTS (TEST_MSG, ""); +#else + CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); +#endif + +#undef TEST_MSG +#define TEST_MSG "VTRN2" + +#define TEST_VTRN2(Q, T1, T2, W, N) TEST_VTRN(2, Q, T1, T2, W, N) + +/* Expected results. */ +VECT_VAR_DECL(expected2,int,8,8) [] = { 0xf1, 0x11, 0xf3, 0x11, + 0xf5, 0x11, 0xf7, 0x11 }; +VECT_VAR_DECL(expected2,int,16,4) [] = { 0xfff1, 0x22, 0xfff3, 0x22 }; +VECT_VAR_DECL(expected2,int,32,2) [] = { 0xfffffff1, 0x33 }; +VECT_VAR_DECL(expected2,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xf1, 0x55, 0xf3, 0x55, + 0xf5, 0x55, 0xf7, 0x55 }; +VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff1, 0x66, 0xfff3, 0x66 }; +VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff1, 0x77 }; +VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf1, 0x55, 0xf3, 0x55, + 0xf5, 0x55, 0xf7, 0x55 }; +VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff1, 0x66, 0xfff3, 0x66 }; +VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1700000, 0x42066666 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb80, 0x4b4d, + 0xca80, 0x4b4d }; +#endif +VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf1, 0x11, 0xf3, 0x11, + 0xf5, 0x11, 0xf7, 0x11, + 0xf9, 0x11, 0xfb, 0x11, + 0xfd, 0x11, 0xff, 0x11 }; +VECT_VAR_DECL(expected2,int,16,8) [] = { 0xfff1, 0x22, 0xfff3, 0x22, + 0xfff5, 0x22, 0xfff7, 0x22 }; +VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfffffff1, 0x33, + 0xfffffff3, 0x33 }; +VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfffffffffffffff1, + 0x44 }; +VECT_VAR_DECL(expected2,uint,8,16) [] = { 0xf1, 0x55, 0xf3, 0x55, + 0xf5, 0x55, 0xf7, 0x55, + 0xf9, 0x55, 0xfb, 0x55, + 0xfd, 0x55, 0xff, 0x55 }; +VECT_VAR_DECL(expected2,uint,16,8) [] = { 0xfff1, 0x66, 0xfff3, 0x66, + 0xfff5, 0x66, 0xfff7, 0x66 }; +VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff1, 0x77, + 0xfffffff3, 0x77 }; +VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff1, + 0x88 }; +VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf1, 0x55, 0xf3, 0x55, + 0xf5, 0x55, 0xf7, 0x55, + 0xf9, 0x55, 0xfb, 0x55, + 0xfd, 0x55, 0xff, 0x55 }; +VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff1, 0x66, 0xfff3, 0x66, + 0xfff5, 0x66, 0xfff7, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb80, 0x4b4d, + 0xca80, 0x4b4d, + 0xc980, 0x4b4d, + 0xc880, 0x4b4d }; +#endif +VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0x42073333, + 0xc1500000, 0x42073333 }; + clean_results (); + CLEAN(expected2, int, 64, 1); + CLEAN(expected2, uint, 64, 1); + + TEST_VTRN2(, int, s, 8, 8); + TEST_VTRN2(, int, s, 16, 4); + TEST_VTRN2(, int, s, 32, 2); + TEST_VTRN2(, uint, u, 8, 8); + TEST_VTRN2(, uint, u, 16, 4); + TEST_VTRN2(, uint, u, 32, 2); + TEST_VTRN2(, poly, p, 8, 8); + TEST_VTRN2(, poly, p, 16, 4); +#if defined (FP16_SUPPORTED) + TEST_VTRN2(, float, f, 16, 4); +#endif + TEST_VTRN2(, float, f, 32, 2); + + TEST_VTRN2(q, int, s, 8, 16); + TEST_VTRN2(q, int, s, 16, 8); + TEST_VTRN2(q, int, s, 32, 4); + TEST_VTRN2(q, int, s, 64, 2); + TEST_VTRN2(q, uint, u, 8, 16); + TEST_VTRN2(q, uint, u, 16, 8); + TEST_VTRN2(q, uint, u, 32, 4); + TEST_VTRN2(q, uint, u, 64, 2); + TEST_VTRN2(q, poly, p, 8, 16); + TEST_VTRN2(q, poly, p, 16, 8); +#if defined (FP16_SUPPORTED) + TEST_VTRN2(q, float, f, 16, 8); +#endif + TEST_VTRN2(q, float, f, 32, 4); + TEST_VTRN2(q, float, f, 64, 2); + + CHECK_RESULTS_NAMED (TEST_MSG, expected2, ""); +#if defined (FP16_SUPPORTED) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected2, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected2, ""); +#endif +} + +int main (void) +{ + exec_vtrn_half (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c index ab6e576..43b49ca 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c @@ -19,6 +19,10 @@ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, + 0xcb00, 0xca80 }; +#endif VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, @@ -48,6 +52,12 @@ VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, + 0xcb00, 0xca80, + 0xca00, 0xc980, + 0xc900, 0xc880 }; +#endif VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; @@ -63,6 +73,10 @@ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 }; VECT_VAR_DECL(expected1,poly,8,8) [] = { 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; VECT_VAR_DECL(expected1,poly,16,4) [] = { 0x66, 0x66, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0x4b4d, 0x4b4d, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 }; VECT_VAR_DECL(expected1,int,8,16) [] = { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, @@ -84,6 +98,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; VECT_VAR_DECL(expected1,poly,16,8) [] = { 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0x4b4d, 0x4b4d, + 0x4b4d, 0x4b4d, + 0x4b4d, 0x4b4d, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0x42073333, 0x42073333, 0x42073333, 0x42073333 }; diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c new file mode 100644 index 0000000..8706f24 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c @@ -0,0 +1,259 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf2, 0xf4, 0xf6, + 0x11, 0x11, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff2, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0x33 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf2, 0xf4, 0xf6, + 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff2, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0x77 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf2, 0xf4, 0xf6, + 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff2, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0x42066666 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0xcb00, + 0x4b4d, 0x4b4d }; +#endif +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf2, 0xf4, 0xf6, + 0xf8, 0xfa, 0xfc, 0xfe, + 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff2, 0xfff4, 0xfff6, + 0x22, 0x22, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff2, + 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, + 0x44 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf2, 0xf4, 0xf6, + 0xf8, 0xfa, 0xfc, 0xfe, + 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff2, 0xfff4, 0xfff6, + 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff2, 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, + 0x88 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf2, 0xf4, 0xf6, + 0xf8, 0xfa, 0xfc, 0xfe, + 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff2, 0xfff4, 0xfff6, + 0x66, 0x66, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb00, 0xca00, 0xc900, + 0x4b4d, 0x4b4d, 0x4b4d, 0x4b4d }; +#endif +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1600000, + 0x42073333, 0x42073333 }; + +#define TEST_MSG "VUZP1" +void exec_vuzp_half (void) +{ +#define TEST_VUZP(PART, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vuzp##PART##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VUZP1(Q, T1, T2, W, N) TEST_VUZP(1, Q, T1, T2, W, N) + + /* Input vector can only have 64 bits. */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE(vector, float, 64, 2); + DECL_VARIABLE(vector2, float, 64, 2); + + DECL_VARIABLE_ALL_VARIANTS(vector_res); + DECL_VARIABLE(vector_res, float, 64, 2); + + clean_results (); + /* We don't have vuzp1_T64x1, so set expected to the clean value. */ + CLEAN(expected, int, 64, 1); + CLEAN(expected, uint, 64, 1); + + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#if defined (FP16_SUPPORTED) + VLOAD(vector, buffer, , float, f, 16, 4); + VLOAD(vector, buffer, q, float, f, 16, 8); +#endif + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); + VLOAD(vector, buffer, q, float, f, 64, 2); + + /* Choose arbitrary initialization values. */ + VDUP(vector2, , int, s, 8, 8, 0x11); + VDUP(vector2, , int, s, 16, 4, 0x22); + VDUP(vector2, , int, s, 32, 2, 0x33); + VDUP(vector2, , uint, u, 8, 8, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x66); + VDUP(vector2, , uint, u, 32, 2, 0x77); + VDUP(vector2, , poly, p, 8, 8, 0x55); + VDUP(vector2, , poly, p, 16, 4, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ +#endif + VDUP(vector2, , float, f, 32, 2, 33.6f); + + VDUP(vector2, q, int, s, 8, 16, 0x11); + VDUP(vector2, q, int, s, 16, 8, 0x22); + VDUP(vector2, q, int, s, 32, 4, 0x33); + VDUP(vector2, q, int, s, 64, 2, 0x44); + VDUP(vector2, q, uint, u, 8, 16, 0x55); + VDUP(vector2, q, uint, u, 16, 8, 0x66); + VDUP(vector2, q, uint, u, 32, 4, 0x77); + VDUP(vector2, q, uint, u, 64, 2, 0x88); + VDUP(vector2, q, poly, p, 8, 16, 0x55); + VDUP(vector2, q, poly, p, 16, 8, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, q, float, f, 16, 8, 14.6f); +#endif + VDUP(vector2, q, float, f, 32, 4, 33.8f); + VDUP(vector2, q, float, f, 64, 2, 33.8f); + + TEST_VUZP1(, int, s, 8, 8); + TEST_VUZP1(, int, s, 16, 4); + TEST_VUZP1(, int, s, 32, 2); + TEST_VUZP1(, uint, u, 8, 8); + TEST_VUZP1(, uint, u, 16, 4); + TEST_VUZP1(, uint, u, 32, 2); + TEST_VUZP1(, poly, p, 8, 8); + TEST_VUZP1(, poly, p, 16, 4); +#if defined (FP16_SUPPORTED) + TEST_VUZP1(, float, f, 16, 4); +#endif + TEST_VUZP1(, float, f, 32, 2); + + TEST_VUZP1(q, int, s, 8, 16); + TEST_VUZP1(q, int, s, 16, 8); + TEST_VUZP1(q, int, s, 32, 4); + TEST_VUZP1(q, int, s, 64, 2); + TEST_VUZP1(q, uint, u, 8, 16); + TEST_VUZP1(q, uint, u, 16, 8); + TEST_VUZP1(q, uint, u, 32, 4); + TEST_VUZP1(q, uint, u, 64, 2); + TEST_VUZP1(q, poly, p, 8, 16); + TEST_VUZP1(q, poly, p, 16, 8); +#if defined (FP16_SUPPORTED) + TEST_VUZP1(q, float, f, 16, 8); +#endif + TEST_VUZP1(q, float, f, 32, 4); + TEST_VUZP1(q, float, f, 64, 2); + +#if defined (FP16_SUPPORTED) + CHECK_RESULTS (TEST_MSG, ""); +#else + CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); +#endif + +#undef TEST_MSG +#define TEST_MSG "VUZP2" + +#define TEST_VUZP2(Q, T1, T2, W, N) TEST_VUZP(2, Q, T1, T2, W, N) + +/* Expected results. */ +VECT_VAR_DECL(expected2,int,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7, + 0x11, 0x11, 0x11, 0x11 }; +VECT_VAR_DECL(expected2,int,16,4) [] = { 0xfff1, 0xfff3, 0x22, 0x22 }; +VECT_VAR_DECL(expected2,int,32,2) [] = { 0xfffffff1, 0x33 }; +VECT_VAR_DECL(expected2,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7, + 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff1, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff1, 0x77 }; +VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7, + 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff1, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1700000, 0x42066666 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb80, 0xca80, + 0x4b4d, 0x4b4d }; +#endif +VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf1, 0xf3, 0xf5, 0xf7, + 0xf9, 0xfb, 0xfd, 0xff, + 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11 }; +VECT_VAR_DECL(expected2,int,16,8) [] = { 0xfff1, 0xfff3, 0xfff5, 0xfff7, + 0x22, 0x22, 0x22, 0x22 }; +VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfffffff1, 0xfffffff3, + 0x33, 0x33 }; +VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfffffffffffffff1, + 0x44 }; +VECT_VAR_DECL(expected2,uint,8,16) [] = { 0xf1, 0xf3, 0xf5, 0xf7, + 0xf9, 0xfb, 0xfd, 0xff, + 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected2,uint,16,8) [] = { 0xfff1, 0xfff3, 0xfff5, 0xfff7, + 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff1, 0xfffffff3, 0x77, 0x77 }; +VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff1, + 0x88 }; +VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf1, 0xf3, 0xf5, 0xf7, + 0xf9, 0xfb, 0xfd, 0xff, + 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff1, 0xfff3, 0xfff5, 0xfff7, + 0x66, 0x66, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb80, 0xca80, 0xc980, 0xc880, + 0x4b4d, 0x4b4d, 0x4b4d, 0x4b4d + }; +#endif +VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0xc1500000, + 0x42073333, 0x42073333 }; + + clean_results (); + CLEAN(expected2, int, 64, 1); + CLEAN(expected2, uint, 64, 1); + + TEST_VUZP2(, int, s, 8, 8); + TEST_VUZP2(, int, s, 16, 4); + TEST_VUZP2(, int, s, 32, 2); + TEST_VUZP2(, uint, u, 8, 8); + TEST_VUZP2(, uint, u, 16, 4); + TEST_VUZP2(, uint, u, 32, 2); + TEST_VUZP2(, poly, p, 8, 8); + TEST_VUZP2(, poly, p, 16, 4); +#if defined (FP16_SUPPORTED) + TEST_VUZP2(, float, f, 16, 4); +#endif + TEST_VUZP2(, float, f, 32, 2); + + TEST_VUZP2(q, int, s, 8, 16); + TEST_VUZP2(q, int, s, 16, 8); + TEST_VUZP2(q, int, s, 32, 4); + TEST_VUZP2(q, int, s, 64, 2); + TEST_VUZP2(q, uint, u, 8, 16); + TEST_VUZP2(q, uint, u, 16, 8); + TEST_VUZP2(q, uint, u, 32, 4); + TEST_VUZP2(q, uint, u, 64, 2); + TEST_VUZP2(q, poly, p, 8, 16); + TEST_VUZP2(q, poly, p, 16, 8); +#if defined (FP16_SUPPORTED) + TEST_VUZP2(q, float, f, 16, 8); +#endif + TEST_VUZP2(q, float, f, 32, 4); + TEST_VUZP2(q, float, f, 64, 2); + + CHECK_RESULTS_NAMED (TEST_MSG, expected2, ""); +#if defined (FP16_SUPPORTED) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected2, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected2, ""); +#endif +} + +int main (void) +{ + exec_vuzp_half (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c index b5fe516..20f4f5d 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c @@ -18,6 +18,10 @@ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55, 0xf1, 0xf5, 0x55, 0x55 }; VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff2, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb00, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf8, 0x11, 0x11, 0xf1, 0xf9, 0x11, 0x11, @@ -41,6 +45,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55, 0xf3, 0xfb, 0x55, 0x55 }; VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66, 0xfff1, 0xfff5, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xca00, + 0x4b4d, 0x4b4d, + 0xcb80, 0xc980, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1600000, 0x42073333, 0x42073333 }; @@ -59,6 +69,10 @@ VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55, 0xf3, 0xf7, 0x55, 0x55 }; VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff3, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xca80, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 }; VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf4, 0xfc, 0x11, 0x11, 0xf5, 0xfd, 0x11, 0x11, @@ -82,6 +96,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55, 0xf7, 0xff, 0x55, 0x55 }; VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66, 0xfff3, 0xfff7, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb00, 0xc900, + 0x4b4d, 0x4b4d, + 0xca80, 0xc880, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1500000, 0x42073333, 0x42073333 }; diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip_half.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip_half.c new file mode 100644 index 0000000..619d6b2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip_half.c @@ -0,0 +1,263 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0x11, 0xf1, 0x11, + 0xf2, 0x11, 0xf3, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0x22, 0xfff1, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0x33 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0x55, 0xf1, 0x55, + 0xf2, 0x55, 0xf3, 0x55 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0x66, 0xfff1, 0x66 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0x77 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0x55, 0xf1, 0x55, + 0xf2, 0x55, 0xf3, 0x55 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0x66, 0xfff1, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0x42066666 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0x4b4d, + 0xcb80, 0x4b4d }; +#endif +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0x11, 0xf1, 0x11, + 0xf2, 0x11, 0xf3, 0x11, + 0xf4, 0x11, 0xf5, 0x11, + 0xf6, 0x11, 0xf7, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0x22, 0xfff1, 0x22, + 0xfff2, 0x22, 0xfff3, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0x33, + 0xfffffff1, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, + 0x44 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0x55, 0xf1, 0x55, + 0xf2, 0x55, 0xf3, 0x55, + 0xf4, 0x55, 0xf5, 0x55, + 0xf6, 0x55, 0xf7, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0x66, 0xfff1, 0x66, + 0xfff2, 0x66, 0xfff3, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0x77, + 0xfffffff1, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, + 0x88 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0x55, 0xf1, 0x55, + 0xf2, 0x55, 0xf3, 0x55, + 0xf4, 0x55, 0xf5, 0x55, + 0xf6, 0x55, 0xf7, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0x66, 0xfff1, 0x66, + 0xfff2, 0x66, 0xfff3, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0x4b4d, + 0xcb80, 0x4b4d, + 0xcb00, 0x4b4d, + 0xca80, 0x4b4d }; +#endif +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0x42073333, + 0xc1700000, 0x42073333 }; + +#define TEST_MSG "VZIP1" +void exec_vzip_half (void) +{ +#define TEST_VZIP(PART, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vzip##PART##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VZIP1(Q, T1, T2, W, N) TEST_VZIP(1, Q, T1, T2, W, N) + + /* Input vector can only have 64 bits. */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE(vector, float, 64, 2); + DECL_VARIABLE(vector2, float, 64, 2); + + DECL_VARIABLE_ALL_VARIANTS(vector_res); + DECL_VARIABLE(vector_res, float, 64, 2); + + clean_results (); + /* We don't have vzip1_T64x1, so set expected to the clean value. */ + CLEAN(expected, int, 64, 1); + CLEAN(expected, uint, 64, 1); + + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#if defined (FP16_SUPPORTED) + VLOAD(vector, buffer, , float, f, 16, 4); + VLOAD(vector, buffer, q, float, f, 16, 8); +#endif + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); + VLOAD(vector, buffer, q, float, f, 64, 2); + + /* Choose arbitrary initialization values. */ + VDUP(vector2, , int, s, 8, 8, 0x11); + VDUP(vector2, , int, s, 16, 4, 0x22); + VDUP(vector2, , int, s, 32, 2, 0x33); + VDUP(vector2, , uint, u, 8, 8, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x66); + VDUP(vector2, , uint, u, 32, 2, 0x77); + VDUP(vector2, , poly, p, 8, 8, 0x55); + VDUP(vector2, , poly, p, 16, 4, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ +#endif + VDUP(vector2, , float, f, 32, 2, 33.6f); + + VDUP(vector2, q, int, s, 8, 16, 0x11); + VDUP(vector2, q, int, s, 16, 8, 0x22); + VDUP(vector2, q, int, s, 32, 4, 0x33); + VDUP(vector2, q, int, s, 64, 2, 0x44); + VDUP(vector2, q, uint, u, 8, 16, 0x55); + VDUP(vector2, q, uint, u, 16, 8, 0x66); + VDUP(vector2, q, uint, u, 32, 4, 0x77); + VDUP(vector2, q, uint, u, 64, 2, 0x88); + VDUP(vector2, q, poly, p, 8, 16, 0x55); + VDUP(vector2, q, poly, p, 16, 8, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, q, float, f, 16, 8, 14.6f); +#endif + VDUP(vector2, q, float, f, 32, 4, 33.8f); + VDUP(vector2, q, float, f, 64, 2, 33.8f); + + TEST_VZIP1(, int, s, 8, 8); + TEST_VZIP1(, int, s, 16, 4); + TEST_VZIP1(, int, s, 32, 2); + TEST_VZIP1(, uint, u, 8, 8); + TEST_VZIP1(, uint, u, 16, 4); + TEST_VZIP1(, uint, u, 32, 2); + TEST_VZIP1(, poly, p, 8, 8); + TEST_VZIP1(, poly, p, 16, 4); +#if defined (FP16_SUPPORTED) + TEST_VZIP1(, float, f, 16, 4); +#endif + TEST_VZIP1(, float, f, 32, 2); + + TEST_VZIP1(q, int, s, 8, 16); + TEST_VZIP1(q, int, s, 16, 8); + TEST_VZIP1(q, int, s, 32, 4); + TEST_VZIP1(q, int, s, 64, 2); + TEST_VZIP1(q, uint, u, 8, 16); + TEST_VZIP1(q, uint, u, 16, 8); + TEST_VZIP1(q, uint, u, 32, 4); + TEST_VZIP1(q, uint, u, 64, 2); + TEST_VZIP1(q, poly, p, 8, 16); + TEST_VZIP1(q, poly, p, 16, 8); +#if defined (FP16_SUPPORTED) + TEST_VZIP1(q, float, f, 16, 8); +#endif + TEST_VZIP1(q, float, f, 32, 4); + TEST_VZIP1(q, float, f, 64, 2); + +#if defined (FP16_SUPPORTED) + CHECK_RESULTS (TEST_MSG, ""); +#else + CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); +#endif + +#undef TEST_MSG +#define TEST_MSG "VZIP2" + +#define TEST_VZIP2(Q, T1, T2, W, N) TEST_VZIP(2, Q, T1, T2, W, N) + +/* Expected results. */ +VECT_VAR_DECL(expected2,int,8,8) [] = { 0xf4, 0x11, 0xf5, 0x11, + 0xf6, 0x11, 0xf7, 0x11 }; +VECT_VAR_DECL(expected2,int,16,4) [] = { 0xfff2, 0x22, 0xfff3, 0x22 }; +VECT_VAR_DECL(expected2,int,32,2) [] = { 0xfffffff1, 0x33 }; +VECT_VAR_DECL(expected2,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xf4, 0x55, 0xf5, 0x55, + 0xf6, 0x55, 0xf7, 0x55 }; +VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff2, 0x66, 0xfff3, 0x66 }; +VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff1, 0x77 }; +VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf4, 0x55, 0xf5, 0x55, + 0xf6, 0x55, 0xf7, 0x55 }; +VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0x66, 0xfff3, 0x66 }; +VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1700000, 0x42066666 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb00, 0x4b4d, + 0xca80, 0x4b4d }; +#endif +VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf8, 0x11, 0xf9, 0x11, + 0xfa, 0x11, 0xfb, 0x11, + 0xfc, 0x11, 0xfd, 0x11, + 0xfe, 0x11, 0xff, 0x11 }; +VECT_VAR_DECL(expected2,int,16,8) [] = { 0xfff4, 0x22, 0xfff5, 0x22, + 0xfff6, 0x22, 0xfff7, 0x22 }; +VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfffffff2, 0x33, + 0xfffffff3, 0x33 }; +VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfffffffffffffff1, + 0x44 }; +VECT_VAR_DECL(expected2,uint,8,16) [] = { 0xf8, 0x55, 0xf9, 0x55, + 0xfa, 0x55, 0xfb, 0x55, + 0xfc, 0x55, 0xfd, 0x55, + 0xfe, 0x55, 0xff, 0x55 }; +VECT_VAR_DECL(expected2,uint,16,8) [] = { 0xfff4, 0x66, 0xfff5, 0x66, + 0xfff6, 0x66, 0xfff7, 0x66 }; +VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff2, 0x77, + 0xfffffff3, 0x77 }; +VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff1, + 0x88 }; +VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf8, 0x55, 0xf9, 0x55, + 0xfa, 0x55, 0xfb, 0x55, + 0xfc, 0x55, 0xfd, 0x55, + 0xfe, 0x55, 0xff, 0x55 }; +VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff4, 0x66, 0xfff5, 0x66, + 0xfff6, 0x66, 0xfff7, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xca00, 0x4b4d, + 0xc980, 0x4b4d, + 0xc900, 0x4b4d, + 0xc880, 0x4b4d }; +#endif +VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0x42073333, + 0xc1500000, 0x42073333 }; + clean_results (); + CLEAN(expected2, int, 64, 1); + CLEAN(expected2, uint, 64, 1); + + TEST_VZIP2(, int, s, 8, 8); + TEST_VZIP2(, int, s, 16, 4); + TEST_VZIP2(, int, s, 32, 2); + TEST_VZIP2(, uint, u, 8, 8); + TEST_VZIP2(, uint, u, 16, 4); + TEST_VZIP2(, uint, u, 32, 2); + TEST_VZIP2(, poly, p, 8, 8); + TEST_VZIP2(, poly, p, 16, 4); +#if defined (FP16_SUPPORTED) + TEST_VZIP2(, float, f, 16, 4); +#endif + TEST_VZIP2(, float, f, 32, 2); + + TEST_VZIP2(q, int, s, 8, 16); + TEST_VZIP2(q, int, s, 16, 8); + TEST_VZIP2(q, int, s, 32, 4); + TEST_VZIP2(q, int, s, 64, 2); + TEST_VZIP2(q, uint, u, 8, 16); + TEST_VZIP2(q, uint, u, 16, 8); + TEST_VZIP2(q, uint, u, 32, 4); + TEST_VZIP2(q, uint, u, 64, 2); + TEST_VZIP2(q, poly, p, 8, 16); + TEST_VZIP2(q, poly, p, 16, 8); +#if defined (FP16_SUPPORTED) + TEST_VZIP2(q, float, f, 16, 8); +#endif + TEST_VZIP2(q, float, f, 32, 4); + TEST_VZIP2(q, float, f, 64, 2); + + CHECK_RESULTS_NAMED (TEST_MSG, expected2, ""); +#if defined (FP16_SUPPORTED) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected2, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected2, ""); +#endif +} + +int main (void) +{ + exec_vzip_half (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c new file mode 100644 index 0000000..b88f43f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c @@ -0,0 +1,105 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_ok } */ +/* { dg-options "-O2 -ffast-math" } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ + +/* Test instructions generated for half-precision arithmetic. */ + +typedef __fp16 float16_t; +typedef __simd64_float16_t float16x4_t; +typedef __simd128_float16_t float16x8_t; + +typedef short int16x4_t __attribute__ ((vector_size (8))); +typedef short int int16x8_t __attribute__ ((vector_size (16))); + +float16_t +fp16_abs (float16_t a) +{ + return (a < 0) ? -a : a; +} + +#define TEST_UNOP(NAME, OPERATOR, TY) \ + TY test_##NAME##_##TY (TY a) \ + { \ + return OPERATOR (a); \ + } + +#define TEST_BINOP(NAME, OPERATOR, TY) \ + TY test_##NAME##_##TY (TY a, TY b) \ + { \ + return a OPERATOR b; \ + } + +#define TEST_CMP(NAME, OPERATOR, RTY, TY) \ + RTY test_##NAME##_##TY (TY a, TY b) \ + { \ + return a OPERATOR b; \ + } + +/* Scalars. */ + +TEST_UNOP (neg, -, float16_t) +TEST_UNOP (abs, fp16_abs, float16_t) + +TEST_BINOP (add, +, float16_t) +TEST_BINOP (sub, -, float16_t) +TEST_BINOP (mult, *, float16_t) +TEST_BINOP (div, /, float16_t) + +TEST_CMP (equal, ==, int, float16_t) +TEST_CMP (unequal, !=, int, float16_t) +TEST_CMP (lessthan, <, int, float16_t) +TEST_CMP (greaterthan, >, int, float16_t) +TEST_CMP (lessthanequal, <=, int, float16_t) +TEST_CMP (greaterthanqual, >=, int, float16_t) + +/* Vectors of size 4. */ + +TEST_UNOP (neg, -, float16x4_t) + +TEST_BINOP (add, +, float16x4_t) +TEST_BINOP (sub, -, float16x4_t) +TEST_BINOP (mult, *, float16x4_t) +TEST_BINOP (div, /, float16x4_t) + +TEST_CMP (equal, ==, int16x4_t, float16x4_t) +TEST_CMP (unequal, !=, int16x4_t, float16x4_t) +TEST_CMP (lessthan, <, int16x4_t, float16x4_t) +TEST_CMP (greaterthan, >, int16x4_t, float16x4_t) +TEST_CMP (lessthanequal, <=, int16x4_t, float16x4_t) +TEST_CMP (greaterthanqual, >=, int16x4_t, float16x4_t) + +/* Vectors of size 8. */ + +TEST_UNOP (neg, -, float16x8_t) + +TEST_BINOP (add, +, float16x8_t) +TEST_BINOP (sub, -, float16x8_t) +TEST_BINOP (mult, *, float16x8_t) +TEST_BINOP (div, /, float16x8_t) + +TEST_CMP (equal, ==, int16x8_t, float16x8_t) +TEST_CMP (unequal, !=, int16x8_t, float16x8_t) +TEST_CMP (lessthan, <, int16x8_t, float16x8_t) +TEST_CMP (greaterthan, >, int16x8_t, float16x8_t) +TEST_CMP (lessthanequal, <=, int16x8_t, float16x8_t) +TEST_CMP (greaterthanqual, >=, int16x8_t, float16x8_t) + +/* { dg-final { scan-assembler-times {vneg\.f16\ts[0-9]+, s[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vneg\.f16\td[0-9]+, d[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vneg\.f16\tq[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vabs\.f16\ts[0-9]+, s[0-9]+} 2 } } */ + +/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ +/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ +/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ +/* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ +/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 26 } } */ +/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 52 } } */ + +/* { dg-final { scan-assembler-not {vadd\.f32} } } */ +/* { dg-final { scan-assembler-not {vsub\.f32} } } */ +/* { dg-final { scan-assembler-not {vmul\.f32} } } */ +/* { dg-final { scan-assembler-not {vdiv\.f32} } } */ +/* { dg-final { scan-assembler-not {vcmp\.f16} } } */ +/* { dg-final { scan-assembler-not {vcmpe\.f16} } } */ diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-conv-1.c b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-conv-1.c new file mode 100644 index 0000000..c9639a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-conv-1.c @@ -0,0 +1,101 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +/* Test ARMv8.2 FP16 conversions. */ +#include + +float +f16_to_f32 (__fp16 a) +{ + return (float)a; +} + +float +f16_to_pf32 (__fp16* a) +{ + return (float)*a; +} + +short +f16_to_s16 (__fp16 a) +{ + return (short)a; +} + +short +pf16_to_s16 (__fp16* a) +{ + return (short)*a; +} + +/* { dg-final { scan-assembler-times {vcvtb\.f32\.f16\ts[0-9]+, s[0-9]+} 4 } } */ + +__fp16 +f32_to_f16 (float a) +{ + return (__fp16)a; +} + +void +f32_to_pf16 (__fp16* x, float a) +{ + *x = (__fp16)a; +} + +__fp16 +s16_to_f16 (short a) +{ + return (__fp16)a; +} + +void +s16_to_pf16 (__fp16* x, short a) +{ + *x = (__fp16)a; +} + +/* { dg-final { scan-assembler-times {vcvtb\.f16\.f32\ts[0-9]+, s[0-9]+} 4 } } */ + +float +s16_to_f32 (short a) +{ + return (float)a; +} + +/* { dg-final { scan-assembler-times {vcvt\.f32\.s32\ts[0-9]+, s[0-9]+} 3 } } */ + +short +f32_to_s16 (float a) +{ + return (short)a; +} + +/* { dg-final { scan-assembler-times {vcvt\.s32\.f32\ts[0-9]+, s[0-9]+} 3 } } */ + +unsigned short +f32_to_u16 (float a) +{ + return (unsigned short)a; +} + +/* { dg-final { scan-assembler-times {vcvt\.u32\.f32\ts[0-9]+, s[0-9]+} 1 } } */ + +short +f64_to_s16 (double a) +{ + return (short)a; +} + +/* { dg-final { scan-assembler-times {vcvt\.s32\.f64\ts[0-9]+, d[0-9]+} 1 } } */ + +unsigned short +f64_to_u16 (double a) +{ + return (unsigned short)a; +} + +/* { dg-final { scan-assembler-times {vcvt\.s32\.f64\ts[0-9]+, d[0-9]+} 1 } } */ + + diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c new file mode 100644 index 0000000..bb4e68f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c @@ -0,0 +1,165 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +__fp16 +test_load_1 (__fp16* a) +{ + return *a; +} + +__fp16 +test_load_2 (__fp16* a, int i) +{ + return a[i]; +} + +/* { dg-final { scan-assembler-times {vld1\.16\t\{d[0-9]+\[[0-9]+\]\}, \[r[0-9]+\]} 2 } } */ + +void +test_store_1 (__fp16* a, __fp16 b) +{ + *a = b; +} + +void +test_store_2 (__fp16* a, int i, __fp16 b) +{ + a[i] = b; +} + +/* { dg-final { scan-assembler-times {vst1\.16\t\{d[0-9]+\[[0-9]+\]\}, \[r[0-9]+\]} 2 } } */ + +__fp16 +test_load_store_1 (__fp16* a, int i, __fp16* b) +{ + a[i] = b[i]; +} + +__fp16 +test_load_store_2 (__fp16* a, int i, __fp16* b) +{ + a[i] = b[i + 2]; + return a[i]; +} +/* { dg-final { scan-assembler-times {ldrh\tr[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {strh\tr[0-9]+} 2 } } */ + +__fp16 +test_select_1 (int sel, __fp16 a, __fp16 b) +{ + if (sel) + return a; + else + return b; +} + +__fp16 +test_select_2 (int sel, __fp16 a, __fp16 b) +{ + return sel ? a : b; +} + +__fp16 +test_select_3 (__fp16 a, __fp16 b, __fp16 c) +{ + return (a == b) ? b : c; +} + +__fp16 +test_select_4 (__fp16 a, __fp16 b, __fp16 c) +{ + return (a != b) ? b : c; +} + +__fp16 +test_select_5 (__fp16 a, __fp16 b, __fp16 c) +{ + return (a < b) ? b : c; +} + +__fp16 +test_select_6 (__fp16 a, __fp16 b, __fp16 c) +{ + return (a <= b) ? b : c; +} + +__fp16 +test_select_7 (__fp16 a, __fp16 b, __fp16 c) +{ + return (a > b) ? b : c; +} + +__fp16 +test_select_8 (__fp16 a, __fp16 b, __fp16 c) +{ + return (a >= b) ? b : c; +} + +/* { dg-final { scan-assembler-times {vseleq\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 4 } } */ +/* { dg-final { scan-assembler-times {vselgt\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vselge\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ + +/* { dg-final { scan-assembler-times {vmov\.f16\ts[0-9]+, r[0-9]+} 4 } } */ +/* { dg-final { scan-assembler-times {vmov\.f16\tr[0-9]+, s[0-9]+} 4 } } */ + +int +test_compare_1 (__fp16 a, __fp16 b) +{ + if (a == b) + return -1; + else + return 0; +} + +int +test_compare_ (__fp16 a, __fp16 b) +{ + if (a != b) + return -1; + else + return 0; +} + +int +test_compare_2 (__fp16 a, __fp16 b) +{ + if (a > b) + return -1; + else + return 0; +} + +int +test_compare_3 (__fp16 a, __fp16 b) +{ + if (a >= b) + return -1; + else + return 0; +} + +int +test_compare_4 (__fp16 a, __fp16 b) +{ + if (a < b) + return -1; + else + return 0; +} + +int +test_compare_5 (__fp16 a, __fp16 b) +{ + if (a <= b) + return -1; + else + return 0; +} + +/* { dg-final { scan-assembler-not {vcmp\.f16} } } */ +/* { dg-final { scan-assembler-not {vcmpe\.f16} } } */ + +/* { dg-final { scan-assembler-times {vcmp\.f32} 4 } } */ +/* { dg-final { scan-assembler-times {vcmpe\.f32} 8 } } */ diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-neon-1.c b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-neon-1.c new file mode 100644 index 0000000..968efae --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-neon-1.c @@ -0,0 +1,490 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ + +/* Test instructions generated for the FP16 vector intrinsics. */ + +#include + +#define MSTRCAT(L, str) L##str + +#define UNOP_TEST(insn) \ + float16x4_t \ + MSTRCAT (test_##insn, _16x4) (float16x4_t a) \ + { \ + return MSTRCAT (insn, _f16) (a); \ + } \ + float16x8_t \ + MSTRCAT (test_##insn, _16x8) (float16x8_t a) \ + { \ + return MSTRCAT (insn, q_f16) (a); \ + } + +#define BINOP_TEST(insn) \ + float16x4_t \ + MSTRCAT (test_##insn, _16x4) (float16x4_t a, float16x4_t b) \ + { \ + return MSTRCAT (insn, _f16) (a, b); \ + } \ + float16x8_t \ + MSTRCAT (test_##insn, _16x8) (float16x8_t a, float16x8_t b) \ + { \ + return MSTRCAT (insn, q_f16) (a, b); \ + } + +#define BINOP_LANE_TEST(insn, I) \ + float16x4_t \ + MSTRCAT (test_##insn##_lane, _16x4) (float16x4_t a, float16x4_t b) \ + { \ + return MSTRCAT (insn, _lane_f16) (a, b, I); \ + } \ + float16x8_t \ + MSTRCAT (test_##insn##_lane, _16x8) (float16x8_t a, float16x4_t b) \ + { \ + return MSTRCAT (insn, q_lane_f16) (a, b, I); \ + } + +#define BINOP_LANEQ_TEST(insn, I) \ + float16x4_t \ + MSTRCAT (test_##insn##_laneq, _16x4) (float16x4_t a, float16x8_t b) \ + { \ + return MSTRCAT (insn, _laneq_f16) (a, b, I); \ + } \ + float16x8_t \ + MSTRCAT (test_##insn##_laneq, _16x8) (float16x8_t a, float16x8_t b) \ + { \ + return MSTRCAT (insn, q_laneq_f16) (a, b, I); \ + } \ + +#define BINOP_N_TEST(insn) \ + float16x4_t \ + MSTRCAT (test_##insn##_n, _16x4) (float16x4_t a, float16_t b) \ + { \ + return MSTRCAT (insn, _n_f16) (a, b); \ + } \ + float16x8_t \ + MSTRCAT (test_##insn##_n, _16x8) (float16x8_t a, float16_t b) \ + { \ + return MSTRCAT (insn, q_n_f16) (a, b); \ + } + +#define TERNOP_TEST(insn) \ + float16_t \ + MSTRCAT (test_##insn, _16) (float16_t a, float16_t b, float16_t c) \ + { \ + return MSTRCAT (insn, h_f16) (a, b, c); \ + } \ + float16x4_t \ + MSTRCAT (test_##insn, _16x4) (float16x4_t a, float16x4_t b, \ + float16x4_t c) \ + { \ + return MSTRCAT (insn, _f16) (a, b, c); \ + } \ + float16x8_t \ + MSTRCAT (test_##insn, _16x8) (float16x8_t a, float16x8_t b, \ + float16x8_t c) \ + { \ + return MSTRCAT (insn, q_f16) (a, b, c); \ + } + +#define VCMP1_TEST(insn) \ + uint16x4_t \ + MSTRCAT (test_##insn, _16x4) (float16x4_t a) \ + { \ + return MSTRCAT (insn, _f16) (a); \ + } \ + uint16x8_t \ + MSTRCAT (test_##insn, _16x8) (float16x8_t a) \ + { \ + return MSTRCAT (insn, q_f16) (a); \ + } + +#define VCMP2_TEST(insn) \ + uint16x4_t \ + MSTRCAT (test_##insn, _16x4) (float16x4_t a, float16x4_t b) \ + { \ + return MSTRCAT (insn, _f16) (a, b); \ + } \ + uint16x8_t \ + MSTRCAT (test_##insn, _16x8) (float16x8_t a, float16x8_t b) \ + { \ + return MSTRCAT (insn, q_f16) (a, b); \ + } + +#define VCVT_TEST(insn, TY, TO, FR) \ + MSTRCAT (TO, 16x4_t) \ + MSTRCAT (test_##insn, TY) (MSTRCAT (FR, 16x4_t) a) \ + { \ + return MSTRCAT (insn, TY) (a); \ + } \ + MSTRCAT (TO, 16x8_t) \ + MSTRCAT (test_##insn##_q, TY) (MSTRCAT (FR, 16x8_t) a) \ + { \ + return MSTRCAT (insn, q##TY) (a); \ + } + +#define VCVT_N_TEST(insn, TY, TO, FR) \ + MSTRCAT (TO, 16x4_t) \ + MSTRCAT (test_##insn##_n, TY) (MSTRCAT (FR, 16x4_t) a) \ + { \ + return MSTRCAT (insn, _n##TY) (a, 1); \ + } \ + MSTRCAT (TO, 16x8_t) \ + MSTRCAT (test_##insn##_n_q, TY) (MSTRCAT (FR, 16x8_t) a) \ + { \ + return MSTRCAT (insn, q_n##TY) (a, 1); \ + } + +VCMP1_TEST (vceqz) +/* { dg-final { scan-assembler-times {vceq\.f16\td[0-9]+, d[0-0]+, #0} 1 } } */ +/* { dg-final { scan-assembler-times {vceq\.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */ + +VCMP1_TEST (vcgtz) +/* { dg-final { scan-assembler-times {vcgt\.f16\td[0-9]+, d[0-9]+, #0} 1 } } */ +/* { dg-final { scan-assembler-times {vceq\.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */ + +VCMP1_TEST (vcgez) +/* { dg-final { scan-assembler-times {vcge\.f16\td[0-9]+, d[0-9]+, #0} 1 } } */ +/* { dg-final { scan-assembler-times {vcge\.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */ + +VCMP1_TEST (vcltz) +/* { dg-final { scan-assembler-times {vclt.f16\td[0-9]+, d[0-9]+, #0} 1 } } */ +/* { dg-final { scan-assembler-times {vclt.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */ + +VCMP1_TEST (vclez) +/* { dg-final { scan-assembler-times {vcle\.f16\td[0-9]+, d[0-9]+, #0} 1 } } */ +/* { dg-final { scan-assembler-times {vcle\.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */ + +VCVT_TEST (vcvt, _f16_s16, float, int) +VCVT_N_TEST (vcvt, _f16_s16, float, int) +/* { dg-final { scan-assembler-times {vcvt\.f16\.s16\td[0-9]+, d[0-9]+} 2 } } + { dg-final { scan-assembler-times {vcvt\.f16\.s16\tq[0-9]+, q[0-9]+} 2 } } + { dg-final { scan-assembler-times {vcvt\.f16\.s16\td[0-9]+, d[0-9]+, #1} 1 } } + { dg-final { scan-assembler-times {vcvt\.f16\.s16\tq[0-9]+, q[0-9]+, #1} 1 } } */ + +VCVT_TEST (vcvt, _f16_u16, float, uint) +VCVT_N_TEST (vcvt, _f16_u16, float, uint) +/* { dg-final { scan-assembler-times {vcvt\.f16\.u16\td[0-9]+, d[0-9]+} 2 } } + { dg-final { scan-assembler-times {vcvt\.f16\.u16\tq[0-9]+, q[0-9]+} 2 } } + { dg-final { scan-assembler-times {vcvt\.f16\.u16\td[0-9]+, d[0-9]+, #1} 1 } } + { dg-final { scan-assembler-times {vcvt\.f16\.u16\tq[0-9]+, q[0-9]+, #1} 1 } } */ + +VCVT_TEST (vcvt, _s16_f16, int, float) +VCVT_N_TEST (vcvt, _s16_f16, int, float) +/* { dg-final { scan-assembler-times {vcvt\.s16\.f16\td[0-9]+, d[0-9]+} 2 } } + { dg-final { scan-assembler-times {vcvt\.s16\.f16\tq[0-9]+, q[0-9]+} 2 } } + { dg-final { scan-assembler-times {vcvt\.s16\.f16\td[0-9]+, d[0-9]+, #1} 1 } } + { dg-final { scan-assembler-times {vcvt\.s16\.f16\tq[0-9]+, q[0-9]+, #1} 1 } } */ + +VCVT_TEST (vcvt, _u16_f16, uint, float) +VCVT_N_TEST (vcvt, _u16_f16, uint, float) +/* { dg-final { scan-assembler-times {vcvt\.u16\.f16\td[0-9]+, d[0-9]+} 2 } } + { dg-final { scan-assembler-times {vcvt\.u16\.f16\tq[0-9]+, q[0-9]+} 2 } } + { dg-final { scan-assembler-times {vcvt\.u16\.f16\td[0-9]+, d[0-9]+, #1} 1 } } + { dg-final { scan-assembler-times {vcvt\.u16\.f16\tq[0-9]+, q[0-9]+, #1} 1 } } */ + +VCVT_TEST (vcvta, _s16_f16, int, float) +/* { dg-final { scan-assembler-times {vcvta\.s16\.f16\td[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vcvta\.s16\.f16\tq[0-9]+, q[0-9]+} 1 } } +*/ + +VCVT_TEST (vcvta, _u16_f16, uint, float) +/* { dg-final { scan-assembler-times {vcvta\.u16\.f16\td[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vcvta\.u16\.f16\tq[0-9]+, q[0-9]+} 1 } } +*/ + +VCVT_TEST (vcvtm, _s16_f16, int, float) +/* { dg-final { scan-assembler-times {vcvtm\.s16\.f16\td[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vcvtm\.s16\.f16\tq[0-9]+, q[0-9]+} 1 } } +*/ + +VCVT_TEST (vcvtm, _u16_f16, uint, float) +/* { dg-final { scan-assembler-times {vcvtm\.u16\.f16\td[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vcvtm\.u16\.f16\tq[0-9]+, q[0-9]+} 1 } } +*/ + +VCVT_TEST (vcvtn, _s16_f16, int, float) +/* { dg-final { scan-assembler-times {vcvtn\.s16\.f16\td[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vcvtn\.s16\.f16\tq[0-9]+, q[0-9]+} 1 } } +*/ + +VCVT_TEST (vcvtn, _u16_f16, uint, float) +/* { dg-final { scan-assembler-times {vcvtn\.u16\.f16\td[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vcvtn\.u16\.f16\tq[0-9]+, q[0-9]+} 1 } } +*/ + +VCVT_TEST (vcvtp, _s16_f16, int, float) +/* { dg-final { scan-assembler-times {vcvtp\.s16\.f16\td[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vcvtp\.s16\.f16\tq[0-9]+, q[0-9]+} 1 } } +*/ + +VCVT_TEST (vcvtp, _u16_f16, uint, float) +/* { dg-final { scan-assembler-times {vcvtp\.u16\.f16\td[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vcvtp\.u16\.f16\tq[0-9]+, q[0-9]+} 1 } } +*/ + +UNOP_TEST (vabs) +/* { dg-final { scan-assembler-times {vabs\.f16\td[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vabs\.f16\tq[0-9]+, q[0-9]+} 1 } } */ + +UNOP_TEST (vneg) +/* { dg-final { scan-assembler-times {vneg\.f16\td[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vneg\.f16\tq[0-9]+, q[0-9]+} 1 } } */ + +UNOP_TEST (vrecpe) +/* { dg-final { scan-assembler-times {vrecpe\.f16\td[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vrecpe\.f16\tq[0-9]+, q[0-9]+} 1 } } */ + +UNOP_TEST (vrnd) +/* { dg-final { scan-assembler-times {vrintz\.f16\td[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vrintz\.f16\tq[0-9]+, q[0-9]+} 1 } } */ + +UNOP_TEST (vrnda) +/* { dg-final { scan-assembler-times {vrinta\.f16\td[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vrinta\.f16\tq[0-9]+, q[0-9]+} 1 } } */ + +UNOP_TEST (vrndm) +/* { dg-final { scan-assembler-times {vrintm\.f16\td[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vrintm\.f16\tq[0-9]+, q[0-9]+} 1 } } */ + +UNOP_TEST (vrndn) +/* { dg-final { scan-assembler-times {vrintn\.f16\td[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vrintn\.f16\tq[0-9]+, q[0-9]+} 1 } } */ + +UNOP_TEST (vrndp) +/* { dg-final { scan-assembler-times {vrintp\.f16\td[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vrintp\.f16\tq[0-9]+, q[0-9]+} 1 } } */ + +UNOP_TEST (vrndx) +/* { dg-final { scan-assembler-times {vrintx\.f16\td[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vrintx\.f16\tq[0-9]+, q[0-9]+} 1 } } */ + +UNOP_TEST (vrsqrte) +/* { dg-final { scan-assembler-times {vrsqrte\.f16\td[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vrsqrte\.f16\tq[0-9]+, q[0-9]+} 1 } } */ + +BINOP_TEST (vadd) +/* { dg-final { scan-assembler-times {vadd\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vadd\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +BINOP_TEST (vabd) +/* { dg-final { scan-assembler-times {vabd\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vabd\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +VCMP2_TEST (vcage) +/* { dg-final { scan-assembler-times {vacge\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vacge\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +VCMP2_TEST (vcagt) +/* { dg-final { scan-assembler-times {vacgt\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vacgt\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +VCMP2_TEST (vcale) +/* { dg-final { scan-assembler-times {vacle\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vacle\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +VCMP2_TEST (vcalt) +/* { dg-final { scan-assembler-times {vaclt\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vaclt\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +VCMP2_TEST (vceq) +/* { dg-final { scan-assembler-times {vceq\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vceq\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +VCMP2_TEST (vcge) +/* { dg-final { scan-assembler-times {vcge\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vcge\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +VCMP2_TEST (vcgt) +/* { dg-final { scan-assembler-times {vcgt\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vcgt\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +VCMP2_TEST (vcle) +/* { dg-final { scan-assembler-times {vcle\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vcle\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +VCMP2_TEST (vclt) +/* { dg-final { scan-assembler-times {vclt\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vclt\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +BINOP_TEST (vmax) +/* { dg-final { scan-assembler-times {vmax\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vmax\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +BINOP_TEST (vmin) +/* { dg-final { scan-assembler-times {vmin\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vmin\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +BINOP_TEST (vmaxnm) +/* { dg-final { scan-assembler-times {vmaxnm\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vmaxnm\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +BINOP_TEST (vminnm) +/* { dg-final { scan-assembler-times {vminnm\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vminnm\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +BINOP_TEST (vmul) +/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 3 } } + { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +BINOP_LANE_TEST (vmul, 2) +/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+\[2\]} 1 } } + { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[2\]} 1 } } */ +BINOP_N_TEST (vmul) +/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+\[0\]} 1 } } + { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[0\]} 1 } }*/ + +float16x4_t +test_vpadd_16x4 (float16x4_t a, float16x4_t b) +{ + return vpadd_f16 (a, b); +} +/* { dg-final { scan-assembler-times {vpadd\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */ + +float16x4_t +test_vpmax_16x4 (float16x4_t a, float16x4_t b) +{ + return vpmax_f16 (a, b); +} +/* { dg-final { scan-assembler-times {vpmax\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */ + +float16x4_t +test_vpmin_16x4 (float16x4_t a, float16x4_t b) +{ + return vpmin_f16 (a, b); +} +/* { dg-final { scan-assembler-times {vpmin\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */ + +BINOP_TEST (vsub) +/* { dg-final { scan-assembler-times {vsub\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vsub\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +BINOP_TEST (vrecps) +/* { dg-final { scan-assembler-times {vrecps\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vrecps\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +BINOP_TEST (vrsqrts) +/* { dg-final { scan-assembler-times {vrsqrts\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vrsqrts\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +TERNOP_TEST (vfma) +/* { dg-final { scan-assembler-times {vfma\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vfma\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +TERNOP_TEST (vfms) +/* { dg-final { scan-assembler-times {vfms\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vfms\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +float16x4_t +test_vmov_n_f16 (float16_t a) +{ + return vmov_n_f16 (a); +} + +float16x4_t +test_vdup_n_f16 (float16_t a) +{ + return vdup_n_f16 (a); +} +/* { dg-final { scan-assembler-times {vdup\.16\td[0-9]+, r[0-9]+} 2 } } */ + +float16x8_t +test_vmovq_n_f16 (float16_t a) +{ + return vmovq_n_f16 (a); +} + +float16x8_t +test_vdupq_n_f16 (float16_t a) +{ + return vdupq_n_f16 (a); +} +/* { dg-final { scan-assembler-times {vdup\.16\tq[0-9]+, r[0-9]+} 2 } } */ + +float16x4_t +test_vdup_lane_f16 (float16x4_t a) +{ + return vdup_lane_f16 (a, 1); +} +/* { dg-final { scan-assembler-times {vdup\.16\td[0-9]+, d[0-9]+\[1\]} 1 } } */ + +float16x8_t +test_vdupq_lane_f16 (float16x4_t a) +{ + return vdupq_lane_f16 (a, 1); +} +/* { dg-final { scan-assembler-times {vdup\.16\tq[0-9]+, d[0-9]+\[1\]} 1 } } */ + +float16x4_t +test_vext_f16 (float16x4_t a, float16x4_t b) +{ + return vext_f16 (a, b, 1); +} +/* { dg-final { scan-assembler-times {vext\.16\td[0-9]+, d[0-9]+, d[0-9]+, #1} 1 } } */ + +float16x8_t +test_vextq_f16 (float16x8_t a, float16x8_t b) +{ + return vextq_f16 (a, b, 1); +} +/* { dg-final { scan-assembler-times {vext\.16\tq[0-9]+, q[0-9]+, q[0-9]+, #1} 1 } } */ + +UNOP_TEST (vrev64) +/* { dg-final { scan-assembler-times {vrev64\.16\td[0-9]+, d[0-9]+} 1 } } + { dg-final { scan-assembler-times {vrev64\.16\tq[0-9]+, q[0-9]+} 1 } } */ + +float16x4_t +test_vbsl16x4 (uint16x4_t a, float16x4_t b, float16x4_t c) +{ + return vbsl_f16 (a, b, c); +} +/* { dg-final { scan-assembler-times {vbsl\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */ + +float16x8_t +test_vbslq16x8 (uint16x8_t a, float16x8_t b, float16x8_t c) +{ + return vbslq_f16 (a, b, c); +} +/*{ dg-final { scan-assembler-times {vbsl\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +float16x4x2_t +test_vzip16x4 (float16x4_t a, float16x4_t b) +{ + return vzip_f16 (a, b); +} +/* { dg-final { scan-assembler-times {vzip\.16\td[0-9]+, d[0-9]+} 1 } } */ + +float16x8x2_t +test_vzipq16x8 (float16x8_t a, float16x8_t b) +{ + return vzipq_f16 (a, b); +} +/*{ dg-final { scan-assembler-times {vzip\.16\tq[0-9]+, q[0-9]+} 1 } } */ + +float16x4x2_t +test_vuzp16x4 (float16x4_t a, float16x4_t b) +{ + return vuzp_f16 (a, b); +} +/* { dg-final { scan-assembler-times {vuzp\.16\td[0-9]+, d[0-9]+} 1 } } */ + +float16x8x2_t +test_vuzpq16x8 (float16x8_t a, float16x8_t b) +{ + return vuzpq_f16 (a, b); +} +/*{ dg-final { scan-assembler-times {vuzp\.16\tq[0-9]+, q[0-9]+} 1 } } */ + +float16x4x2_t +test_vtrn16x4 (float16x4_t a, float16x4_t b) +{ + return vtrn_f16 (a, b); +} +/* { dg-final { scan-assembler-times {vtrn\.16\td[0-9]+, d[0-9]+} 1 } } */ + +float16x8x2_t +test_vtrnq16x8 (float16x8_t a, float16x8_t b) +{ + return vtrnq_f16 (a, b); +} +/*{ dg-final { scan-assembler-times {vtrn\.16\tq[0-9]+, q[0-9]+} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-scalar-1.c b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-scalar-1.c new file mode 100644 index 0000000..2eddb76 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-scalar-1.c @@ -0,0 +1,203 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +/* Test instructions generated for the FP16 scalar intrinsics. */ +#include + +#define MSTRCAT(L, str) L##str + +#define UNOP_TEST(insn) \ + float16_t \ + MSTRCAT (test_##insn, 16) (float16_t a) \ + { \ + return MSTRCAT (insn, h_f16) (a); \ + } + +#define BINOP_TEST(insn) \ + float16_t \ + MSTRCAT (test_##insn, 16) (float16_t a, float16_t b) \ + { \ + return MSTRCAT (insn, h_f16) (a, b); \ + } + +#define TERNOP_TEST(insn) \ + float16_t \ + MSTRCAT (test_##insn, 16) (float16_t a, float16_t b, float16_t c) \ + { \ + return MSTRCAT (insn, h_f16) (a, b, c); \ + } + +float16_t +test_vcvth_f16_s32 (int32_t a) +{ + return vcvth_f16_s32 (a); +} + +float16_t +test_vcvth_n_f16_s32 (int32_t a) +{ + return vcvth_n_f16_s32 (a, 1); +} +/* { dg-final { scan-assembler-times {vcvt\.f16\.s32\ts[0-9]+, s[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vcvt\.f16\.s32\ts[0-9]+, s[0-9]+, #1} 1 } } */ + +float16_t +test_vcvth_f16_u32 (uint32_t a) +{ + return vcvth_f16_u32 (a); +} + +float16_t +test_vcvth_n_f16_u32 (uint32_t a) +{ + return vcvth_n_f16_u32 (a, 1); +} + +/* { dg-final { scan-assembler-times {vcvt\.f16\.u32\ts[0-9]+, s[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vcvt\.f16\.u32\ts[0-9]+, s[0-9]+, #1} 1 } } */ + +uint32_t +test_vcvth_u32_f16 (float16_t a) +{ + return vcvth_u32_f16 (a); +} +/* { dg-final { scan-assembler-times {vcvt\.u32\.f16\ts[0-9]+, s[0-9]+} 2 } } */ + +uint32_t +test_vcvth_n_u32_f16 (float16_t a) +{ + return vcvth_n_u32_f16 (a, 1); +} +/* { dg-final { scan-assembler-times {vcvt\.u32\.f16\ts[0-9]+, s[0-9]+, #1} 1 } } */ + +int32_t +test_vcvth_s32_f16 (float16_t a) +{ + return vcvth_s32_f16 (a); +} + +int32_t +test_vcvth_n_s32_f16 (float16_t a) +{ + return vcvth_n_s32_f16 (a, 1); +} + +/* { dg-final { scan-assembler-times {vcvt\.s32\.f16\ts[0-9]+, s[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vcvt\.s32\.f16\ts[0-9]+, s[0-9]+, #1} 1 } } */ + +int32_t +test_vcvtah_s32_f16 (float16_t a) +{ + return vcvtah_s32_f16 (a); +} +/* { dg-final { scan-assembler-times {vcvta\.s32\.f16\ts[0-9]+, s[0-9]+} 1 } } */ + +uint32_t +test_vcvtah_u32_f16 (float16_t a) +{ + return vcvtah_u32_f16 (a); +} +/* { dg-final { scan-assembler-times {vcvta\.u32\.f16\ts[0-9]+, s[0-9]+} 1 } } */ + +int32_t +test_vcvtmh_s32_f16 (float16_t a) +{ + return vcvtmh_s32_f16 (a); +} +/* { dg-final { scan-assembler-times {vcvtm\.s32\.f16\ts[0-9]+, s[0-9]+} 1 } } */ + +uint32_t +test_vcvtmh_u32_f16 (float16_t a) +{ + return vcvtmh_u32_f16 (a); +} +/* { dg-final { scan-assembler-times {vcvtm\.u32\.f16\ts[0-9]+, s[0-9]+} 1 } } + */ + +int32_t +test_vcvtnh_s32_f16 (float16_t a) +{ + return vcvtnh_s32_f16 (a); +} +/* { dg-final { scan-assembler-times {vcvtn\.s32\.f16\ts[0-9]+, s[0-9]+} 1 } } + */ + +uint32_t +test_vcvtnh_u32_f16 (float16_t a) +{ + return vcvtnh_u32_f16 (a); +} +/* { dg-final { scan-assembler-times {vcvtn\.u32\.f16\ts[0-9]+, s[0-9]+} 1 } } + */ + +int32_t +test_vcvtph_s32_f16 (float16_t a) +{ + return vcvtph_s32_f16 (a); +} +/* { dg-final { scan-assembler-times {vcvtp\.s32\.f16\ts[0-9]+, s[0-9]+} 1 } } + */ + +uint32_t +test_vcvtph_u32_f16 (float16_t a) +{ + return vcvtph_u32_f16 (a); +} +/* { dg-final { scan-assembler-times {vcvtp\.u32\.f16\ts[0-9]+, s[0-9]+} 1 } } + */ + +UNOP_TEST (vabs) +/* { dg-final { scan-assembler-times {vabs\.f16\ts[0-9]+, s[0-9]+} 1 } } */ + +UNOP_TEST (vneg) +/* { dg-final { scan-assembler-times {vneg\.f16\ts[0-9]+, s[0-9]+} 1 } } */ + +UNOP_TEST (vrnd) +/* { dg-final { scan-assembler-times {vrintz\.f16\ts[0-9]+, s[0-9]+} 1 } } */ + +UNOP_TEST (vrndi) +/* { dg-final { scan-assembler-times {vrintr\.f16\ts[0-9]+, s[0-9]+} 1 } } */ + +UNOP_TEST (vrnda) +/* { dg-final { scan-assembler-times {vrinta\.f16\ts[0-9]+, s[0-9]+} 1 } } */ + +UNOP_TEST (vrndm) +/* { dg-final { scan-assembler-times {vrinta\.f16\ts[0-9]+, s[0-9]+} 1 } } */ + +UNOP_TEST (vrndn) +/* { dg-final { scan-assembler-times {vrinta\.f16\ts[0-9]+, s[0-9]+} 1 } } */ + +UNOP_TEST (vrndp) +/* { dg-final { scan-assembler-times {vrinta\.f16\ts[0-9]+, s[0-9]+} 1 } } */ + +UNOP_TEST (vrndx) +/* { dg-final { scan-assembler-times {vrinta\.f16\ts[0-9]+, s[0-9]+} 1 } } */ + +UNOP_TEST (vsqrt) +/* { dg-final { scan-assembler-times {vsqrt\.f16\ts[0-9]+, s[0-9]+} 1 } } */ + +BINOP_TEST (vadd) +/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ + +BINOP_TEST (vdiv) +/* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ + +BINOP_TEST (vmaxnm) +/* { dg-final { scan-assembler-times {vmaxnm\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ + +BINOP_TEST (vminnm) +/* { dg-final { scan-assembler-times {vminnm\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ + +BINOP_TEST (vmul) +/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ + +BINOP_TEST (vsub) +/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ + +TERNOP_TEST (vfma) +/* { dg-final { scan-assembler-times {vfma\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ + +TERNOP_TEST (vfms) +/* { dg-final { scan-assembler-times {vfms\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-scalar-2.c b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-scalar-2.c new file mode 100644 index 0000000..fa4828d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-scalar-2.c @@ -0,0 +1,71 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */ +/* { dg-options "-O2 -std=c11" } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +/* Test compiler use of FP16 instructions. */ +#include + +float16_t +test_mov_imm_1 (float16_t a) +{ + return 1.0; +} + +float16_t +test_mov_imm_2 (float16_t a) +{ + float16_t b = 1.0; + return b; +} + +float16_t +test_vmov_imm_3 (float16_t a) +{ + float16_t b = 1.0; + return vaddh_f16 (a, b); +} + +float16_t +test_vmov_imm_4 (float16_t a) +{ + return vaddh_f16 (a, 1.0); +} + +/* { dg-final { scan-assembler-times {vmov.f16\ts[0-9]+, #1\.0e\+0} 4 } } + { dg-final { scan-assembler-times {vadd.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 2 } } */ + +float16_t +test_vmla_1 (float16_t a, float16_t b, float16_t c) +{ + return vaddh_f16 (vmulh_f16 (a, b), c); +} +/* { dg-final { scan-assembler-times {vmla\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ + +float16_t +test_vmla_2 (float16_t a, float16_t b, float16_t c) +{ + return vsubh_f16 (vmulh_f16 (vnegh_f16 (a), b), c); +} +/* { dg-final { scan-assembler-times {vnmla\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ + +float16_t +test_vmls_1 (float16_t a, float16_t b, float16_t c) +{ + return vsubh_f16 (c, vmulh_f16 (a, b)); +} + +float16_t +test_vmls_2 (float16_t a, float16_t b, float16_t c) +{ + return vsubh_f16 (a, vmulh_f16 (b, c)); +} +/* { dg-final { scan-assembler-times {vmls\.f16} 2 } } */ + +float16_t +test_vnmls_1 (float16_t a, float16_t b, float16_t c) +{ + return vsubh_f16 (vmulh_f16 (a, b), c); +} +/* { dg-final { scan-assembler-times {vnmls\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ + diff --git a/gcc/testsuite/gcc.target/arm/attr-fp16-arith-1.c b/gcc/testsuite/gcc.target/arm/attr-fp16-arith-1.c new file mode 100644 index 0000000..a93d30f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/attr-fp16-arith-1.c @@ -0,0 +1,58 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +/* Reset fpu to a value compatible with the next pragmas. */ +#pragma GCC target ("fpu=vfp") + +#pragma GCC push_options +#pragma GCC target ("fpu=fp-armv8") + +#ifndef __ARM_FEATURE_FP16_SCALAR_ARITHMETIC +#error __ARM_FEATURE_FP16_SCALAR_ARITHMETIC not defined. +#endif + +#pragma GCC push_options +#pragma GCC target ("fpu=neon-fp-armv8") + +#ifndef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#error __ARM_FEATURE_FP16_VECTOR_ARITHMETIC not defined. +#endif + +#ifndef __ARM_NEON +#error __ARM_NEON not defined. +#endif + +#if !defined (__ARM_FP) || !(__ARM_FP & 0x2) +#error Invalid value for __ARM_FP +#endif + +#include "arm_neon.h" + +float16_t +foo (float16x4_t b) +{ + float16x4_t a = {2.0, 3.0, 4.0, 5.0}; + float16x4_t res = vadd_f16 (a, b); + + return res[0]; +} + +/* { dg-final { scan-assembler "vadd\\.f16\td\[0-9\]+, d\[0-9\]+" } } */ + +#pragma GCC pop_options + +/* Check that the FP version is correctly reset to mfpu=fp-armv8. */ + +#if !defined (__ARM_FP) || !(__ARM_FP & 0x2) +#error __ARM_FP should record FP16 support. +#endif + +#pragma GCC pop_options + +/* Check that the FP version is correctly reset to mfpu=vfp. */ + +#if !defined (__ARM_FP) || (__ARM_FP & 0x2) +#error Unexpected value for __ARM_FP. +#endif diff --git a/gcc/testsuite/gcc.target/arm/fp16-aapcs-1.c b/gcc/testsuite/gcc.target/arm/fp16-aapcs-1.c index 9bf3fc0..b91168d 100644 --- a/gcc/testsuite/gcc.target/arm/fp16-aapcs-1.c +++ b/gcc/testsuite/gcc.target/arm/fp16-aapcs-1.c @@ -16,6 +16,6 @@ F (__fp16 a, __fp16 b, __fp16 c) return c; } -/* { dg-final { scan-assembler-times {vmov\tr[0-9]+, s[0-2]} 2 } } */ -/* { dg-final { scan-assembler-times {vmov.f32\ts1, s0} 1 } } */ -/* { dg-final { scan-assembler-times {vmov\ts0, r[0-9]+} 2 } } */ +/* { dg-final { scan-assembler {vmov(\.f16)?\tr[0-9]+, s[0-9]+} } } */ +/* { dg-final { scan-assembler {vmov(\.f32)?\ts1, s0} } } */ +/* { dg-final { scan-assembler {vmov(\.f16)?\ts0, r[0-9]+} } } */ diff --git a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-1.c b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-1.c index 3abcd94..0845e88 100644 --- a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-1.c +++ b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-1.c @@ -1,4 +1,5 @@ /* { dg-do compile } */ +/* { dg-require-effective-target arm_fp16_alternative_ok } */ /* { dg-options "-mfp16-format=alternative" } */ __fp16 xx = 0.0; diff --git a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-10.c b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-10.c index 2e3d31f..a8772a1 100644 --- a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-10.c +++ b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-10.c @@ -1,4 +1,5 @@ /* { dg-do compile } */ +/* { dg-require-effective-target arm_fp16_alternative_ok } */ /* { dg-options "-mfp16-format=alternative -pedantic -std=gnu99" } */ #include diff --git a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-11.c b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-11.c index 62a7a3d..1cb3d2c 100644 --- a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-11.c +++ b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-11.c @@ -1,4 +1,5 @@ /* { dg-do compile } */ +/* { dg-require-effective-target arm_fp16_alternative_ok } */ /* { dg-options "-mfp16-format=alternative -pedantic -std=gnu99" } */ #include diff --git a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-12.c b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-12.c index 09586e9..3c3bd2f 100644 --- a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-12.c +++ b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-12.c @@ -1,4 +1,5 @@ /* { dg-do compile } */ +/* { dg-require-effective-target arm_fp16_alternative_ok } */ /* { dg-options "-mfp16-format=alternative" } */ float xx __attribute__((mode(HF))) = 0.0; diff --git a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-2.c b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-2.c index b7fe99d..8a45f1f 100644 --- a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-2.c +++ b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-2.c @@ -1,4 +1,5 @@ /* { dg-do compile } */ +/* { dg-require-effective-target arm_fp16_alternative_ok } */ /* { dg-options "-mfp16-format=alternative" } */ /* Encoding taken from: http://en.wikipedia.org/wiki/Half_precision */ diff --git a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-3.c b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-3.c index f325a84..e786a51 100644 --- a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-3.c +++ b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-3.c @@ -1,4 +1,5 @@ /* { dg-do compile } */ +/* { dg-require-effective-target arm_fp16_alternative_ok } */ /* { dg-options "-mfp16-format=alternative" } */ /* Encoding taken from: http://en.wikipedia.org/wiki/Half_precision */ diff --git a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-4.c b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-4.c index 4b9b331..cfeb61a 100644 --- a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-4.c +++ b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-4.c @@ -1,4 +1,5 @@ /* { dg-do compile } */ +/* { dg-require-effective-target arm_fp16_alternative_ok } */ /* { dg-options "-mfp16-format=alternative" } */ /* Encoding taken from: http://en.wikipedia.org/wiki/Half_precision */ diff --git a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-5.c b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-5.c index 458f507..3b741ae 100644 --- a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-5.c +++ b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-5.c @@ -1,4 +1,5 @@ /* { dg-do compile } */ +/* { dg-require-effective-target arm_fp16_alternative_ok } */ /* { dg-options "-mfp16-format=alternative" } */ /* Encoding taken from: http://en.wikipedia.org/wiki/Half_precision */ diff --git a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-6.c b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-6.c index dbb4a99..abffff5 100644 --- a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-6.c +++ b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-6.c @@ -1,4 +1,5 @@ /* { dg-do compile } */ +/* { dg-require-effective-target arm_fp16_alternative_ok } */ /* { dg-options "-mfp16-format=alternative" } */ /* This number is the maximum value representable in the alternative diff --git a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-7.c b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-7.c index 40940a6..c339f19 100644 --- a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-7.c +++ b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-7.c @@ -1,4 +1,5 @@ /* { dg-do compile } */ +/* { dg-require-effective-target arm_fp16_alternative_ok } */ /* { dg-options "-mfp16-format=alternative -pedantic" } */ /* This number overflows the range of the alternative encoding. Since this diff --git a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-8.c b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-8.c index cbc0a39..deeb5cd 100644 --- a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-8.c +++ b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-8.c @@ -1,4 +1,5 @@ /* { dg-do compile } */ +/* { dg-require-effective-target arm_fp16_alternative_ok } */ /* { dg-options "-mfp16-format=alternative" } */ /* Encoding taken from: http://en.wikipedia.org/wiki/Half_precision */ diff --git a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-9.c b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-9.c index 6487c8d..f9f5654 100644 --- a/gcc/testsuite/gcc.target/arm/fp16-compile-alt-9.c +++ b/gcc/testsuite/gcc.target/arm/fp16-compile-alt-9.c @@ -1,4 +1,5 @@ /* { dg-do compile } */ +/* { dg-require-effective-target arm_fp16_alternative_ok } */ /* { dg-options "-mfp16-format=alternative" } */ /* Encoding taken from: http://en.wikipedia.org/wiki/Half_precision */ diff --git a/gcc/testsuite/gcc.target/arm/fp16-compile-none-1.c b/gcc/testsuite/gcc.target/arm/fp16-compile-none-1.c index e912505..9472249 100644 --- a/gcc/testsuite/gcc.target/arm/fp16-compile-none-1.c +++ b/gcc/testsuite/gcc.target/arm/fp16-compile-none-1.c @@ -1,4 +1,5 @@ /* { dg-do compile } */ +/* { dg-require-effective-target arm_fp16_none_ok } */ /* { dg-options "-mfp16-format=none" } */ /* __fp16 type name is not recognized unless you explicitly enable it diff --git a/gcc/testsuite/gcc.target/arm/fp16-compile-none-2.c b/gcc/testsuite/gcc.target/arm/fp16-compile-none-2.c index eb7eef5..9ec21e5 100644 --- a/gcc/testsuite/gcc.target/arm/fp16-compile-none-2.c +++ b/gcc/testsuite/gcc.target/arm/fp16-compile-none-2.c @@ -1,4 +1,5 @@ /* { dg-do compile } */ +/* { dg-require-effective-target arm_fp16_none_ok } */ /* { dg-options "-mfp16-format=none" } */ /* mode(HF) attributes are not recognized unless you explicitly enable diff --git a/gcc/testsuite/gcc.target/arm/fp16-rounding-alt-1.c b/gcc/testsuite/gcc.target/arm/fp16-rounding-alt-1.c index f50b447..1c15b61 100644 --- a/gcc/testsuite/gcc.target/arm/fp16-rounding-alt-1.c +++ b/gcc/testsuite/gcc.target/arm/fp16-rounding-alt-1.c @@ -3,6 +3,7 @@ from double to __fp16. */ /* { dg-do run } */ +/* { dg-require-effective-target arm_fp16_alternative_ok } */ /* { dg-options "-mfp16-format=alternative" } */ #include diff --git a/gcc/testsuite/gcc.target/arm/short-vfp-1.c b/gcc/testsuite/gcc.target/arm/short-vfp-1.c new file mode 100644 index 0000000..d96c763 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/short-vfp-1.c @@ -0,0 +1,45 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_vfp_ok } +/* { dg-options "-mfpu=vfp" } */ + +int +test_sisf (float x) +{ + return (int)x; +} + +short +test_hisf (float x) +{ + return (short)x; +} + +float +test_sfsi (int x) +{ + return (float)x; +} + +float +test_sfhi (short x) +{ + return (float)x; +} + +short +test_hisi (int x) +{ + return (short)x; +} + +int +test_sihi (short x) +{ + return (int)x; +} + +/* {dg-final { scan-assembler-times {vcvt\.s32\.f32\ts[0-9]+,s[0-9]+} 2 }} */ +/* {dg-final { scan-assembler-times {vcvt\.f32\.s32\ts[0-9]+,s[0-9]+} 2 }} */ +/* {dg-final { scan-assembler-times {vmov\tr[0-9]+,s[0-9]+} 2 }} */ +/* {dg-final { scan-assembler-times {vmov\ts[0-9]+,r[0-9]+} 2 }} */ +/* {dg-final { scan-assembler-times {sxth\tr[0-9]+,r[0-9]+} 2 }} */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 5895923..0a60e08 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -2938,6 +2938,28 @@ proc add_options_for_arm_v8_1a_neon { flags } { return "$flags $et_arm_v8_1a_neon_flags -march=armv8.1-a" } +# Add the options needed for ARMv8.2 with the scalar FP16 extension. +# Also adds the ARMv8 FP options for ARM and for AArch64. + +proc add_options_for_arm_v8_2a_fp16_scalar { flags } { + if { ! [check_effective_target_arm_v8_2a_fp16_scalar_ok] } { + return "$flags" + } + global et_arm_v8_2a_fp16_scalar_flags + return "$flags $et_arm_v8_2a_fp16_scalar_flags" +} + +# Add the options needed for ARMv8.2 with the FP16 extension. Also adds +# the ARMv8 NEON options for ARM and for AArch64. + +proc add_options_for_arm_v8_2a_fp16_neon { flags } { + if { ! [check_effective_target_arm_v8_2a_fp16_neon_ok] } { + return "$flags" + } + global et_arm_v8_2a_fp16_neon_flags + return "$flags $et_arm_v8_2a_fp16_neon_flags" +} + proc add_options_for_arm_crc { flags } { if { ! [check_effective_target_arm_crc_ok] } { return "$flags" @@ -3079,6 +3101,65 @@ proc add_options_for_arm_neon_fp16 { flags } { return "$flags $et_arm_neon_fp16_flags" } +# Return 1 if this is an ARM target supporting the FP16 alternative +# format. Some multilibs may be incompatible with the options needed. Also +# set et_arm_neon_fp16_flags to the best options to add. + +proc check_effective_target_arm_fp16_alternative_ok_nocache { } { + global et_arm_neon_fp16_flags + set et_arm_neon_fp16_flags "" + if { [check_effective_target_arm32] } { + foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp16" + "-mfpu=neon-fp16 -mfloat-abi=softfp"} { + if { [check_no_compiler_messages_nocache \ + arm_fp16_alternative_ok object { + #if !defined (__ARM_FP16_FORMAT_ALTERNATIVE) + #error __ARM_FP16_FORMAT_ALTERNATIVE not defined + #endif + } "$flags -mfp16-format=alternative"] } { + set et_arm_neon_fp16_flags "$flags -mfp16-format=alternative" + return 1 + } + } + } + + return 0 +} + +proc check_effective_target_arm_fp16_alternative_ok { } { + return [check_cached_effective_target arm_fp16_alternative_ok \ + check_effective_target_arm_fp16_alternative_ok_nocache] +} + +# Return 1 if this is an ARM target supports specifying the FP16 none +# format. Some multilibs may be incompatible with the options needed. + +proc check_effective_target_arm_fp16_none_ok_nocache { } { + if { [check_effective_target_arm32] } { + foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp16" + "-mfpu=neon-fp16 -mfloat-abi=softfp"} { + if { [check_no_compiler_messages_nocache \ + arm_fp16_none_ok object { + #if defined (__ARM_FP16_FORMAT_ALTERNATIVE) + #error __ARM_FP16_FORMAT_ALTERNATIVE defined + #endif + #if defined (__ARM_FP16_FORMAT_IEEE) + #error __ARM_FP16_FORMAT_IEEE defined + #endif + } "$flags -mfp16-format=none"] } { + return 1 + } + } + } + + return 0 +} + +proc check_effective_target_arm_fp16_none_ok { } { + return [check_cached_effective_target arm_fp16_none_ok \ + check_effective_target_arm_fp16_none_ok_nocache] +} + # Return 1 if this is an ARM target supporting -mfpu=neon-fp-armv8 # -mfloat-abi=softfp or equivalent options. Some multilibs may be # incompatible with these options. Also set et_arm_v8_neon_flags to the @@ -3279,6 +3360,7 @@ foreach { armfunc armflag armdef } { v4 "-march=armv4 -marm" __ARM_ARCH_4__ v7em "-march=armv7e-m -mthumb" __ARM_ARCH_7EM__ v8a "-march=armv8-a" __ARM_ARCH_8A__ v8_1a "-march=armv8.1a" __ARM_ARCH_8A__ + v8_2a "-march=armv8.2a" __ARM_ARCH_8A__ v8m_base "-march=armv8-m.base -mthumb" __ARM_ARCH_8M_BASE__ v8m_main "-march=armv8-m.main -mthumb" __ARM_ARCH_8M_MAIN__ } { eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] { @@ -3524,6 +3606,76 @@ proc check_effective_target_arm_v8_1a_neon_ok { } { check_effective_target_arm_v8_1a_neon_ok_nocache] } +# Return 1 if the target supports ARMv8.2 scalar FP16 arithmetic +# instructions, 0 otherwise. The test is valid for ARM and for AArch64. +# Record the command line options needed. + +proc check_effective_target_arm_v8_2a_fp16_scalar_ok_nocache { } { + global et_arm_v8_2a_fp16_scalar_flags + set et_arm_v8_2a_fp16_scalar_flags "" + + if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } { + return 0; + } + + # Iterate through sets of options to find the compiler flags that + # need to be added to the -march option. + foreach flags {"" "-mfpu=fp-armv8" "-mfloat-abi=softfp" \ + "-mfpu=fp-armv8 -mfloat-abi=softfp"} { + if { [check_no_compiler_messages_nocache \ + arm_v8_2a_fp16_scalar_ok object { + #if !defined (__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) + #error "__ARM_FEATURE_FP16_SCALAR_ARITHMETIC not defined" + #endif + } "$flags -march=armv8.2-a+fp16"] } { + set et_arm_v8_2a_fp16_scalar_flags "$flags -march=armv8.2-a+fp16" + return 1 + } + } + + return 0; +} + +proc check_effective_target_arm_v8_2a_fp16_scalar_ok { } { + return [check_cached_effective_target arm_v8_2a_fp16_scalar_ok \ + check_effective_target_arm_v8_2a_fp16_scalar_ok_nocache] +} + +# Return 1 if the target supports ARMv8.2 Adv.SIMD FP16 arithmetic +# instructions, 0 otherwise. The test is valid for ARM and for AArch64. +# Record the command line options needed. + +proc check_effective_target_arm_v8_2a_fp16_neon_ok_nocache { } { + global et_arm_v8_2a_fp16_neon_flags + set et_arm_v8_2a_fp16_neon_flags "" + + if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } { + return 0; + } + + # Iterate through sets of options to find the compiler flags that + # need to be added to the -march option. + foreach flags {"" "-mfpu=neon-fp-armv8" "-mfloat-abi=softfp" \ + "-mfpu=neon-fp-armv8 -mfloat-abi=softfp"} { + if { [check_no_compiler_messages_nocache \ + arm_v8_2a_fp16_neon_ok object { + #if !defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #error "__ARM_FEATURE_FP16_VECTOR_ARITHMETIC not defined" + #endif + } "$flags -march=armv8.2-a+fp16"] } { + set et_arm_v8_2a_fp16_neon_flags "$flags -march=armv8.2-a+fp16" + return 1 + } + } + + return 0; +} + +proc check_effective_target_arm_v8_2a_fp16_neon_ok { } { + return [check_cached_effective_target arm_v8_2a_fp16_neon_ok \ + check_effective_target_arm_v8_2a_fp16_neon_ok_nocache] +} + # Return 1 if the target supports executing ARMv8 NEON instructions, 0 # otherwise. @@ -3586,6 +3738,81 @@ proc check_effective_target_arm_v8_1a_neon_hw { } { } [add_options_for_arm_v8_1a_neon ""]] } +# Return 1 if the target supports executing floating point instructions from +# ARMv8.2 with the FP16 extension, 0 otherwise. The test is valid for ARM and +# for AArch64. + +proc check_effective_target_arm_v8_2a_fp16_scalar_hw { } { + if { ![check_effective_target_arm_v8_2a_fp16_scalar_ok] } { + return 0; + } + return [check_runtime arm_v8_2a_fp16_scalar_hw_available { + int + main (void) + { + __fp16 a = 1.0; + __fp16 result; + + #ifdef __ARM_ARCH_ISA_A64 + + asm ("fabs %h0, %h1" + : "=w"(result) + : "w"(a) + : /* No clobbers. */); + + #else + + asm ("vabs.f16 %0, %1" + : "=w"(result) + : "w"(a) + : /* No clobbers. */); + + #endif + + return (result == 1.0) ? 0 : 1; + } + } [add_options_for_arm_v8_2a_fp16_scalar ""]] +} + +# Return 1 if the target supports executing Adv.SIMD instructions from ARMv8.2 +# with the FP16 extension, 0 otherwise. The test is valid for ARM and for +# AArch64. + +proc check_effective_target_arm_v8_2a_fp16_neon_hw { } { + if { ![check_effective_target_arm_v8_2a_fp16_neon_ok] } { + return 0; + } + return [check_runtime arm_v8_2a_fp16_neon_hw_available { + int + main (void) + { + #ifdef __ARM_ARCH_ISA_A64 + + __Float16x4_t a = {1.0, -1.0, 1.0, -1.0}; + __Float16x4_t result; + + asm ("fabs %0.4h, %1.4h" + : "=w"(result) + : "w"(a) + : /* No clobbers. */); + + #else + + __simd64_float16_t a = {1.0, -1.0, 1.0, -1.0}; + __simd64_float16_t result; + + asm ("vabs.f16 %P0, %P1" + : "=w"(result) + : "w"(a) + : /* No clobbers. */); + + #endif + + return (result[0] == 1.0) ? 0 : 1; + } + } [add_options_for_arm_v8_2a_fp16_neon ""]] +} + # Return 1 if this is a ARM target with NEON enabled. proc check_effective_target_arm_neon { } { -- 2.7.4