From 8f3edb0732491c053d821445c6ce46b1ca52b1a6 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Mon, 10 Oct 2016 13:45:17 +0000 Subject: [PATCH] [2/4] ARMv8.2-A testsuite for new data movement intrinsics gcc/testsuite/ * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (FP16_SUPPORTED): Enable AArch64. * gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: Add support for vdup*_laneq. * gcc.target/aarch64/advsimd-intrinsics/vduph_lane.c: New. * gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c: New. * gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c: New. * gcc.target/aarch64/advsimd-intrinsics/vzip_half.c: New. From-SVN: r240922 --- gcc/testsuite/ChangeLog | 11 + .../aarch64/advsimd-intrinsics/arm-neon-ref.h | 16 +- .../aarch64/advsimd-intrinsics/vdup_lane.c | 119 +++++++++- .../aarch64/advsimd-intrinsics/vduph_lane.c | 137 +++++++++++ .../aarch64/advsimd-intrinsics/vtrn_half.c | 263 +++++++++++++++++++++ .../aarch64/advsimd-intrinsics/vuzp_half.c | 259 ++++++++++++++++++++ .../aarch64/advsimd-intrinsics/vzip_half.c | 263 +++++++++++++++++++++ 7 files changed, 1053 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vduph_lane.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip_half.c diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c56f494a..faa53e3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,14 @@ +2016-10-10 Jiong Wang + + * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (FP16_SUPPORTED): + Enable AArch64. + * gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: Add support for + vdup*_laneq. + * gcc.target/aarch64/advsimd-intrinsics/vduph_lane.c: New. + * gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c: New. + * gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c: New. + * gcc.target/aarch64/advsimd-intrinsics/vzip_half.c: New. + 2016-10-10 Matthew Wahab Jiong Wang diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h index 1297137..4621415 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h @@ -17,9 +17,8 @@ extern void *memcpy(void *, const void *, size_t); extern size_t strlen(const char *); /* Helper macro to select FP16 tests. */ -#if (!defined (__aarch64__) \ - && (defined (__ARM_FP16_FORMAT_IEEE) \ - || defined (__ARM_FP16_FORMAT_ALTERNATIVE))) +#if (defined (__ARM_FP16_FORMAT_IEEE) \ + || defined (__ARM_FP16_FORMAT_ALTERNATIVE)) #define FP16_SUPPORTED (1) #else #undef FP16_SUPPORTED @@ -520,17 +519,6 @@ static void clean_results (void) /* Helpers to initialize vectors. */ #define VDUP(VAR, Q, T1, T2, W, N, V) \ VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V) -#if (defined (__aarch64__) \ - && (defined (__ARM_FP16_FORMAT_IEEE) \ - || defined (__ARM_FP16_FORMAT_ALTERNATIVE))) -/* Work around that there is no vdup_n_f16 intrinsic. */ -#define vdup_n_f16(VAL) \ - __extension__ \ - ({ \ - float16_t f = VAL; \ - vld1_dup_f16(&f); \ - }) -#endif #define VSET_LANE(VAR, Q, T1, T2, W, N, L, V) \ VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V, \ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c index c4b8f14..5d0dba3 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c @@ -56,7 +56,7 @@ VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xca80, 0xca80, VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, 0xc1700000, 0xc1700000 }; -#define TEST_MSG "VDUP_LANE/VDUP_LANEQ" +#define TEST_MSG "VDUP_LANE/VDUPQ_LANE" void exec_vdup_lane (void) { /* Basic test: vec1=vdup_lane(vec2, lane), then store the result. */ @@ -114,6 +114,123 @@ void exec_vdup_lane (void) #else CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); #endif + +#if defined (__aarch64__) + +#undef TEST_MSG +#define TEST_MSG "VDUP_LANEQ/VDUPQ_LANEQ" + + /* Expected results for vdup*_laneq tests. */ +VECT_VAR_DECL(expected2,int,8,8) [] = { 0xfd, 0xfd, 0xfd, 0xfd, + 0xfd, 0xfd, 0xfd, 0xfd }; +VECT_VAR_DECL(expected2,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected2,int,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected2,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; +VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7, + 0xf7, 0xf7, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; +VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xca80, 0xca80, + 0xca80, 0xca80 }; +#endif +VECT_VAR_DECL(expected2,int,8,16) [] = { 0xfb, 0xfb, 0xfb, 0xfb, + 0xfb, 0xfb, 0xfb, 0xfb, + 0xfb, 0xfb, 0xfb, 0xfb, + 0xfb, 0xfb, 0xfb, 0xfb }; +VECT_VAR_DECL(expected2,int,16,8) [] = { 0xfff7, 0xfff7, 0xfff7, 0xfff7, + 0xfff7, 0xfff7, 0xfff7, 0xfff7 }; +VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfffffff1, 0xfffffff1, + 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected2,uint,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5, + 0xf5, 0xf5, 0xf5, 0xf5, + 0xf5, 0xf5, 0xf5, 0xf5, + 0xf5, 0xf5, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected2,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, + 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff0, 0xfffffff0, + 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5, + 0xf5, 0xf5, 0xf5, 0xf5, + 0xf5, 0xf5, 0xf5, 0xf5, + 0xf5, 0xf5, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, + 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xc880, 0xc880, + 0xc880, 0xc880, + 0xc880, 0xc880, + 0xc880, 0xc880 }; +#endif +VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, + 0xc1700000, 0xc1700000 }; + + /* Clean all results for vdup*_laneq tests. */ + clean_results (); + /* Basic test: vec1=vdup_lane(vec2, lane), then store the result. */ +#define TEST_VDUP_LANEQ(Q, T1, T2, W, N, N2, L) \ + VECT_VAR(vector_res, T1, W, N) = \ + vdup##Q##_laneq_##T2##W(VECT_VAR(vector, T1, W, N2), L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* Input vector can only have 64 bits. */ + DECL_VARIABLE_128BITS_VARIANTS(vector); + + clean_results (); + + TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector, buffer); +#if defined (FP16_SUPPORTED) + VLOAD(vector, buffer, q, float, f, 16, 8); +#endif + VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Choose lane arbitrarily. */ + TEST_VDUP_LANEQ(, int, s, 8, 8, 16, 13); + TEST_VDUP_LANEQ(, int, s, 16, 4, 8, 2); + TEST_VDUP_LANEQ(, int, s, 32, 2, 4, 1); + TEST_VDUP_LANEQ(, int, s, 64, 1, 2, 0); + TEST_VDUP_LANEQ(, uint, u, 8, 8, 16, 15); + TEST_VDUP_LANEQ(, uint, u, 16, 4, 8, 3); + TEST_VDUP_LANEQ(, uint, u, 32, 2, 4, 1); + TEST_VDUP_LANEQ(, uint, u, 64, 1, 2, 0); + TEST_VDUP_LANEQ(, poly, p, 8, 8, 16, 7); + TEST_VDUP_LANEQ(, poly, p, 16, 4, 8, 3); +#if defined (FP16_SUPPORTED) + TEST_VDUP_LANEQ(, float, f, 16, 4, 8, 3); +#endif + TEST_VDUP_LANEQ(, float, f, 32, 2, 4, 1); + + TEST_VDUP_LANEQ(q, int, s, 8, 16, 16, 11); + TEST_VDUP_LANEQ(q, int, s, 16, 8, 8, 7); + TEST_VDUP_LANEQ(q, int, s, 32, 4, 4, 1); + TEST_VDUP_LANEQ(q, int, s, 64, 2, 2, 0); + TEST_VDUP_LANEQ(q, uint, u, 8, 16, 16, 5); + TEST_VDUP_LANEQ(q, uint, u, 16, 8, 8, 1); + TEST_VDUP_LANEQ(q, uint, u, 32, 4, 4, 0); + TEST_VDUP_LANEQ(q, uint, u, 64, 2, 2, 0); + TEST_VDUP_LANEQ(q, poly, p, 8, 16, 16, 5); + TEST_VDUP_LANEQ(q, poly, p, 16, 8, 8, 1); +#if defined (FP16_SUPPORTED) + TEST_VDUP_LANEQ(q, float, f, 16, 8, 8, 7); +#endif + TEST_VDUP_LANEQ(q, float, f, 32, 4, 4, 1); + + CHECK_RESULTS_NAMED (TEST_MSG, expected2, ""); +#if defined (FP16_SUPPORTED) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected2, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected2, ""); +#endif + +#endif /* __aarch64__. */ } int main (void) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vduph_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vduph_lane.c new file mode 100644 index 0000000..c9d553a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vduph_lane.c @@ -0,0 +1,137 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define A -16 +#define B -15 +#define C -14 +#define D -13 +#define E -12 +#define F -11 +#define G -10 +#define H -9 + +#define F16_C(a) ((__fp16) a) +#define AF F16_C (A) +#define BF F16_C (B) +#define CF F16_C (C) +#define DF F16_C (D) +#define EF F16_C (E) +#define FF F16_C (F) +#define GF F16_C (G) +#define HF F16_C (H) + +#define S16_C(a) ((int16_t) a) +#define AS S16_C (A) +#define BS S16_C (B) +#define CS S16_C (C) +#define DS S16_C (D) +#define ES S16_C (E) +#define FS S16_C (F) +#define GS S16_C (G) +#define HS S16_C (H) + +#define U16_C(a) ((int16_t) a) +#define AU U16_C (A) +#define BU U16_C (B) +#define CU U16_C (C) +#define DU U16_C (D) +#define EU U16_C (E) +#define FU U16_C (F) +#define GU U16_C (G) +#define HU U16_C (H) + +#define P16_C(a) ((poly16_t) a) +#define AP P16_C (A) +#define BP P16_C (B) +#define CP P16_C (C) +#define DP P16_C (D) +#define EP P16_C (E) +#define FP P16_C (F) +#define GP P16_C (G) +#define HP P16_C (H) + +/* Expected results for vduph_lane. */ +float16_t expected_f16 = AF; +int16_t expected_s16 = DS; +uint16_t expected_u16 = BU; +poly16_t expected_p16 = CP; + +/* Expected results for vduph_laneq. */ +float16_t expected_q_f16 = EF; +int16_t expected_q_s16 = BS; +uint16_t expected_q_u16 = GU; +poly16_t expected_q_p16 = FP; + +void exec_vduph_lane_f16 (void) +{ + /* vduph_lane. */ + DECL_VARIABLE(vsrc, float, 16, 4); + DECL_VARIABLE(vsrc, int, 16, 4); + DECL_VARIABLE(vsrc, uint, 16, 4); + DECL_VARIABLE(vsrc, poly, 16, 4); + VECT_VAR_DECL (buf_src, float, 16, 4) [] = {AF, BF, CF, DF}; + VECT_VAR_DECL (buf_src, int, 16, 4) [] = {AS, BS, CS, DS}; + VECT_VAR_DECL (buf_src, uint, 16, 4) [] = {AU, BU, CU, DU}; + VECT_VAR_DECL (buf_src, poly, 16, 4) [] = {AP, BP, CP, DP}; + VLOAD (vsrc, buf_src, , int, s, 16, 4); + VLOAD (vsrc, buf_src, , float, f, 16, 4); + VLOAD (vsrc, buf_src, , uint, u, 16, 4); + VLOAD (vsrc, buf_src, , poly, p, 16, 4); + + float16_t res_f = vduph_lane_f16 (VECT_VAR (vsrc, float, 16, 4), 0); + if (* (unsigned short *) &res_f != * (unsigned short *) &expected_f16) + abort (); + + int16_t res_s = vduph_lane_s16 (VECT_VAR (vsrc, int, 16, 4), 3); + if (* (unsigned short *) &res_s != * (unsigned short *) &expected_s16) + abort (); + + uint16_t res_u = vduph_lane_u16 (VECT_VAR (vsrc, uint, 16, 4), 1); + if (* (unsigned short *) &res_u != * (unsigned short *) &expected_u16) + abort (); + + poly16_t res_p = vduph_lane_p16 (VECT_VAR (vsrc, poly, 16, 4), 2); + if (* (unsigned short *) &res_p != * (unsigned short *) &expected_p16) + abort (); + + /* vduph_laneq. */ + DECL_VARIABLE(vsrc, float, 16, 8); + DECL_VARIABLE(vsrc, int, 16, 8); + DECL_VARIABLE(vsrc, uint, 16, 8); + DECL_VARIABLE(vsrc, poly, 16, 8); + VECT_VAR_DECL (buf_src, float, 16, 8) [] = {AF, BF, CF, DF, EF, FF, GF, HF}; + VECT_VAR_DECL (buf_src, int, 16, 8) [] = {AS, BS, CS, DS, ES, FS, GS, HS}; + VECT_VAR_DECL (buf_src, uint, 16, 8) [] = {AU, BU, CU, DU, EU, FU, GU, HU}; + VECT_VAR_DECL (buf_src, poly, 16, 8) [] = {AP, BP, CP, DP, EP, FP, GP, HP}; + VLOAD (vsrc, buf_src, q, int, s, 16, 8); + VLOAD (vsrc, buf_src, q, float, f, 16, 8); + VLOAD (vsrc, buf_src, q, uint, u, 16, 8); + VLOAD (vsrc, buf_src, q, poly, p, 16, 8); + + res_f = vduph_laneq_f16 (VECT_VAR (vsrc, float, 16, 8), 4); + if (* (unsigned short *) &res_f != * (unsigned short *) &expected_q_f16) + abort (); + + res_s = vduph_laneq_s16 (VECT_VAR (vsrc, int, 16, 8), 1); + if (* (unsigned short *) &res_s != * (unsigned short *) &expected_q_s16) + abort (); + + res_u = vduph_laneq_u16 (VECT_VAR (vsrc, uint, 16, 8), 6); + if (* (unsigned short *) &res_u != * (unsigned short *) &expected_q_u16) + abort (); + + res_p = vduph_laneq_p16 (VECT_VAR (vsrc, poly, 16, 8), 5); + if (* (unsigned short *) &res_p != * (unsigned short *) &expected_q_p16) + abort (); +} + +int +main (void) +{ + exec_vduph_lane_f16 (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c new file mode 100644 index 0000000..63f820f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c @@ -0,0 +1,263 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0x11, 0xf2, 0x11, + 0xf4, 0x11, 0xf6, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0x22, 0xfff2, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0x33 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0x55, 0xf2, 0x55, + 0xf4, 0x55, 0xf6, 0x55 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0x66, 0xfff2, 0x66 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0x77 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0x55, 0xf2, 0x55, + 0xf4, 0x55, 0xf6, 0x55 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0x66, 0xfff2, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0x42066666 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0x4b4d, + 0xcb00, 0x4b4d }; +#endif +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0x11, 0xf2, 0x11, + 0xf4, 0x11, 0xf6, 0x11, + 0xf8, 0x11, 0xfa, 0x11, + 0xfc, 0x11, 0xfe, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0x22, 0xfff2, 0x22, + 0xfff4, 0x22, 0xfff6, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0x33, + 0xfffffff2, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, + 0x44 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0x55, 0xf2, 0x55, + 0xf4, 0x55, 0xf6, 0x55, + 0xf8, 0x55, 0xfa, 0x55, + 0xfc, 0x55, 0xfe, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0x66, 0xfff2, 0x66, + 0xfff4, 0x66, 0xfff6, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0x77, + 0xfffffff2, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, + 0x88 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0x55, 0xf2, 0x55, + 0xf4, 0x55, 0xf6, 0x55, + 0xf8, 0x55, 0xfa, 0x55, + 0xfc, 0x55, 0xfe, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0x66, 0xfff2, 0x66, + 0xfff4, 0x66, 0xfff6, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0x4b4d, + 0xcb00, 0x4b4d, + 0xca00, 0x4b4d, + 0xc900, 0x4b4d }; +#endif +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0x42073333, + 0xc1600000, 0x42073333 }; + +#define TEST_MSG "VTRN1" +void exec_vtrn_half (void) +{ +#define TEST_VTRN(PART, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vtrn##PART##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VTRN1(Q, T1, T2, W, N) TEST_VTRN(1, Q, T1, T2, W, N) + + /* Input vector can only have 64 bits. */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE(vector, float, 64, 2); + DECL_VARIABLE(vector2, float, 64, 2); + + DECL_VARIABLE_ALL_VARIANTS(vector_res); + DECL_VARIABLE(vector_res, float, 64, 2); + + clean_results (); + /* We don't have vtrn1_T64x1, so set expected to the clean value. */ + CLEAN(expected, int, 64, 1); + CLEAN(expected, uint, 64, 1); + + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#if defined (FP16_SUPPORTED) + VLOAD(vector, buffer, , float, f, 16, 4); + VLOAD(vector, buffer, q, float, f, 16, 8); +#endif + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); + VLOAD(vector, buffer, q, float, f, 64, 2); + + /* Choose arbitrary initialization values. */ + VDUP(vector2, , int, s, 8, 8, 0x11); + VDUP(vector2, , int, s, 16, 4, 0x22); + VDUP(vector2, , int, s, 32, 2, 0x33); + VDUP(vector2, , uint, u, 8, 8, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x66); + VDUP(vector2, , uint, u, 32, 2, 0x77); + VDUP(vector2, , poly, p, 8, 8, 0x55); + VDUP(vector2, , poly, p, 16, 4, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ +#endif + VDUP(vector2, , float, f, 32, 2, 33.6f); + + VDUP(vector2, q, int, s, 8, 16, 0x11); + VDUP(vector2, q, int, s, 16, 8, 0x22); + VDUP(vector2, q, int, s, 32, 4, 0x33); + VDUP(vector2, q, int, s, 64, 2, 0x44); + VDUP(vector2, q, uint, u, 8, 16, 0x55); + VDUP(vector2, q, uint, u, 16, 8, 0x66); + VDUP(vector2, q, uint, u, 32, 4, 0x77); + VDUP(vector2, q, uint, u, 64, 2, 0x88); + VDUP(vector2, q, poly, p, 8, 16, 0x55); + VDUP(vector2, q, poly, p, 16, 8, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, q, float, f, 16, 8, 14.6f); +#endif + VDUP(vector2, q, float, f, 32, 4, 33.8f); + VDUP(vector2, q, float, f, 64, 2, 33.8f); + + TEST_VTRN1(, int, s, 8, 8); + TEST_VTRN1(, int, s, 16, 4); + TEST_VTRN1(, int, s, 32, 2); + TEST_VTRN1(, uint, u, 8, 8); + TEST_VTRN1(, uint, u, 16, 4); + TEST_VTRN1(, uint, u, 32, 2); + TEST_VTRN1(, poly, p, 8, 8); + TEST_VTRN1(, poly, p, 16, 4); +#if defined (FP16_SUPPORTED) + TEST_VTRN1(, float, f, 16, 4); +#endif + TEST_VTRN1(, float, f, 32, 2); + + TEST_VTRN1(q, int, s, 8, 16); + TEST_VTRN1(q, int, s, 16, 8); + TEST_VTRN1(q, int, s, 32, 4); + TEST_VTRN1(q, int, s, 64, 2); + TEST_VTRN1(q, uint, u, 8, 16); + TEST_VTRN1(q, uint, u, 16, 8); + TEST_VTRN1(q, uint, u, 32, 4); + TEST_VTRN1(q, uint, u, 64, 2); + TEST_VTRN1(q, poly, p, 8, 16); + TEST_VTRN1(q, poly, p, 16, 8); +#if defined (FP16_SUPPORTED) + TEST_VTRN1(q, float, f, 16, 8); +#endif + TEST_VTRN1(q, float, f, 32, 4); + TEST_VTRN1(q, float, f, 64, 2); + +#if defined (FP16_SUPPORTED) + CHECK_RESULTS (TEST_MSG, ""); +#else + CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); +#endif + +#undef TEST_MSG +#define TEST_MSG "VTRN2" + +#define TEST_VTRN2(Q, T1, T2, W, N) TEST_VTRN(2, Q, T1, T2, W, N) + +/* Expected results. */ +VECT_VAR_DECL(expected2,int,8,8) [] = { 0xf1, 0x11, 0xf3, 0x11, + 0xf5, 0x11, 0xf7, 0x11 }; +VECT_VAR_DECL(expected2,int,16,4) [] = { 0xfff1, 0x22, 0xfff3, 0x22 }; +VECT_VAR_DECL(expected2,int,32,2) [] = { 0xfffffff1, 0x33 }; +VECT_VAR_DECL(expected2,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xf1, 0x55, 0xf3, 0x55, + 0xf5, 0x55, 0xf7, 0x55 }; +VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff1, 0x66, 0xfff3, 0x66 }; +VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff1, 0x77 }; +VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf1, 0x55, 0xf3, 0x55, + 0xf5, 0x55, 0xf7, 0x55 }; +VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff1, 0x66, 0xfff3, 0x66 }; +VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1700000, 0x42066666 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb80, 0x4b4d, + 0xca80, 0x4b4d }; +#endif +VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf1, 0x11, 0xf3, 0x11, + 0xf5, 0x11, 0xf7, 0x11, + 0xf9, 0x11, 0xfb, 0x11, + 0xfd, 0x11, 0xff, 0x11 }; +VECT_VAR_DECL(expected2,int,16,8) [] = { 0xfff1, 0x22, 0xfff3, 0x22, + 0xfff5, 0x22, 0xfff7, 0x22 }; +VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfffffff1, 0x33, + 0xfffffff3, 0x33 }; +VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfffffffffffffff1, + 0x44 }; +VECT_VAR_DECL(expected2,uint,8,16) [] = { 0xf1, 0x55, 0xf3, 0x55, + 0xf5, 0x55, 0xf7, 0x55, + 0xf9, 0x55, 0xfb, 0x55, + 0xfd, 0x55, 0xff, 0x55 }; +VECT_VAR_DECL(expected2,uint,16,8) [] = { 0xfff1, 0x66, 0xfff3, 0x66, + 0xfff5, 0x66, 0xfff7, 0x66 }; +VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff1, 0x77, + 0xfffffff3, 0x77 }; +VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff1, + 0x88 }; +VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf1, 0x55, 0xf3, 0x55, + 0xf5, 0x55, 0xf7, 0x55, + 0xf9, 0x55, 0xfb, 0x55, + 0xfd, 0x55, 0xff, 0x55 }; +VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff1, 0x66, 0xfff3, 0x66, + 0xfff5, 0x66, 0xfff7, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb80, 0x4b4d, + 0xca80, 0x4b4d, + 0xc980, 0x4b4d, + 0xc880, 0x4b4d }; +#endif +VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0x42073333, + 0xc1500000, 0x42073333 }; + clean_results (); + CLEAN(expected2, int, 64, 1); + CLEAN(expected2, uint, 64, 1); + + TEST_VTRN2(, int, s, 8, 8); + TEST_VTRN2(, int, s, 16, 4); + TEST_VTRN2(, int, s, 32, 2); + TEST_VTRN2(, uint, u, 8, 8); + TEST_VTRN2(, uint, u, 16, 4); + TEST_VTRN2(, uint, u, 32, 2); + TEST_VTRN2(, poly, p, 8, 8); + TEST_VTRN2(, poly, p, 16, 4); +#if defined (FP16_SUPPORTED) + TEST_VTRN2(, float, f, 16, 4); +#endif + TEST_VTRN2(, float, f, 32, 2); + + TEST_VTRN2(q, int, s, 8, 16); + TEST_VTRN2(q, int, s, 16, 8); + TEST_VTRN2(q, int, s, 32, 4); + TEST_VTRN2(q, int, s, 64, 2); + TEST_VTRN2(q, uint, u, 8, 16); + TEST_VTRN2(q, uint, u, 16, 8); + TEST_VTRN2(q, uint, u, 32, 4); + TEST_VTRN2(q, uint, u, 64, 2); + TEST_VTRN2(q, poly, p, 8, 16); + TEST_VTRN2(q, poly, p, 16, 8); +#if defined (FP16_SUPPORTED) + TEST_VTRN2(q, float, f, 16, 8); +#endif + TEST_VTRN2(q, float, f, 32, 4); + TEST_VTRN2(q, float, f, 64, 2); + + CHECK_RESULTS_NAMED (TEST_MSG, expected2, ""); +#if defined (FP16_SUPPORTED) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected2, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected2, ""); +#endif +} + +int main (void) +{ + exec_vtrn_half (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c new file mode 100644 index 0000000..8706f24 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c @@ -0,0 +1,259 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf2, 0xf4, 0xf6, + 0x11, 0x11, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff2, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0x33 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf2, 0xf4, 0xf6, + 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff2, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0x77 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf2, 0xf4, 0xf6, + 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff2, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0x42066666 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0xcb00, + 0x4b4d, 0x4b4d }; +#endif +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf2, 0xf4, 0xf6, + 0xf8, 0xfa, 0xfc, 0xfe, + 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff2, 0xfff4, 0xfff6, + 0x22, 0x22, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff2, + 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, + 0x44 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf2, 0xf4, 0xf6, + 0xf8, 0xfa, 0xfc, 0xfe, + 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff2, 0xfff4, 0xfff6, + 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff2, 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, + 0x88 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf2, 0xf4, 0xf6, + 0xf8, 0xfa, 0xfc, 0xfe, + 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff2, 0xfff4, 0xfff6, + 0x66, 0x66, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb00, 0xca00, 0xc900, + 0x4b4d, 0x4b4d, 0x4b4d, 0x4b4d }; +#endif +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1600000, + 0x42073333, 0x42073333 }; + +#define TEST_MSG "VUZP1" +void exec_vuzp_half (void) +{ +#define TEST_VUZP(PART, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vuzp##PART##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VUZP1(Q, T1, T2, W, N) TEST_VUZP(1, Q, T1, T2, W, N) + + /* Input vector can only have 64 bits. */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE(vector, float, 64, 2); + DECL_VARIABLE(vector2, float, 64, 2); + + DECL_VARIABLE_ALL_VARIANTS(vector_res); + DECL_VARIABLE(vector_res, float, 64, 2); + + clean_results (); + /* We don't have vuzp1_T64x1, so set expected to the clean value. */ + CLEAN(expected, int, 64, 1); + CLEAN(expected, uint, 64, 1); + + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#if defined (FP16_SUPPORTED) + VLOAD(vector, buffer, , float, f, 16, 4); + VLOAD(vector, buffer, q, float, f, 16, 8); +#endif + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); + VLOAD(vector, buffer, q, float, f, 64, 2); + + /* Choose arbitrary initialization values. */ + VDUP(vector2, , int, s, 8, 8, 0x11); + VDUP(vector2, , int, s, 16, 4, 0x22); + VDUP(vector2, , int, s, 32, 2, 0x33); + VDUP(vector2, , uint, u, 8, 8, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x66); + VDUP(vector2, , uint, u, 32, 2, 0x77); + VDUP(vector2, , poly, p, 8, 8, 0x55); + VDUP(vector2, , poly, p, 16, 4, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ +#endif + VDUP(vector2, , float, f, 32, 2, 33.6f); + + VDUP(vector2, q, int, s, 8, 16, 0x11); + VDUP(vector2, q, int, s, 16, 8, 0x22); + VDUP(vector2, q, int, s, 32, 4, 0x33); + VDUP(vector2, q, int, s, 64, 2, 0x44); + VDUP(vector2, q, uint, u, 8, 16, 0x55); + VDUP(vector2, q, uint, u, 16, 8, 0x66); + VDUP(vector2, q, uint, u, 32, 4, 0x77); + VDUP(vector2, q, uint, u, 64, 2, 0x88); + VDUP(vector2, q, poly, p, 8, 16, 0x55); + VDUP(vector2, q, poly, p, 16, 8, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, q, float, f, 16, 8, 14.6f); +#endif + VDUP(vector2, q, float, f, 32, 4, 33.8f); + VDUP(vector2, q, float, f, 64, 2, 33.8f); + + TEST_VUZP1(, int, s, 8, 8); + TEST_VUZP1(, int, s, 16, 4); + TEST_VUZP1(, int, s, 32, 2); + TEST_VUZP1(, uint, u, 8, 8); + TEST_VUZP1(, uint, u, 16, 4); + TEST_VUZP1(, uint, u, 32, 2); + TEST_VUZP1(, poly, p, 8, 8); + TEST_VUZP1(, poly, p, 16, 4); +#if defined (FP16_SUPPORTED) + TEST_VUZP1(, float, f, 16, 4); +#endif + TEST_VUZP1(, float, f, 32, 2); + + TEST_VUZP1(q, int, s, 8, 16); + TEST_VUZP1(q, int, s, 16, 8); + TEST_VUZP1(q, int, s, 32, 4); + TEST_VUZP1(q, int, s, 64, 2); + TEST_VUZP1(q, uint, u, 8, 16); + TEST_VUZP1(q, uint, u, 16, 8); + TEST_VUZP1(q, uint, u, 32, 4); + TEST_VUZP1(q, uint, u, 64, 2); + TEST_VUZP1(q, poly, p, 8, 16); + TEST_VUZP1(q, poly, p, 16, 8); +#if defined (FP16_SUPPORTED) + TEST_VUZP1(q, float, f, 16, 8); +#endif + TEST_VUZP1(q, float, f, 32, 4); + TEST_VUZP1(q, float, f, 64, 2); + +#if defined (FP16_SUPPORTED) + CHECK_RESULTS (TEST_MSG, ""); +#else + CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); +#endif + +#undef TEST_MSG +#define TEST_MSG "VUZP2" + +#define TEST_VUZP2(Q, T1, T2, W, N) TEST_VUZP(2, Q, T1, T2, W, N) + +/* Expected results. */ +VECT_VAR_DECL(expected2,int,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7, + 0x11, 0x11, 0x11, 0x11 }; +VECT_VAR_DECL(expected2,int,16,4) [] = { 0xfff1, 0xfff3, 0x22, 0x22 }; +VECT_VAR_DECL(expected2,int,32,2) [] = { 0xfffffff1, 0x33 }; +VECT_VAR_DECL(expected2,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7, + 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff1, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff1, 0x77 }; +VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7, + 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff1, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1700000, 0x42066666 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb80, 0xca80, + 0x4b4d, 0x4b4d }; +#endif +VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf1, 0xf3, 0xf5, 0xf7, + 0xf9, 0xfb, 0xfd, 0xff, + 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11 }; +VECT_VAR_DECL(expected2,int,16,8) [] = { 0xfff1, 0xfff3, 0xfff5, 0xfff7, + 0x22, 0x22, 0x22, 0x22 }; +VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfffffff1, 0xfffffff3, + 0x33, 0x33 }; +VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfffffffffffffff1, + 0x44 }; +VECT_VAR_DECL(expected2,uint,8,16) [] = { 0xf1, 0xf3, 0xf5, 0xf7, + 0xf9, 0xfb, 0xfd, 0xff, + 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected2,uint,16,8) [] = { 0xfff1, 0xfff3, 0xfff5, 0xfff7, + 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff1, 0xfffffff3, 0x77, 0x77 }; +VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff1, + 0x88 }; +VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf1, 0xf3, 0xf5, 0xf7, + 0xf9, 0xfb, 0xfd, 0xff, + 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff1, 0xfff3, 0xfff5, 0xfff7, + 0x66, 0x66, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb80, 0xca80, 0xc980, 0xc880, + 0x4b4d, 0x4b4d, 0x4b4d, 0x4b4d + }; +#endif +VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0xc1500000, + 0x42073333, 0x42073333 }; + + clean_results (); + CLEAN(expected2, int, 64, 1); + CLEAN(expected2, uint, 64, 1); + + TEST_VUZP2(, int, s, 8, 8); + TEST_VUZP2(, int, s, 16, 4); + TEST_VUZP2(, int, s, 32, 2); + TEST_VUZP2(, uint, u, 8, 8); + TEST_VUZP2(, uint, u, 16, 4); + TEST_VUZP2(, uint, u, 32, 2); + TEST_VUZP2(, poly, p, 8, 8); + TEST_VUZP2(, poly, p, 16, 4); +#if defined (FP16_SUPPORTED) + TEST_VUZP2(, float, f, 16, 4); +#endif + TEST_VUZP2(, float, f, 32, 2); + + TEST_VUZP2(q, int, s, 8, 16); + TEST_VUZP2(q, int, s, 16, 8); + TEST_VUZP2(q, int, s, 32, 4); + TEST_VUZP2(q, int, s, 64, 2); + TEST_VUZP2(q, uint, u, 8, 16); + TEST_VUZP2(q, uint, u, 16, 8); + TEST_VUZP2(q, uint, u, 32, 4); + TEST_VUZP2(q, uint, u, 64, 2); + TEST_VUZP2(q, poly, p, 8, 16); + TEST_VUZP2(q, poly, p, 16, 8); +#if defined (FP16_SUPPORTED) + TEST_VUZP2(q, float, f, 16, 8); +#endif + TEST_VUZP2(q, float, f, 32, 4); + TEST_VUZP2(q, float, f, 64, 2); + + CHECK_RESULTS_NAMED (TEST_MSG, expected2, ""); +#if defined (FP16_SUPPORTED) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected2, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected2, ""); +#endif +} + +int main (void) +{ + exec_vuzp_half (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip_half.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip_half.c new file mode 100644 index 0000000..619d6b2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip_half.c @@ -0,0 +1,263 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0x11, 0xf1, 0x11, + 0xf2, 0x11, 0xf3, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0x22, 0xfff1, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0x33 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0x55, 0xf1, 0x55, + 0xf2, 0x55, 0xf3, 0x55 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0x66, 0xfff1, 0x66 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0x77 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0x55, 0xf1, 0x55, + 0xf2, 0x55, 0xf3, 0x55 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0x66, 0xfff1, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0x42066666 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0x4b4d, + 0xcb80, 0x4b4d }; +#endif +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0x11, 0xf1, 0x11, + 0xf2, 0x11, 0xf3, 0x11, + 0xf4, 0x11, 0xf5, 0x11, + 0xf6, 0x11, 0xf7, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0x22, 0xfff1, 0x22, + 0xfff2, 0x22, 0xfff3, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0x33, + 0xfffffff1, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, + 0x44 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0x55, 0xf1, 0x55, + 0xf2, 0x55, 0xf3, 0x55, + 0xf4, 0x55, 0xf5, 0x55, + 0xf6, 0x55, 0xf7, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0x66, 0xfff1, 0x66, + 0xfff2, 0x66, 0xfff3, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0x77, + 0xfffffff1, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, + 0x88 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0x55, 0xf1, 0x55, + 0xf2, 0x55, 0xf3, 0x55, + 0xf4, 0x55, 0xf5, 0x55, + 0xf6, 0x55, 0xf7, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0x66, 0xfff1, 0x66, + 0xfff2, 0x66, 0xfff3, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0x4b4d, + 0xcb80, 0x4b4d, + 0xcb00, 0x4b4d, + 0xca80, 0x4b4d }; +#endif +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0x42073333, + 0xc1700000, 0x42073333 }; + +#define TEST_MSG "VZIP1" +void exec_vzip_half (void) +{ +#define TEST_VZIP(PART, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vzip##PART##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VZIP1(Q, T1, T2, W, N) TEST_VZIP(1, Q, T1, T2, W, N) + + /* Input vector can only have 64 bits. */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE(vector, float, 64, 2); + DECL_VARIABLE(vector2, float, 64, 2); + + DECL_VARIABLE_ALL_VARIANTS(vector_res); + DECL_VARIABLE(vector_res, float, 64, 2); + + clean_results (); + /* We don't have vzip1_T64x1, so set expected to the clean value. */ + CLEAN(expected, int, 64, 1); + CLEAN(expected, uint, 64, 1); + + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#if defined (FP16_SUPPORTED) + VLOAD(vector, buffer, , float, f, 16, 4); + VLOAD(vector, buffer, q, float, f, 16, 8); +#endif + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); + VLOAD(vector, buffer, q, float, f, 64, 2); + + /* Choose arbitrary initialization values. */ + VDUP(vector2, , int, s, 8, 8, 0x11); + VDUP(vector2, , int, s, 16, 4, 0x22); + VDUP(vector2, , int, s, 32, 2, 0x33); + VDUP(vector2, , uint, u, 8, 8, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x66); + VDUP(vector2, , uint, u, 32, 2, 0x77); + VDUP(vector2, , poly, p, 8, 8, 0x55); + VDUP(vector2, , poly, p, 16, 4, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ +#endif + VDUP(vector2, , float, f, 32, 2, 33.6f); + + VDUP(vector2, q, int, s, 8, 16, 0x11); + VDUP(vector2, q, int, s, 16, 8, 0x22); + VDUP(vector2, q, int, s, 32, 4, 0x33); + VDUP(vector2, q, int, s, 64, 2, 0x44); + VDUP(vector2, q, uint, u, 8, 16, 0x55); + VDUP(vector2, q, uint, u, 16, 8, 0x66); + VDUP(vector2, q, uint, u, 32, 4, 0x77); + VDUP(vector2, q, uint, u, 64, 2, 0x88); + VDUP(vector2, q, poly, p, 8, 16, 0x55); + VDUP(vector2, q, poly, p, 16, 8, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, q, float, f, 16, 8, 14.6f); +#endif + VDUP(vector2, q, float, f, 32, 4, 33.8f); + VDUP(vector2, q, float, f, 64, 2, 33.8f); + + TEST_VZIP1(, int, s, 8, 8); + TEST_VZIP1(, int, s, 16, 4); + TEST_VZIP1(, int, s, 32, 2); + TEST_VZIP1(, uint, u, 8, 8); + TEST_VZIP1(, uint, u, 16, 4); + TEST_VZIP1(, uint, u, 32, 2); + TEST_VZIP1(, poly, p, 8, 8); + TEST_VZIP1(, poly, p, 16, 4); +#if defined (FP16_SUPPORTED) + TEST_VZIP1(, float, f, 16, 4); +#endif + TEST_VZIP1(, float, f, 32, 2); + + TEST_VZIP1(q, int, s, 8, 16); + TEST_VZIP1(q, int, s, 16, 8); + TEST_VZIP1(q, int, s, 32, 4); + TEST_VZIP1(q, int, s, 64, 2); + TEST_VZIP1(q, uint, u, 8, 16); + TEST_VZIP1(q, uint, u, 16, 8); + TEST_VZIP1(q, uint, u, 32, 4); + TEST_VZIP1(q, uint, u, 64, 2); + TEST_VZIP1(q, poly, p, 8, 16); + TEST_VZIP1(q, poly, p, 16, 8); +#if defined (FP16_SUPPORTED) + TEST_VZIP1(q, float, f, 16, 8); +#endif + TEST_VZIP1(q, float, f, 32, 4); + TEST_VZIP1(q, float, f, 64, 2); + +#if defined (FP16_SUPPORTED) + CHECK_RESULTS (TEST_MSG, ""); +#else + CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); +#endif + +#undef TEST_MSG +#define TEST_MSG "VZIP2" + +#define TEST_VZIP2(Q, T1, T2, W, N) TEST_VZIP(2, Q, T1, T2, W, N) + +/* Expected results. */ +VECT_VAR_DECL(expected2,int,8,8) [] = { 0xf4, 0x11, 0xf5, 0x11, + 0xf6, 0x11, 0xf7, 0x11 }; +VECT_VAR_DECL(expected2,int,16,4) [] = { 0xfff2, 0x22, 0xfff3, 0x22 }; +VECT_VAR_DECL(expected2,int,32,2) [] = { 0xfffffff1, 0x33 }; +VECT_VAR_DECL(expected2,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xf4, 0x55, 0xf5, 0x55, + 0xf6, 0x55, 0xf7, 0x55 }; +VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff2, 0x66, 0xfff3, 0x66 }; +VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff1, 0x77 }; +VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf4, 0x55, 0xf5, 0x55, + 0xf6, 0x55, 0xf7, 0x55 }; +VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0x66, 0xfff3, 0x66 }; +VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1700000, 0x42066666 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb00, 0x4b4d, + 0xca80, 0x4b4d }; +#endif +VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf8, 0x11, 0xf9, 0x11, + 0xfa, 0x11, 0xfb, 0x11, + 0xfc, 0x11, 0xfd, 0x11, + 0xfe, 0x11, 0xff, 0x11 }; +VECT_VAR_DECL(expected2,int,16,8) [] = { 0xfff4, 0x22, 0xfff5, 0x22, + 0xfff6, 0x22, 0xfff7, 0x22 }; +VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfffffff2, 0x33, + 0xfffffff3, 0x33 }; +VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfffffffffffffff1, + 0x44 }; +VECT_VAR_DECL(expected2,uint,8,16) [] = { 0xf8, 0x55, 0xf9, 0x55, + 0xfa, 0x55, 0xfb, 0x55, + 0xfc, 0x55, 0xfd, 0x55, + 0xfe, 0x55, 0xff, 0x55 }; +VECT_VAR_DECL(expected2,uint,16,8) [] = { 0xfff4, 0x66, 0xfff5, 0x66, + 0xfff6, 0x66, 0xfff7, 0x66 }; +VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff2, 0x77, + 0xfffffff3, 0x77 }; +VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff1, + 0x88 }; +VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf8, 0x55, 0xf9, 0x55, + 0xfa, 0x55, 0xfb, 0x55, + 0xfc, 0x55, 0xfd, 0x55, + 0xfe, 0x55, 0xff, 0x55 }; +VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff4, 0x66, 0xfff5, 0x66, + 0xfff6, 0x66, 0xfff7, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xca00, 0x4b4d, + 0xc980, 0x4b4d, + 0xc900, 0x4b4d, + 0xc880, 0x4b4d }; +#endif +VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0x42073333, + 0xc1500000, 0x42073333 }; + clean_results (); + CLEAN(expected2, int, 64, 1); + CLEAN(expected2, uint, 64, 1); + + TEST_VZIP2(, int, s, 8, 8); + TEST_VZIP2(, int, s, 16, 4); + TEST_VZIP2(, int, s, 32, 2); + TEST_VZIP2(, uint, u, 8, 8); + TEST_VZIP2(, uint, u, 16, 4); + TEST_VZIP2(, uint, u, 32, 2); + TEST_VZIP2(, poly, p, 8, 8); + TEST_VZIP2(, poly, p, 16, 4); +#if defined (FP16_SUPPORTED) + TEST_VZIP2(, float, f, 16, 4); +#endif + TEST_VZIP2(, float, f, 32, 2); + + TEST_VZIP2(q, int, s, 8, 16); + TEST_VZIP2(q, int, s, 16, 8); + TEST_VZIP2(q, int, s, 32, 4); + TEST_VZIP2(q, int, s, 64, 2); + TEST_VZIP2(q, uint, u, 8, 16); + TEST_VZIP2(q, uint, u, 16, 8); + TEST_VZIP2(q, uint, u, 32, 4); + TEST_VZIP2(q, uint, u, 64, 2); + TEST_VZIP2(q, poly, p, 8, 16); + TEST_VZIP2(q, poly, p, 16, 8); +#if defined (FP16_SUPPORTED) + TEST_VZIP2(q, float, f, 16, 8); +#endif + TEST_VZIP2(q, float, f, 32, 4); + TEST_VZIP2(q, float, f, 64, 2); + + CHECK_RESULTS_NAMED (TEST_MSG, expected2, ""); +#if defined (FP16_SUPPORTED) + CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected2, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected2, ""); +#endif +} + +int main (void) +{ + exec_vzip_half (); + return 0; +} -- 2.7.4