From a0a31ab837c92fef41ce7bfec59c579d232da7fa Mon Sep 17 00:00:00 2001 From: yroux Date: Thu, 4 Dec 2014 12:54:40 +0000 Subject: [PATCH] gcc/ 2014-12-04 Yvan Roux Backport from trunk r214952. 2014-09-05 Alan Lawrence * config/aarch64/arm_neon.h (__GET_HIGH): New macro. (vget_high_f32, vget_high_f64, vget_high_p8, vget_high_p16, vget_high_s8, vget_high_s16, vget_high_s32, vget_high_s64, vget_high_u8, vget_high_u16, vget_high_u32, vget_high_u64): Remove temporary __asm__ and reimplement. gcc/testsuite 2014-12-04 Yvan Roux Backport from trunk r214950. 2014-09-05 Alan Lawrence * gcc.target/aarch64/vget_high_1.c: New test. * gcc.target/aarch64/vget_low_1.c: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_9-branch@218356 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog.linaro | 11 ++ gcc/config/aarch64/arm_neon.h | 211 +++++++++---------------- gcc/testsuite/ChangeLog.linaro | 8 + gcc/testsuite/gcc.target/aarch64/vget_high_1.c | 60 +++++++ gcc/testsuite/gcc.target/aarch64/vget_low_1.c | 60 +++++++ 5 files changed, 218 insertions(+), 132 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/vget_high_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/vget_low_1.c diff --git a/gcc/ChangeLog.linaro b/gcc/ChangeLog.linaro index a19bb0f..b1e2d1a 100644 --- a/gcc/ChangeLog.linaro +++ b/gcc/ChangeLog.linaro @@ -1,5 +1,16 @@ 2014-12-04 Yvan Roux + Backport from trunk r214952. + 2014-09-05 Alan Lawrence + + * config/aarch64/arm_neon.h (__GET_HIGH): New macro. + (vget_high_f32, vget_high_f64, vget_high_p8, vget_high_p16, + vget_high_s8, vget_high_s16, vget_high_s32, vget_high_s64, + vget_high_u8, vget_high_u16, vget_high_u32, vget_high_u64): + Remove temporary __asm__ and reimplement. + +2014-12-04 Yvan Roux + Backport from trunk r214948, r214949. 2014-09-05 Alan Lawrence diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index fbd7713..1cb08f3 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -4297,6 +4297,85 @@ vget_low_u64 (uint64x2_t __a) #undef __GET_LOW +#define __GET_HIGH(__TYPE) \ + uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \ + uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1)); \ + return vreinterpret_##__TYPE##_u64 (hi); + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vget_high_f32 (float32x4_t __a) +{ + __GET_HIGH (f32); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vget_high_f64 (float64x2_t __a) +{ + __GET_HIGH (f64); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vget_high_p8 (poly8x16_t __a) +{ + __GET_HIGH (p8); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vget_high_p16 (poly16x8_t __a) +{ + __GET_HIGH (p16); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vget_high_s8 (int8x16_t __a) +{ + __GET_HIGH (s8); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vget_high_s16 (int16x8_t __a) +{ + __GET_HIGH (s16); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vget_high_s32 (int32x4_t __a) +{ + __GET_HIGH (s32); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vget_high_s64 (int64x2_t __a) +{ + __GET_HIGH (s64); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vget_high_u8 (uint8x16_t __a) +{ + __GET_HIGH (u8); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vget_high_u16 (uint16x8_t __a) +{ + __GET_HIGH (u16); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vget_high_u32 (uint32x4_t __a) +{ + __GET_HIGH (u32); +} + +#undef __GET_HIGH + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vget_high_u64 (uint64x2_t __a) +{ + return vcreate_u64 (vgetq_lane_u64 (__a, 1)); +} + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vcombine_s8 (int8x8_t __a, int8x8_t __b) { @@ -5740,138 +5819,6 @@ vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vget_high_f32 (float32x4_t a) -{ - float32x2_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -vget_high_f64 (float64x2_t a) -{ - float64x1_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vget_high_p8 (poly8x16_t a) -{ - poly8x8_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vget_high_p16 (poly16x8_t a) -{ - poly16x4_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vget_high_s8 (int8x16_t a) -{ - int8x8_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vget_high_s16 (int16x8_t a) -{ - int16x4_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vget_high_s32 (int32x4_t a) -{ - int32x2_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vget_high_s64 (int64x2_t a) -{ - int64x1_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vget_high_u8 (uint8x16_t a) -{ - uint8x8_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vget_high_u16 (uint16x8_t a) -{ - uint16x4_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vget_high_u32 (uint32x4_t a) -{ - uint32x2_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vget_high_u64 (uint64x2_t a) -{ - uint64x1_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vhsub_s8 (int8x8_t a, int8x8_t b) { diff --git a/gcc/testsuite/ChangeLog.linaro b/gcc/testsuite/ChangeLog.linaro index 9738183..d81a4e4 100644 --- a/gcc/testsuite/ChangeLog.linaro +++ b/gcc/testsuite/ChangeLog.linaro @@ -1,5 +1,13 @@ 2014-12-04 Yvan Roux + Backport from trunk r214950. + 2014-09-05 Alan Lawrence + + * gcc.target/aarch64/vget_high_1.c: New test. + * gcc.target/aarch64/vget_low_1.c: Likewise. + +2014-12-04 Yvan Roux + Backport from trunk r214948. 2014-09-05 Alan Lawrence diff --git a/gcc/testsuite/gcc.target/aarch64/vget_high_1.c b/gcc/testsuite/gcc.target/aarch64/vget_high_1.c new file mode 100644 index 0000000..4cb872d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vget_high_1.c @@ -0,0 +1,60 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -std=c99" } */ + +#include + +extern void abort (void); + +#define VARIANTS(VARIANT) \ +VARIANT (uint8_t, 8, uint8x8_t, uint8x16_t, u8) \ +VARIANT (uint16_t, 4, uint16x4_t, uint16x8_t, u16) \ +VARIANT (uint32_t, 2, uint32x2_t, uint32x4_t, u32) \ +VARIANT (uint64_t, 1, uint64x1_t, uint64x2_t, u64) \ +VARIANT (int8_t, 8, int8x8_t, int8x16_t, s8) \ +VARIANT (int16_t, 4, int16x4_t, int16x8_t, s16) \ +VARIANT (int32_t, 2, int32x2_t, int32x4_t, s32) \ +VARIANT (int64_t, 1, int64x1_t, int64x2_t, s64) \ +VARIANT (float32_t, 2, float32x2_t, float32x4_t, f32) \ +VARIANT (float64_t, 1, float64x1_t, float64x2_t, f64) + + +#define TESTMETH(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX) \ +int \ +test_vget_low_ ##SUFFIX (BASETYPE *data) \ +{ \ + BASETYPE temp [NUM64]; \ + TYPE128 vec = vld1q_##SUFFIX (data); \ + TYPE64 high = vget_high_##SUFFIX (vec); \ + vst1_##SUFFIX (temp, high); \ + for (int i = 0; i < NUM64; i++) \ + if (temp[i] != data[i + NUM64]) \ + return 1; \ + return 0; \ +} + +VARIANTS (TESTMETH) + +#define CHECK(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX) \ + if (test_vget_low_##SUFFIX (BASETYPE ## _ ## data) != 0) \ + abort (); + +int +main (int argc, char **argv) +{ + uint8_t uint8_t_data[16] = + { 1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47 }; + uint16_t uint16_t_data[8] = { 1, 22, 333, 4444, 55555, 6666, 777, 88 }; + uint32_t uint32_t_data[4] = { 65537, 11, 70000, 23 }; + uint64_t uint64_t_data[2] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL }; + int8_t int8_t_data[16] = + { -1, -3, -5, -7, 9, -11, -13, 15, -17, -19, 21, -23, 25, 27, -29, -31 }; + int16_t int16_t_data[8] = { -17, 19, 3, -999, 44048, 505, 9999, 1000}; + int32_t int32_t_data[4] = { 123456789, -987654321, -135792468, 975318642 }; + int64_t int64_t_data[2] = {0xfedcba9876543210LL, 0xdeadbabecafebeefLL }; + float32_t float32_t_data[4] = { 3.14159, 2.718, 1.414, 100.0 }; + float64_t float64_t_data[2] = { 1.01001000100001, 12345.6789 }; + + VARIANTS (CHECK); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/vget_low_1.c b/gcc/testsuite/gcc.target/aarch64/vget_low_1.c new file mode 100644 index 0000000..f8016ef --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vget_low_1.c @@ -0,0 +1,60 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -std=c99" } */ + +#include + +extern void abort (void); + +#define VARIANTS(VARIANT) \ +VARIANT (uint8_t, 8, uint8x8_t, uint8x16_t, u8) \ +VARIANT (uint16_t, 4, uint16x4_t, uint16x8_t, u16) \ +VARIANT (uint32_t, 2, uint32x2_t, uint32x4_t, u32) \ +VARIANT (uint64_t, 1, uint64x1_t, uint64x2_t, u64) \ +VARIANT (int8_t, 8, int8x8_t, int8x16_t, s8) \ +VARIANT (int16_t, 4, int16x4_t, int16x8_t, s16) \ +VARIANT (int32_t, 2, int32x2_t, int32x4_t, s32) \ +VARIANT (int64_t, 1, int64x1_t, int64x2_t, s64) \ +VARIANT (float32_t, 2, float32x2_t, float32x4_t, f32) \ +VARIANT (float64_t, 1, float64x1_t, float64x2_t, f64) + + +#define TESTMETH(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX) \ +int \ +test_vget_low_ ##SUFFIX (BASETYPE *data) \ +{ \ + BASETYPE temp [NUM64]; \ + TYPE128 vec = vld1q_##SUFFIX (data); \ + TYPE64 low = vget_low_##SUFFIX (vec); \ + vst1_##SUFFIX (temp, low); \ + for (int i = 0; i < NUM64; i++) \ + if (temp[i] != data[i]) \ + return 1; \ + return 0; \ +} + +VARIANTS (TESTMETH) + +#define CHECK(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX) \ + if (test_vget_low_##SUFFIX (BASETYPE ## _ ## data) != 0) \ + abort (); + +int +main (int argc, char **argv) +{ + uint8_t uint8_t_data[16] = + { 1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47 }; + uint16_t uint16_t_data[8] = { 1, 22, 333, 4444, 55555, 6666, 777, 88 }; + uint32_t uint32_t_data[4] = { 65537, 11, 70000, 23 }; + uint64_t uint64_t_data[2] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL }; + int8_t int8_t_data[16] = + { -1, -3, -5, -7, 9, -11, -13, 15, -17, -19, 21, -23, 25, 27, -29, -31 }; + int16_t int16_t_data[8] = { -17, 19, 3, -999, 44048, 505, 9999, 1000}; + int32_t int32_t_data[4] = { 123456789, -987654321, -135792468, 975318642 }; + int64_t int64_t_data[2] = {0xfedcba9876543210LL, 0xdeadbabecafebeefLL }; + float32_t float32_t_data[4] = { 3.14159, 2.718, 1.414, 100.0 }; + float64_t float64_t_data[2] = { 1.01001000100001, 12345.6789 }; + + VARIANTS (CHECK); + + return 0; +} -- 2.7.4