From 0bd30463a21bf84cd104740939b321cbc2f5ef30 Mon Sep 17 00:00:00 2001 From: jgreenhalgh Date: Thu, 31 Jul 2014 16:45:38 +0000 Subject: [PATCH] [AArch64] arm_neon.h - add vpaddd_f64, vpaddd_s64, vpaddd_u64 intrinsics gcc/ * config/aarch64/arm_neon.h (vpadd_<8,16,32,64>): Move to correct alphabetical position. (vpaddd_f64): Rewrite using builtins. (vpaddd_s64): Move to correct alphabetical position. (vpaddd_u64): New. gcc/testsuite/ * gcc.target/aarch64/scalar_intrinsics.c (test_vpaddd_f64): New. (test_vpaddd_s64): Likewise. (test_vpaddd_s64): Likewise. * gcc.target/aarch64/simd/vpaddd_f64: New. * gcc.target/aarch64/simd/vpaddd_s64: New. * gcc.target/aarch64/simd/vpaddd_u64: New. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@213382 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 8 ++ gcc/config/aarch64/arm_neon.h | 115 +++++++++++---------- gcc/testsuite/ChangeLog | 9 ++ .../gcc.target/aarch64/scalar_intrinsics.c | 17 ++- gcc/testsuite/gcc.target/aarch64/simd/vpaddd_f64.c | 27 +++++ gcc/testsuite/gcc.target/aarch64/simd/vpaddd_s64.c | 27 +++++ gcc/testsuite/gcc.target/aarch64/simd/vpaddd_u64.c | 27 +++++ 7 files changed, 173 insertions(+), 57 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vpaddd_f64.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vpaddd_s64.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vpaddd_u64.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 27ec98e..706fedc 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2014-07-31 James Greenhalgh + + * config/aarch64/arm_neon.h (vpadd_<8,16,32,64>): Move to + correct alphabetical position. + (vpaddd_f64): Rewrite using builtins. + (vpaddd_s64): Move to correct alphabetical position. + (vpaddd_u64): New. + 2014-07-31 Oleg Endo PR target/61844 diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 66968e8..7e6aba7 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -9233,56 +9233,6 @@ vpadd_f32 (float32x2_t a, float32x2_t b) return result; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vpadd_s8 (int8x8_t __a, int8x8_t __b) -{ - return __builtin_aarch64_addpv8qi (__a, __b); -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vpadd_s16 (int16x4_t __a, int16x4_t __b) -{ - return __builtin_aarch64_addpv4hi (__a, __b); -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vpadd_s32 (int32x2_t __a, int32x2_t __b) -{ - return __builtin_aarch64_addpv2si (__a, __b); -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vpadd_u8 (uint8x8_t __a, uint8x8_t __b) -{ - return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a, - (int8x8_t) __b); -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vpadd_u16 (uint16x4_t __a, uint16x4_t __b) -{ - return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a, - (int16x4_t) __b); -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vpadd_u32 (uint32x2_t __a, uint32x2_t __b) -{ - return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a, - (int32x2_t) __b); -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vpaddd_f64 (float64x2_t a) -{ - float64_t result; - __asm__ ("faddp %d0,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vpaddl_s8 (int8x8_t a) { @@ -12563,12 +12513,6 @@ vaddlv_u32 (uint32x2_t a) return result; } -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -vpaddd_s64 (int64x2_t __a) -{ - return __builtin_aarch64_addpdi (__a); -} - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c) { @@ -19230,6 +19174,65 @@ vnegq_s64 (int64x2_t __a) return -__a; } +/* vpadd */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vpadd_s8 (int8x8_t __a, int8x8_t __b) +{ + return __builtin_aarch64_addpv8qi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpadd_s16 (int16x4_t __a, int16x4_t __b) +{ + return __builtin_aarch64_addpv4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpadd_s32 (int32x2_t __a, int32x2_t __b) +{ + return __builtin_aarch64_addpv2si (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vpadd_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpadd_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpadd_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vpaddd_f64 (float64x2_t __a) +{ + return vgetq_lane_f64 (__builtin_aarch64_reduc_splus_v2df (__a), 0); +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vpaddd_s64 (int64x2_t __a) +{ + return __builtin_aarch64_addpdi (__a); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vpaddd_u64 (uint64x2_t __a) +{ + return __builtin_aarch64_addpdi ((int64x2_t) __a); +} + /* vqabs */ __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 1932ab1..a0c768f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2014-07-31 James Greenhalgh + + * gcc.target/aarch64/scalar_intrinsics.c (test_vpaddd_f64): New. + (test_vpaddd_s64): Likewise. + (test_vpaddd_s64): Likewise. + * gcc.target/aarch64/simd/vpaddd_f64: New. + * gcc.target/aarch64/simd/vpaddd_s64: New. + * gcc.target/aarch64/simd/vpaddd_u64: New. + 2014-07-31 Charles Baylis PR target/61948 diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c index 624348e..0e288f2 100644 --- a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c +++ b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c @@ -293,13 +293,28 @@ test_vtstd_u64 (uint64_t a, uint64_t b) return res; } -/* { dg-final { scan-assembler-times "\\taddp\\td\[0-9\]+, v\[0-9\]+\.2d" 1 } } */ +/* { dg-final { scan-assembler-times "\\tfaddp\\td\[0-9\]+, v\[0-9\]+\.2d" 1 } } */ +float64_t +test_vpaddd_f64 (float64x2_t a) +{ + return vpaddd_f64 (a); +} + +/* { dg-final { scan-assembler-times "\\taddp\\td\[0-9\]+, v\[0-9\]+\.2d" 2 } } */ + +int64_t test_vpaddd_s64 (int64x2_t a) { return vpaddd_s64 (a); } +uint64_t +test_vpaddd_u64 (uint64x2_t a) +{ + return vpaddd_u64 (a); +} + /* { dg-final { scan-assembler-times "\\tuqadd\\td\[0-9\]+" 1 } } */ uint64_t diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_f64.c b/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_f64.c new file mode 100644 index 0000000..041da8e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_f64.c @@ -0,0 +1,27 @@ +/* Test the vpaddd_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +#define SIZE 6 + +extern void abort (void); + +float64_t in[SIZE] = { -4.0, 4.0, -2.0, 2.0, -1.0, 1.0 }; + +int +main (void) +{ + int i; + + for (i = 0; i < SIZE / 2; ++i) + if (vpaddd_f64 (vld1q_f64 (in + 2 * i)) != 0.0) + abort (); + + return 0; +} + +/* { dg-final { scan-assembler "faddp\[ \t\]+\[dD\]\[0-9\]+, v\[0-9\].2d+\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_s64.c b/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_s64.c new file mode 100644 index 0000000..44714d2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_s64.c @@ -0,0 +1,27 @@ +/* Test the vpaddd_s64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +#define SIZE 6 + +extern void abort (void); + +int64_t in[SIZE] = { -4l, 4l, -2l, 2l, -1l, 1l }; + +int +main (void) +{ + int i; + + for (i = 0; i < SIZE / 2; ++i) + if (vpaddd_s64 (vld1q_s64 (in + 2 * i)) != 0) + abort (); + + return 0; +} + +/* { dg-final { scan-assembler "addp\[ \t\]+\[dD\]\[0-9\]+, v\[0-9\].2d+\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_u64.c b/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_u64.c new file mode 100644 index 0000000..013ca00 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_u64.c @@ -0,0 +1,27 @@ +/* Test the vpaddd_u64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +#define SIZE 6 + +extern void abort (void); + +uint64_t in[SIZE] = { 4ul, 4ul, 2ul, 2ul, 1ul, 1ul }; + +int +main (void) +{ + int i; + + for (i = 0; i < SIZE / 2; ++i) + if (vpaddd_u64 (vld1q_u64 (in + 2 * i)) != 2 * in[2 * i]) + abort (); + + return 0; +} + +/* { dg-final { scan-assembler "addp\[ \t\]+\[dD\]\[0-9\]+, v\[0-9\].2d+\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ -- 2.7.4