From 73729a342be3488c4801d6c6e496f58e0107303a Mon Sep 17 00:00:00 2001 From: yroux Date: Wed, 16 Jul 2014 15:04:27 +0000 Subject: [PATCH] gcc/ 2014-07-16 Yvan Roux Backport from trunk r209943. 2014-04-30 Alan Lawrence * config/aarch64/arm_neon.h (vuzp1_f32, vuzp1_p8, vuzp1_p16, vuzp1_s8, vuzp1_s16, vuzp1_s32, vuzp1_u8, vuzp1_u16, vuzp1_u32, vuzp1q_f32, vuzp1q_f64, vuzp1q_p8, vuzp1q_p16, vuzp1q_s8, vuzp1q_s16, vuzp1q_s32, vuzp1q_s64, vuzp1q_u8, vuzp1q_u16, vuzp1q_u32, vuzp1q_u64, vuzp2_f32, vuzp2_p8, vuzp2_p16, vuzp2_s8, vuzp2_s16, vuzp2_s32, vuzp2_u8, vuzp2_u16, vuzp2_u32, vuzp2q_f32, vuzp2q_f64, vuzp2q_p8, vuzp2q_p16, vuzp2q_s8, vuzp2q_s16, vuzp2q_s32, vuzp2q_s64, vuzp2q_u8, vuzp2q_u16, vuzp2q_u32, vuzp2q_u64): Replace temporary asm with __builtin_shuffle. gcc/testsuite/ 2014-07-16 Yvan Roux Backport from trunk r209940, `r209943, r209947. 2014-04-30 Alan Lawrence * gcc.target/arm/simd/vuzpqf32_1.c: New file. * gcc.target/arm/simd/vuzpqp16_1.c: New file. * gcc.target/arm/simd/vuzpqp8_1.c: New file. * gcc.target/arm/simd/vuzpqs16_1.c: New file. * gcc.target/arm/simd/vuzpqs32_1.c: New file. * gcc.target/arm/simd/vuzpqs8_1.c: New file. * gcc.target/arm/simd/vuzpqu16_1.c: New file. * gcc.target/arm/simd/vuzpqu32_1.c: New file. * gcc.target/arm/simd/vuzpqu8_1.c: New file. * gcc.target/arm/simd/vuzpf32_1.c: New file. * gcc.target/arm/simd/vuzpp16_1.c: New file. * gcc.target/arm/simd/vuzpp8_1.c: New file. * gcc.target/arm/simd/vuzps16_1.c: New file. * gcc.target/arm/simd/vuzps32_1.c: New file. * gcc.target/arm/simd/vuzps8_1.c: New file. * gcc.target/arm/simd/vuzpu16_1.c: New file. * gcc.target/arm/simd/vuzpu32_1.c: New file. * gcc.target/arm/simd/vuzpu8_1.c: New file. 2014-04-30 Alan Lawrence * gcc.target/aarch64/vuzps32_1.c: Expect zip1/2 insn rather than uzp1/2. * gcc.target/aarch64/vuzpu32_1.c: Likewise. * gcc.target/aarch64/vuzpf32_1.c: Likewise. 2014-04-30 Alan Lawrence * gcc.target/aarch64/simd/vuzpf32_1.c: New file. * gcc.target/aarch64/simd/vuzpf32.x: New file. * gcc.target/aarch64/simd/vuzpp16_1.c: New file. * gcc.target/aarch64/simd/vuzpp16.x: New file. * gcc.target/aarch64/simd/vuzpp8_1.c: New file. * gcc.target/aarch64/simd/vuzpp8.x: New file. * gcc.target/aarch64/simd/vuzpqf32_1.c: New file. * gcc.target/aarch64/simd/vuzpqf32.x: New file. * gcc.target/aarch64/simd/vuzpqp16_1.c: New file. * gcc.target/aarch64/simd/vuzpqp16.x: New file. * gcc.target/aarch64/simd/vuzpqp8_1.c: New file. * gcc.target/aarch64/simd/vuzpqp8.x: New file. * gcc.target/aarch64/simd/vuzpqs16_1.c: New file. * gcc.target/aarch64/simd/vuzpqs16.x: New file. * gcc.target/aarch64/simd/vuzpqs32_1.c: New file. * gcc.target/aarch64/simd/vuzpqs32.x: New file. * gcc.target/aarch64/simd/vuzpqs8_1.c: New file. * gcc.target/aarch64/simd/vuzpqs8.x: New file. * gcc.target/aarch64/simd/vuzpqu16_1.c: New file. * gcc.target/aarch64/simd/vuzpqu16.x: New file. * gcc.target/aarch64/simd/vuzpqu32_1.c: New file. * gcc.target/aarch64/simd/vuzpqu32.x: New file. * gcc.target/aarch64/simd/vuzpqu8_1.c: New file. * gcc.target/aarch64/simd/vuzpqu8.x: New file. * gcc.target/aarch64/simd/vuzps16_1.c: New file. * gcc.target/aarch64/simd/vuzps16.x: New file. * gcc.target/aarch64/simd/vuzps32_1.c: New file. * gcc.target/aarch64/simd/vuzps32.x: New file. * gcc.target/aarch64/simd/vuzps8_1.c: New file. * gcc.target/aarch64/simd/vuzps8.x: New file. * gcc.target/aarch64/simd/vuzpu16_1.c: New file. * gcc.target/aarch64/simd/vuzpu16.x: New file. * gcc.target/aarch64/simd/vuzpu32_1.c: New file. * gcc.target/aarch64/simd/vuzpu32.x: New file. * gcc.target/aarch64/simd/vuzpu8_1.c: New file. * gcc.target/aarch64/simd/vuzpu8.x: New file. git-svn-id: svn://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_9-branch@212665 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog.linaro | 14 + gcc/config/aarch64/arm_neon.h | 1335 ++++++++++---------- gcc/testsuite/ChangeLog.linaro | 69 + gcc/testsuite/gcc.target/aarch64/simd/vuzpf32.x | 26 + gcc/testsuite/gcc.target/aarch64/simd/vuzpf32_1.c | 11 + gcc/testsuite/gcc.target/aarch64/simd/vuzpp16.x | 26 + gcc/testsuite/gcc.target/aarch64/simd/vuzpp16_1.c | 11 + gcc/testsuite/gcc.target/aarch64/simd/vuzpp8.x | 26 + gcc/testsuite/gcc.target/aarch64/simd/vuzpp8_1.c | 11 + gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32.x | 26 + gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32_1.c | 11 + gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16.x | 26 + gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16_1.c | 11 + gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8.x | 27 + gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8_1.c | 11 + gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16.x | 26 + gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16_1.c | 11 + gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32.x | 26 + gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32_1.c | 11 + gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8.x | 27 + gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8_1.c | 11 + gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16.x | 26 + gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16_1.c | 11 + gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32.x | 26 + gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32_1.c | 11 + gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8.x | 27 + gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8_1.c | 11 + gcc/testsuite/gcc.target/aarch64/simd/vuzps16.x | 26 + gcc/testsuite/gcc.target/aarch64/simd/vuzps16_1.c | 11 + gcc/testsuite/gcc.target/aarch64/simd/vuzps32.x | 26 + gcc/testsuite/gcc.target/aarch64/simd/vuzps32_1.c | 11 + gcc/testsuite/gcc.target/aarch64/simd/vuzps8.x | 26 + gcc/testsuite/gcc.target/aarch64/simd/vuzps8_1.c | 11 + gcc/testsuite/gcc.target/aarch64/simd/vuzpu16.x | 26 + gcc/testsuite/gcc.target/aarch64/simd/vuzpu16_1.c | 11 + gcc/testsuite/gcc.target/aarch64/simd/vuzpu32.x | 26 + gcc/testsuite/gcc.target/aarch64/simd/vuzpu32_1.c | 11 + gcc/testsuite/gcc.target/aarch64/simd/vuzpu8.x | 26 + gcc/testsuite/gcc.target/aarch64/simd/vuzpu8_1.c | 11 + gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c | 12 + 57 files changed, 1621 insertions(+), 682 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpf32.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpf32_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpp16.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpp16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpp8.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpp8_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzps16.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzps16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzps32.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzps32_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzps8.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzps8_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpu16.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpu16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpu32.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpu32_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpu8.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vuzpu8_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c diff --git a/gcc/ChangeLog.linaro b/gcc/ChangeLog.linaro index 7516a19..e6e17c5 100644 --- a/gcc/ChangeLog.linaro +++ b/gcc/ChangeLog.linaro @@ -1,3 +1,17 @@ +2014-07-16 Yvan Roux + + Backport from trunk r209943. + 2014-04-30 Alan Lawrence + + * config/aarch64/arm_neon.h (vuzp1_f32, vuzp1_p8, vuzp1_p16, vuzp1_s8, + vuzp1_s16, vuzp1_s32, vuzp1_u8, vuzp1_u16, vuzp1_u32, vuzp1q_f32, + vuzp1q_f64, vuzp1q_p8, vuzp1q_p16, vuzp1q_s8, vuzp1q_s16, vuzp1q_s32, + vuzp1q_s64, vuzp1q_u8, vuzp1q_u16, vuzp1q_u32, vuzp1q_u64, vuzp2_f32, + vuzp2_p8, vuzp2_p16, vuzp2_s8, vuzp2_s16, vuzp2_s32, vuzp2_u8, + vuzp2_u16, vuzp2_u32, vuzp2q_f32, vuzp2q_f64, vuzp2q_p8, vuzp2q_p16, + vuzp2q_s8, vuzp2q_s16, vuzp2q_s32, vuzp2q_s64, vuzp2q_u8, vuzp2q_u16, + vuzp2q_u32, vuzp2q_u64): Replace temporary asm with __builtin_shuffle. + 2014-06-26 Yvan Roux * LINARO-VERSION: Bump version. diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index e69351c..e1b5d62 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -13199,467 +13199,6 @@ vtstq_p16 (poly16x8_t a, poly16x8_t b) : /* No clobbers */); return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vuzp1_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vuzp1_p8 (poly8x8_t a, poly8x8_t b) -{ - poly8x8_t result; - __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vuzp1_p16 (poly16x4_t a, poly16x4_t b) -{ - poly16x4_t result; - __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vuzp1_s8 (int8x8_t a, int8x8_t b) -{ - int8x8_t result; - __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vuzp1_s16 (int16x4_t a, int16x4_t b) -{ - int16x4_t result; - __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vuzp1_s32 (int32x2_t a, int32x2_t b) -{ - int32x2_t result; - __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vuzp1_u8 (uint8x8_t a, uint8x8_t b) -{ - uint8x8_t result; - __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vuzp1_u16 (uint16x4_t a, uint16x4_t b) -{ - uint16x4_t result; - __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vuzp1_u32 (uint32x2_t a, uint32x2_t b) -{ - uint32x2_t result; - __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vuzp1q_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vuzp1q_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vuzp1q_p8 (poly8x16_t a, poly8x16_t b) -{ - poly8x16_t result; - __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vuzp1q_p16 (poly16x8_t a, poly16x8_t b) -{ - poly16x8_t result; - __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vuzp1q_s8 (int8x16_t a, int8x16_t b) -{ - int8x16_t result; - __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vuzp1q_s16 (int16x8_t a, int16x8_t b) -{ - int16x8_t result; - __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vuzp1q_s32 (int32x4_t a, int32x4_t b) -{ - int32x4_t result; - __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vuzp1q_s64 (int64x2_t a, int64x2_t b) -{ - int64x2_t result; - __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vuzp1q_u8 (uint8x16_t a, uint8x16_t b) -{ - uint8x16_t result; - __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vuzp1q_u16 (uint16x8_t a, uint16x8_t b) -{ - uint16x8_t result; - __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vuzp1q_u32 (uint32x4_t a, uint32x4_t b) -{ - uint32x4_t result; - __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vuzp1q_u64 (uint64x2_t a, uint64x2_t b) -{ - uint64x2_t result; - __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vuzp2_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vuzp2_p8 (poly8x8_t a, poly8x8_t b) -{ - poly8x8_t result; - __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vuzp2_p16 (poly16x4_t a, poly16x4_t b) -{ - poly16x4_t result; - __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vuzp2_s8 (int8x8_t a, int8x8_t b) -{ - int8x8_t result; - __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vuzp2_s16 (int16x4_t a, int16x4_t b) -{ - int16x4_t result; - __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vuzp2_s32 (int32x2_t a, int32x2_t b) -{ - int32x2_t result; - __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vuzp2_u8 (uint8x8_t a, uint8x8_t b) -{ - uint8x8_t result; - __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vuzp2_u16 (uint16x4_t a, uint16x4_t b) -{ - uint16x4_t result; - __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vuzp2_u32 (uint32x2_t a, uint32x2_t b) -{ - uint32x2_t result; - __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vuzp2q_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vuzp2q_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vuzp2q_p8 (poly8x16_t a, poly8x16_t b) -{ - poly8x16_t result; - __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vuzp2q_p16 (poly16x8_t a, poly16x8_t b) -{ - poly16x8_t result; - __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vuzp2q_s8 (int8x16_t a, int8x16_t b) -{ - int8x16_t result; - __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vuzp2q_s16 (int16x8_t a, int16x8_t b) -{ - int16x8_t result; - __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vuzp2q_s32 (int32x4_t a, int32x4_t b) -{ - int32x4_t result; - __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vuzp2q_s64 (int64x2_t a, int64x2_t b) -{ - int64x2_t result; - __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vuzp2q_u8 (uint8x16_t a, uint8x16_t b) -{ - uint8x16_t result; - __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vuzp2q_u16 (uint16x8_t a, uint16x8_t b) -{ - uint16x8_t result; - __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vuzp2q_u32 (uint32x4_t a, uint32x4_t b) -{ - uint32x4_t result; - __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vuzp2q_u64 (uint64x2_t a, uint64x2_t b) -{ - uint64x2_t result; - __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} /* End of temporary inline asm implementations. */ @@ -24838,407 +24377,839 @@ vst4q_f64 (float64_t * __a, float64x2x4_t val) __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vsubd_s64 (int64x1_t __a, int64x1_t __b) { - return __a - __b; + return __a - __b; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vsubd_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a - __b; +} + +/* vtbx1 */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx) +{ + uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx), + vmov_n_u8 (8)); + int8x8_t __tbl = vtbl1_s8 (__tab, __idx); + + return vbsl_s8 (__mask, __tbl, __r); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx) +{ + uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8)); + uint8x8_t __tbl = vtbl1_u8 (__tab, __idx); + + return vbsl_u8 (__mask, __tbl, __r); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx) +{ + uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8)); + poly8x8_t __tbl = vtbl1_p8 (__tab, __idx); + + return vbsl_p8 (__mask, __tbl, __r); +} + +/* vtbx3 */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx) +{ + uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx), + vmov_n_u8 (24)); + int8x8_t __tbl = vtbl3_s8 (__tab, __idx); + + return vbsl_s8 (__mask, __tbl, __r); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx) +{ + uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24)); + uint8x8_t __tbl = vtbl3_u8 (__tab, __idx); + + return vbsl_u8 (__mask, __tbl, __r); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx) +{ + uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24)); + poly8x8_t __tbl = vtbl3_p8 (__tab, __idx); + + return vbsl_p8 (__mask, __tbl, __r); +} + +/* vtrn */ + +__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) +vtrn_f32 (float32x2_t a, float32x2_t b) +{ + return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)}; +} + +__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) +vtrn_p8 (poly8x8_t a, poly8x8_t b) +{ + return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)}; +} + +__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) +vtrn_p16 (poly16x4_t a, poly16x4_t b) +{ + return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)}; +} + +__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) +vtrn_s8 (int8x8_t a, int8x8_t b) +{ + return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)}; +} + +__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) +vtrn_s16 (int16x4_t a, int16x4_t b) +{ + return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)}; +} + +__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) +vtrn_s32 (int32x2_t a, int32x2_t b) +{ + return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)}; +} + +__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) +vtrn_u8 (uint8x8_t a, uint8x8_t b) +{ + return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)}; +} + +__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) +vtrn_u16 (uint16x4_t a, uint16x4_t b) +{ + return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)}; +} + +__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) +vtrn_u32 (uint32x2_t a, uint32x2_t b) +{ + return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)}; +} + +__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) +vtrnq_f32 (float32x4_t a, float32x4_t b) +{ + return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)}; +} + +__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) +vtrnq_p8 (poly8x16_t a, poly8x16_t b) +{ + return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)}; +} + +__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) +vtrnq_p16 (poly16x8_t a, poly16x8_t b) +{ + return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)}; +} + +__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) +vtrnq_s8 (int8x16_t a, int8x16_t b) +{ + return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)}; +} + +__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) +vtrnq_s16 (int16x8_t a, int16x8_t b) +{ + return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)}; +} + +__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) +vtrnq_s32 (int32x4_t a, int32x4_t b) +{ + return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)}; +} + +__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) +vtrnq_u8 (uint8x16_t a, uint8x16_t b) +{ + return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)}; +} + +__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) +vtrnq_u16 (uint16x8_t a, uint16x8_t b) +{ + return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)}; +} + +__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) +vtrnq_u32 (uint32x4_t a, uint32x4_t b) +{ + return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)}; +} + +/* vtst */ + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtst_s8 (int8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vtst_s16 (int16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vtst_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vtst_s64 (int64x1_t __a, int64x1_t __b) +{ + return (__a & __b) ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtst_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vtst_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vtst_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vtst_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (__a & __b) ? -1ll : 0ll; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vtstq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vtstq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vtstq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vtstq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vsubd_u64 (uint64x1_t __a, uint64x1_t __b) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vtstq_u8 (uint8x16_t __a, uint8x16_t __b) { - return __a - __b; + return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a, + (int8x16_t) __b); } -/* vtbx1 */ - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vtstq_u16 (uint16x8_t __a, uint16x8_t __b) { - uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx), - vmov_n_u8 (8)); - int8x8_t __tbl = vtbl1_s8 (__tab, __idx); - - return vbsl_s8 (__mask, __tbl, __r); + return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a, + (int16x8_t) __b); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vtstq_u32 (uint32x4_t __a, uint32x4_t __b) { - uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8)); - uint8x8_t __tbl = vtbl1_u8 (__tab, __idx); + return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a, + (int32x4_t) __b); +} - return vbsl_u8 (__mask, __tbl, __r); +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vtstq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a, + (int64x2_t) __b); } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vtstd_s64 (int64x1_t __a, int64x1_t __b) { - uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8)); - poly8x8_t __tbl = vtbl1_p8 (__tab, __idx); + return (__a & __b) ? -1ll : 0ll; +} - return vbsl_p8 (__mask, __tbl, __r); +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vtstd_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (__a & __b) ? -1ll : 0ll; } -/* vtbx3 */ +/* vuqadd */ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx) +vuqadd_s8 (int8x8_t __a, uint8x8_t __b) { - uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx), - vmov_n_u8 (24)); - int8x8_t __tbl = vtbl3_s8 (__tab, __idx); - - return vbsl_s8 (__mask, __tbl, __r); + return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vuqadd_s16 (int16x4_t __a, uint16x4_t __b) { - uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24)); - uint8x8_t __tbl = vtbl3_u8 (__tab, __idx); - - return vbsl_u8 (__mask, __tbl, __r); + return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b); } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vuqadd_s32 (int32x2_t __a, uint32x2_t __b) { - uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24)); - poly8x8_t __tbl = vtbl3_p8 (__tab, __idx); - - return vbsl_p8 (__mask, __tbl, __r); + return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b); } -/* vtrn */ - -__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) -vtrn_f32 (float32x2_t a, float32x2_t b) +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vuqadd_s64 (int64x1_t __a, uint64x1_t __b) { - return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)}; + return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b); } -__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) -vtrn_p8 (poly8x8_t a, poly8x8_t b) +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vuqaddq_s8 (int8x16_t __a, uint8x16_t __b) { - return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)}; + return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b); } -__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) -vtrn_p16 (poly16x4_t a, poly16x4_t b) +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vuqaddq_s16 (int16x8_t __a, uint16x8_t __b) { - return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)}; + return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b); } -__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) -vtrn_s8 (int8x8_t a, int8x8_t b) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vuqaddq_s32 (int32x4_t __a, uint32x4_t __b) { - return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)}; + return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b); } -__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) -vtrn_s16 (int16x4_t a, int16x4_t b) +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vuqaddq_s64 (int64x2_t __a, uint64x2_t __b) { - return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)}; + return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b); } -__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) -vtrn_s32 (int32x2_t a, int32x2_t b) +__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +vuqaddb_s8 (int8x1_t __a, uint8x1_t __b) { - return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)}; + return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b); } -__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) -vtrn_u8 (uint8x8_t a, uint8x8_t b) +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vuqaddh_s16 (int16x1_t __a, uint16x1_t __b) { - return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)}; + return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b); } -__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) -vtrn_u16 (uint16x4_t a, uint16x4_t b) +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vuqadds_s32 (int32x1_t __a, uint32x1_t __b) { - return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)}; + return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b); } -__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) -vtrn_u32 (uint32x2_t a, uint32x2_t b) +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vuqaddd_s64 (int64x1_t __a, uint64x1_t __b) { - return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)}; + return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b); } -__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) -vtrnq_f32 (float32x4_t a, float32x4_t b) +#define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \ + __extension__ static __inline rettype \ + __attribute__ ((__always_inline__)) \ + v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \ + { \ + return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \ + v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \ + } + +#define __INTERLEAVE_LIST(op) \ + __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \ + __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \ + __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \ + __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \ + __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \ + __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \ + __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \ + __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \ + __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \ + __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \ + __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \ + __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \ + __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \ + __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \ + __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \ + __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \ + __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \ + __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q) + +/* vuzp */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vuzp1_f32 (float32x2_t __a, float32x2_t __b) { - return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)}; +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); +#endif } -__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) -vtrnq_p8 (poly8x16_t a, poly8x16_t b) +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vuzp1_p8 (poly8x8_t __a, poly8x8_t __b) { - return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)}; +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); +#endif } -__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) -vtrnq_p16 (poly16x8_t a, poly16x8_t b) +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vuzp1_p16 (poly16x4_t __a, poly16x4_t __b) { - return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)}; +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); +#endif } -__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) -vtrnq_s8 (int8x16_t a, int8x16_t b) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vuzp1_s8 (int8x8_t __a, int8x8_t __b) { - return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)}; +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); +#endif } -__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) -vtrnq_s16 (int16x8_t a, int16x8_t b) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vuzp1_s16 (int16x4_t __a, int16x4_t __b) { - return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)}; +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); +#endif } -__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) -vtrnq_s32 (int32x4_t a, int32x4_t b) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vuzp1_s32 (int32x2_t __a, int32x2_t __b) { - return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)}; +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); +#endif } -__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) -vtrnq_u8 (uint8x16_t a, uint8x16_t b) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vuzp1_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)}; +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); +#endif } -__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) -vtrnq_u16 (uint16x8_t a, uint16x8_t b) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vuzp1_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)}; +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); +#endif } -__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) -vtrnq_u32 (uint32x4_t a, uint32x4_t b) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vuzp1_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)}; +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); +#endif } -/* vtst */ - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vtst_s8 (int8x8_t __a, int8x8_t __b) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vuzp1q_f32 (float32x4_t __a, float32x4_t __b) { - return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); +#endif } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vtst_s16 (int16x4_t __a, int16x4_t __b) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vuzp1q_f64 (float64x2_t __a, float64x2_t __b) { - return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); +#endif } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vtst_s32 (int32x2_t __a, int32x2_t __b) +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b) { - return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x16_t) + {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); +#endif } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vtst_s64 (int64x1_t __a, int64x1_t __b) +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b) { - return (__a & __b) ? -1ll : 0ll; +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); +#endif } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vtst_u8 (uint8x8_t __a, uint8x8_t __b) +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vuzp1q_s8 (int8x16_t __a, int8x16_t __b) { - return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a, - (int8x8_t) __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, + (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); +#else + return __builtin_shuffle (__a, __b, + (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); +#endif } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vtst_u16 (uint16x4_t __a, uint16x4_t __b) +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vuzp1q_s16 (int16x8_t __a, int16x8_t __b) { - return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a, - (int16x4_t) __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); +#endif } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vtst_u32 (uint32x2_t __a, uint32x2_t __b) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vuzp1q_s32 (int32x4_t __a, int32x4_t __b) { - return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a, - (int32x2_t) __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); +#endif } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vtst_u64 (uint64x1_t __a, uint64x1_t __b) +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vuzp1q_s64 (int64x2_t __a, int64x2_t __b) { - return (__a & __b) ? -1ll : 0ll; +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); +#endif } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vtstq_s8 (int8x16_t __a, int8x16_t __b) +vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, + (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); +#else + return __builtin_shuffle (__a, __b, + (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); +#endif } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vtstq_s16 (int16x8_t __a, int16x8_t __b) +vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); +#endif } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vtstq_s32 (int32x4_t __a, int32x4_t __b) +vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); +#endif } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vtstq_s64 (int64x2_t __a, int64x2_t __b) +vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); +#endif } -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vtstq_u8 (uint8x16_t __a, uint8x16_t __b) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vuzp2_f32 (float32x2_t __a, float32x2_t __b) { - return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a, - (int8x16_t) __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); +#endif } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vtstq_u16 (uint16x8_t __a, uint16x8_t __b) +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vuzp2_p8 (poly8x8_t __a, poly8x8_t __b) { - return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a, - (int16x8_t) __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); +#endif } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vtstq_u32 (uint32x4_t __a, uint32x4_t __b) +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vuzp2_p16 (poly16x4_t __a, poly16x4_t __b) { - return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a, - (int32x4_t) __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); +#endif } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vtstq_u64 (uint64x2_t __a, uint64x2_t __b) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vuzp2_s8 (int8x8_t __a, int8x8_t __b) { - return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a, - (int64x2_t) __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); +#endif } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vtstd_s64 (int64x1_t __a, int64x1_t __b) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vuzp2_s16 (int16x4_t __a, int16x4_t __b) { - return (__a & __b) ? -1ll : 0ll; +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); +#endif } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vtstd_u64 (uint64x1_t __a, uint64x1_t __b) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vuzp2_s32 (int32x2_t __a, int32x2_t __b) { - return (__a & __b) ? -1ll : 0ll; +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); +#endif } -/* vuqadd */ +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vuzp2_u8 (uint8x8_t __a, uint8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); +#endif +} -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vuqadd_s8 (int8x8_t __a, uint8x8_t __b) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vuzp2_u16 (uint16x4_t __a, uint16x4_t __b) { - return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); +#endif } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vuqadd_s16 (int16x4_t __a, uint16x4_t __b) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vuzp2_u32 (uint32x2_t __a, uint32x2_t __b) { - return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); +#endif } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vuqadd_s32 (int32x2_t __a, uint32x2_t __b) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vuzp2q_f32 (float32x4_t __a, float32x4_t __b) { - return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); +#endif } -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vuqadd_s64 (int64x1_t __a, uint64x1_t __b) +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vuzp2q_f64 (float64x2_t __a, float64x2_t __b) { - return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); +#endif +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, + (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); +#else + return __builtin_shuffle (__a, __b, + (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); +#endif +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); +#endif } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vuqaddq_s8 (int8x16_t __a, uint8x16_t __b) +vuzp2q_s8 (int8x16_t __a, int8x16_t __b) { - return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, + (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); +#else + return __builtin_shuffle (__a, __b, + (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); +#endif } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vuqaddq_s16 (int16x8_t __a, uint16x8_t __b) +vuzp2q_s16 (int16x8_t __a, int16x8_t __b) { - return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); +#endif } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vuqaddq_s32 (int32x4_t __a, uint32x4_t __b) +vuzp2q_s32 (int32x4_t __a, int32x4_t __b) { - return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); +#endif } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vuqaddq_s64 (int64x2_t __a, uint64x2_t __b) +vuzp2q_s64 (int64x2_t __a, int64x2_t __b) { - return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); +#endif } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vuqaddb_s8 (int8x1_t __a, uint8x1_t __b) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b) { - return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x16_t) + {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); +#endif } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vuqaddh_s16 (int16x1_t __a, uint16x1_t __b) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b) { - return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); +#endif } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vuqadds_s32 (int32x1_t __a, uint32x1_t __b) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b) { - return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); +#endif } -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vuqaddd_s64 (int64x1_t __a, uint64x1_t __b) +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b) { - return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); +#endif } -#define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \ - __extension__ static __inline rettype \ - __attribute__ ((__always_inline__)) \ - v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \ - { \ - return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \ - v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \ - } - -#define __INTERLEAVE_LIST(op) \ - __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \ - __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \ - __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \ - __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \ - __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \ - __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \ - __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \ - __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \ - __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \ - __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \ - __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \ - __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \ - __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \ - __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \ - __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \ - __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \ - __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \ - __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q) - -/* vuzp */ - __INTERLEAVE_LIST (uzp) /* vzip */ diff --git a/gcc/testsuite/ChangeLog.linaro b/gcc/testsuite/ChangeLog.linaro index bf5d39a..c0e26bd 100644 --- a/gcc/testsuite/ChangeLog.linaro +++ b/gcc/testsuite/ChangeLog.linaro @@ -1,3 +1,72 @@ +2014-07-16 Yvan Roux + + Backport from trunk r209940, `r209943, r209947. + 2014-04-30 Alan Lawrence + + * gcc.target/arm/simd/vuzpqf32_1.c: New file. + * gcc.target/arm/simd/vuzpqp16_1.c: New file. + * gcc.target/arm/simd/vuzpqp8_1.c: New file. + * gcc.target/arm/simd/vuzpqs16_1.c: New file. + * gcc.target/arm/simd/vuzpqs32_1.c: New file. + * gcc.target/arm/simd/vuzpqs8_1.c: New file. + * gcc.target/arm/simd/vuzpqu16_1.c: New file. + * gcc.target/arm/simd/vuzpqu32_1.c: New file. + * gcc.target/arm/simd/vuzpqu8_1.c: New file. + * gcc.target/arm/simd/vuzpf32_1.c: New file. + * gcc.target/arm/simd/vuzpp16_1.c: New file. + * gcc.target/arm/simd/vuzpp8_1.c: New file. + * gcc.target/arm/simd/vuzps16_1.c: New file. + * gcc.target/arm/simd/vuzps32_1.c: New file. + * gcc.target/arm/simd/vuzps8_1.c: New file. + * gcc.target/arm/simd/vuzpu16_1.c: New file. + * gcc.target/arm/simd/vuzpu32_1.c: New file. + * gcc.target/arm/simd/vuzpu8_1.c: New file. + + 2014-04-30 Alan Lawrence + + * gcc.target/aarch64/vuzps32_1.c: Expect zip1/2 insn rather than uzp1/2. + * gcc.target/aarch64/vuzpu32_1.c: Likewise. + * gcc.target/aarch64/vuzpf32_1.c: Likewise. + + 2014-04-30 Alan Lawrence + + * gcc.target/aarch64/simd/vuzpf32_1.c: New file. + * gcc.target/aarch64/simd/vuzpf32.x: New file. + * gcc.target/aarch64/simd/vuzpp16_1.c: New file. + * gcc.target/aarch64/simd/vuzpp16.x: New file. + * gcc.target/aarch64/simd/vuzpp8_1.c: New file. + * gcc.target/aarch64/simd/vuzpp8.x: New file. + * gcc.target/aarch64/simd/vuzpqf32_1.c: New file. + * gcc.target/aarch64/simd/vuzpqf32.x: New file. + * gcc.target/aarch64/simd/vuzpqp16_1.c: New file. + * gcc.target/aarch64/simd/vuzpqp16.x: New file. + * gcc.target/aarch64/simd/vuzpqp8_1.c: New file. + * gcc.target/aarch64/simd/vuzpqp8.x: New file. + * gcc.target/aarch64/simd/vuzpqs16_1.c: New file. + * gcc.target/aarch64/simd/vuzpqs16.x: New file. + * gcc.target/aarch64/simd/vuzpqs32_1.c: New file. + * gcc.target/aarch64/simd/vuzpqs32.x: New file. + * gcc.target/aarch64/simd/vuzpqs8_1.c: New file. + * gcc.target/aarch64/simd/vuzpqs8.x: New file. + * gcc.target/aarch64/simd/vuzpqu16_1.c: New file. + * gcc.target/aarch64/simd/vuzpqu16.x: New file. + * gcc.target/aarch64/simd/vuzpqu32_1.c: New file. + * gcc.target/aarch64/simd/vuzpqu32.x: New file. + * gcc.target/aarch64/simd/vuzpqu8_1.c: New file. + * gcc.target/aarch64/simd/vuzpqu8.x: New file. + * gcc.target/aarch64/simd/vuzps16_1.c: New file. + * gcc.target/aarch64/simd/vuzps16.x: New file. + * gcc.target/aarch64/simd/vuzps32_1.c: New file. + * gcc.target/aarch64/simd/vuzps32.x: New file. + * gcc.target/aarch64/simd/vuzps8_1.c: New file. + * gcc.target/aarch64/simd/vuzps8.x: New file. + * gcc.target/aarch64/simd/vuzpu16_1.c: New file. + * gcc.target/aarch64/simd/vuzpu16.x: New file. + * gcc.target/aarch64/simd/vuzpu32_1.c: New file. + * gcc.target/aarch64/simd/vuzpu32.x: New file. + * gcc.target/aarch64/simd/vuzpu8_1.c: New file. + * gcc.target/aarch64/simd/vuzpu8.x: New file. + 2014-06-25 Yvan Roux GCC Linaro 4.9-2014.06-1 released. diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32.x b/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32.x new file mode 100644 index 0000000..86c3700 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32.x @@ -0,0 +1,26 @@ +extern void abort (void); + +float32x2x2_t +test_vuzpf32 (float32x2_t _a, float32x2_t _b) +{ + return vuzp_f32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + float32_t first[] = {1, 2}; + float32_t second[] = {3, 4}; + float32x2x2_t result = test_vuzpf32 (vld1_f32 (first), vld1_f32 (second)); + float32_t exp1[] = {1, 3}; + float32_t exp2[] = {2, 4}; + float32x2_t expect1 = vld1_f32 (exp1); + float32x2_t expect2 = vld1_f32 (exp2); + + for (i = 0; i < 2; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32_1.c new file mode 100644 index 0000000..0daba1c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzp_f32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpf32.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16.x b/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16.x new file mode 100644 index 0000000..bc45efc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16.x @@ -0,0 +1,26 @@ +extern void abort (void); + +poly16x4x2_t +test_vuzpp16 (poly16x4_t _a, poly16x4_t _b) +{ + return vuzp_p16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + poly16_t first[] = {1, 2, 3, 4}; + poly16_t second[] = {5, 6, 7, 8}; + poly16x4x2_t result = test_vuzpp16 (vld1_p16 (first), vld1_p16 (second)); + poly16_t exp1[] = {1, 3, 5, 7}; + poly16_t exp2[] = {2, 4, 6, 8}; + poly16x4_t expect1 = vld1_p16 (exp1); + poly16x4_t expect2 = vld1_p16 (exp2); + + for (i = 0; i < 4; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16_1.c new file mode 100644 index 0000000..03b0722 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzp_p16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpp16.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8.x b/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8.x new file mode 100644 index 0000000..b4ef51c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8.x @@ -0,0 +1,26 @@ +extern void abort (void); + +poly8x8x2_t +test_vuzpp8 (poly8x8_t _a, poly8x8_t _b) +{ + return vuzp_p8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + poly8x8x2_t result = test_vuzpp8 (vld1_p8 (first), vld1_p8 (second)); + poly8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; + poly8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; + poly8x8_t expect1 = vld1_p8 (exp1); + poly8x8_t expect2 = vld1_p8 (exp2); + + for (i = 0; i < 8; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8_1.c new file mode 100644 index 0000000..5186b1f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzp_p8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpp8.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32.x b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32.x new file mode 100644 index 0000000..f1b48da --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32.x @@ -0,0 +1,26 @@ +extern void abort (void); + +float32x4x2_t +test_vuzpqf32 (float32x4_t _a, float32x4_t _b) +{ + return vuzpq_f32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + float32_t first[] = {1, 2, 3, 4}; + float32_t second[] = {5, 6, 7, 8}; + float32x4x2_t result = test_vuzpqf32 (vld1q_f32 (first), vld1q_f32 (second)); + float32_t exp1[] = {1, 3, 5, 7}; + float32_t exp2[] = {2, 4, 6, 8}; + float32x4_t expect1 = vld1q_f32 (exp1); + float32x4_t expect2 = vld1q_f32 (exp2); + + for (i = 0; i < 4; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32_1.c new file mode 100644 index 0000000..1167f7b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzpq_f32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpqf32.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16.x b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16.x new file mode 100644 index 0000000..d4e08f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16.x @@ -0,0 +1,26 @@ +extern void abort (void); + +poly16x8x2_t +test_vuzpqp16 (poly16x8_t _a, poly16x8_t _b) +{ + return vuzpq_p16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + poly16x8x2_t result = test_vuzpqp16 (vld1q_p16 (first), vld1q_p16 (second)); + poly16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; + poly16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; + poly16x8_t expect1 = vld1q_p16 (exp1); + poly16x8_t expect2 = vld1q_p16 (exp2); + + for (i = 0; i < 8; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16_1.c new file mode 100644 index 0000000..c664804 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzpq_p16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpqp16.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8.x b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8.x new file mode 100644 index 0000000..31541de --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8.x @@ -0,0 +1,27 @@ +extern void abort (void); + +poly8x16x2_t +test_vuzpqp8 (poly8x16_t _a, poly8x16_t _b) +{ + return vuzpq_p8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + poly8_t second[] = + {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; + poly8x16x2_t result = test_vuzpqp8 (vld1q_p8 (first), vld1q_p8 (second)); + poly8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}; + poly8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32}; + poly8x16_t expect1 = vld1q_p8 (exp1); + poly8x16_t expect2 = vld1q_p8 (exp2); + + for (i = 0; i < 16; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8_1.c new file mode 100644 index 0000000..a9e6ce2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzpq_p8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpqp8.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16.x b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16.x new file mode 100644 index 0000000..439107b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16.x @@ -0,0 +1,26 @@ +extern void abort (void); + +int16x8x2_t +test_vuzpqs16 (int16x8_t _a, int16x8_t _b) +{ + return vuzpq_s16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + int16x8x2_t result = test_vuzpqs16 (vld1q_s16 (first), vld1q_s16 (second)); + int16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; + int16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; + int16x8_t expect1 = vld1q_s16 (exp1); + int16x8_t expect2 = vld1q_s16 (exp2); + + for (i = 0; i < 8; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16_1.c new file mode 100644 index 0000000..af1e28b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzpq_s16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpqs16.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32.x b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32.x new file mode 100644 index 0000000..84463f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32.x @@ -0,0 +1,26 @@ +extern void abort (void); + +int32x4x2_t +test_vuzpqs32 (int32x4_t _a, int32x4_t _b) +{ + return vuzpq_s32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int32_t first[] = {1, 2, 3, 4}; + int32_t second[] = {5, 6, 7, 8}; + int32x4x2_t result = test_vuzpqs32 (vld1q_s32 (first), vld1q_s32 (second)); + int32_t exp1[] = {1, 3, 5, 7}; + int32_t exp2[] = {2, 4, 6, 8}; + int32x4_t expect1 = vld1q_s32 (exp1); + int32x4_t expect2 = vld1q_s32 (exp2); + + for (i = 0; i < 4; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32_1.c new file mode 100644 index 0000000..a4bf7ac --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzpq_s32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpqs32.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8.x b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8.x new file mode 100644 index 0000000..c8b9167 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8.x @@ -0,0 +1,27 @@ +extern void abort (void); + +int8x16x2_t +test_vuzpqs8 (int8x16_t _a, int8x16_t _b) +{ + return vuzpq_s8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + int8_t second[] = + {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; + int8x16x2_t result = test_vuzpqs8 (vld1q_s8 (first), vld1q_s8 (second)); + int8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}; + int8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32}; + int8x16_t expect1 = vld1q_s8 (exp1); + int8x16_t expect2 = vld1q_s8 (exp2); + + for (i = 0; i < 16; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8_1.c new file mode 100644 index 0000000..234a329 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzpq_s8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpqs8.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16.x b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16.x new file mode 100644 index 0000000..1757467 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16.x @@ -0,0 +1,26 @@ +extern void abort (void); + +uint16x8x2_t +test_vuzpqu16 (uint16x8_t _a, uint16x8_t _b) +{ + return vuzpq_u16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + uint16x8x2_t result = test_vuzpqu16 (vld1q_u16 (first), vld1q_u16 (second)); + uint16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; + uint16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; + uint16x8_t expect1 = vld1q_u16 (exp1); + uint16x8_t expect2 = vld1q_u16 (exp2); + + for (i = 0; i < 8; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16_1.c new file mode 100644 index 0000000..3f029ed --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzpq_u16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpqu16.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32.x b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32.x new file mode 100644 index 0000000..9ff2369 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32.x @@ -0,0 +1,26 @@ +extern void abort (void); + +uint32x4x2_t +test_vuzpqu32 (uint32x4_t _a, uint32x4_t _b) +{ + return vuzpq_u32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint32_t first[] = {1, 2, 3, 4}; + uint32_t second[] = {5, 6, 7, 8}; + uint32x4x2_t result = test_vuzpqu32 (vld1q_u32 (first), vld1q_u32 (second)); + uint32_t exp1[] = {1, 3, 5, 7}; + uint32_t exp2[] = {2, 4, 6, 8}; + uint32x4_t expect1 = vld1q_u32 (exp1); + uint32x4_t expect2 = vld1q_u32 (exp2); + + for (i = 0; i < 4; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32_1.c new file mode 100644 index 0000000..16090ee --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzpq_u32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpqu32.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8.x b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8.x new file mode 100644 index 0000000..1f5288d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8.x @@ -0,0 +1,27 @@ +extern void abort (void); + +uint8x16x2_t +test_vuzpqu8 (uint8x16_t _a, uint8x16_t _b) +{ + return vuzpq_u8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + uint8_t second[] = + {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; + uint8x16x2_t result = test_vuzpqu8 (vld1q_u8 (first), vld1q_u8 (second)); + uint8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}; + uint8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32}; + uint8x16_t expect1 = vld1q_u8 (exp1); + uint8x16_t expect2 = vld1q_u8 (exp2); + + for (i = 0; i < 16; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8_1.c new file mode 100644 index 0000000..6313e4c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzpq_u8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpqu8.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzps16.x b/gcc/testsuite/gcc.target/aarch64/simd/vuzps16.x new file mode 100644 index 0000000..4775135 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzps16.x @@ -0,0 +1,26 @@ +extern void abort (void); + +int16x4x2_t +test_vuzps16 (int16x4_t _a, int16x4_t _b) +{ + return vuzp_s16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int16_t first[] = {1, 2, 3, 4}; + int16_t second[] = {5, 6, 7, 8}; + int16x4x2_t result = test_vuzps16 (vld1_s16 (first), vld1_s16 (second)); + int16_t exp1[] = {1, 3, 5, 7}; + int16_t exp2[] = {2, 4, 6, 8}; + int16x4_t expect1 = vld1_s16 (exp1); + int16x4_t expect2 = vld1_s16 (exp2); + + for (i = 0; i < 4; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzps16_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vuzps16_1.c new file mode 100644 index 0000000..f31bd31 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzps16_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzp_s16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzps16.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzps32.x b/gcc/testsuite/gcc.target/aarch64/simd/vuzps32.x new file mode 100644 index 0000000..6f885ce --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzps32.x @@ -0,0 +1,26 @@ +extern void abort (void); + +int32x2x2_t +test_vuzps32 (int32x2_t _a, int32x2_t _b) +{ + return vuzp_s32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int32_t first[] = {1, 2}; + int32_t second[] = {3, 4}; + int32x2x2_t result = test_vuzps32 (vld1_s32 (first), vld1_s32 (second)); + int32_t exp1[] = {1, 3}; + int32_t exp2[] = {2, 4}; + int32x2_t expect1 = vld1_s32 (exp1); + int32x2_t expect2 = vld1_s32 (exp2); + + for (i = 0; i < 2; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzps32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vuzps32_1.c new file mode 100644 index 0000000..af48d63 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzps32_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzp_s32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzps32.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzps8.x b/gcc/testsuite/gcc.target/aarch64/simd/vuzps8.x new file mode 100644 index 0000000..62ccad4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzps8.x @@ -0,0 +1,26 @@ +extern void abort (void); + +int8x8x2_t +test_vuzps8 (int8x8_t _a, int8x8_t _b) +{ + return vuzp_s8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + int8x8x2_t result = test_vuzps8 (vld1_s8 (first), vld1_s8 (second)); + int8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; + int8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; + int8x8_t expect1 = vld1_s8 (exp1); + int8x8_t expect2 = vld1_s8 (exp2); + + for (i = 0; i < 8; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzps8_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vuzps8_1.c new file mode 100644 index 0000000..5962604 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzps8_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzp_s8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzps8.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16.x b/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16.x new file mode 100644 index 0000000..a5983f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16.x @@ -0,0 +1,26 @@ +extern void abort (void); + +uint16x4x2_t +test_vuzpu16 (uint16x4_t _a, uint16x4_t _b) +{ + return vuzp_u16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint16_t first[] = {1, 2, 3, 4}; + uint16_t second[] = {5, 6, 7, 8}; + uint16x4x2_t result = test_vuzpu16 (vld1_u16 (first), vld1_u16 (second)); + uint16_t exp1[] = {1, 3, 5, 7}; + uint16_t exp2[] = {2, 4, 6, 8}; + uint16x4_t expect1 = vld1_u16 (exp1); + uint16x4_t expect2 = vld1_u16 (exp2); + + for (i = 0; i < 4; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16_1.c new file mode 100644 index 0000000..5025c5f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzp_u16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpu16.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32.x b/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32.x new file mode 100644 index 0000000..6bf6731 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32.x @@ -0,0 +1,26 @@ +extern void abort (void); + +uint32x2x2_t +test_vuzpu32 (uint32x2_t _a, uint32x2_t _b) +{ + return vuzp_u32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint32_t first[] = {1, 2}; + uint32_t second[] = {3, 4}; + uint32x2x2_t result = test_vuzpu32 (vld1_u32 (first), vld1_u32 (second)); + uint32_t exp1[] = {1, 3}; + uint32_t exp2[] = {2, 4}; + uint32x2_t expect1 = vld1_u32 (exp1); + uint32x2_t expect2 = vld1_u32 (exp2); + + for (i = 0; i < 2; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32_1.c new file mode 100644 index 0000000..05e1c95 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzp_u32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpu32.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8.x b/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8.x new file mode 100644 index 0000000..c3e67e8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8.x @@ -0,0 +1,26 @@ +extern void abort (void); + +uint8x8x2_t +test_vuzpu8 (uint8x8_t _a, uint8x8_t _b) +{ + return vuzp_u8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + uint8x8x2_t result = test_vuzpu8 (vld1_u8 (first), vld1_u8 (second)); + uint8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; + uint8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; + uint8x8_t expect1 = vld1_u8 (exp1); + uint8x8_t expect2 = vld1_u8 (exp2); + + for (i = 0; i < 8; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8_1.c new file mode 100644 index 0000000..57aa49c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzp_u8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpu8.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c b/gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c new file mode 100644 index 0000000..723c86a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpf32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpf32.x" + +/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c b/gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c new file mode 100644 index 0000000..c7ad757 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpp16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpp16.x" + +/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c b/gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c new file mode 100644 index 0000000..670b550 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpp8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpp8.x" + +/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c b/gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c new file mode 100644 index 0000000..53147f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpQf32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpqf32.x" + +/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c b/gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c new file mode 100644 index 0000000..feef15a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpQp16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpqp16.x" + +/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c b/gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c new file mode 100644 index 0000000..db98f35 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpQp8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpqp8.x" + +/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c b/gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c new file mode 100644 index 0000000..808d562 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpQs16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpqs16.x" + +/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c b/gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c new file mode 100644 index 0000000..7adf5f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpQs32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpqs32.x" + +/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c b/gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c new file mode 100644 index 0000000..9d0256a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpQs8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpqs8.x" + +/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c b/gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c new file mode 100644 index 0000000..23106ed --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpQu16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpqu16.x" + +/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c b/gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c new file mode 100644 index 0000000..0002fdf --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpQu32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpqu32.x" + +/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c b/gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c new file mode 100644 index 0000000..f8d19dc --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpQu8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpqu8.x" + +/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c b/gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c new file mode 100644 index 0000000..6e3f2eb --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzps16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzps16.x" + +/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c b/gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c new file mode 100644 index 0000000..372c393 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzps32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzps32.x" + +/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c b/gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c new file mode 100644 index 0000000..3338477 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzps8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzps8.x" + +/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c b/gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c new file mode 100644 index 0000000..378b5a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpu16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpu16.x" + +/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c b/gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c new file mode 100644 index 0000000..ebb0d6b --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpu32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpu32.x" + +/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c b/gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c new file mode 100644 index 0000000..82719a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpu8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpu8.x" + +/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ -- 2.7.4