return result;
}
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vtrn1_f32 (float32x2_t a, float32x2_t b)
-{
- float32x2_t result;
- __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vtrn1_p8 (poly8x8_t a, poly8x8_t b)
-{
- poly8x8_t result;
- __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vtrn1_p16 (poly16x4_t a, poly16x4_t b)
-{
- poly16x4_t result;
- __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vtrn1_s8 (int8x8_t a, int8x8_t b)
-{
- int8x8_t result;
- __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vtrn1_s16 (int16x4_t a, int16x4_t b)
-{
- int16x4_t result;
- __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vtrn1_s32 (int32x2_t a, int32x2_t b)
-{
- int32x2_t result;
- __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vtrn1_u8 (uint8x8_t a, uint8x8_t b)
-{
- uint8x8_t result;
- __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vtrn1_u16 (uint16x4_t a, uint16x4_t b)
-{
- uint16x4_t result;
- __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vtrn1_u32 (uint32x2_t a, uint32x2_t b)
-{
- uint32x2_t result;
- __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vtrn1q_f32 (float32x4_t a, float32x4_t b)
-{
- float32x4_t result;
- __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vtrn1q_f64 (float64x2_t a, float64x2_t b)
-{
- float64x2_t result;
- __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vtrn1q_p8 (poly8x16_t a, poly8x16_t b)
-{
- poly8x16_t result;
- __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vtrn1q_p16 (poly16x8_t a, poly16x8_t b)
-{
- poly16x8_t result;
- __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vtrn1q_s8 (int8x16_t a, int8x16_t b)
-{
- int8x16_t result;
- __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vtrn1q_s16 (int16x8_t a, int16x8_t b)
-{
- int16x8_t result;
- __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vtrn1q_s32 (int32x4_t a, int32x4_t b)
-{
- int32x4_t result;
- __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vtrn1q_s64 (int64x2_t a, int64x2_t b)
-{
- int64x2_t result;
- __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vtrn1q_u8 (uint8x16_t a, uint8x16_t b)
-{
- uint8x16_t result;
- __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vtrn1q_u16 (uint16x8_t a, uint16x8_t b)
-{
- uint16x8_t result;
- __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vtrn1q_u32 (uint32x4_t a, uint32x4_t b)
-{
- uint32x4_t result;
- __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vtrn1q_u64 (uint64x2_t a, uint64x2_t b)
-{
- uint64x2_t result;
- __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vtrn2_f32 (float32x2_t a, float32x2_t b)
-{
- float32x2_t result;
- __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vtrn2_p8 (poly8x8_t a, poly8x8_t b)
-{
- poly8x8_t result;
- __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vtrn2_p16 (poly16x4_t a, poly16x4_t b)
-{
- poly16x4_t result;
- __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vtrn2_s8 (int8x8_t a, int8x8_t b)
-{
- int8x8_t result;
- __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vtrn2_s16 (int16x4_t a, int16x4_t b)
-{
- int16x4_t result;
- __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vtrn2_s32 (int32x2_t a, int32x2_t b)
-{
- int32x2_t result;
- __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vtrn2_u8 (uint8x8_t a, uint8x8_t b)
-{
- uint8x8_t result;
- __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vtrn2_u16 (uint16x4_t a, uint16x4_t b)
-{
- uint16x4_t result;
- __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vtrn2_u32 (uint32x2_t a, uint32x2_t b)
-{
- uint32x2_t result;
- __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vtrn2q_f32 (float32x4_t a, float32x4_t b)
-{
- float32x4_t result;
- __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vtrn2q_f64 (float64x2_t a, float64x2_t b)
-{
- float64x2_t result;
- __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vtrn2q_p8 (poly8x16_t a, poly8x16_t b)
-{
- poly8x16_t result;
- __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vtrn2q_p16 (poly16x8_t a, poly16x8_t b)
-{
- poly16x8_t result;
- __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vtrn2q_s8 (int8x16_t a, int8x16_t b)
-{
- int8x16_t result;
- __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vtrn2q_s16 (int16x8_t a, int16x8_t b)
-{
- int16x8_t result;
- __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vtrn2q_s32 (int32x4_t a, int32x4_t b)
-{
- int32x4_t result;
- __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vtrn2q_s64 (int64x2_t a, int64x2_t b)
-{
- int64x2_t result;
- __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vtrn2q_u8 (uint8x16_t a, uint8x16_t b)
-{
- uint8x16_t result;
- __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vtrn2q_u16 (uint16x8_t a, uint16x8_t b)
-{
- uint16x8_t result;
- __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vtrn2q_u32 (uint32x4_t a, uint32x4_t b)
-{
- uint32x4_t result;
- __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vtrn2q_u64 (uint64x2_t a, uint64x2_t b)
-{
- uint64x2_t result;
- __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtst_p8 (poly8x8_t a, poly8x8_t b)
{
(int8x8_t) __b, __c);
}
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+ return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a,
+ (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+ return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a,
+ (int32x2_t) __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+ return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a,
+ (int64x1_t) __b, __c);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+{
+ return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+{
+ return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+{
+ return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+{
+ return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+{
+ return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a,
+ (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+{
+ return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a,
+ (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+{
+ return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a,
+ (int32x4_t) __b, __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+{
+ return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a,
+ (int64x2_t) __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+ return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+ return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c);
+}
+
+/* vsri */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+{
+ return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+ return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+ return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+ return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+{
+ return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a,
+ (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+ return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a,
+ (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+ return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a,
+ (int32x2_t) __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+ return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a,
+ (int64x1_t) __b, __c);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+{
+ return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+{
+ return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+{
+ return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+{
+ return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+{
+ return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a,
+ (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+{
+ return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a,
+ (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+{
+ return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a,
+ (int32x4_t) __b, __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+{
+ return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a,
+ (int64x2_t) __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsrid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+ return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+ return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c);
+}
+
+/* vst1 */
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_f32 (float32_t *a, float32x2_t b)
+{
+ __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_f64 (float64_t *a, float64x1_t b)
+{
+ *a = b;
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_p8 (poly8_t *a, poly8x8_t b)
+{
+ __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
+ (int8x8_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_p16 (poly16_t *a, poly16x4_t b)
+{
+ __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
+ (int16x4_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s8 (int8_t *a, int8x8_t b)
+{
+ __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s16 (int16_t *a, int16x4_t b)
+{
+ __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s32 (int32_t *a, int32x2_t b)
+{
+ __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s64 (int64_t *a, int64x1_t b)
+{
+ *a = b;
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u8 (uint8_t *a, uint8x8_t b)
{
- return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a,
- (int16x4_t) __b, __c);
+ __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
+ (int8x8_t) b);
}
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u16 (uint16_t *a, uint16x4_t b)
{
- return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a,
- (int32x2_t) __b, __c);
+ __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
+ (int16x4_t) b);
}
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u32 (uint32_t *a, uint32x2_t b)
{
- return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a,
- (int64x1_t) __b, __c);
+ __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
+ (int32x2_t) b);
}
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u64 (uint64_t *a, uint64x1_t b)
{
- return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
+ *a = b;
}
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_f32 (float32_t *a, float32x4_t b)
{
- return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
+ __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
}
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_f64 (float64_t *a, float64x2_t b)
{
- return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
+ __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
}
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+/* vst1q */
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_p8 (poly8_t *a, poly8x16_t b)
{
- return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
+ __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
+ (int8x16_t) b);
}
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_p16 (poly16_t *a, poly16x8_t b)
{
- return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a,
- (int8x16_t) __b, __c);
+ __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
+ (int16x8_t) b);
}
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s8 (int8_t *a, int8x16_t b)
{
- return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a,
- (int16x8_t) __b, __c);
+ __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
}
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s16 (int16_t *a, int16x8_t b)
{
- return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a,
- (int32x4_t) __b, __c);
+ __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
}
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s32 (int32_t *a, int32x4_t b)
{
- return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a,
- (int64x2_t) __b, __c);
+ __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
}
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s64 (int64_t *a, int64x2_t b)
{
- return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
+ __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
}
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u8 (uint8_t *a, uint8x16_t b)
{
- return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c);
+ __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
+ (int8x16_t) b);
}
-/* vsri */
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u16 (uint16_t *a, uint16x8_t b)
{
- return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
+ __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
+ (int16x8_t) b);
}
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u32 (uint32_t *a, uint32x4_t b)
{
- return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
+ __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
+ (int32x4_t) b);
}
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u64 (uint64_t *a, uint64x2_t b)
{
- return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
+ __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
+ (int64x2_t) b);
}
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+/* vstn */
+
+__extension__ static __inline void
+vst2_s64 (int64_t * __a, int64x1x2_t val)
{
- return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
+ __builtin_aarch64_simd_oi __o;
+ int64x2x2_t temp;
+ temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
+ temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
+ __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
}
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+__extension__ static __inline void
+vst2_u64 (uint64_t * __a, uint64x1x2_t val)
{
- return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a,
- (int8x8_t) __b, __c);
+ __builtin_aarch64_simd_oi __o;
+ uint64x2x2_t temp;
+ temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
+ __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
}
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+__extension__ static __inline void
+vst2_f64 (float64_t * __a, float64x1x2_t val)
{
- return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a,
- (int16x4_t) __b, __c);
+ __builtin_aarch64_simd_oi __o;
+ float64x2x2_t temp;
+ temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
+ __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
}
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+__extension__ static __inline void
+vst2_s8 (int8_t * __a, int8x8x2_t val)
{
- return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a,
- (int32x2_t) __b, __c);
+ __builtin_aarch64_simd_oi __o;
+ int8x16x2_t temp;
+ temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
+ temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
+ __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
}
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_p8 (poly8_t * __a, poly8x8x2_t val)
{
- return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a,
- (int64x1_t) __b, __c);
+ __builtin_aarch64_simd_oi __o;
+ poly8x16x2_t temp;
+ temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
+ __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
}
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_s16 (int16_t * __a, int16x4x2_t val)
{
- return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
+ __builtin_aarch64_simd_oi __o;
+ int16x8x2_t temp;
+ temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
+ temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
+ __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
}
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_p16 (poly16_t * __a, poly16x4x2_t val)
{
- return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
+ __builtin_aarch64_simd_oi __o;
+ poly16x8x2_t temp;
+ temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
+ __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
}
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_s32 (int32_t * __a, int32x2x2_t val)
{
- return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
+ __builtin_aarch64_simd_oi __o;
+ int32x4x2_t temp;
+ temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
+ temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
+ __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
}
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_u8 (uint8_t * __a, uint8x8x2_t val)
{
- return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
+ __builtin_aarch64_simd_oi __o;
+ uint8x16x2_t temp;
+ temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
+ __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
}
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_u16 (uint16_t * __a, uint16x4x2_t val)
{
- return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a,
- (int8x16_t) __b, __c);
+ __builtin_aarch64_simd_oi __o;
+ uint16x8x2_t temp;
+ temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
+ __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
}
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_u32 (uint32_t * __a, uint32x2x2_t val)
{
- return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a,
- (int16x8_t) __b, __c);
+ __builtin_aarch64_simd_oi __o;
+ uint32x4x2_t temp;
+ temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
+ __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
}
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_f32 (float32_t * __a, float32x2x2_t val)
{
- return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a,
- (int32x4_t) __b, __c);
+ __builtin_aarch64_simd_oi __o;
+ float32x4x2_t temp;
+ temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
+ __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
}
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_s8 (int8_t * __a, int8x16x2_t val)
{
- return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a,
- (int64x2_t) __b, __c);
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
+ __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
}
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vsrid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
{
- return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
+ __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
}
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_s16 (int16_t * __a, int16x8x2_t val)
{
- return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c);
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
+ __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
}
-/* vst1 */
-
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_f32 (float32_t *a, float32x2_t b)
+vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
{
- __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
+ __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_f64 (float64_t *a, float64x1_t b)
+vst2q_s32 (int32_t * __a, int32x4x2_t val)
{
- *a = b;
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
+ __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_p8 (poly8_t *a, poly8x8_t b)
+vst2q_s64 (int64_t * __a, int64x2x2_t val)
{
- __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
- (int8x8_t) b);
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
+ __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_p16 (poly16_t *a, poly16x4_t b)
+vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
{
- __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
- (int16x4_t) b);
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
+ __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_s8 (int8_t *a, int8x8_t b)
+vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
{
- __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
+ __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_s16 (int16_t *a, int16x4_t b)
+vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
{
- __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
+ __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_s32 (int32_t *a, int32x2_t b)
+vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
{
- __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
+ __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_s64 (int64_t *a, int64x1_t b)
+vst2q_f32 (float32_t * __a, float32x4x2_t val)
{
- *a = b;
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
+ __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_u8 (uint8_t *a, uint8x8_t b)
+vst2q_f64 (float64_t * __a, float64x2x2_t val)
{
- __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
- (int8x8_t) b);
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
+ __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_u16 (uint16_t *a, uint16x4_t b)
+__extension__ static __inline void
+vst3_s64 (int64_t * __a, int64x1x3_t val)
{
- __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
- (int16x4_t) b);
+ __builtin_aarch64_simd_ci __o;
+ int64x2x3_t temp;
+ temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
+ temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
+ temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
+ __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_u32 (uint32_t *a, uint32x2_t b)
+__extension__ static __inline void
+vst3_u64 (uint64_t * __a, uint64x1x3_t val)
{
- __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
- (int32x2_t) b);
+ __builtin_aarch64_simd_ci __o;
+ uint64x2x3_t temp;
+ temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
+ __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_u64 (uint64_t *a, uint64x1_t b)
+__extension__ static __inline void
+vst3_f64 (float64_t * __a, float64x1x3_t val)
{
- *a = b;
+ __builtin_aarch64_simd_ci __o;
+ float64x2x3_t temp;
+ temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
+ __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_f32 (float32_t *a, float32x4_t b)
+__extension__ static __inline void
+vst3_s8 (int8_t * __a, int8x8x3_t val)
{
- __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
+ __builtin_aarch64_simd_ci __o;
+ int8x16x3_t temp;
+ temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
+ temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
+ temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
+ __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_f64 (float64_t *a, float64x2_t b)
+vst3_p8 (poly8_t * __a, poly8x8x3_t val)
{
- __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
+ __builtin_aarch64_simd_ci __o;
+ poly8x16x3_t temp;
+ temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
+ __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
}
-/* vst1q */
-
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_p8 (poly8_t *a, poly8x16_t b)
+vst3_s16 (int16_t * __a, int16x4x3_t val)
{
- __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
- (int8x16_t) b);
+ __builtin_aarch64_simd_ci __o;
+ int16x8x3_t temp;
+ temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
+ temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
+ temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
+ __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_p16 (poly16_t *a, poly16x8_t b)
+vst3_p16 (poly16_t * __a, poly16x4x3_t val)
{
- __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
- (int16x8_t) b);
+ __builtin_aarch64_simd_ci __o;
+ poly16x8x3_t temp;
+ temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
+ __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_s8 (int8_t *a, int8x16_t b)
+vst3_s32 (int32_t * __a, int32x2x3_t val)
{
- __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
+ __builtin_aarch64_simd_ci __o;
+ int32x4x3_t temp;
+ temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
+ temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
+ temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
+ __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_s16 (int16_t *a, int16x8_t b)
+vst3_u8 (uint8_t * __a, uint8x8x3_t val)
{
- __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
+ __builtin_aarch64_simd_ci __o;
+ uint8x16x3_t temp;
+ temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
+ __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_s32 (int32_t *a, int32x4_t b)
+vst3_u16 (uint16_t * __a, uint16x4x3_t val)
{
- __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
+ __builtin_aarch64_simd_ci __o;
+ uint16x8x3_t temp;
+ temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
+ __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_s64 (int64_t *a, int64x2_t b)
+vst3_u32 (uint32_t * __a, uint32x2x3_t val)
{
- __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
+ __builtin_aarch64_simd_ci __o;
+ uint32x4x3_t temp;
+ temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
+ __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_u8 (uint8_t *a, uint8x16_t b)
+vst3_f32 (float32_t * __a, float32x2x3_t val)
{
- __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
- (int8x16_t) b);
+ __builtin_aarch64_simd_ci __o;
+ float32x4x3_t temp;
+ temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
+ __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_u16 (uint16_t *a, uint16x8_t b)
+vst3q_s8 (int8_t * __a, int8x16x3_t val)
{
- __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
- (int16x8_t) b);
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
+ __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_u32 (uint32_t *a, uint32x4_t b)
+vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
{
- __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
- (int32x4_t) b);
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
+ __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_u64 (uint64_t *a, uint64x2_t b)
-{
- __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
- (int64x2_t) b);
-}
-
-/* vstn */
-
-__extension__ static __inline void
-vst2_s64 (int64_t * __a, int64x1x2_t val)
-{
- __builtin_aarch64_simd_oi __o;
- int64x2x2_t temp;
- temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
- temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
- __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
-}
-
-__extension__ static __inline void
-vst2_u64 (uint64_t * __a, uint64x1x2_t val)
+vst3q_s16 (int16_t * __a, int16x8x3_t val)
{
- __builtin_aarch64_simd_oi __o;
- uint64x2x2_t temp;
- temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
- __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
+ __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
}
-__extension__ static __inline void
-vst2_f64 (float64_t * __a, float64x1x2_t val)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
{
- __builtin_aarch64_simd_oi __o;
- float64x2x2_t temp;
- temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
- __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
+ __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
}
-__extension__ static __inline void
-vst2_s8 (int8_t * __a, int8x8x2_t val)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_s32 (int32_t * __a, int32x4x3_t val)
{
- __builtin_aarch64_simd_oi __o;
- int8x16x2_t temp;
- temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
- temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
- __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
+ __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2_p8 (poly8_t * __a, poly8x8x2_t val)
+vst3q_s64 (int64_t * __a, int64x2x3_t val)
{
- __builtin_aarch64_simd_oi __o;
- poly8x16x2_t temp;
- temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
- __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
+ __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2_s16 (int16_t * __a, int16x4x2_t val)
+vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
{
- __builtin_aarch64_simd_oi __o;
- int16x8x2_t temp;
- temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
- temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
- __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
+ __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2_p16 (poly16_t * __a, poly16x4x2_t val)
+vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
{
- __builtin_aarch64_simd_oi __o;
- poly16x8x2_t temp;
- temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
- __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
+ __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2_s32 (int32_t * __a, int32x2x2_t val)
+vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
{
- __builtin_aarch64_simd_oi __o;
- int32x4x2_t temp;
- temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
- temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
- __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
+ __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2_u8 (uint8_t * __a, uint8x8x2_t val)
+vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
{
- __builtin_aarch64_simd_oi __o;
- uint8x16x2_t temp;
- temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
- __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
+ __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2_u16 (uint16_t * __a, uint16x4x2_t val)
+vst3q_f32 (float32_t * __a, float32x4x3_t val)
{
- __builtin_aarch64_simd_oi __o;
- uint16x8x2_t temp;
- temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
- __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
+ __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2_u32 (uint32_t * __a, uint32x2x2_t val)
+vst3q_f64 (float64_t * __a, float64x2x3_t val)
{
- __builtin_aarch64_simd_oi __o;
- uint32x4x2_t temp;
- temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
- __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
+ __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2_f32 (float32_t * __a, float32x2x2_t val)
+__extension__ static __inline void
+vst4_s64 (int64_t * __a, int64x1x4_t val)
{
- __builtin_aarch64_simd_oi __o;
- float32x4x2_t temp;
- temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
- __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ int64x2x4_t temp;
+ temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
+ temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
+ temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
+ temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
+ __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2q_s8 (int8_t * __a, int8x16x2_t val)
+__extension__ static __inline void
+vst4_u64 (uint64_t * __a, uint64x1x4_t val)
{
- __builtin_aarch64_simd_oi __o;
- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
- __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ uint64x2x4_t temp;
+ temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
+ temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
+ __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
+__extension__ static __inline void
+vst4_f64 (float64_t * __a, float64x1x4_t val)
{
- __builtin_aarch64_simd_oi __o;
- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
- __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ float64x2x4_t temp;
+ temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
+ temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
+ __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2q_s16 (int16_t * __a, int16x8x2_t val)
+__extension__ static __inline void
+vst4_s8 (int8_t * __a, int8x8x4_t val)
{
- __builtin_aarch64_simd_oi __o;
- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
- __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ int8x16x4_t temp;
+ temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
+ temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
+ temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
+ temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
+ __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
+vst4_p8 (poly8_t * __a, poly8x8x4_t val)
{
- __builtin_aarch64_simd_oi __o;
- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
- __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ poly8x16x4_t temp;
+ temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
+ temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
+ __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2q_s32 (int32_t * __a, int32x4x2_t val)
+vst4_s16 (int16_t * __a, int16x4x4_t val)
{
- __builtin_aarch64_simd_oi __o;
- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
- __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ int16x8x4_t temp;
+ temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
+ temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
+ temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
+ temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
+ __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2q_s64 (int64_t * __a, int64x2x2_t val)
+vst4_p16 (poly16_t * __a, poly16x4x4_t val)
{
- __builtin_aarch64_simd_oi __o;
- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
- __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ poly16x8x4_t temp;
+ temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
+ temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
+ __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
+vst4_s32 (int32_t * __a, int32x2x4_t val)
{
- __builtin_aarch64_simd_oi __o;
- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
- __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ int32x4x4_t temp;
+ temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
+ temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
+ temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
+ temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
+ __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
+vst4_u8 (uint8_t * __a, uint8x8x4_t val)
{
- __builtin_aarch64_simd_oi __o;
- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
- __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ uint8x16x4_t temp;
+ temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
+ temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
+ __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
+vst4_u16 (uint16_t * __a, uint16x4x4_t val)
{
- __builtin_aarch64_simd_oi __o;
- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
- __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ uint16x8x4_t temp;
+ temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
+ temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
+ __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
+vst4_u32 (uint32_t * __a, uint32x2x4_t val)
{
- __builtin_aarch64_simd_oi __o;
- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
- __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ uint32x4x4_t temp;
+ temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
+ temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
+ __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2q_f32 (float32_t * __a, float32x4x2_t val)
+vst4_f32 (float32_t * __a, float32x2x4_t val)
{
- __builtin_aarch64_simd_oi __o;
- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
- __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ float32x4x4_t temp;
+ temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
+ temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
+ __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst2q_f64 (float64_t * __a, float64x2x2_t val)
+vst4q_s8 (int8_t * __a, int8x16x4_t val)
{
- __builtin_aarch64_simd_oi __o;
- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
- __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
+ __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
}
-__extension__ static __inline void
-vst3_s64 (int64_t * __a, int64x1x3_t val)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
{
- __builtin_aarch64_simd_ci __o;
- int64x2x3_t temp;
- temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
- temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
- temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
- __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
+ __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
}
-__extension__ static __inline void
-vst3_u64 (uint64_t * __a, uint64x1x3_t val)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_s16 (int16_t * __a, int16x8x4_t val)
{
- __builtin_aarch64_simd_ci __o;
- uint64x2x3_t temp;
- temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
- temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
- __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
+ __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
}
-__extension__ static __inline void
-vst3_f64 (float64_t * __a, float64x1x3_t val)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
{
- __builtin_aarch64_simd_ci __o;
- float64x2x3_t temp;
- temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
- temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
- __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
+ __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
}
-__extension__ static __inline void
-vst3_s8 (int8_t * __a, int8x8x3_t val)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_s32 (int32_t * __a, int32x4x4_t val)
{
- __builtin_aarch64_simd_ci __o;
- int8x16x3_t temp;
- temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
- temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
- temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
- __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
+ __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3_p8 (poly8_t * __a, poly8x8x3_t val)
+vst4q_s64 (int64_t * __a, int64x2x4_t val)
{
- __builtin_aarch64_simd_ci __o;
- poly8x16x3_t temp;
- temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
- temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
- __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
+ __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3_s16 (int16_t * __a, int16x4x3_t val)
+vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
{
- __builtin_aarch64_simd_ci __o;
- int16x8x3_t temp;
- temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
- temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
- temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
- __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
+ __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3_p16 (poly16_t * __a, poly16x4x3_t val)
+vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
{
- __builtin_aarch64_simd_ci __o;
- poly16x8x3_t temp;
- temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
- temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
- __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
+ __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3_s32 (int32_t * __a, int32x2x3_t val)
+vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
{
- __builtin_aarch64_simd_ci __o;
- int32x4x3_t temp;
- temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
- temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
- temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
- __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
+ __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3_u8 (uint8_t * __a, uint8x8x3_t val)
+vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
{
- __builtin_aarch64_simd_ci __o;
- uint8x16x3_t temp;
- temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
- temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
- __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
+ __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3_u16 (uint16_t * __a, uint16x4x3_t val)
+vst4q_f32 (float32_t * __a, float32x4x4_t val)
{
- __builtin_aarch64_simd_ci __o;
- uint16x8x3_t temp;
- temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
- temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
- __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
+ __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3_u32 (uint32_t * __a, uint32x2x3_t val)
+vst4q_f64 (float64_t * __a, float64x2x4_t val)
{
- __builtin_aarch64_simd_ci __o;
- uint32x4x3_t temp;
- temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
- temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
- __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
+ __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3_f32 (float32_t * __a, float32x2x3_t val)
+/* vsub */
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsubd_s64 (int64x1_t __a, int64x1_t __b)
{
- __builtin_aarch64_simd_ci __o;
- float32x4x3_t temp;
- temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
- temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
- __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
+ return __a - __b;
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3q_s8 (int8_t * __a, int8x16x3_t val)
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsubd_u64 (uint64x1_t __a, uint64x1_t __b)
{
- __builtin_aarch64_simd_ci __o;
- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
- __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+ return __a - __b;
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
+/* vtbx1 */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
{
- __builtin_aarch64_simd_ci __o;
- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
- __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+ uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
+ vmov_n_u8 (8));
+ int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
+
+ return vbsl_s8 (__mask, __tbl, __r);
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3q_s16 (int16_t * __a, int16x8x3_t val)
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
{
- __builtin_aarch64_simd_ci __o;
- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
- __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+ uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
+ uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
+
+ return vbsl_u8 (__mask, __tbl, __r);
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
{
- __builtin_aarch64_simd_ci __o;
- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
- __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+ uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
+ poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
+
+ return vbsl_p8 (__mask, __tbl, __r);
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3q_s32 (int32_t * __a, int32x4x3_t val)
+/* vtbx3 */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
{
- __builtin_aarch64_simd_ci __o;
- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
- __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
+ uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
+ vmov_n_u8 (24));
+ int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
+
+ return vbsl_s8 (__mask, __tbl, __r);
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3q_s64 (int64_t * __a, int64x2x3_t val)
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
{
- __builtin_aarch64_simd_ci __o;
- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
- __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
+ uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
+ uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
+
+ return vbsl_u8 (__mask, __tbl, __r);
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
{
- __builtin_aarch64_simd_ci __o;
- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
- __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+ uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
+ poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
+
+ return vbsl_p8 (__mask, __tbl, __r);
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
+/* vtrn */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vtrn1_f32 (float32x2_t __a, float32x2_t __b)
{
- __builtin_aarch64_simd_ci __o;
- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
- __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtrn1_p8 (poly8x8_t __a, poly8x8_t __b)
{
- __builtin_aarch64_simd_ci __o;
- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
- __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vtrn1_p16 (poly16x4_t __a, poly16x4_t __b)
{
- __builtin_aarch64_simd_ci __o;
- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
- __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
+#endif
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtrn1_s8 (int8x8_t __a, int8x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3q_f32 (float32_t * __a, float32x4x3_t val)
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vtrn1_s16 (int16x4_t __a, int16x4_t __b)
{
- __builtin_aarch64_simd_ci __o;
- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
- __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst3q_f64 (float64_t * __a, float64x2x3_t val)
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vtrn1_s32 (int32x2_t __a, int32x2_t __b)
{
- __builtin_aarch64_simd_ci __o;
- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
- __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
}
-__extension__ static __inline void
-vst4_s64 (int64_t * __a, int64x1x4_t val)
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtrn1_u8 (uint8x8_t __a, uint8x8_t __b)
{
- __builtin_aarch64_simd_xi __o;
- int64x2x4_t temp;
- temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
- temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
- temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
- temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
- __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
+#endif
}
-__extension__ static __inline void
-vst4_u64 (uint64_t * __a, uint64x1x4_t val)
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vtrn1_u16 (uint16x4_t __a, uint16x4_t __b)
{
- __builtin_aarch64_simd_xi __o;
- uint64x2x4_t temp;
- temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
- temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
- temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
- __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
+#endif
}
-__extension__ static __inline void
-vst4_f64 (float64_t * __a, float64x1x4_t val)
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vtrn1_u32 (uint32x2_t __a, uint32x2_t __b)
{
- __builtin_aarch64_simd_xi __o;
- float64x2x4_t temp;
- temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
- temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
- temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
- __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
}
-__extension__ static __inline void
-vst4_s8 (int8_t * __a, int8x8x4_t val)
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vtrn1q_f32 (float32x4_t __a, float32x4_t __b)
{
- __builtin_aarch64_simd_xi __o;
- int8x16x4_t temp;
- temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
- temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
- temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
- temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
- __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4_p8 (poly8_t * __a, poly8x8x4_t val)
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vtrn1q_f64 (float64x2_t __a, float64x2_t __b)
{
- __builtin_aarch64_simd_xi __o;
- poly8x16x4_t temp;
- temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
- temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
- temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
- __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4_s16 (int16_t * __a, int16x4x4_t val)
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b)
{
- __builtin_aarch64_simd_xi __o;
- int16x8x4_t temp;
- temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
- temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
- temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
- temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
- __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
+#else
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4_p16 (poly16_t * __a, poly16x4x4_t val)
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b)
{
- __builtin_aarch64_simd_xi __o;
- poly16x8x4_t temp;
- temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
- temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
- temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
- __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4_s32 (int32_t * __a, int32x2x4_t val)
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vtrn1q_s8 (int8x16_t __a, int8x16_t __b)
{
- __builtin_aarch64_simd_xi __o;
- int32x4x4_t temp;
- temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
- temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
- temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
- temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
- __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
+#else
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4_u8 (uint8_t * __a, uint8x8x4_t val)
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vtrn1q_s16 (int16x8_t __a, int16x8_t __b)
{
- __builtin_aarch64_simd_xi __o;
- uint8x16x4_t temp;
- temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
- temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
- temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
- __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4_u16 (uint16_t * __a, uint16x4x4_t val)
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vtrn1q_s32 (int32x4_t __a, int32x4_t __b)
{
- __builtin_aarch64_simd_xi __o;
- uint16x8x4_t temp;
- temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
- temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
- temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
- __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4_u32 (uint32_t * __a, uint32x2x4_t val)
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vtrn1q_s64 (int64x2_t __a, int64x2_t __b)
{
- __builtin_aarch64_simd_xi __o;
- uint32x4x4_t temp;
- temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
- temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
- temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
- __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4_f32 (float32_t * __a, float32x2x4_t val)
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b)
{
- __builtin_aarch64_simd_xi __o;
- float32x4x4_t temp;
- temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
- temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
- temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
- temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
- __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
+#else
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4q_s8 (int8_t * __a, int8x16x4_t val)
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b)
{
- __builtin_aarch64_simd_xi __o;
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
- __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b)
{
- __builtin_aarch64_simd_xi __o;
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
- __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4q_s16 (int16_t * __a, int16x8x4_t val)
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b)
{
- __builtin_aarch64_simd_xi __o;
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
- __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vtrn2_f32 (float32x2_t __a, float32x2_t __b)
{
- __builtin_aarch64_simd_xi __o;
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
- __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4q_s32 (int32_t * __a, int32x4x4_t val)
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtrn2_p8 (poly8x8_t __a, poly8x8_t __b)
{
- __builtin_aarch64_simd_xi __o;
- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
- __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4q_s64 (int64_t * __a, int64x2x4_t val)
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vtrn2_p16 (poly16x4_t __a, poly16x4_t __b)
{
- __builtin_aarch64_simd_xi __o;
- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
- __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtrn2_s8 (int8x8_t __a, int8x8_t __b)
{
- __builtin_aarch64_simd_xi __o;
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
- __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vtrn2_s16 (int16x4_t __a, int16x4_t __b)
{
- __builtin_aarch64_simd_xi __o;
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
- __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vtrn2_s32 (int32x2_t __a, int32x2_t __b)
{
- __builtin_aarch64_simd_xi __o;
- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
- __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtrn2_u8 (uint8x8_t __a, uint8x8_t __b)
{
- __builtin_aarch64_simd_xi __o;
- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
- __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4q_f32 (float32_t * __a, float32x4x4_t val)
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vtrn2_u16 (uint16x4_t __a, uint16x4_t __b)
{
- __builtin_aarch64_simd_xi __o;
- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
- __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
+#endif
}
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst4q_f64 (float64_t * __a, float64x2x4_t val)
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vtrn2_u32 (uint32x2_t __a, uint32x2_t __b)
{
- __builtin_aarch64_simd_xi __o;
- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
- __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
}
-/* vsub */
-
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vsubd_s64 (int64x1_t __a, int64x1_t __b)
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vtrn2q_f32 (float32x4_t __a, float32x4_t __b)
{
- return __a - __b;
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
+#endif
}
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vsubd_u64 (uint64x1_t __a, uint64x1_t __b)
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vtrn2q_f64 (float64x2_t __a, float64x2_t __b)
{
- return __a - __b;
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
}
-/* vtbx1 */
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b)
{
- uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
- vmov_n_u8 (8));
- int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
-
- return vbsl_s8 (__mask, __tbl, __r);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
+#else
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
+#endif
}
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b)
{
- uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
- uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
-
- return vbsl_u8 (__mask, __tbl, __r);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
+#endif
}
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vtrn2q_s8 (int8x16_t __a, int8x16_t __b)
{
- uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
- poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
-
- return vbsl_p8 (__mask, __tbl, __r);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
+#else
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
+#endif
}
-/* vtbx3 */
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vtrn2q_s16 (int16x8_t __a, int16x8_t __b)
{
- uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
- vmov_n_u8 (24));
- int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
+#endif
+}
- return vbsl_s8 (__mask, __tbl, __r);
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vtrn2q_s32 (int32x4_t __a, int32x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
+#endif
}
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vtrn2q_s64 (int64x2_t __a, int64x2_t __b)
{
- uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
- uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
+}
- return vbsl_u8 (__mask, __tbl, __r);
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
+#else
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
+#endif
}
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b)
{
- uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
- poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
+#endif
+}
- return vbsl_p8 (__mask, __tbl, __r);
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
+#endif
}
-/* vtrn */
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
+}
__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vtrn_f32 (float32x2_t a, float32x2_t b)