typedef __Poly8x16_t poly8x16_t;
typedef __Poly16x8_t poly16x8_t;
typedef __Poly64x2_t poly64x2_t;
+typedef __Poly64x1_t poly64x1_t;
typedef __Uint8x16_t uint8x16_t;
typedef __Uint16x8_t uint16x8_t;
typedef __Uint32x4_t uint32x4_t;
poly16x8_t val[2];
} poly16x8x2_t;
+typedef struct poly64x1x2_t
+{
+ poly64x1_t val[2];
+} poly64x1x2_t;
+
+typedef struct poly64x1x3_t
+{
+ poly64x1_t val[3];
+} poly64x1x3_t;
+
+typedef struct poly64x1x4_t
+{
+ poly64x1_t val[4];
+} poly64x1x4_t;
+
+typedef struct poly64x2x2_t
+{
+ poly64x2_t val[2];
+} poly64x2x2_t;
+
+typedef struct poly64x2x3_t
+{
+ poly64x2_t val[3];
+} poly64x2x3_t;
+
+typedef struct poly64x2x4_t
+{
+ poly64x2_t val[4];
+} poly64x2x4_t;
+
typedef struct int8x8x3_t
{
int8x8_t val[3];
__aarch64_vdup_lane_any (p8, , __a, __b)
#define __aarch64_vdup_lane_p16(__a, __b) \
__aarch64_vdup_lane_any (p16, , __a, __b)
+#define __aarch64_vdup_lane_p64(__a, __b) \
+ __aarch64_vdup_lane_any (p64, , __a, __b)
#define __aarch64_vdup_lane_s8(__a, __b) \
__aarch64_vdup_lane_any (s8, , __a, __b)
#define __aarch64_vdup_lane_s16(__a, __b) \
__aarch64_vdup_lane_any (p8, , __a, __b)
#define __aarch64_vdup_laneq_p16(__a, __b) \
__aarch64_vdup_lane_any (p16, , __a, __b)
+#define __aarch64_vdup_laneq_p64(__a, __b) \
+ __aarch64_vdup_lane_any (p64, , __a, __b)
#define __aarch64_vdup_laneq_s8(__a, __b) \
__aarch64_vdup_lane_any (s8, , __a, __b)
#define __aarch64_vdup_laneq_s16(__a, __b) \
__aarch64_vdup_lane_any (p8, q, __a, __b)
#define __aarch64_vdupq_lane_p16(__a, __b) \
__aarch64_vdup_lane_any (p16, q, __a, __b)
+#define __aarch64_vdupq_lane_p64(__a, __b) \
+ __aarch64_vdup_lane_any (p64, q, __a, __b)
#define __aarch64_vdupq_lane_s8(__a, __b) \
__aarch64_vdup_lane_any (s8, q, __a, __b)
#define __aarch64_vdupq_lane_s16(__a, __b) \
__aarch64_vdup_lane_any (p8, q, __a, __b)
#define __aarch64_vdupq_laneq_p16(__a, __b) \
__aarch64_vdup_lane_any (p16, q, __a, __b)
+#define __aarch64_vdupq_laneq_p64(__a, __b) \
+ __aarch64_vdup_lane_any (p64, q, __a, __b)
#define __aarch64_vdupq_laneq_s8(__a, __b) \
__aarch64_vdup_lane_any (s8, q, __a, __b)
#define __aarch64_vdupq_laneq_s16(__a, __b) \
return (poly16x4_t) __a;
}
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcreate_p64 (uint64_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
/* vget_lane */
__extension__ extern __inline float16_t
return __aarch64_vget_lane_any (__a, __b);
}
+__extension__ extern __inline poly64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vget_lane_p64 (poly64x1_t __a, const int __b)
+{
+ return __aarch64_vget_lane_any (__a, __b);
+}
+
__extension__ extern __inline int8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_lane_s8 (int8x8_t __a, const int __b)
return __aarch64_vget_lane_any (__a, __b);
}
+__extension__ extern __inline poly64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vgetq_lane_p64 (poly64x2_t __a, const int __b)
+{
+ return __aarch64_vget_lane_any (__a, __b);
+}
+
__extension__ extern __inline int8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vgetq_lane_s8 (int8x16_t __a, const int __b)
return (poly8x8_t) __a;
}
+__extension__ extern __inline poly8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_p8_p64 (poly64x1_t __a)
+{
+ return (poly8x8_t) __a;
+}
+
__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p8_f64 (float64x2_t __a)
return (poly8x16_t) __a;
}
+__extension__ extern __inline poly8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p8_p64 (poly64x2_t __a)
+{
+ return (poly8x16_t) __a;
+}
+
+__extension__ extern __inline poly8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p8_p128 (poly128_t __a)
+{
+ return (poly8x16_t)__a;
+}
+
__extension__ extern __inline poly16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p16_f16 (float16x4_t __a)
return (poly16x4_t) __a;
}
+__extension__ extern __inline poly16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_p16_p64 (poly64x1_t __a)
+{
+ return (poly16x4_t) __a;
+}
+
__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p16_f64 (float64x2_t __a)
return (poly16x8_t) __a;
}
+__extension__ extern __inline poly16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p16_p64 (poly64x2_t __a)
+{
+ return (poly16x8_t) __a;
+}
+
+__extension__ extern __inline poly16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p16_p128 (poly128_t __a)
+{
+ return (poly16x8_t)__a;
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_p64_f16 (float16x4_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_p64_f64 (float64x1_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_p64_s8 (int8x8_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_p64_s16 (int16x4_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_p64_s32 (int32x2_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_p64_s64 (int64x1_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_p64_f32 (float32x2_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_p64_u8 (uint8x8_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_p64_u16 (uint16x4_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_p64_u32 (uint32x2_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_p64_u64 (uint64x1_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_p64_p8 (poly8x8_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_p64_p16 (poly16x4_t __a)
+{
+ return (poly64x1_t)__a;
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p64_f64 (float64x2_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p64_s8 (int8x16_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p64_s16 (int16x8_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p64_s32 (int32x4_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p64_s64 (int64x2_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p64_f16 (float16x8_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p64_f32 (float32x4_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p64_p128 (poly128_t __a)
+{
+ return (poly64x2_t)__a;
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p64_u8 (uint8x16_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p64_u16 (uint16x8_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p64_p16 (poly16x8_t __a)
+{
+ return (poly64x2_t)__a;
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p64_u32 (uint32x4_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p64_u64 (uint64x2_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p64_p8 (poly8x16_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_p8 (poly8x16_t __a)
+{
+ return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_p16 (poly16x8_t __a)
+{
+ return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_f16 (float16x8_t __a)
+{
+ return (poly128_t) __a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_f32 (float32x4_t __a)
+{
+ return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_p64 (poly64x2_t __a)
+{
+ return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_s64 (int64x2_t __a)
+{
+ return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_u64 (uint64x2_t __a)
+{
+ return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_s8 (int8x16_t __a)
+{
+ return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_s16 (int16x8_t __a)
+{
+ return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_s32 (int32x4_t __a)
+{
+ return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_u8 (uint8x16_t __a)
+{
+ return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_u16 (uint16x8_t __a)
+{
+ return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_u32 (uint32x4_t __a)
+{
+ return (poly128_t)__a;
+}
+
__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f16_f64 (float64x1_t __a)
return (float16x4_t) __a;
}
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_f16_p64 (poly64x1_t __a)
+{
+ return (float16x4_t) __a;
+}
+
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f16_f64 (float64x2_t __a)
}
__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_f16_p128 (poly128_t __a)
+{
+ return (float16x8_t) __a;
+}
+
+__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f16_p16 (poly16x8_t __a)
{
return (float16x8_t) __a;
}
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_f16_p64 (poly64x2_t __a)
+{
+ return (float16x8_t) __a;
+}
+
__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f32_f16 (float16x4_t __a)
return (float32x2_t) __a;
}
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_f32_p64 (poly64x1_t __a)
+{
+ return (float32x2_t) __a;
+}
+
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f32_f16 (float16x8_t __a)
return (float32x4_t) __a;
}
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_f32_p64 (poly64x2_t __a)
+{
+ return (float32x4_t) __a;
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_f32_p128 (poly128_t __a)
+{
+ return (float32x4_t)__a;
+}
+
+
__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f64_f16 (float16x4_t __a)
__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_f64_p64 (poly64x1_t __a)
+{
+ return (float64x1_t) __a;
+}
+
+__extension__ extern __inline float64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f64_s8 (int8x8_t __a)
{
return (float64x1_t) __a;
__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_f64_p64 (poly64x2_t __a)
+{
+ return (float64x2_t) __a;
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f64_s8 (int8x16_t __a)
{
return (float64x2_t) __a;
return (int64x1_t) __a;
}
+__extension__ extern __inline int64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_s64_p64 (poly64x1_t __a)
+{
+ return (int64x1_t) __a;
+}
+
__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s64_f64 (float64x2_t __a)
return (int64x2_t) __a;
}
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_s64_p64 (poly64x2_t __a)
+{
+ return (int64x2_t) __a;
+}
+
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_s64_p128 (poly128_t __a)
+{
+ return (int64x2_t)__a;
+}
+
__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u64_f16 (float16x4_t __a)
return (uint64x1_t) __a;
}
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_u64_p64 (poly64x1_t __a)
+{
+ return (uint64x1_t) __a;
+}
+
__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u64_f64 (float64x2_t __a)
return (uint64x2_t) __a;
}
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_u64_p64 (poly64x2_t __a)
+{
+ return (uint64x2_t) __a;
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_u64_p128 (poly128_t __a)
+{
+ return (uint64x2_t)__a;
+}
+
__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s8_f16 (float16x4_t __a)
return (int8x8_t) __a;
}
+__extension__ extern __inline int8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_s8_p64 (poly64x1_t __a)
+{
+ return (int8x8_t) __a;
+}
+
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s8_f64 (float64x2_t __a)
return (int8x16_t) __a;
}
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_s8_p64 (poly64x2_t __a)
+{
+ return (int8x16_t) __a;
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_s8_p128 (poly128_t __a)
+{
+ return (int8x16_t)__a;
+}
+
__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s16_f16 (float16x4_t __a)
return (int16x4_t) __a;
}
+__extension__ extern __inline int16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_s16_p64 (poly64x1_t __a)
+{
+ return (int16x4_t) __a;
+}
+
__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s16_f64 (float64x2_t __a)
return (int16x8_t) __a;
}
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_s16_p64 (poly64x2_t __a)
+{
+ return (int16x8_t) __a;
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_s16_p128 (poly128_t __a)
+{
+ return (int16x8_t)__a;
+}
+
__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s32_f16 (float16x4_t __a)
return (int32x2_t) __a;
}
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_s32_p64 (poly64x1_t __a)
+{
+ return (int32x2_t) __a;
+}
+
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s32_f64 (float64x2_t __a)
return (int32x4_t) __a;
}
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_s32_p64 (poly64x2_t __a)
+{
+ return (int32x4_t) __a;
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_s32_p128 (poly128_t __a)
+{
+ return (int32x4_t)__a;
+}
+
__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u8_f16 (float16x4_t __a)
return (uint8x8_t) __a;
}
+__extension__ extern __inline uint8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_u8_p64 (poly64x1_t __a)
+{
+ return (uint8x8_t) __a;
+}
+
__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u8_f64 (float64x2_t __a)
return (uint8x16_t) __a;
}
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_u8_p64 (poly64x2_t __a)
+{
+ return (uint8x16_t) __a;
+}
+
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_u8_p128 (poly128_t __a)
+{
+ return (uint8x16_t)__a;
+}
+
__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u16_f16 (float16x4_t __a)
return (uint16x4_t) __a;
}
+__extension__ extern __inline uint16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_u16_p64 (poly64x1_t __a)
+{
+ return (uint16x4_t) __a;
+}
+
__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u16_f64 (float64x2_t __a)
return (uint16x8_t) __a;
}
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_u16_p64 (poly64x2_t __a)
+{
+ return (uint16x8_t) __a;
+}
+
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_u16_p128 (poly128_t __a)
+{
+ return (uint16x8_t)__a;
+}
+
__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u32_f16 (float16x4_t __a)
return (uint32x2_t) __a;
}
+__extension__ extern __inline uint32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_u32_p64 (poly64x1_t __a)
+{
+ return (uint32x2_t) __a;
+}
+
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u32_f64 (float64x2_t __a)
return (uint32x4_t) __a;
}
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_u32_p64 (poly64x2_t __a)
+{
+ return (uint32x4_t) __a;
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_u32_p128 (poly128_t __a)
+{
+ return (uint32x4_t)__a;
+}
+
/* vset_lane */
__extension__ extern __inline float16x4_t
return __aarch64_vset_lane_any (__elem, __vec, __index);
}
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vset_lane_p64 (poly64_t __elem, poly64x1_t __vec, const int __index)
+{
+ return __aarch64_vset_lane_any (__elem, __vec, __index);
+}
+
__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vset_lane_s8 (int8_t __elem, int8x8_t __vec, const int __index)
return __aarch64_vset_lane_any (__elem, __vec, __index);
}
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsetq_lane_p64 (poly64_t __elem, poly64x2_t __vec, const int __index)
+{
+ return __aarch64_vset_lane_any (__elem, __vec, __index);
+}
+
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsetq_lane_s8 (int8_t __elem, int8x16_t __vec, const int __index)
__GET_LOW (p16);
}
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vget_low_p64 (poly64x2_t __a)
+{
+ __GET_LOW (p64);
+}
+
__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_low_s8 (int8x16_t __a)
__GET_HIGH (p16);
}
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vget_high_p64 (poly64x2_t __a)
+{
+ __GET_HIGH (p64);
+}
+
__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_high_s8 (int8x16_t __a)
(int16x4_t) __b);
}
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcombine_p64 (poly64x1_t __a, poly64x1_t __b)
+{
+ return (poly64x2_t) __builtin_aarch64_combinedi_ppp (__a[0], __b[0]);
+}
+
/* Start of temporary inline asm implementations. */
__extension__ extern __inline int8x8_t
result; \
})
+#define vsri_n_p64(a, b, c) \
+ __extension__ \
+ ({ \
+ poly64x1_t b_ = (b); \
+ poly64x1_t a_ = (a); \
+ poly64x1_t result; \
+ __asm__ ("sri %d0,%d2,%3" \
+ : "=w"(result) \
+ : "0"(a_), "w"(b_), "i"(c) \
+ : /* No clobbers. */); \
+ result; \
+ })
+
#define vsriq_n_p8(a, b, c) \
__extension__ \
({ \
result; \
})
+#define vsriq_n_p64(a, b, c) \
+ __extension__ \
+ ({ \
+ poly64x2_t b_ = (b); \
+ poly64x2_t a_ = (a); \
+ poly64x2_t result; \
+ __asm__ ("sri %0.2d,%2.2d,%3" \
+ : "=w"(result) \
+ : "0"(a_), "w"(b_), "i"(c) \
+ : /* No clobbers. */); \
+ result; \
+ })
+
__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtst_p8 (poly8x8_t a, poly8x8_t b)
int8x16_t)
__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, p16,
int16x8_t)
+__ST2_LANE_FUNC (poly64x1x2_t, poly64x2x2_t, poly64_t, di, v2di_ssps, di, p64,
+ poly64x2_t)
__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8,
int8x16_t)
__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16,
__ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
+__ST2_LANE_FUNC (poly64x2x2_t, poly64_t, v2di, di, p64)
__ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
__ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
__ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
int8x16_t)
__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, p16,
int16x8_t)
+__ST3_LANE_FUNC (poly64x1x3_t, poly64x2x3_t, poly64_t, di, v2di_ssps, di, p64,
+ poly64x2_t)
__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
int8x16_t)
__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
__ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
+__ST3_LANE_FUNC (poly64x2x3_t, poly64_t, v2di, di, p64)
__ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
__ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
__ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
int8x16_t)
__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, p16,
int16x8_t)
+__ST4_LANE_FUNC (poly64x1x4_t, poly64x2x4_t, poly64_t, di, v2di_ssps, di, p64,
+ poly64x2_t)
__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8,
int8x16_t)
__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16,
__ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
+__ST4_LANE_FUNC (poly64x2x4_t, poly64_t, v2di, di, p64)
__ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
__ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
__ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
{
return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
}
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbsl_p64 (uint64x1_t __a, poly64x1_t __b, poly64x1_t __c)
+{
+ return (poly64x1_t)
+ {__builtin_aarch64_simd_bsldi_pupp (__a[0], __b[0], __c[0])};
+}
__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
}
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbslq_p64 (uint64x2_t __a, poly64x2_t __b, poly64x2_t __c)
+{
+ return __builtin_aarch64_simd_bslv2di_pupp (__a, __b, __c);
+}
+
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
return (uint8x8_t) (__a == __b);
}
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vceq_p64 (poly64x1_t __a, poly64x1_t __b)
+{
+ return (uint64x1_t) (__a == __b);
+}
+
__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceq_s8 (int8x8_t __a, int8x8_t __b)
__a, __lane1);
}
-__extension__ extern __inline poly16x4_t
+__extension__ extern __inline poly16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcopy_lane_p16 (poly16x4_t __a, const int __lane1,
+ poly16x4_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ extern __inline poly64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_lane_p16 (poly16x4_t __a, const int __lane1,
- poly16x4_t __b, const int __lane2)
+vcopy_lane_p64 (poly64x1_t __a, const int __lane1,
+ poly64x1_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
__a, __lane1);
}
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcopy_laneq_p64 (poly64x1_t __a, const int __lane1,
+ poly64x2_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcopy_laneq_s8 (int8x8_t __a, const int __lane1,
__a, __lane1);
}
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcopyq_lane_p64 (poly64x2_t __a, const int __lane1,
+ poly64x1_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcopyq_lane_s8 (int8x16_t __a, const int __lane1,
__a, __lane1);
}
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcopyq_laneq_p64 (poly64x2_t __a, const int __lane1,
+ poly64x2_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcopyq_laneq_s8 (int8x16_t __a, const int __lane1,
return (poly16x4_t) {__a, __a, __a, __a};
}
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdup_n_p64 (poly64_t __a)
+{
+ return (poly64x1_t) {__a};
+}
+
__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vdup_n_s8 (int8_t __a)
return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
}
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdupq_n_p64 (uint64_t __a)
+{
+ return (poly64x2_t) {__a, __a};
+}
+
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vdupq_n_s8 (int32_t __a)
return __aarch64_vdup_lane_p16 (__a, __b);
}
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdup_lane_p64 (poly64x1_t __a, const int __b)
+{
+ return __aarch64_vdup_lane_p64 (__a, __b);
+}
+
__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vdup_lane_s8 (int8x8_t __a, const int __b)
return __aarch64_vdup_laneq_p16 (__a, __b);
}
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdup_laneq_p64 (poly64x2_t __a, const int __b)
+{
+ return __aarch64_vdup_laneq_p64 (__a, __b);
+}
+
__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vdup_laneq_s8 (int8x16_t __a, const int __b)
return __aarch64_vdupq_lane_p16 (__a, __b);
}
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdupq_lane_p64 (poly64x1_t __a, const int __b)
+{
+ return __aarch64_vdupq_lane_p64 (__a, __b);
+}
+
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vdupq_lane_s8 (int8x8_t __a, const int __b)
return __aarch64_vdupq_laneq_p16 (__a, __b);
}
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdupq_laneq_p64 (poly64x2_t __a, const int __b)
+{
+ return __aarch64_vdupq_laneq_p64 (__a, __b);
+}
+
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vdupq_laneq_s8 (int8x16_t __a, const int __b)
#endif
}
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vext_p64 (poly64x1_t __a, poly64x1_t __b, __const int __c)
+{
+ __AARCH64_LANE_CHECK (__a, __c);
+ /* The only possible index to the assembler instruction returns element 0. */
+ return __a;
+}
+
__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
#endif
}
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vextq_p64 (poly64x2_t __a, poly64x2_t __b, __const int __c)
+{
+ __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
+#else
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
+#endif
+}
+
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)
__builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
}
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_p64 (const poly64_t *a)
+{
+ return (poly64x1_t) {*a};
+}
+
__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld1_s8 (const int8_t *a)
__builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
}
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_p64 (const poly64_t *a)
+{
+ return (poly64x2_t)
+ __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
+}
+
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld1q_s8 (const int8_t *a)
return vdup_n_p16 (*__a);
}
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_dup_p64 (const poly64_t* __a)
+{
+ return vdup_n_p64 (*__a);
+}
+
__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld1_dup_s8 (const int8_t* __a)
return vdupq_n_p16 (*__a);
}
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_dup_p64 (const poly64_t* __a)
+{
+ return vdupq_n_p64 (*__a);
+}
+
+ __extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld1q_dup_s8 (const int8_t* __a)
{
return __aarch64_vset_lane_any (*__src, __vec, __lane);
}
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_lane_p64 (const poly64_t *__src, poly64x1_t __vec, const int __lane)
+{
+ return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld1_lane_s8 (const int8_t *__src, int8x8_t __vec, const int __lane)
return __aarch64_vset_lane_any (*__src, __vec, __lane);
}
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_lane_p64 (const poly64_t *__src, poly64x2_t __vec, const int __lane)
+{
+ return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld1q_lane_s8 (const int8_t *__src, int8x16_t __vec, const int __lane)
return ret;
}
+__extension__ extern __inline poly64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_p64 (const poly64_t * __a)
+{
+ poly64x1x2_t ret;
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 0);
+ ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 1);
+ return ret;
+}
+
__extension__ extern __inline int16x4x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld2_s16 (const int16_t * __a)
return ret;
}
+__extension__ extern __inline poly64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_p64 (const poly64_t * __a)
+{
+ poly64x2x2_t ret;
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 0);
+ ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 1);
+ return ret;
+}
+
__extension__ extern __inline int32x4x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld2q_s32 (const int32_t * __a)
return ret;
}
+__extension__ extern __inline poly64x1x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_p64 (const poly64_t * __a)
+{
+ poly64x1x3_t ret;
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 0);
+ ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 1);
+ ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 2);
+ return ret;
+}
+
__extension__ extern __inline int8x16x3_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld3q_s8 (const int8_t * __a)
return ret;
}
+__extension__ extern __inline poly64x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_p64 (const poly64_t * __a)
+{
+ poly64x2x3_t ret;
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 0);
+ ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 1);
+ ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 2);
+ return ret;
+}
+
__extension__ extern __inline int64x1x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld4_s64 (const int64_t * __a)
return ret;
}
+__extension__ extern __inline poly64x1x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_p64 (const poly64_t * __a)
+{
+ poly64x1x4_t ret;
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 0);
+ ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 1);
+ ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 2);
+ ret.val[3] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 3);
+ return ret;
+}
+
__extension__ extern __inline int8x16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld4q_s8 (const int8_t * __a)
return ret;
}
+__extension__ extern __inline poly64x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_p64 (const poly64_t * __a)
+{
+ poly64x2x4_t ret;
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 0);
+ ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 1);
+ ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 2);
+ ret.val[3] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 3);
+ return ret;
+}
+
/* vldn_dup */
__extension__ extern __inline int8x8x2_t
return ret;
}
+__extension__ extern __inline poly64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_dup_p64 (const poly64_t * __a)
+{
+ poly64x1x2_t ret;
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 0);
+ ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 1);
+ return ret;
+}
+
+
__extension__ extern __inline int64x1x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld2_dup_s64 (const int64_t * __a)
return ret;
}
+__extension__ extern __inline poly64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_dup_p64 (const poly64_t * __a)
+{
+ poly64x2x2_t ret;
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 0);
+ ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 1);
+ return ret;
+}
+
__extension__ extern __inline int64x1x3_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld3_dup_s64 (const int64_t * __a)
return ret;
}
+__extension__ extern __inline poly64x1x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_dup_p64 (const poly64_t * __a)
+{
+ poly64x1x3_t ret;
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 0);
+ ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 1);
+ ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 2);
+ return ret;
+}
+
__extension__ extern __inline int8x16x3_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld3q_dup_s8 (const int8_t * __a)
return ret;
}
+__extension__ extern __inline poly64x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_dup_p64 (const poly64_t * __a)
+{
+ poly64x2x3_t ret;
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 0);
+ ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 1);
+ ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 2);
+ return ret;
+}
+
__extension__ extern __inline int64x1x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld4_dup_s64 (const int64_t * __a)
return ret;
}
+__extension__ extern __inline poly64x1x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_dup_p64 (const poly64_t * __a)
+{
+ poly64x1x4_t ret;
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 0);
+ ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 1);
+ ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 2);
+ ret.val[3] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 3);
+ return ret;
+}
+
__extension__ extern __inline int8x16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld4q_dup_s8 (const int8_t * __a)
return ret;
}
+__extension__ extern __inline poly64x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_dup_p64 (const poly64_t * __a)
+{
+ poly64x2x4_t ret;
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 0);
+ ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 1);
+ ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 2);
+ ret.val[3] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 3);
+ return ret;
+}
+
/* vld2_lane */
#define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \
int8x16_t)
__LD2_LANE_FUNC (poly16x4x2_t, poly16x4_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi,
p16, int16x8_t)
+__LD2_LANE_FUNC (poly64x1x2_t, poly64x1_t, poly64x2x2_t, poly64_t, di,
+ v2di_ssps, di, p64, poly64x2_t)
__LD2_LANE_FUNC (int8x8x2_t, int8x8_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8,
int8x16_t)
__LD2_LANE_FUNC (int16x4x2_t, int16x4_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16,
__LD2_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
__LD2_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
__LD2_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16)
+__LD2_LANE_FUNC (poly64x2x2_t, poly64x2_t, poly64_t, v2di, di, p64)
__LD2_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8)
__LD2_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16)
__LD2_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32)
int8x16_t)
__LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi,
p16, int16x8_t)
+__LD3_LANE_FUNC (poly64x1x3_t, poly64x1_t, poly64x2x3_t, poly64_t, di,
+ v2di_ssps, di, p64, poly64x2_t)
__LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
int8x16_t)
__LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
__LD3_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
__LD3_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
__LD3_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16)
+__LD3_LANE_FUNC (poly64x2x3_t, poly64x2_t, poly64_t, v2di, di, p64)
__LD3_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8)
__LD3_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16)
__LD3_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32)
int8x16_t)
__LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi,
p16, int16x8_t)
+__LD4_LANE_FUNC (poly64x1x4_t, poly64x1_t, poly64x2x4_t, poly64_t, di,
+ v2di_ssps, di, p64, poly64x2_t)
__LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8,
int8x16_t)
__LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16,
__LD4_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
__LD4_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
__LD4_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16)
+__LD4_LANE_FUNC (poly64x2x4_t, poly64x2_t, poly64_t, v2di, di, p64)
__LD4_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8)
__LD4_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16)
__LD4_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32)
return vdup_n_p16 (__a);
}
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmov_n_p64 (poly64_t __a)
+{
+ return vdup_n_p64 (__a);
+}
+
__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmov_n_s8 (int8_t __a)
return vdupq_n_p16 (__a);
}
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmovq_n_p64 (poly64_t __a)
+{
+ return vdupq_n_p64 (__a);
+}
+
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovq_n_s8 (int8_t __a)
return (uint64x1_t) {__builtin_aarch64_usli_ndi_uuus (__a[0], __b[0], __c)};
}
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsli_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c)
+{
+ return (poly64x1_t) {__builtin_aarch64_ssli_ndi_ppps (__a[0], __b[0], __c)};
+}
+
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c);
}
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsliq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c)
+{
+ return __builtin_aarch64_ssli_nv2di_ppps (__a, __b, __c);
+}
+
__extension__ extern __inline int64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vslid_n_s64 (int64_t __a, int64_t __b, const int __c)
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst1_p64 (poly64_t *a, poly64x1_t b)
+{
+ *a = b[0];
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vst1_s8 (int8_t *a, int8x8_t b)
{
__builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst1q_p64 (poly64_t *a, poly64x2_t b)
+{
+ __builtin_aarch64_st1v2di_sp ((__builtin_aarch64_simd_di *) a,
+ (poly64x2_t) b);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vst1q_s8 (int8_t *a, int8x16_t b)
{
__builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst1_lane_p64 (poly64_t *__a, poly64x1_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vst1_lane_s8 (int8_t *__a, int8x8_t __b, const int __lane)
{
*__a = __aarch64_vget_lane_any (__b, __lane);
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst1q_lane_p64 (poly64_t *__a, poly64x2_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vst1q_lane_s8 (int8_t *__a, int8x16_t __b, const int __lane)
{
*__a = __aarch64_vget_lane_any (__b, __lane);
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2_p64 (poly64_t * __a, poly64x1x2_t val)
+{
+ __builtin_aarch64_simd_oi __o;
+ poly64x2x2_t temp;
+ temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
+ (poly64x2_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
+ (poly64x2_t) temp.val[1], 1);
+ __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vst2q_s8 (int8_t * __a, int8x16x2_t val)
{
__builtin_aarch64_simd_oi __o;
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2q_p64 (poly64_t * __a, poly64x2x2_t val)
+{
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
+ (poly64x2_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
+ (poly64x2_t) val.val[1], 1);
+ __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vst3_s64 (int64_t * __a, int64x1x3_t val)
{
__builtin_aarch64_simd_ci __o;
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3_p64 (poly64_t * __a, poly64x1x3_t val)
+{
+ __builtin_aarch64_simd_ci __o;
+ poly64x2x3_t temp;
+ temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_p64 (val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+ (poly64x2_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+ (poly64x2_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+ (poly64x2_t) temp.val[2], 2);
+ __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vst3q_s8 (int8_t * __a, int8x16x3_t val)
{
__builtin_aarch64_simd_ci __o;
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3q_p64 (poly64_t * __a, poly64x2x3_t val)
+{
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+ (poly64x2_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+ (poly64x2_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+ (poly64x2_t) val.val[2], 2);
+ __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vst4_s64 (int64_t * __a, int64x1x4_t val)
{
__builtin_aarch64_simd_xi __o;
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4_p64 (poly64_t * __a, poly64x1x4_t val)
+{
+ __builtin_aarch64_simd_xi __o;
+ poly64x2x4_t temp;
+ temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_p64 (val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0)));
+ temp.val[3] = vcombine_p64 (val.val[3], vcreate_p64 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+ (poly64x2_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+ (poly64x2_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+ (poly64x2_t) temp.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+ (poly64x2_t) temp.val[3], 3);
+ __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vst4q_s8 (int8_t * __a, int8x16x4_t val)
{
__builtin_aarch64_simd_xi __o;
__builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4q_p64 (poly64_t * __a, poly64x2x4_t val)
+{
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+ (poly64x2_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+ (poly64x2_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+ (poly64x2_t) val.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+ (poly64x2_t) val.val[3], 3);
+ __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
/* vsub */
__extension__ extern __inline int64_t
/* This file contains tests for all the *p64 intrinsics, except for
vreinterpret which have their own testcase. */
-/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */
/* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
#include <arm_neon.h>
#include "arm-neon-ref.h"
VECT_VAR_DECL(vdup_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
0xfffffffffffffff2 };
+/* Expected results: vmov_n. */
+VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+ 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff2 };
+
/* Expected results: vext. */
VECT_VAR_DECL(vext_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 };
/* Expected results: vget_low. */
VECT_VAR_DECL(vget_low_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+/* Expected results: vget_high. */
+VECT_VAR_DECL(vget_high_expected,poly,64,1) [] = { 0xfffffffffffffff1 };
+
/* Expected results: vld1. */
VECT_VAR_DECL(vld1_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(vld1_expected,poly,64,2) [] = { 0xfffffffffffffff0,
VECT_VAR_DECL(vst1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0,
0x3333333333333333 };
+/* Expected results: vldX_lane. */
+VECT_VAR_DECL(expected_vld_st2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld_st2_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st2_1,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld_st3_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st3_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld_st3_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld_st4_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st4_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld_st4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld_st4_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld_st4_3,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+
+/* Expected results: vget_lane. */
+VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0;
+VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0;
+
int main (void)
{
int i;
CHECK(TEST_MSG, poly, 64, 1, PRIx64, vget_low_expected, "");
+ /* vget_high_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VGET_HIGH"
+
+#define TEST_VGET_HIGH(T1, T2, W, N, N2) \
+ VECT_VAR(vget_high_vector64, T1, W, N) = \
+ vget_high_##T2##W(VECT_VAR(vget_high_vector128, T1, W, N2)); \
+ vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vget_high_vector64, T1, W, N))
+
+ DECL_VARIABLE(vget_high_vector64, poly, 64, 1);
+ DECL_VARIABLE(vget_high_vector128, poly, 64, 2);
+
+ CLEAN(result, poly, 64, 1);
+
+ VLOAD(vget_high_vector128, buffer, q, poly, p, 64, 2);
+
+ TEST_VGET_HIGH(poly, p, 64, 1, 2);
+
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, vget_high_expected, "");
+
/* vld1_p64 tests. */
#undef TEST_MSG
#define TEST_MSG "VLD1/VLD1Q"
VECT_VAR(vst1_lane_vector, T1, W, N) = \
vld1##Q##_##T2##W(VECT_VAR(buffer, T1, W, N)); \
vst1##Q##_lane_##T2##W(VECT_VAR(result, T1, W, N), \
- VECT_VAR(vst1_lane_vector, T1, W, N), L)
+ VECT_VAR(vst1_lane_vector, T1, W, N), L);
DECL_VARIABLE(vst1_lane_vector, poly, 64, 1);
DECL_VARIABLE(vst1_lane_vector, poly, 64, 2);
CHECK(TEST_MSG, poly, 64, 1, PRIx64, vst1_lane_expected, "");
CHECK(TEST_MSG, poly, 64, 2, PRIx64, vst1_lane_expected, "");
+#ifdef __aarch64__
+
+ /* vmov_n_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VMOV/VMOVQ"
+
+#define TEST_VMOV(Q, T1, T2, W, N) \
+ VECT_VAR(vmov_n_vector, T1, W, N) = \
+ vmov##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]); \
+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vmov_n_vector, T1, W, N))
+
+ DECL_VARIABLE(vmov_n_vector, poly, 64, 1);
+ DECL_VARIABLE(vmov_n_vector, poly, 64, 2);
+
+ /* Try to read different places from the input buffer. */
+ for (i=0; i< 3; i++) {
+ CLEAN(result, poly, 64, 1);
+ CLEAN(result, poly, 64, 2);
+
+ TEST_VMOV(, poly, p, 64, 1);
+ TEST_VMOV(q, poly, p, 64, 2);
+
+ switch (i) {
+ case 0:
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected0, "");
+ CHECK(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected0, "");
+ break;
+ case 1:
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected1, "");
+ CHECK(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected1, "");
+ break;
+ case 2:
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected2, "");
+ CHECK(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected2, "");
+ break;
+ default:
+ abort();
+ }
+ }
+
+ /* vget_lane_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VGET_LANE/VGETQ_LANE"
+
+#define TEST_VGET_LANE(Q, T1, T2, W, N, L) \
+ VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
+ if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) { \
+ fprintf(stderr, \
+ "ERROR in %s (%s line %d in result '%s') at type %s " \
+ "got 0x%" PRIx##W " != 0x%" PRIx##W "\n", \
+ TEST_MSG, __FILE__, __LINE__, \
+ STR(VECT_VAR(vget_lane_expected, T1, W, N)), \
+ STR(VECT_NAME(T1, W, N)), \
+ VECT_VAR(vget_lane_vector, T1, W, N), \
+ VECT_VAR(vget_lane_expected, T1, W, N)); \
+ abort (); \
+ }
+
+ /* Initialize input values. */
+ DECL_VARIABLE(vector, poly, 64, 1);
+ DECL_VARIABLE(vector, poly, 64, 2);
+
+ VLOAD(vector, buffer, , poly, p, 64, 1);
+ VLOAD(vector, buffer, q, poly, p, 64, 2);
+
+ VECT_VAR_DECL(vget_lane_vector, poly, 64, 1);
+ VECT_VAR_DECL(vget_lane_vector, poly, 64, 2);
+
+ TEST_VGET_LANE( , poly, p, 64, 1, 0);
+ TEST_VGET_LANE(q, poly, p, 64, 2, 0);
+
+ /* vldx_lane_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VLDX_LANE/VLDXQ_LANE"
+
+VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 64, 2);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 64, 3);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 64, 4);
+
+ /* In this case, input variables are arrays of vectors. */
+#define DECL_VLD_STX_LANE(T1, W, N, X) \
+ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \
+ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \
+ VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
+
+ /* We need to use a temporary result buffer (result_bis), because
+ the one used for other tests is not large enough. A subset of the
+ result data is moved from result_bis to result, and it is this
+ subset which is used to check the actual behavior. The next
+ macro enables to move another chunk of data from result_bis to
+ result. */
+ /* We also use another extra input buffer (buffer_src), which we
+ fill with 0xAA, and which it used to load a vector from which we
+ read a given lane. */
+
+#define TEST_VLDX_LANE(Q, T1, T2, W, N, X, L) \
+ memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \
+ sizeof(VECT_VAR(buffer_src, T1, W, N))); \
+ \
+ VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \
+ vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \
+ \
+ VECT_ARRAY_VAR(vector, T1, W, N, X) = \
+ /* Use dedicated init buffer, of size. X */ \
+ vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \
+ VECT_ARRAY_VAR(vector_src, T1, W, N, X), \
+ L); \
+ vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \
+ VECT_ARRAY_VAR(vector, T1, W, N, X)); \
+ memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
+ sizeof(VECT_VAR(result, T1, W, N)))
+
+ /* Overwrite "result" with the contents of "result_bis"[Y]. */
+#undef TEST_EXTRA_CHUNK
+#define TEST_EXTRA_CHUNK(T1, W, N, X, Y) \
+ memcpy(VECT_VAR(result, T1, W, N), \
+ &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
+ sizeof(VECT_VAR(result, T1, W, N)));
+
+ /* Add some padding to try to catch out of bound accesses. */
+#define ARRAY1(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[1]={42}
+#define DUMMY_ARRAY(V, T, W, N, L) \
+ VECT_VAR_DECL(V,T,W,N)[N*L]={0}; \
+ ARRAY1(V##_pad,T,W,N)
+
+#define DECL_ALL_VLD_STX_LANE(X) \
+ DECL_VLD_STX_LANE(poly, 64, 1, X); \
+ DECL_VLD_STX_LANE(poly, 64, 2, X);
+
+#define TEST_ALL_VLDX_LANE(X) \
+ TEST_VLDX_LANE(, poly, p, 64, 1, X, 0); \
+ TEST_VLDX_LANE(q, poly, p, 64, 2, X, 0);
+
+#define TEST_ALL_EXTRA_CHUNKS(X,Y) \
+ TEST_EXTRA_CHUNK(poly, 64, 1, X, Y) \
+ TEST_EXTRA_CHUNK(poly, 64, 2, X, Y)
+
+#define CHECK_RESULTS_VLD_STX_LANE(test_name,EXPECTED,comment) \
+ CHECK(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \
+ CHECK(test_name, poly, 64, 2, PRIx64, EXPECTED, comment);
+
+ /* Declare the temporary buffers / variables. */
+ DECL_ALL_VLD_STX_LANE(2);
+ DECL_ALL_VLD_STX_LANE(3);
+ DECL_ALL_VLD_STX_LANE(4);
+
+ DUMMY_ARRAY(buffer_src, poly, 64, 1, 4);
+ DUMMY_ARRAY(buffer_src, poly, 64, 2, 4);
+
+ /* Check vld2_lane/vld2q_lane. */
+ clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD2_LANE/VLD2Q_LANE"
+ TEST_ALL_VLDX_LANE(2);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st2_0, " chunk 0");
+
+ TEST_ALL_EXTRA_CHUNKS(2, 1);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st2_1, " chunk 1");
+
+ /* Check vld3_lane/vld3q_lane. */
+ clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD3_LANE/VLD3Q_LANE"
+ TEST_ALL_VLDX_LANE(3);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_0, " chunk 0");
+
+ TEST_ALL_EXTRA_CHUNKS(3, 1);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_1, " chunk 1");
+
+ TEST_ALL_EXTRA_CHUNKS(3, 2);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_2, " chunk 2");
+
+ /* Check vld4_lane/vld4q_lane. */
+ clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD4_LANE/VLD4Q_LANE"
+ TEST_ALL_VLDX_LANE(4);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_0, " chunk 0");
+
+ TEST_ALL_EXTRA_CHUNKS(4, 1);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_1, " chunk 1");
+ TEST_ALL_EXTRA_CHUNKS(4, 2);
+
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_2, " chunk 2");
+
+ TEST_ALL_EXTRA_CHUNKS(4, 3);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_3, " chunk 3");
+
+ /* In this case, input variables are arrays of vectors. */
+#define DECL_VSTX_LANE(T1, W, N, X) \
+ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \
+ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \
+ VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
+
+ /* We need to use a temporary result buffer (result_bis), because
+ the one used for other tests is not large enough. A subset of the
+ result data is moved from result_bis to result, and it is this
+ subset which is used to check the actual behavior. The next
+ macro enables to move another chunk of data from result_bis to
+ result. */
+ /* We also use another extra input buffer (buffer_src), which we
+ fill with 0xAA, and which it used to load a vector from which we
+ read a given lane. */
+#define TEST_VSTX_LANE(Q, T1, T2, W, N, X, L) \
+ memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \
+ sizeof(VECT_VAR(buffer_src, T1, W, N))); \
+ memset (VECT_VAR(result_bis_##X, T1, W, N), 0, \
+ sizeof(VECT_VAR(result_bis_##X, T1, W, N))); \
+ \
+ VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \
+ vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \
+ \
+ VECT_ARRAY_VAR(vector, T1, W, N, X) = \
+ /* Use dedicated init buffer, of size X. */ \
+ vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \
+ VECT_ARRAY_VAR(vector_src, T1, W, N, X), \
+ L); \
+ vst##X##Q##_lane_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \
+ VECT_ARRAY_VAR(vector, T1, W, N, X), \
+ L); \
+ memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
+ sizeof(VECT_VAR(result, T1, W, N)));
+
+#define TEST_ALL_VSTX_LANE(X) \
+ TEST_VSTX_LANE(, poly, p, 64, 1, X, 0); \
+ TEST_VSTX_LANE(q, poly, p, 64, 2, X, 0);
+
+ /* Check vst2_lane/vst2q_lane. */
+ clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VST2_LANE/VST2Q_LANE"
+ TEST_ALL_VSTX_LANE(2);
+
+#define CMT " (chunk 0)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st2_0, CMT);
+
+ TEST_ALL_EXTRA_CHUNKS(2, 1);
+#undef CMT
+#define CMT " chunk 1"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st2_1, CMT);
+
+ /* Check vst3_lane/vst3q_lane. */
+ clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VST3_LANE/VST3Q_LANE"
+ TEST_ALL_VSTX_LANE(3);
+
+#undef CMT
+#define CMT " (chunk 0)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_0, CMT);
+
+ TEST_ALL_EXTRA_CHUNKS(3, 1);
+
+#undef CMT
+#define CMT " (chunk 1)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_1, CMT);
+
+ TEST_ALL_EXTRA_CHUNKS(3, 2);
+
+#undef CMT
+#define CMT " (chunk 2)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_2, CMT);
+
+ /* Check vst4_lane/vst4q_lane. */
+ clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VST4_LANE/VST4Q_LANE"
+ TEST_ALL_VSTX_LANE(4);
+
+#undef CMT
+#define CMT " (chunk 0)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_0, CMT);
+
+ TEST_ALL_EXTRA_CHUNKS(4, 1);
+
+#undef CMT
+#define CMT " (chunk 1)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_1, CMT);
+
+ TEST_ALL_EXTRA_CHUNKS(4, 2);
+
+#undef CMT
+#define CMT " (chunk 2)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_2, CMT);
+
+ TEST_ALL_EXTRA_CHUNKS(4, 3);
+
+#undef CMT
+#define CMT " (chunk 3)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_3, CMT);
+
+#endif /* __aarch64__. */
+
return 0;
}