return __a + __b;
}
-#if __AARCH64EB__
-#define __LANE0(__t) ((__t) - 1)
-#else
-#define __LANE0(__t) 0
-#endif
-
/* vaddv */
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
vaddv_s8 (int8x8_t __a)
{
- return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), __LANE0 (8));
+ return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
}
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vaddv_s16 (int16x4_t __a)
{
- return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), __LANE0 (4));
+ return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
}
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vaddv_s32 (int32x2_t __a)
{
- return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), __LANE0 (2));
+ return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
}
__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
{
return vget_lane_u8 ((uint8x8_t)
__builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a),
- __LANE0 (8));
+ 0);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
{
return vget_lane_u16 ((uint16x4_t)
__builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a),
- __LANE0 (4));
+ 0);
}
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
{
return vget_lane_u32 ((uint32x2_t)
__builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a),
- __LANE0 (2));
+ 0);
}
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
vaddvq_s8 (int8x16_t __a)
{
return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a),
- __LANE0 (16));
+ 0);
}
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vaddvq_s16 (int16x8_t __a)
{
- return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), __LANE0 (8));
+ return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
}
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vaddvq_s32 (int32x4_t __a)
{
- return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), __LANE0 (4));
+ return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
}
__extension__ static __inline int64_t __attribute__ ((__always_inline__))
vaddvq_s64 (int64x2_t __a)
{
- return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), __LANE0 (2));
+ return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
}
__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
{
return vgetq_lane_u8 ((uint8x16_t)
__builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a),
- __LANE0 (16));
+ 0);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
{
return vgetq_lane_u16 ((uint16x8_t)
__builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a),
- __LANE0 (8));
+ 0);
}
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
{
return vgetq_lane_u32 ((uint32x4_t)
__builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a),
- __LANE0 (4));
+ 0);
}
__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
{
return vgetq_lane_u64 ((uint64x2_t)
__builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a),
- __LANE0 (2));
+ 0);
}
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vaddv_f32 (float32x2_t __a)
{
float32x2_t __t = __builtin_aarch64_reduc_splus_v2sf (__a);
- return vget_lane_f32 (__t, __LANE0 (2));
+ return vget_lane_f32 (__t, 0);
}
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vaddvq_f32 (float32x4_t __a)
{
float32x4_t __t = __builtin_aarch64_reduc_splus_v4sf (__a);
- return vgetq_lane_f32 (__t, __LANE0 (4));
+ return vgetq_lane_f32 (__t, 0);
}
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vaddvq_f64 (float64x2_t __a)
{
float64x2_t __t = __builtin_aarch64_reduc_splus_v2df (__a);
- return vgetq_lane_f64 (__t, __LANE0 (2));
+ return vgetq_lane_f64 (__t, 0);
}
/* vbsl */
vmaxv_f32 (float32x2_t __a)
{
return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a),
- __LANE0 (2));
+ 0);
}
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
vmaxv_s8 (int8x8_t __a)
{
- return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), __LANE0 (8));
+ return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0);
}
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vmaxv_s16 (int16x4_t __a)
{
- return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), __LANE0 (4));
+ return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0);
}
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vmaxv_s32 (int32x2_t __a)
{
- return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), __LANE0 (2));
+ return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0);
}
__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
{
return vget_lane_u8 ((uint8x8_t)
__builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a),
- __LANE0 (8));
+ 0);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
{
return vget_lane_u16 ((uint16x4_t)
__builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a),
- __LANE0 (4));
+ 0);
}
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
{
return vget_lane_u32 ((uint32x2_t)
__builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a),
- __LANE0 (2));
+ 0);
}
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmaxvq_f32 (float32x4_t __a)
{
return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a),
- __LANE0 (4));
+ 0);
}
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vmaxvq_f64 (float64x2_t __a)
{
return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a),
- __LANE0 (2));
+ 0);
}
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
vmaxvq_s8 (int8x16_t __a)
{
- return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), __LANE0 (16));
+ return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0);
}
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vmaxvq_s16 (int16x8_t __a)
{
- return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), __LANE0 (8));
+ return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0);
}
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vmaxvq_s32 (int32x4_t __a)
{
- return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), __LANE0 (4));
+ return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0);
}
__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
{
return vgetq_lane_u8 ((uint8x16_t)
__builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a),
- __LANE0 (16));
+ 0);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
{
return vgetq_lane_u16 ((uint16x8_t)
__builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a),
- __LANE0 (8));
+ 0);
}
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
{
return vgetq_lane_u32 ((uint32x4_t)
__builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a),
- __LANE0 (4));
+ 0);
}
/* vmaxnmv */
vmaxnmv_f32 (float32x2_t __a)
{
return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a),
- __LANE0 (2));
+ 0);
}
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmaxnmvq_f32 (float32x4_t __a)
{
- return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), __LANE0 (4));
+ return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0);
}
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vmaxnmvq_f64 (float64x2_t __a)
{
- return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), __LANE0 (2));
+ return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0);
}
/* vmin */
vminv_f32 (float32x2_t __a)
{
return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a),
- __LANE0 (2));
+ 0);
}
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
vminv_s8 (int8x8_t __a)
{
return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a),
- __LANE0 (8));
+ 0);
}
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vminv_s16 (int16x4_t __a)
{
- return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), __LANE0 (4));
+ return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0);
}
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vminv_s32 (int32x2_t __a)
{
- return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), __LANE0 (2));
+ return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0);
}
__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
{
return vget_lane_u8 ((uint8x8_t)
__builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a),
- __LANE0 (8));
+ 0);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
{
return vget_lane_u16 ((uint16x4_t)
__builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a),
- __LANE0 (4));
+ 0);
}
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
{
return vget_lane_u32 ((uint32x2_t)
__builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a),
- __LANE0 (2));
+ 0);
}
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vminvq_f32 (float32x4_t __a)
{
return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a),
- __LANE0 (4));
+ 0);
}
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vminvq_f64 (float64x2_t __a)
{
return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a),
- __LANE0 (2));
+ 0);
}
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
vminvq_s8 (int8x16_t __a)
{
- return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), __LANE0 (16));
+ return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0);
}
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vminvq_s16 (int16x8_t __a)
{
- return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), __LANE0 (8));
+ return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0);
}
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vminvq_s32 (int32x4_t __a)
{
- return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), __LANE0 (4));
+ return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0);
}
__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
{
return vgetq_lane_u8 ((uint8x16_t)
__builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a),
- __LANE0 (16));
+ 0);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
{
return vgetq_lane_u16 ((uint16x8_t)
__builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a),
- __LANE0 (8));
+ 0);
}
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
{
return vgetq_lane_u32 ((uint32x4_t)
__builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a),
- __LANE0 (4));
+ 0);
}
/* vminnmv */
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vminnmv_f32 (float32x2_t __a)
{
- return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), __LANE0 (2));
+ return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0);
}
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vminnmvq_f32 (float32x4_t __a)
{
- return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), __LANE0 (4));
+ return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0);
}
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vminnmvq_f64 (float64x2_t __a)
{
- return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), __LANE0 (2));
+ return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0);
}
/* vmla */
/* End of optimal implementations in approved order. */
-#undef __LANE0
-
#undef __aarch64_vget_lane_any
#undef __aarch64_vget_lane_f32
#undef __aarch64_vget_lane_f64