/// destination operand x with the specified bitfields replaced by the lower
/// bits of source operand y. The upper 64 bits of the return value are
/// undefined.
-
#define _mm_inserti_si64(x, y, len, idx) \
((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \
(__v2di)(__m128i)(y), \
/// destination operand __x with the specified bitfields replaced by the
/// lower bits of source operand __y. The upper 64 bits of the return value
/// are undefined.
-
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_insert_si64(__m128i __x, __m128i __y)
{
/// \returns A 256-bit floating-point vector of [8 x float] containing the
/// concatenated result.
static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_set_m128 (__m128 __hi, __m128 __lo) {
+_mm256_set_m128 (__m128 __hi, __m128 __lo)
+{
return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7);
}
/// \returns A 256-bit floating-point vector of [4 x double] containing the
/// concatenated result.
static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_set_m128d (__m128d __hi, __m128d __lo) {
+_mm256_set_m128d (__m128d __hi, __m128d __lo)
+{
return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
}
/// result.
/// \returns A 256-bit integer vector containing the concatenated result.
static __inline __m256i __DEFAULT_FN_ATTRS
-_mm256_set_m128i (__m128i __hi, __m128i __lo) {
+_mm256_set_m128i (__m128i __hi, __m128i __lo)
+{
return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
}
/// \returns A 256-bit floating-point vector of [8 x float] containing the
/// concatenated result.
static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_setr_m128 (__m128 __lo, __m128 __hi) {
+_mm256_setr_m128 (__m128 __lo, __m128 __hi)
+{
return _mm256_set_m128(__hi, __lo);
}
/// \returns A 256-bit floating-point vector of [4 x double] containing the
/// concatenated result.
static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_setr_m128d (__m128d __lo, __m128d __hi) {
+_mm256_setr_m128d (__m128d __lo, __m128d __hi)
+{
return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
}
/// result.
/// \returns A 256-bit integer vector containing the concatenated result.
static __inline __m256i __DEFAULT_FN_ATTRS
-_mm256_setr_m128i (__m128i __lo, __m128i __hi) {
+_mm256_setr_m128i (__m128i __lo, __m128i __hi)
+{
return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
}
return (__m128d)__a;
}
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
/// \brief Indicates that a spin loop is being executed for the purposes of
/// optimizing power consumption during the loop.
///
///
/// This intrinsic corresponds to the \c PAUSE instruction.
///
-#if defined(__cplusplus)
-extern "C"
-#endif
void _mm_pause(void);
+#if defined(__cplusplus)
+} // extern "C"
+#endif
#undef __DEFAULT_FN_ATTRS
#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
__builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p);
}
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
/// \brief Forces strong memory ordering (serialization) between store
/// instructions preceding this instruction and store instructions following
/// this instruction, ensuring the system completes all previous stores
///
/// This intrinsic corresponds to the \c SFENCE instruction.
///
+void _mm_sfence(void);
+
#if defined(__cplusplus)
-extern "C"
+} // extern "C"
#endif
-void _mm_sfence(void);
/// \brief Extracts 16-bit element from a 64-bit vector of [4 x i16] and
/// returns it, as specified by the immediate integer operand.
///
/// \headerfile <x86intrin.h>
///
+/// \code
+/// void _mm_extract_pi(__m64 a, int n);
+/// \endcode
+///
/// This intrinsic corresponds to the \c VPEXTRW / PEXTRW instruction.
///
/// \param __a
///
/// \headerfile <x86intrin.h>
///
+/// \code
+/// void _mm_insert_pi(__m64 a, int d, int n);
+/// \endcode
+///
/// This intrinsic corresponds to the \c VPINSRW / PINSRW instruction.
///
/// \param __a
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the \c PSHUFW instruction.
-///
/// \code
/// __m64 _mm_shuffle_pi16(__m64 a, const int n);
/// \endcode
///
+/// This intrinsic corresponds to the \c PSHUFW instruction.
+///
/// \param a
/// A 64-bit integer vector containing the values to be shuffled.
/// \param n