BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "")
BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "")
BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "")
-BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dc", "")
-BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fc", "")
-BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8ic", "")
+BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "")
+BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "")
+BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "")
BUILTIN(__builtin_ia32_vpermilpd, "V2dV2dc", "")
BUILTIN(__builtin_ia32_vpermilps, "V4fV4fc", "")
BUILTIN(__builtin_ia32_vpermilpd256, "V4dV4dc", "")
return (__m256)__builtin_ia32_vpermilps256((__v8sf)a, c);
}
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
-_mm256_permute2f128_pd(__m256d a, __m256d b, const int c)
-{
- return (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)a, (__v4df)b, c);
-}
+#define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \
+ __m256d __V1 = (V1); \
+ __m256d __V2 = (V2); \
+ (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)__V1, (__v4df)__V2, M); })
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
-_mm256_permute2f128_ps(__m256 a, __m256 b, const int c)
-{
- return (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)a, (__v8sf)b, c);
-}
+#define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \
+ __m256 __V1 = (V1); \
+ __m256 __V2 = (V2); \
+ (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)__V1, (__v8sf)__V2, M); })
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
-_mm256_permute2f128_si256(__m256i a, __m256i b, const int c)
-{
- return (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)a, (__v8si)b, c);
-}
+#define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \
+ __m256i __V1 = (V1); \
+ __m256i __V2 = (V2); \
+ (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)__V1, (__v8si)__V2, M); })
/* Vector Blend */
#define _mm256_blend_pd(V1, V2, M) __extension__ ({ \