Previously we only checked the sse feature, but this means that if you passed -mno-mmx, the builtins/intrinsics wouldn't be disabled in the frontend and would instead fail backend isel.
llvm-svn: 333980
TARGET_BUILTIN(__builtin_ia32_vec_ext_v2si, "iV2ii", "nc", "mmx")
// MMX2 (MMX+SSE) intrinsics
-TARGET_BUILTIN(__builtin_ia32_cvtpi2ps, "V4fV4fV2i", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cvtps2pi, "V2iV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cvttps2pi, "V2iV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_pavgb, "V8cV8cV8c", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_pavgw, "V4sV4sV4s", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_pmaxsw, "V4sV4sV4s", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_pmaxub, "V8cV8cV8c", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_pminsw, "V4sV4sV4s", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_pminub, "V8cV8cV8c", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_pmovmskb, "iV8c", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_pmulhuw, "V4sV4sV4s", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_psadbw, "V4sV8cV8c", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_pshufw, "V4sV4sIc", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v4hi, "iV4sIi", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v4hi, "V4sV4siIi", "nc", "sse")
+TARGET_BUILTIN(__builtin_ia32_cvtpi2ps, "V4fV4fV2i", "nc", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_cvtps2pi, "V2iV4f", "nc", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_cvttps2pi, "V2iV4f", "nc", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_pavgb, "V8cV8cV8c", "nc", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_pavgw, "V4sV4sV4s", "nc", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_pmaxsw, "V4sV4sV4s", "nc", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_pmaxub, "V8cV8cV8c", "nc", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_pminsw, "V4sV4sV4s", "nc", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_pminub, "V8cV8cV8c", "nc", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_pmovmskb, "iV8c", "nc", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_pmulhuw, "V4sV4sV4s", "nc", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_psadbw, "V4sV8cV8c", "nc", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_pshufw, "V4sV4sIc", "nc", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v4hi, "iV4sIi", "nc", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v4hi, "V4sV4siIi", "nc", "mmx,sse")
// MMX+SSE2
-TARGET_BUILTIN(__builtin_ia32_cvtpd2pi, "V2iV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtpi2pd, "V2dV2i", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2pi, "V2iV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_paddq, "V1LLiV1LLiV1LLi", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pmuludq, "V1LLiV2iV2i", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psubq, "V1LLiV1LLiV1LLi", "nc", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2pi, "V2iV2d", "nc", "mmx,sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtpi2pd, "V2dV2i", "nc", "mmx,sse2")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2pi, "V2iV2d", "nc", "mmx,sse2")
+TARGET_BUILTIN(__builtin_ia32_paddq, "V1LLiV1LLiV1LLi", "nc", "mmx,sse2")
+TARGET_BUILTIN(__builtin_ia32_pmuludq, "V1LLiV2iV2i", "nc", "mmx,sse2")
+TARGET_BUILTIN(__builtin_ia32_psubq, "V1LLiV1LLiV1LLi", "nc", "mmx,sse2")
// MMX+SSSE3
-TARGET_BUILTIN(__builtin_ia32_pabsb, "V8cV8c", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pabsd, "V2iV2i", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pabsw, "V4sV4s", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_palignr, "V8cV8cV8cIc", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_phaddd, "V2iV2iV2i", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_phaddsw, "V4sV4sV4s", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_phaddw, "V4sV4sV4s", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_phsubd, "V2iV2iV2i", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_phsubsw, "V4sV4sV4s", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_phsubw, "V4sV4sV4s", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pmaddubsw, "V8cV8cV8c", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pmulhrsw, "V4sV4sV4s", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pshufb, "V8cV8cV8c", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_psignw, "V4sV4sV4s", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_psignb, "V8cV8cV8c", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_psignd, "V2iV2iV2i", "nc", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pabsb, "V8cV8c", "nc", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_pabsd, "V2iV2i", "nc", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_pabsw, "V4sV4s", "nc", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_palignr, "V8cV8cV8cIc", "nc", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_phaddd, "V2iV2iV2i", "nc", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_phaddsw, "V4sV4sV4s", "nc", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_phaddw, "V4sV4sV4s", "nc", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_phsubd, "V2iV2iV2i", "nc", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_phsubsw, "V4sV4sV4s", "nc", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_phsubw, "V4sV4sV4s", "nc", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_pmaddubsw, "V8cV8cV8c", "nc", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_pmulhrsw, "V4sV4sV4s", "nc", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_pshufb, "V8cV8cV8c", "nc", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_psignw, "V4sV4sV4s", "nc", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_psignb, "V8cV8cV8c", "nc", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_psignd, "V2iV2iV2i", "nc", "mmx,ssse3")
// SSE intrinsics.
TARGET_BUILTIN(__builtin_ia32_comieq, "iV4fV4f", "nc", "sse")
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
+#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2")))
/// Adds lower double-precision values in both operands and returns the
/// sum in the lower 64 bits of the result. The upper 64 bits of the result
/// \param __a
/// A 128-bit vector of [2 x double].
/// \returns A 64-bit vector of [2 x i32] containing the converted values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_cvtpd_pi32(__m128d __a)
{
return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a);
/// \param __a
/// A 128-bit vector of [2 x double].
/// \returns A 64-bit vector of [2 x i32] containing the converted values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_cvttpd_pi32(__m128d __a)
{
return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a);
/// \param __a
/// A 64-bit vector of [2 x i32].
/// \returns A 128-bit vector of [2 x double] containing the converted values.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX
_mm_cvtpi32_pd(__m64 __a)
{
return __builtin_ia32_cvtpi2pd((__v2si)__a);
/// \param __b
/// A 64-bit integer.
/// \returns A 64-bit integer containing the sum of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_add_si64(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b);
/// \param __b
/// A 64-bit integer containing one of the source operands.
/// \returns A 64-bit integer vector containing the product of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_mul_su32(__m64 __a, __m64 __b)
{
return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
/// A 64-bit integer vector containing the subtrahend.
/// \returns A 64-bit integer vector containing the difference of the values in
/// the operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_sub_si64(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b);
} // extern "C"
#endif
#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_MMX
#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3")))
+#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3")))
/// Computes the absolute value of each of the packed 8-bit signed
/// integers in the source operand and stores the 8-bit unsigned integer
/// A 64-bit vector of [8 x i8].
/// \returns A 64-bit integer vector containing the absolute values of the
/// elements in the operand.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_abs_pi8(__m64 __a)
{
return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
/// A 64-bit vector of [4 x i16].
/// \returns A 64-bit integer vector containing the absolute values of the
/// elements in the operand.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_abs_pi16(__m64 __a)
{
return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
/// A 64-bit vector of [2 x i32].
/// \returns A 64-bit integer vector containing the absolute values of the
/// elements in the operand.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_abs_pi32(__m64 __a)
{
return (__m64)__builtin_ia32_pabsd((__v2si)__a);
/// destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
/// operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_hadd_pi16(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
/// destination.
/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
/// operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_hadd_pi32(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
/// destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
/// sums of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_hadds_pi16(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
/// the destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
/// of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_hsub_pi16(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
/// the destination.
/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
/// of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_hsub_pi32(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
/// the destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
/// differences of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_hsubs_pi16(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_maddubs_pi16(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
/// A 64-bit vector of [4 x i16] containing one of the source operands.
/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
/// products of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_mulhrs_pi16(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
/// destination. \n
/// Bits [3:0] select the source byte to be copied.
/// \returns A 64-bit integer vector containing the copied or cleared values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_shuffle_pi8(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
/// A 64-bit integer vector containing control bytes corresponding to
/// positions in the destination.
/// \returns A 64-bit integer vector containing the resultant values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_sign_pi8(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
/// A 64-bit integer vector containing control words corresponding to
/// positions in the destination.
/// \returns A 64-bit integer vector containing the resultant values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_sign_pi16(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
/// A 64-bit integer vector containing two control doublewords corresponding
/// to positions in the destination.
/// \returns A 64-bit integer vector containing the resultant values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_sign_pi32(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
}
#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_MMX
#endif /* __TMMINTRIN_H */
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse")))
+#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse")))
/// Adds the 32-bit float values in the low-order bits of the operands.
///
/// \param __a
/// A 128-bit vector of [4 x float].
/// \returns A 64-bit integer vector containing the converted values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_cvtps_pi32(__m128 __a)
{
return (__m64)__builtin_ia32_cvtps2pi((__v4sf)__a);
/// \param __a
/// A 128-bit vector of [4 x float].
/// \returns A 64-bit integer vector containing the converted values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_cvt_ps2pi(__m128 __a)
{
return _mm_cvtps_pi32(__a);
/// \param __a
/// A 128-bit vector of [4 x float].
/// \returns A 64-bit integer vector containing the converted values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_cvttps_pi32(__m128 __a)
{
return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__a);
/// \param __a
/// A 128-bit vector of [4 x float].
/// \returns A 64-bit integer vector containing the converted values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_cvtt_ps2pi(__m128 __a)
{
return _mm_cvttps_pi32(__a);
/// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
/// converted value of the second operand. The upper 64 bits are copied from
/// the upper 64 bits of the first operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
_mm_cvtpi32_ps(__m128 __a, __m64 __b)
{
return __builtin_ia32_cvtpi2ps((__v4sf)__a, (__v2si)__b);
/// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
/// converted value from the second operand. The upper 64 bits are copied
/// from the upper 64 bits of the first operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
_mm_cvt_pi2ps(__m128 __a, __m64 __b)
{
return _mm_cvtpi32_ps(__a, __b);
/// A pointer to an aligned memory location used to store the register value.
/// \param __a
/// A 64-bit integer containing the value to be stored.
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS_MMX
_mm_stream_pi(__m64 *__p, __m64 __a)
{
__builtin_ia32_movntq(__p, __a);
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the comparison results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_max_pi16(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b);
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the comparison results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_max_pu8(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b);
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the comparison results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_min_pi16(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b);
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the comparison results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_min_pu8(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b);
/// A 64-bit integer vector containing the values with bits to be extracted.
/// \returns The most significant bit from each 8-bit element in \a __a,
/// written to bits [7:0].
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS_MMX
_mm_movemask_pi8(__m64 __a)
{
return __builtin_ia32_pmovmskb((__v8qi)__a);
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the products of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_mulhi_pu16(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b);
/// A pointer to a 64-bit memory location that will receive the conditionally
/// copied integer values. The address of the memory location does not have
/// to be aligned.
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS_MMX
_mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
{
__builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p);
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the averages of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_avg_pu8(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b);
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the averages of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_avg_pu16(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b);
/// \returns A 64-bit integer vector whose lower 16 bits contain the sums of the
/// sets of absolute differences between both operands. The upper bits are
/// cleared.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_sad_pu8(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b);
/// from the corresponding elements in this operand.
/// \returns A 128-bit vector of [4 x float] containing the copied and converted
/// values from the operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
_mm_cvtpi16_ps(__m64 __a)
{
__m64 __b, __c;
/// destination are copied from the corresponding elements in this operand.
/// \returns A 128-bit vector of [4 x float] containing the copied and converted
/// values from the operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
_mm_cvtpu16_ps(__m64 __a)
{
__m64 __b, __c;
/// from the corresponding lower 4 elements in this operand.
/// \returns A 128-bit vector of [4 x float] containing the copied and converted
/// values from the operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
_mm_cvtpi8_ps(__m64 __a)
{
__m64 __b;
/// operand.
/// \returns A 128-bit vector of [4 x float] containing the copied and converted
/// values from the source operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
_mm_cvtpu8_ps(__m64 __a)
{
__m64 __b;
/// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
/// copied and converted values from the first operand. The upper 64 bits
/// contain the copied and converted values from the second operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
_mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
{
__m128 __c;
/// A 128-bit floating-point vector of [4 x float].
/// \returns A 64-bit integer vector of [4 x i16] containing the converted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_cvtps_pi16(__m128 __a)
{
__m64 __b, __c;
/// 128-bit floating-point vector of [4 x float].
/// \returns A 64-bit integer vector of [8 x i8]. The lower 32 bits contain the
/// converted values and the uppper 32 bits are set to zero.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_cvtps_pi8(__m128 __a)
{
__m64 __b, __c;
#define _m_ _mm_
#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_MMX
/* Ugly hack for backwards-compatibility (compatible with gcc) */
#if defined(__SSE2__) && !__building_module(_Builtin_intrinsics)