/// A 128-bit vector of [2 x double] containing one of the operands. The
/// square root is calculated using the lower 64 bits of this operand.
/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
-/// square root of the lower 64 bits of operand __b, and whose upper 64 bits
-/// are copied from the upper 64 bits of operand __a.
+/// square root of the lower 64 bits of operand \a __b, and whose upper 64
+/// bits are copied from the upper 64 bits of operand \a __a.
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_sqrt_sd(__m128d __a, __m128d __b)
{
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
-/// results. The upper 64 bits are copied from the upper 64 bits of __a.
+/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpeq_sd(__m128d __a, __m128d __b)
{
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
-/// results. The upper 64 bits are copied from the upper 64 bits of __a.
+/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmplt_sd(__m128d __a, __m128d __b)
{
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
-/// results. The upper 64 bits are copied from the upper 64 bits of __a.
+/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmple_sd(__m128d __a, __m128d __b)
{
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
-/// results. The upper 64 bits are copied from the upper 64 bits of __a.
+/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpgt_sd(__m128d __a, __m128d __b)
{
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
-/// results. The upper 64 bits are copied from the upper 64 bits of __a.
+/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpge_sd(__m128d __a, __m128d __b)
{
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
-/// results. The upper 64 bits are copied from the upper 64 bits of __a.
+/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpord_sd(__m128d __a, __m128d __b)
{
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
-/// results. The upper 64 bits are copied from the upper 64 bits of __a.
+/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpunord_sd(__m128d __a, __m128d __b)
{
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
-/// results. The upper 64 bits are copied from the upper 64 bits of __a.
+/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpneq_sd(__m128d __a, __m128d __b)
{
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
-/// results. The upper 64 bits are copied from the upper 64 bits of __a.
+/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpnlt_sd(__m128d __a, __m128d __b)
{
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
-/// results. The upper 64 bits are copied from the upper 64 bits of __a.
+/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpnle_sd(__m128d __a, __m128d __b)
{
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
-/// results. The upper 64 bits are copied from the upper 64 bits of __a.
+/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpngt_sd(__m128d __a, __m128d __b)
{
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
-/// results. The upper 64 bits are copied from the upper 64 bits of __a.
+/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cmpnge_sd(__m128d __a, __m128d __b)
{
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comieq_sd(__m128d __a, __m128d __b)
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comilt_sd(__m128d __a, __m128d __b)
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comile_sd(__m128d __a, __m128d __b)
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comigt_sd(__m128d __a, __m128d __b)
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comige_sd(__m128d __a, __m128d __b)
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comineq_sd(__m128d __a, __m128d __b)
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results. If either of the two
/// lower double-precision values is NaN, 1 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results. If either of the two
/// lower double-precision values is NaN, 1 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results. If either of the two
/// lower double-precision values is NaN, 1 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results. If either of the two
/// lower double-precision values is NaN, 0 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results. If either of the two
/// lower double-precision values is NaN, 0 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
///
/// \param __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __b.
+/// compared to the lower double-precision value of \a __b.
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
-/// compared to the lower double-precision value of __a.
+/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison result. If either of the two
/// lower double-precision values is NaN, 0 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
/// \param __a
/// A 128-bit vector of [2 x double]. The lower 64 bits are returned.
/// \returns A double-precision floating-point value copied from the lower 64
-/// bits of __a.
+/// bits of \a __a.
static __inline__ double __DEFAULT_FN_ATTRS
_mm_cvtsd_f64(__m128d __a)
{
/// A 128-bit integer vector containing the source operand.
/// \param imm
/// An immediate value specifying the number of bytes to left-shift
-/// operand a.
+/// operand \a a.
/// \returns A 128-bit integer vector containing the left-shifted value.
#define _mm_slli_si128(a, imm) __extension__ ({ \
(__m128i)__builtin_shufflevector( \
/// A 128-bit integer vector containing the source operand.
/// \param __count
/// An integer value specifying the number of bits to left-shift each value
-/// in operand __a.
+/// in operand \a __a.
/// \returns A 128-bit integer vector containing the left-shifted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_slli_epi16(__m128i __a, int __count)
/// A 128-bit integer vector containing the source operand.
/// \param __count
/// A 128-bit integer vector in which bits [63:0] specify the number of bits
-/// to left-shift each value in operand __a.
+/// to left-shift each value in operand \a __a.
/// \returns A 128-bit integer vector containing the left-shifted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sll_epi16(__m128i __a, __m128i __count)
/// A 128-bit integer vector containing the source operand.
/// \param __count
/// An integer value specifying the number of bits to left-shift each value
-/// in operand __a.
+/// in operand \a __a.
/// \returns A 128-bit integer vector containing the left-shifted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_slli_epi32(__m128i __a, int __count)
/// A 128-bit integer vector containing the source operand.
/// \param __count
/// A 128-bit integer vector in which bits [63:0] specify the number of bits
-/// to left-shift each value in operand __a.
+/// to left-shift each value in operand \a __a.
/// \returns A 128-bit integer vector containing the left-shifted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sll_epi32(__m128i __a, __m128i __count)
/// A 128-bit integer vector containing the source operand.
/// \param __count
/// An integer value specifying the number of bits to left-shift each value
-/// in operand __a.
+/// in operand \a __a.
/// \returns A 128-bit integer vector containing the left-shifted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_slli_epi64(__m128i __a, int __count)
/// A 128-bit integer vector containing the source operand.
/// \param __count
/// A 128-bit integer vector in which bits [63:0] specify the number of bits
-/// to left-shift each value in operand __a.
+/// to left-shift each value in operand \a __a.
/// \returns A 128-bit integer vector containing the left-shifted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sll_epi64(__m128i __a, __m128i __count)
/// A 128-bit integer vector containing the source operand.
/// \param __count
/// An integer value specifying the number of bits to right-shift each value
-/// in operand __a.
+/// in operand \a __a.
/// \returns A 128-bit integer vector containing the right-shifted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_srai_epi16(__m128i __a, int __count)
/// A 128-bit integer vector containing the source operand.
/// \param __count
/// A 128-bit integer vector in which bits [63:0] specify the number of bits
-/// to right-shift each value in operand __a.
+/// to right-shift each value in operand \a __a.
/// \returns A 128-bit integer vector containing the right-shifted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sra_epi16(__m128i __a, __m128i __count)
/// A 128-bit integer vector containing the source operand.
/// \param __count
/// An integer value specifying the number of bits to right-shift each value
-/// in operand __a.
+/// in operand \a __a.
/// \returns A 128-bit integer vector containing the right-shifted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_srai_epi32(__m128i __a, int __count)
/// A 128-bit integer vector containing the source operand.
/// \param __count
/// A 128-bit integer vector in which bits [63:0] specify the number of bits
-/// to right-shift each value in operand __a.
+/// to right-shift each value in operand \a __a.
/// \returns A 128-bit integer vector containing the right-shifted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sra_epi32(__m128i __a, __m128i __count)
/// A 128-bit integer vector containing the source operand.
/// \param imm
/// An immediate value specifying the number of bytes to right-shift operand
-/// a.
+/// \a a.
/// \returns A 128-bit integer vector containing the right-shifted value.
#define _mm_srli_si128(a, imm) __extension__ ({ \
(__m128i)__builtin_shufflevector( \
/// A 128-bit integer vector containing the source operand.
/// \param __count
/// An integer value specifying the number of bits to right-shift each value
-/// in operand __a.
+/// in operand \a __a.
/// \returns A 128-bit integer vector containing the right-shifted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_srli_epi16(__m128i __a, int __count)
/// A 128-bit integer vector containing the source operand.
/// \param __count
/// A 128-bit integer vector in which bits [63:0] specify the number of bits
-/// to right-shift each value in operand __a.
+/// to right-shift each value in operand \a __a.
/// \returns A 128-bit integer vector containing the right-shifted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_srl_epi16(__m128i __a, __m128i __count)
/// A 128-bit integer vector containing the source operand.
/// \param __count
/// An integer value specifying the number of bits to right-shift each value
-/// in operand __a.
+/// in operand \a __a.
/// \returns A 128-bit integer vector containing the right-shifted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_srli_epi32(__m128i __a, int __count)
/// A 128-bit integer vector containing the source operand.
/// \param __count
/// A 128-bit integer vector in which bits [63:0] specify the number of bits
-/// to right-shift each value in operand __a.
+/// to right-shift each value in operand \a __a.
/// \returns A 128-bit integer vector containing the right-shifted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_srl_epi32(__m128i __a, __m128i __count)
/// A 128-bit integer vector containing the source operand.
/// \param __count
/// An integer value specifying the number of bits to right-shift each value
-/// in operand __a.
+/// in operand \a __a.
/// \returns A 128-bit integer vector containing the right-shifted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_srli_epi64(__m128i __a, int __count)
/// A 128-bit integer vector containing the source operand.
/// \param __count
/// A 128-bit integer vector in which bits [63:0] specify the number of bits
-/// to right-shift each value in operand __a.
+/// to right-shift each value in operand \a __a.
/// \returns A 128-bit integer vector containing the right-shifted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_srl_epi64(__m128i __a, __m128i __count)
extern "C" {
#endif
-/// \brief The cache line containing __p is flushed and invalidated from all
+/// \brief The cache line containing \a __p is flushed and invalidated from all
/// caches in the coherency domain.
///
/// \headerfile <x86intrin.h>
/// \param __a
/// A 128-bit integer vector.
/// \param __imm
-/// An immediate value. Bits [3:0] selects values from __a to be assigned to
-/// bits[15:0] of the result.
-/// 000: assign values from bits [15:0] of __a.
-/// 001: assign values from bits [31:16] of __a.
-/// 010: assign values from bits [47:32] of __a.
-/// 011: assign values from bits [63:48] of __a.
-/// 100: assign values from bits [79:64] of __a.
-/// 101: assign values from bits [95:80] of __a.
-/// 110: assign values from bits [111:96] of __a.
-/// 111: assign values from bits [127:112] of __a.
+/// An immediate value. Bits [3:0] selects values from \a __a to be assigned
+/// to bits[15:0] of the result.
+/// 000: assign values from bits [15:0] of \a __a.
+/// 001: assign values from bits [31:16] of \a __a.
+/// 010: assign values from bits [47:32] of \a __a.
+/// 011: assign values from bits [63:48] of \a __a.
+/// 100: assign values from bits [79:64] of \a __a.
+/// 101: assign values from bits [95:80] of \a __a.
+/// 110: assign values from bits [111:96] of \a __a.
+/// 111: assign values from bits [127:112] of \a __a.
/// \returns An integer, whose lower 16 bits are selected from the 128-bit
/// integer vector parameter and the remaining bits are assigned zeros.
static __inline__ int __DEFAULT_FN_ATTRS
/// \param __a
/// A 128-bit integer vector of [8 x i16]. This vector is copied to the
/// result and then one of the eight elements in the result is replaced by
-/// the lower 16 bits of __b.
+/// the lower 16 bits of \a __b.
/// \param __b
/// An integer. The lower 16 bits of this parameter are written to the
-/// result beginning at an offset specified by __imm.
+/// result beginning at an offset specified by \a __imm.
/// \param __imm
/// An immediate value specifying the bit offset in the result at which the
-/// lower 16 bits of__b are written.
+/// lower 16 bits of \a __b are written.
/// \returns A 128-bit integer vector containing the constructed values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_insert_epi16(__m128i __a, int __b, int __imm)
///
/// \param __a
/// A 128-bit integer vector containing the values with bits to be extracted.
-/// \returns The most significant bits from each 8-bit element in __a, written
-/// to bits [15:0]. The other bits are assigned zeros.
+/// \returns The most significant bits from each 8-bit element in \a __a,
+/// written to bits [15:0]. The other bits are assigned zeros.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_movemask_epi8(__m128i __a)
{
/// \param __a
/// A 128-bit vector of [2 x double] containing the values with sign bits to
/// be extracted.
-/// \returns The sign bits from each of the double-precision elements in __a,
+/// \returns The sign bits from each of the double-precision elements in \a __a,
/// written to bits [1:0]. The remaining bits are assigned values of zero.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_movemask_pd(__m128d __a)
/// elements to copy from a and b:
/// Bit[0] = 0: lower element of a copied to lower element of result.
/// Bit[0] = 1: upper element of a copied to lower element of result.
-/// Bit[1] = 0: lower element of b copied to upper element of result.
-/// Bit[1] = 1: upper element of b copied to upper element of result.
+/// Bit[1] = 0: lower element of \a b copied to upper element of result.
+/// Bit[1] = 1: upper element of \a b copied to upper element of result.
/// \returns A 128-bit vector of [2 x double] containing the shuffled values.
#define _mm_shuffle_pd(a, b, i) __extension__ ({ \
(__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \