#ifndef _WMMINTRIN_PCLMUL_H
#define _WMMINTRIN_PCLMUL_H
+/// \brief Multiplies two 64-bit integer values, which are selected from source
+/// operands using the immediate-value operand. The multiplication is a
+/// carry-less multiplication, and the 128-bit integer product is stored in
+/// the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128i _mm_clmulepi64_si128(__m128i __X, __m128i __Y, const int __I);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VPCLMULQDQ instruction.
+///
+/// \param __X
+/// A 128-bit vector of [2 x i64] containing one of the source operands.
+/// \param __Y
+/// A 128-bit vector of [2 x i64] containing one of the source operands.
+/// \param __I
+/// An immediate value specifying which 64-bit values to select from the
+/// operands.
+/// Bit 0 is used to select a value from operand __X,
+/// and bit 4 is used to select a value from operand __Y:
+/// Bit[0]=0 indicates that bits[63:0] of operand __X are used.
+/// Bit[0]=1 indicates that bits[127:64] of operand __X are used.
+/// Bit[4]=0 indicates that bits[63:0] of operand __Y are used.
+/// Bit[4]=1 indicates that bits[127:64] of operand __Y are used.
+/// \returns The 128-bit integer vector containing the result of the carry-less
+/// multiplication of the selected 64-bit values.
#define _mm_clmulepi64_si128(__X, __Y, __I) \
((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(__X), \
(__v2di)(__m128i)(__Y), (char)(__I)))