From df08b3493869540bad5d4b040dae814e078b411d Mon Sep 17 00:00:00 2001 From: Warren Ristow Date: Tue, 26 Apr 2022 14:33:14 -0700 Subject: [PATCH] [NFC] Cleanup miscellaneous header items - Explain the use of the _MM_SHUFFLE and _MM_SHUFFLE2 macros - Update some doxygen parameter descriptions to match the implementations - Add "see also" doxygen tags to some intrinsics - Minor clang-format changes Reviewers: RKSimon Differential Revision: https://reviews.llvm.org/D124469 --- clang/lib/Headers/__wmmintrin_pclmul.h | 20 ++++---- clang/lib/Headers/avxintrin.h | 93 +++++++++++++++++++++++----------- clang/lib/Headers/bmiintrin.h | 4 ++ clang/lib/Headers/emmintrin.h | 56 +++++++++++++------- clang/lib/Headers/smmintrin.h | 4 +- clang/lib/Headers/xmmintrin.h | 12 +++-- 6 files changed, 127 insertions(+), 62 deletions(-) diff --git a/clang/lib/Headers/__wmmintrin_pclmul.h b/clang/lib/Headers/__wmmintrin_pclmul.h index fef4b93..c9a6d50 100644 --- a/clang/lib/Headers/__wmmintrin_pclmul.h +++ b/clang/lib/Headers/__wmmintrin_pclmul.h @@ -22,23 +22,23 @@ /// \headerfile /// /// \code -/// __m128i _mm_clmulepi64_si128(__m128i __X, __m128i __Y, const int __I); +/// __m128i _mm_clmulepi64_si128(__m128i X, __m128i Y, const int I); /// \endcode /// /// This intrinsic corresponds to the VPCLMULQDQ instruction. /// -/// \param __X +/// \param X /// A 128-bit vector of [2 x i64] containing one of the source operands. -/// \param __Y +/// \param Y /// A 128-bit vector of [2 x i64] containing one of the source operands. -/// \param __I +/// \param I /// An immediate value specifying which 64-bit values to select from the -/// operands. Bit 0 is used to select a value from operand \a __X, and bit -/// 4 is used to select a value from operand \a __Y: \n -/// Bit[0]=0 indicates that bits[63:0] of operand \a __X are used. \n -/// Bit[0]=1 indicates that bits[127:64] of operand \a __X are used. \n -/// Bit[4]=0 indicates that bits[63:0] of operand \a __Y are used. \n -/// Bit[4]=1 indicates that bits[127:64] of operand \a __Y are used. +/// operands. Bit 0 is used to select a value from operand \a X, and bit +/// 4 is used to select a value from operand \a Y: \n +/// Bit[0]=0 indicates that bits[63:0] of operand \a X are used. \n +/// Bit[0]=1 indicates that bits[127:64] of operand \a X are used. \n +/// Bit[4]=0 indicates that bits[63:0] of operand \a Y are used. \n +/// Bit[4]=1 indicates that bits[127:64] of operand \a Y are used. /// \returns The 128-bit integer vector containing the result of the carry-less /// multiplication of the selected 64-bit values. #define _mm_clmulepi64_si128(X, Y, I) \ diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index df2d1a2..a8f953c 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -1504,7 +1504,10 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// 00: Bits [31:0] and [159:128] are copied from the selected operand. \n /// 01: Bits [63:32] and [191:160] are copied from the selected operand. \n /// 10: Bits [95:64] and [223:192] are copied from the selected operand. \n -/// 11: Bits [127:96] and [255:224] are copied from the selected operand. +/// 11: Bits [127:96] and [255:224] are copied from the selected operand. \n +/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. +/// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form +/// [b6, b4, b2, b0]. /// \returns A 256-bit vector of [8 x float] containing the shuffled values. #define _mm256_shuffle_ps(a, b, mask) \ ((__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \ @@ -1953,12 +1956,16 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// /// \headerfile /// +/// \code +/// int _mm256_extract_epi32(__m256i X, const int N); +/// \endcode +/// /// This intrinsic corresponds to the VEXTRACTF128+COMPOSITE /// instruction. /// -/// \param __a +/// \param X /// A 256-bit vector of [8 x i32]. -/// \param __imm +/// \param N /// An immediate integer operand with bits [2:0] determining which vector /// element is extracted and returned. /// \returns A 32-bit integer containing the extracted 32 bits of extended @@ -1971,12 +1978,16 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// /// \headerfile /// +/// \code +/// int _mm256_extract_epi16(__m256i X, const int N); +/// \endcode +/// /// This intrinsic corresponds to the VEXTRACTF128+COMPOSITE /// instruction. /// -/// \param __a +/// \param X /// A 256-bit integer vector of [16 x i16]. -/// \param __imm +/// \param N /// An immediate integer operand with bits [3:0] determining which vector /// element is extracted and returned. /// \returns A 32-bit integer containing the extracted 16 bits of zero extended @@ -1990,12 +2001,16 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// /// \headerfile /// +/// \code +/// int _mm256_extract_epi8(__m256i X, const int N); +/// \endcode +/// /// This intrinsic corresponds to the VEXTRACTF128+COMPOSITE /// instruction. /// -/// \param __a +/// \param X /// A 256-bit integer vector of [32 x i8]. -/// \param __imm +/// \param N /// An immediate integer operand with bits [4:0] determining which vector /// element is extracted and returned. /// \returns A 32-bit integer containing the extracted 8 bits of zero extended @@ -2010,12 +2025,16 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// /// \headerfile /// +/// \code +/// long long _mm256_extract_epi64(__m256i X, const int N); +/// \endcode +/// /// This intrinsic corresponds to the VEXTRACTF128+COMPOSITE /// instruction. /// -/// \param __a +/// \param X /// A 256-bit integer vector of [4 x i64]. -/// \param __imm +/// \param N /// An immediate integer operand with bits [1:0] determining which vector /// element is extracted and returned. /// \returns A 64-bit integer containing the extracted 64 bits of extended @@ -2030,18 +2049,22 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// /// \headerfile /// +/// \code +/// __m256i _mm256_insert_epi32(__m256i X, int I, const int N); +/// \endcode +/// /// This intrinsic corresponds to the VINSERTF128+COMPOSITE /// instruction. /// -/// \param __a +/// \param X /// A vector of [8 x i32] to be used by the insert operation. -/// \param __b +/// \param I /// An integer value. The replacement value for the insert operation. -/// \param __imm +/// \param N /// An immediate integer specifying the index of the vector element to be /// replaced. -/// \returns A copy of vector \a __a, after replacing its element indexed by -/// \a __imm with \a __b. +/// \returns A copy of vector \a X, after replacing its element indexed by +/// \a N with \a I. #define _mm256_insert_epi32(X, I, N) \ ((__m256i)__builtin_ia32_vec_set_v8si((__v8si)(__m256i)(X), \ (int)(I), (int)(N))) @@ -2053,18 +2076,22 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// /// \headerfile /// +/// \code +/// __m256i _mm256_insert_epi16(__m256i X, int I, const int N); +/// \endcode +/// /// This intrinsic corresponds to the VINSERTF128+COMPOSITE /// instruction. /// -/// \param __a +/// \param X /// A vector of [16 x i16] to be used by the insert operation. -/// \param __b +/// \param I /// An i16 integer value. The replacement value for the insert operation. -/// \param __imm +/// \param N /// An immediate integer specifying the index of the vector element to be /// replaced. -/// \returns A copy of vector \a __a, after replacing its element indexed by -/// \a __imm with \a __b. +/// \returns A copy of vector \a X, after replacing its element indexed by +/// \a N with \a I. #define _mm256_insert_epi16(X, I, N) \ ((__m256i)__builtin_ia32_vec_set_v16hi((__v16hi)(__m256i)(X), \ (int)(I), (int)(N))) @@ -2075,18 +2102,22 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// /// \headerfile /// +/// \code +/// __m256i _mm256_insert_epi8(__m256i X, int I, const int N); +/// \endcode +/// /// This intrinsic corresponds to the VINSERTF128+COMPOSITE /// instruction. /// -/// \param __a +/// \param X /// A vector of [32 x i8] to be used by the insert operation. -/// \param __b +/// \param I /// An i8 integer value. The replacement value for the insert operation. -/// \param __imm +/// \param N /// An immediate integer specifying the index of the vector element to be /// replaced. -/// \returns A copy of vector \a __a, after replacing its element indexed by -/// \a __imm with \a __b. +/// \returns A copy of vector \a X, after replacing its element indexed by +/// \a N with \a I. #define _mm256_insert_epi8(X, I, N) \ ((__m256i)__builtin_ia32_vec_set_v32qi((__v32qi)(__m256i)(X), \ (int)(I), (int)(N))) @@ -2098,18 +2129,22 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// /// \headerfile /// +/// \code +/// __m256i _mm256_insert_epi64(__m256i X, int I, const int N); +/// \endcode +/// /// This intrinsic corresponds to the VINSERTF128+COMPOSITE /// instruction. /// -/// \param __a +/// \param X /// A vector of [4 x i64] to be used by the insert operation. -/// \param __b +/// \param I /// A 64-bit integer value. The replacement value for the insert operation. -/// \param __imm +/// \param N /// An immediate integer specifying the index of the vector element to be /// replaced. -/// \returns A copy of vector \a __a, after replacing its element indexed by -/// \a __imm with \a __b. +/// \returns A copy of vector \a X, after replacing its element indexed by +/// \a N with \a I. #define _mm256_insert_epi64(X, I, N) \ ((__m256i)__builtin_ia32_vec_set_v4di((__v4di)(__m256i)(X), \ (long long)(I), (int)(N))) diff --git a/clang/lib/Headers/bmiintrin.h b/clang/lib/Headers/bmiintrin.h index f583c21..0db8ddf 100644 --- a/clang/lib/Headers/bmiintrin.h +++ b/clang/lib/Headers/bmiintrin.h @@ -47,6 +47,7 @@ __tzcnt_u16(unsigned short __X) /// An unsigned 32-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 32-bit integer containing the number of trailing zero /// bits in the operand. +/// \see _mm_tzcnt_32 static __inline__ unsigned int __RELAXED_FN_ATTRS __tzcnt_u32(unsigned int __X) { @@ -63,6 +64,7 @@ __tzcnt_u32(unsigned int __X) /// An unsigned 32-bit integer whose trailing zeros are to be counted. /// \returns An 32-bit integer containing the number of trailing zero bits in /// the operand. +/// \see __tzcnt_u32 static __inline__ int __RELAXED_FN_ATTRS _mm_tzcnt_32(unsigned int __X) { @@ -83,6 +85,7 @@ _mm_tzcnt_32(unsigned int __X) /// An unsigned 64-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 64-bit integer containing the number of trailing zero /// bits in the operand. +/// \see _mm_tzcnt_64 static __inline__ unsigned long long __RELAXED_FN_ATTRS __tzcnt_u64(unsigned long long __X) { @@ -99,6 +102,7 @@ __tzcnt_u64(unsigned long long __X) /// An unsigned 64-bit integer whose trailing zeros are to be counted. /// \returns An 64-bit integer containing the number of trailing zero bits in /// the operand. +/// \see __tzcnt_u64 static __inline__ long long __RELAXED_FN_ATTRS _mm_tzcnt_64(unsigned long long __X) { diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 2078c7f..c1e2915 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -4126,21 +4126,25 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, /// /// \headerfile /// +/// \code +/// __m128i _mm_extract_epi16(__m256i a, const int imm); +/// \endcode +/// /// This intrinsic corresponds to the VPEXTRW / PEXTRW instruction. /// -/// \param __a +/// \param a /// A 128-bit integer vector. -/// \param __imm -/// An immediate value. Bits [2:0] selects values from \a __a to be assigned +/// \param imm +/// An immediate value. Bits [2:0] selects values from \a a to be assigned /// to bits[15:0] of the result. \n -/// 000: assign values from bits [15:0] of \a __a. \n -/// 001: assign values from bits [31:16] of \a __a. \n -/// 010: assign values from bits [47:32] of \a __a. \n -/// 011: assign values from bits [63:48] of \a __a. \n -/// 100: assign values from bits [79:64] of \a __a. \n -/// 101: assign values from bits [95:80] of \a __a. \n -/// 110: assign values from bits [111:96] of \a __a. \n -/// 111: assign values from bits [127:112] of \a __a. +/// 000: assign values from bits [15:0] of \a a. \n +/// 001: assign values from bits [31:16] of \a a. \n +/// 010: assign values from bits [47:32] of \a a. \n +/// 011: assign values from bits [63:48] of \a a. \n +/// 100: assign values from bits [79:64] of \a a. \n +/// 101: assign values from bits [95:80] of \a a. \n +/// 110: assign values from bits [111:96] of \a a. \n +/// 111: assign values from bits [127:112] of \a a. /// \returns An integer, whose lower 16 bits are selected from the 128-bit /// integer vector parameter and the remaining bits are assigned zeros. #define _mm_extract_epi16(a, imm) \ @@ -4154,18 +4158,22 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, /// /// \headerfile /// +/// \code +/// __m128i _mm_insert_epi16(__m256i a, int b, const int imm); +/// \endcode +/// /// This intrinsic corresponds to the VPINSRW / PINSRW instruction. /// -/// \param __a +/// \param a /// A 128-bit integer vector of [8 x i16]. This vector is copied to the /// result and then one of the eight elements in the result is replaced by -/// the lower 16 bits of \a __b. -/// \param __b +/// the lower 16 bits of \a b. +/// \param b /// An integer. The lower 16 bits of this parameter are written to the -/// result beginning at an offset specified by \a __imm. -/// \param __imm +/// result beginning at an offset specified by \a imm. +/// \param imm /// An immediate value specifying the bit offset in the result at which the -/// lower 16 bits of \a __b are written. +/// lower 16 bits of \a b are written. /// \returns A 128-bit integer vector containing the constructed values. #define _mm_insert_epi16(a, b, imm) \ ((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \ @@ -4213,7 +4221,10 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) { /// 00: assign values from bits [31:0] of \a a. \n /// 01: assign values from bits [63:32] of \a a. \n /// 10: assign values from bits [95:64] of \a a. \n -/// 11: assign values from bits [127:96] of \a a. +/// 11: assign values from bits [127:96] of \a a. \n +/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. +/// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form +/// [b6, b4, b2, b0]. /// \returns A 128-bit integer vector containing the shuffled values. #define _mm_shuffle_epi32(a, imm) \ ((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))) @@ -4244,6 +4255,9 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) { /// 01: assign values from bits [31:16] of \a a. \n /// 10: assign values from bits [47:32] of \a a. \n /// 11: assign values from bits [63:48] of \a a. \n +/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. +/// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form +/// [b6, b4, b2, b0]. /// \returns A 128-bit integer vector containing the shuffled values. #define _mm_shufflelo_epi16(a, imm) \ ((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))) @@ -4274,6 +4288,9 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) { /// 01: assign values from bits [95:80] of \a a. \n /// 10: assign values from bits [111:96] of \a a. \n /// 11: assign values from bits [127:112] of \a a. \n +/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. +/// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form +/// [b6, b4, b2, b0]. /// \returns A 128-bit integer vector containing the shuffled values. #define _mm_shufflehi_epi16(a, imm) \ ((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))) @@ -4617,6 +4634,9 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) { /// Bit[0] = 1: upper element of \a a copied to lower element of result. \n /// Bit[1] = 0: lower element of \a b copied to upper element of result. \n /// Bit[1] = 1: upper element of \a b copied to upper element of result. \n +/// Note: To generate a mask, you can use the \c _MM_SHUFFLE2 macro. +/// _MM_SHUFFLE2(b1, b0) can create a 2-bit mask of the form +/// [b1, b0]. /// \returns A 128-bit vector of [2 x double] containing the shuffled values. #define _mm_shuffle_pd(a, b, i) \ ((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 52b2f6f..46fb7bc 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -1213,8 +1213,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi64(__m128i __V1, /// This intrinsic corresponds to the VPMOVSXBW / PMOVSXBW instruction. /// /// \param __V -/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are sign- -/// extended to 16-bit values. +/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are +/// sign-extended to 16-bit values. /// \returns A 128-bit vector of [8 x i16] containing the sign-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index 1612d3d..4aa70d6 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -2086,7 +2086,7 @@ _mm_storer_ps(float *__p, __m128 __a) /// \headerfile /// /// \code -/// void _mm_prefetch(const void * a, const int sel); +/// void _mm_prefetch(const void *a, const int sel); /// \endcode /// /// This intrinsic corresponds to the PREFETCHNTA instruction. @@ -2360,7 +2360,10 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b) /// 00: assigned from bits [15:0] of \a a. \n /// 01: assigned from bits [31:16] of \a a. \n /// 10: assigned from bits [47:32] of \a a. \n -/// 11: assigned from bits [63:48] of \a a. +/// 11: assigned from bits [63:48] of \a a. \n +/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. +/// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form +/// [b6, b4, b2, b0]. /// \returns A 64-bit integer vector containing the shuffled values. #define _mm_shuffle_pi16(a, n) \ ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))) @@ -2602,7 +2605,10 @@ void _mm_setcsr(unsigned int __i); /// 00: Bits [31:0] copied from the specified operand. \n /// 01: Bits [63:32] copied from the specified operand. \n /// 10: Bits [95:64] copied from the specified operand. \n -/// 11: Bits [127:96] copied from the specified operand. +/// 11: Bits [127:96] copied from the specified operand. \n +/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. +/// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form +/// [b6, b4, b2, b0]. /// \returns A 128-bit vector of [4 x float] containing the shuffled values. #define _mm_shuffle_ps(a, b, mask) \ ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ -- 2.7.4