From 69dc9c59e911dcf1e5212a15c54bc8f20943190b Mon Sep 17 00:00:00 2001 From: dianhong xu Date: Tue, 22 Jun 2021 20:33:24 +0800 Subject: [PATCH] AVX512FP16: Add complex conjugation intrinsic instructions. gcc/ChangeLog: * config/i386/avx512fp16intrin.h: Add new intrinsics. (_mm512_conj_pch): New intrinsic. (_mm512_mask_conj_pch): Ditto. (_mm512_maskz_conj_pch): Ditto. * config/i386/avx512fp16vlintrin.h: Add new intrinsics. (_mm256_conj_pch): New intrinsic. (_mm256_mask_conj_pch): Ditto. (_mm256_maskz_conj_pch): Ditto. (_mm_conj_pch): Ditto. (_mm_mask_conj_pch): Ditto. (_mm_maskz_conj_pch): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512fp16-conjugation-1.c: New test. * gcc.target/i386/avx512fp16vl-conjugation-1.c: New test. --- gcc/config/i386/avx512fp16intrin.h | 27 +++++++++++ gcc/config/i386/avx512fp16vlintrin.h | 53 +++++++++++++++++++++ .../gcc.target/i386/avx512fp16-conjugation-1.c | 34 +++++++++++++ .../gcc.target/i386/avx512fp16vl-conjugation-1.c | 55 ++++++++++++++++++++++ 4 files changed, 169 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-conjugation-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-conjugation-1.c diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h index b1913b9..c0fb9ff 100644 --- a/gcc/config/i386/avx512fp16intrin.h +++ b/gcc/config/i386/avx512fp16intrin.h @@ -721,6 +721,33 @@ _mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C, (A), (D))) #endif /* __OPTIMIZE__ */ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_conj_pch (__m512h __A) +{ + return (__m512h) _mm512_xor_epi32 ((__m512i) __A, _mm512_set1_epi32 (1<<31)); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_conj_pch (__m512h __W, __mmask16 __U, __m512h __A) +{ + return (__m512h) + __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A), + (__v16sf) __W, + (__mmask16) __U); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_conj_pch (__mmask16 __U, __m512h __A) +{ + return (__m512h) + __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A), + (__v16sf) _mm512_setzero_ps (), + (__mmask16) __U); +} + /* Intrinsics of v[add,sub,mul,div]sh. */ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h index 6ce83df..36b3fe2 100644 --- a/gcc/config/i386/avx512fp16vlintrin.h +++ b/gcc/config/i386/avx512fp16vlintrin.h @@ -151,6 +151,59 @@ _mm256_zextph128_ph256 (__m128h __A) (__m128) __A, 0); } +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_conj_pch (__m256h __A) +{ + return (__m256h) _mm256_xor_epi32 ((__m256i) __A, _mm256_set1_epi32 (1<<31)); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_conj_pch (__m256h __W, __mmask8 __U, __m256h __A) +{ + return (__m256h) __builtin_ia32_movaps256_mask ((__v8sf) + _mm256_conj_pch (__A), + (__v8sf) __W, + (__mmask8) __U); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_conj_pch (__mmask8 __U, __m256h __A) +{ + return (__m256h) __builtin_ia32_movaps256_mask ((__v8sf) + _mm256_conj_pch (__A), + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_conj_pch (__m128h __A) +{ + return (__m128h) _mm_xor_epi32 ((__m128i) __A, _mm_set1_epi32 (1<<31)); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_conj_pch (__m128h __W, __mmask8 __U, __m128h __A) +{ + return (__m128h) __builtin_ia32_movaps128_mask ((__v4sf) _mm_conj_pch (__A), + (__v4sf) __W, + (__mmask8) __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_conj_pch (__mmask8 __U, __m128h __A) +{ + return (__m128h) __builtin_ia32_movaps128_mask ((__v4sf) _mm_conj_pch (__A), + (__v4sf) _mm_setzero_ps (), + (__mmask8) __U); +} + /* Intrinsics v[add,sub,mul,div]ph. */ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-conjugation-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-conjugation-1.c new file mode 100644 index 0000000..662b23c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-conjugation-1.c @@ -0,0 +1,34 @@ +/* { dg-do compile} */ +/* { dg-options "-O2 -mavx512fp16" } */ + +#include +__m512h +__attribute__ ((noinline, noclone)) +test_mm512_conj_pch (__m512h __A) +{ + return _mm512_conj_pch (__A); +} + +/* { dg-final { scan-assembler-times "vpxord\[^\n\]*%zmm\[0-9\]+" 3 } } */ + +__m512h +__attribute__ ((noinline, noclone)) +test_mm512_mask_conj_pch (__m512h __W, __mmask16 __U, __m512h __A) +{ + return _mm512_mask_conj_pch (__W, __U, __A); +} + +/* { dg-final { scan-assembler-times "vpxord\[^\n\]*%zmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "kmovw\[^\n\]*%k\[1-9\]+" 2 } } */ +/* { dg-final { scan-assembler-times "vmovaps\[^\n]" 2 } } */ + +__m512h +__attribute__ ((noinline, noclone)) +test_mm512_maskz_conj_pch (__mmask16 __U, __m512h __A) +{ + return _mm512_maskz_conj_pch (__U, __A); +} + +/* { dg-final { scan-assembler-times "vpxord\[^\n\]*%zmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "kmovw\[^\n\]*%k\[1-9\]+" 2 } } */ +/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-conjugation-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-conjugation-1.c new file mode 100644 index 0000000..d8fdab7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-conjugation-1.c @@ -0,0 +1,55 @@ +/* { dg-do compile} */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#include +__m256h +__attribute__ ((noinline, noclone)) +test_mm256_conj_pch (__m256h __A) +{ + return _mm256_conj_pch (__A); +} + +/* { dg-final { scan-assembler-times "vpxor\[^\n\]*%ymm\[0-9\]+" 3 } } */ + +__m128h +__attribute__ ((noinline, noclone)) +test_mm_conj_pch (__m128h __A) +{ + return _mm_conj_pch (__A); +} + +/* { dg-final { scan-assembler-times "vpxor\[^\n\]*%xmm\[0-9\]+" 3 } } */ + +__m256h +__attribute__ ((noinline, noclone)) +test_mm256_mask_conj_pch (__m256h __W, __mmask8 __U, __m256h __A) +{ + return _mm256_mask_conj_pch (__W, __U, __A); +} + +/* { dg-final { scan-assembler-times "vmovaps\[^\n\]*%ymm\[0-9\]+" 2 } } */ + +__m128h +__attribute__ ((noinline, noclone)) +test_mm_mask_conj_pch (__m128h __W, __mmask8 __U, __m128h __A) +{ + return _mm_mask_conj_pch (__W, __U, __A); +} + +/* { dg-final { scan-assembler-times "vmovaps\[^\n\]*%xmm\[0-9\]+" 2 } } */ + +__m256h +__attribute__ ((noinline, noclone)) +test_mm256_maskz_conj_pch (__mmask8 __U, __m256h __A) +{ + return _mm256_maskz_conj_pch (__U, __A); +} +/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + +__m128h +__attribute__ ((noinline, noclone)) +test_mm_maskz_conj_pch (__mmask8 __U, __m128h __A) { + return _mm_maskz_conj_pch (__U, __A); +} + +/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -- 2.7.4