From d7a33a4cb02589a3f336ae831e67c8981ab24b4f Mon Sep 17 00:00:00 2001 From: Julia Koval Date: Thu, 11 May 2017 20:01:12 +0200 Subject: [PATCH] * config/i386/avx512fintrin.h (_mm_mask_rsqrt14_sd) (_mm_maskz_rsqrt14_sd, _mm_mask_rsqrt14_ss, _mm_maskz_rsqrt14_ss): New intrinsics. * config/i386/i386-builtin.def (__builtin_ia32_rsqrt14sd_mask) (__builtin_ia32_rsqrt14ss_mask): New builtins. * config/i386/sse.md (rsqrt14__mask): New pattern. testsuite/ChangeLog: * gcc.target/i386/avx512f-vrsqrt14sd-1.c: Test new intrinsics. * gcc.target/i386/avx512f-vrsqrt14sd-2.c: Ditto. * gcc.target/i386/avx512f-vrsqrt14ss-1.c: Ditto. * gcc.target/i386/avx512f-vrsqrt14ss-2.c: Ditto. From-SVN: r247920 --- gcc/config/i386/avx512fintrin.h | 40 ++++++++++++++++++++++ gcc/config/i386/i386-builtin.def | 2 ++ gcc/config/i386/sse.md | 17 +++++++++ gcc/testsuite/ChangeLog | 7 ++++ .../gcc.target/i386/avx512f-vrsqrt14sd-1.c | 6 +++- .../gcc.target/i386/avx512f-vrsqrt14sd-2.c | 17 ++++++++- .../gcc.target/i386/avx512f-vrsqrt14ss-1.c | 7 +++- .../gcc.target/i386/avx512f-vrsqrt14ss-2.c | 17 ++++++++- 8 files changed, 109 insertions(+), 4 deletions(-) diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index 7ac7cb6..ce139d1 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -1843,6 +1843,26 @@ _mm_rsqrt14_sd (__m128d __A, __m128d __B) (__v2df) __A); } +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B, + (__v2df) __A, + (__v2df) __W, + (__mmask8) __U); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B, + (__v2df) __A, + (__v2df) _mm_setzero_pd (), + (__mmask8) __U); +} + extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_rsqrt14_ss (__m128 __A, __m128 __B) @@ -1851,6 +1871,26 @@ _mm_rsqrt14_ss (__m128 __A, __m128 __B) (__v4sf) __A); } +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B, + (__v4sf) __A, + (__v4sf) __W, + (__mmask8) __U); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B, + (__v4sf) __A, + (__v4sf) _mm_setzero_ps (), + (__mmask8) __U); +} + #ifdef __OPTIMIZE__ extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 80ee7e10..1e29198 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -1377,7 +1377,9 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf_mask, "__builtin_ia32_rcp14s BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14_v2df_mask, "__builtin_ia32_rsqrt14sd_mask", IX86_BUILTIN_RSQRT14SDMASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14_v4sf_mask, "__builtin_ia32_rsqrt14ss_mask", IX86_BUILTIN_RSQRT14SSMASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a118fce..220f69f 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1847,6 +1847,23 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) +(define_insn "rsqrt14__mask" + [(set (match_operand:VF_128 0 "register_operand" "=v") + (vec_merge:VF_128 + (vec_merge:VF_128 + (unspec:VF_128 + [(match_operand:VF_128 1 "nonimmediate_operand" "vm")] + UNSPEC_RSQRT14) + (match_operand:VF_128 3 "vector_move_operand" "0C") + (match_operand: 4 "register_operand" "Yk")) + (match_operand:VF_128 2 "register_operand" "v") + (const_int 1)))] + "TARGET_AVX512F" + "vrsqrt14\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %1}" + [(set_attr "type" "sse") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + (define_insn "sse_vmrsqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x,x") (vec_merge:V4SF diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2bb56df..eed6482 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2017-05-11 Julia Koval + + * gcc.target/i386/avx512f-vrsqrt14sd-1.c: Test new intrinsics. + * gcc.target/i386/avx512f-vrsqrt14sd-2.c: Ditto. + * gcc.target/i386/avx512f-vrsqrt14ss-1.c: Ditto. + * gcc.target/i386/avx512f-vrsqrt14ss-2.c: Ditto. + 2017-05-11 Paolo Carlini PR c++/70538 diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c index de37e61..8f23692 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c @@ -1,14 +1,18 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ /* { dg-final { scan-assembler-times "vrsqrt14sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrt14sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrt14sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -volatile __m128d x1, x2; +volatile __m128d x1, x2, x3; volatile __mmask8 m; void extern avx512f_test (void) { x1 = _mm_rsqrt14_sd (x1, x2); + x1 = _mm_mask_rsqrt14_sd (x3, m, x1, x2); + x1 = _mm_maskz_rsqrt14_sd (m, x1, x2); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c index dc96158..d106682 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c @@ -4,6 +4,7 @@ #include #include "avx512f-check.h" +#include "avx512f-helper.h" static void compute_vrsqrt14sd (double *s1, double *s2, double *r) @@ -15,7 +16,8 @@ compute_vrsqrt14sd (double *s1, double *s2, double *r) static void avx512f_test (void) { - union128d s1, s2, res1; + union128d s1, s2, res1, res2, res3; + __mmask8 m = 0; double res_ref[2]; s1.x = _mm_set_pd (-3.0, 111.111); @@ -27,4 +29,17 @@ avx512f_test (void) if (check_fp_union128d (res1, res_ref)) abort (); + + res2.x = _mm_set_pd (-4.0, DEFAULT_VALUE); + res2.x = _mm_mask_rsqrt14_sd(res2.x, m, s1.x, s2.x); + + MASK_MERGE (d) (res_ref, m, 1); + if (checkVd (res2.a, res_ref, 2)) + abort(); + + res3.x = _mm_maskz_rsqrt14_sd(m, s1.x, s2.x); + + MASK_ZERO (d) (res_ref, m, 1); + if (checkVd (res3.a, res_ref, 2)) + abort(); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c index ba08114..098df0d 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c @@ -1,13 +1,18 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ /* { dg-final { scan-assembler-times "vrsqrt14ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrt14ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrt14ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -volatile __m128 x1, x2; +volatile __m128 x1, x2, x3; +volatile __mmask8 m; void extern avx512f_test (void) { x1 = _mm_rsqrt14_ss (x1, x2); + x1 = _mm_mask_rsqrt14_ss (x3, m, x1, x2); + x1 = _mm_maskz_rsqrt14_ss (m, x1, x2); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c index 10d8664..739a852 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c @@ -4,6 +4,7 @@ #include #include "avx512f-check.h" +#include "avx512f-helper.h" static void compute_vrsqrt14ss (float *s1, float *s2, float *r) @@ -17,7 +18,8 @@ compute_vrsqrt14ss (float *s1, float *s2, float *r) static void avx512f_test (void) { - union128 s1, s2, res1; + union128 s1, s2, res1, res2, res3; + __mmask8 m = 0; float res_ref[4]; s1.x = _mm_set_ps (-24.43, 68.346, -43.35, 546.46); @@ -29,4 +31,17 @@ avx512f_test (void) if (check_fp_union128 (res1, res_ref)) abort (); + + res2.x = _mm_set_ps (5.0, 6.0, 7.0, DEFAULT_VALUE); + res2.x = _mm_mask_rsqrt14_ss(res2.x, m, s1.x, s2.x); + + MASK_MERGE () (res_ref, m, 1); + if (checkVf (res2.a, res_ref, 4)) + abort(); + + res3.x = _mm_maskz_rsqrt14_ss(m, s1.x, s2.x); + + MASK_ZERO () (res_ref, m, 1); + if (checkVf (res3.a, res_ref, 4)) + abort(); } -- 2.7.4