From b10bc0d6f9d5e281465c6f8d4197f3158e46e093 Mon Sep 17 00:00:00 2001 From: Olga Makhotina Date: Mon, 12 Feb 2018 05:44:29 +0000 Subject: [PATCH] Add missing intrinsics for _mm_mask[z]_sqrt_round_[sd,ss] MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit gcc/ * config/i386/avx512fintrin.h (_mm_mask_sqrt_round_sd) (_mm_maskz_sqrt_round_sd, _mm_mask_sqrt_round_ss) (_mm_maskz_sqrt_round_ss): New intrinsics. (__builtin_ia32_sqrtsd_round, __builtin_ia32_sqrtss_round): Remove. (__builtin_ia32_sqrtsd_mask_round) (__builtin_ia32_sqrtss_mask_round): New builtins. * config/i386/i386-builtin.def (__builtin_ia32_sqrtsd_round) (__builtin_ia32_sqrtss_round): Remove. (__builtin_ia32_sqrtsd_mask_round) (__builtin_ia32_sqrtss_mask_round): New builtins. * config/i386/sse.md (vmsqrt2): Renamed to ... (vmsqrt2): ... this. ((match_operand:VF_128 1 "vector_operand" "xBm,")): Changed to ... ((match_operand:VF_128 1 "vector_operand" "xBm,")): ... this. (vsqrt\t{%1, %2, %0| %0, %2, %1}): Changed to ... (vsqrt\t{%1, %2, %0|%0, %2, %1}): ... this. ((set_attr "prefix" "")): Changed to ... ((set_attr "prefix" "")): ... this. gcc/testsuite/ * gcc.target/i386/avx512f-vsqrtsd-1.c (_mm_mask_sqrt_round_sd) (_mm_maskz_sqrt_round_sd): Test new intrinsics. * gcc.target/i386/avx512f-vsqrtsd-2.c (_mm_sqrt_round_sd) (_mm_mask_sqrt_round_sd, _mm_maskz_sqrt_round_sd): Test new intrinsics. * gcc.target/i386/avx512f-vsqrtss-1.c (_mm_mask_sqrt_round_ss) (_mm_maskz_sqrt_round_ss): Test new intrinsics. * gcc.target/i386/avx512f-vsqrtss-2.c (_mm_sqrt_round_ss) (_mm_mask_sqrt_round_ss,      _mm_maskz_sqrt_round_ss): Test new intrinsics. * gcc.target/i386/avx-1.c (__builtin_ia32_sqrtsd_round) (__builtin_ia32_sqrtss_round): Remove builtins. (__builtin_ia32_sqrtsd_mask_round) (__builtin_ia32_sqrtss_mask_round): Test new builtins. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. From-SVN: r257576 --- gcc/config/i386/avx512fintrin.h | 84 ++++++++++++++++++++--- gcc/config/i386/i386-builtin.def | 4 +- gcc/config/i386/sse.md | 8 +-- gcc/testsuite/ChangeLog | 18 +++++ gcc/testsuite/gcc.target/i386/avx-1.c | 4 +- gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-1.c | 5 ++ gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-2.c | 62 +++++++++++++++++ gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-1.c | 6 ++ gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-2.c | 63 +++++++++++++++++ gcc/testsuite/gcc.target/i386/sse-13.c | 4 +- gcc/testsuite/gcc.target/i386/sse-23.c | 4 +- 11 files changed, 240 insertions(+), 22 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-2.c diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index 71e36a5..ffbb1d9 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -1955,18 +1955,66 @@ extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R) { - return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B, - (__v2df) __A, - __R); + return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B, + (__v2df) __A, + (__v2df) + _mm_setzero_pd (), + (__mmask8) -1, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, + const int __R) +{ + return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B, + (__v2df) __A, + (__v2df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R) +{ + return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B, + (__v2df) __A, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U, __R); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R) { - return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B, - (__v4sf) __A, - __R); + return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B, + (__v4sf) __A, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, + const int __R) +{ + return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B, + (__v4sf) __A, + (__v4sf) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R) +{ + return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B, + (__v4sf) __A, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U, __R); } #else #define _mm512_sqrt_round_pd(A, C) \ @@ -1987,11 +2035,27 @@ _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R) #define _mm512_maskz_sqrt_round_ps(U, A, C) \ (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) -#define _mm_sqrt_round_sd(A, B, C) \ - (__m128d)__builtin_ia32_sqrtsd_round(A, B, C) +#define _mm_sqrt_round_sd(A, B, C) \ + (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \ + (__v2df) _mm_setzero_pd (), -1, C) + +#define _mm_mask_sqrt_round_sd(W, U, A, B, C) \ + (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C) + +#define _mm_maskz_sqrt_round_sd(U, A, B, C) \ + (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \ + (__v2df) _mm_setzero_pd (), U, C) + +#define _mm_sqrt_round_ss(A, B, C) \ + (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \ + (__v4sf) _mm_setzero_ps (), -1, C) + +#define _mm_mask_sqrt_round_ss(W, U, A, B, C) \ + (__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C) -#define _mm_sqrt_round_ss(A, B, C) \ - (__m128)__builtin_ia32_sqrtss_round(A, B, C) +#define _mm_maskz_sqrt_round_ss(U, A, B, C) \ + (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \ + (__v4sf) _mm_setzero_ps (), U, C) #endif extern __inline __m512i diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 2caac88..5061042 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2722,8 +2722,8 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) -BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_mask_round, "__builtin_ia32_sqrtsd_mask_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_mask_round, "__builtin_ia32_sqrtss_mask_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ef0d015..47687a6 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1881,21 +1881,21 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "")]) -(define_insn "_vmsqrt2" +(define_insn "_vmsqrt2" [(set (match_operand:VF_128 0 "register_operand" "=x,v") (vec_merge:VF_128 (sqrt:VF_128 - (match_operand:VF_128 1 "vector_operand" "xBm,")) + (match_operand:VF_128 1 "vector_operand" "xBm,")) (match_operand:VF_128 2 "register_operand" "0,v") (const_int 1)))] "TARGET_SSE" "@ sqrt\t{%1, %0|%0, %1} - vsqrt\t{%1, %2, %0|%0, %2, %1}" + vsqrt\t{%1, %2, %0|%0, %2, %1}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sse") (set_attr "atom_sse_attr" "sqrt") - (set_attr "prefix" "") + (set_attr "prefix" "") (set_attr "btver2_sse_attr" "sqrt") (set_attr "mode" "")]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 27c25a5..9c53f99 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,21 @@ +2018-02-12 Olga Makhotina + + * gcc.target/i386/avx512f-vsqrtsd-1.c (_mm_mask_sqrt_round_sd) + (_mm_maskz_sqrt_round_sd): Test new intrinsics. + * gcc.target/i386/avx512f-vsqrtsd-2.c (_mm_sqrt_round_sd) + (_mm_mask_sqrt_round_sd, _mm_maskz_sqrt_round_sd): Test new intrinsics. + * gcc.target/i386/avx512f-vsqrtss-1.c (_mm_mask_sqrt_round_ss) + (_mm_maskz_sqrt_round_ss): Test new intrinsics. + * gcc.target/i386/avx512f-vsqrtss-2.c (_mm_sqrt_round_ss) + (_mm_mask_sqrt_round_ss,      _mm_maskz_sqrt_round_ss): Test new + intrinsics. + * gcc.target/i386/avx-1.c (__builtin_ia32_sqrtsd_round) + (__builtin_ia32_sqrtss_round): Remove builtins. + (__builtin_ia32_sqrtsd_mask_round) + (__builtin_ia32_sqrtss_mask_round): Test new builtins. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + 2018-02-11 Francois-Xavier Coudert PR fortran/35299 diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index 2cd3fd1..acfc85b 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -305,8 +305,8 @@ #define __builtin_ia32_shufps512_mask(A, B, F, D, E) __builtin_ia32_shufps512_mask(A, B, 1, D, E) #define __builtin_ia32_sqrtpd512_mask(A, B, C, D) __builtin_ia32_sqrtpd512_mask(A, B, C, 8) #define __builtin_ia32_sqrtps512_mask(A, B, C, D) __builtin_ia32_sqrtps512_mask(A, B, C, 8) -#define __builtin_ia32_sqrtss_round(A, B, C) __builtin_ia32_sqrtss_round(A, B, 8) -#define __builtin_ia32_sqrtsd_round(A, B, C) __builtin_ia32_sqrtsd_round(A, B, 8) +#define __builtin_ia32_sqrtss_mask_round(A, B, C, D, E) __builtin_ia32_sqrtss_mask_round(A, B, C, D, 8) +#define __builtin_ia32_sqrtsd_mask_round(A, B, C, D, E) __builtin_ia32_sqrtsd_mask_round(A, B, C, D, 8) #define __builtin_ia32_subpd512_mask(A, B, C, D, E) __builtin_ia32_subpd512_mask(A, B, C, D, 8) #define __builtin_ia32_subps512_mask(A, B, C, D, E) __builtin_ia32_subps512_mask(A, B, C, D, 8) #define __builtin_ia32_subsd_round(A, B, C) __builtin_ia32_subsd_round(A, B, 8) diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-1.c index c0559c0..a7d7af9 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-1.c @@ -1,13 +1,18 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ /* { dg-final { scan-assembler-times "vsqrtsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtsd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtsd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include volatile __m128d x1, x2; +volatile __mmask8 m; void extern avx512f_test (void) { x1 = _mm_sqrt_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + x1 = _mm_mask_sqrt_round_sd (x1, m, x1, x2, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + x1 = _mm_maskz_sqrt_round_sd (m, x1, x2, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-2.c new file mode 100644 index 0000000..49ca7ee --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-2.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-require-effective-target avx512f } */ + +#include +#include "avx512f-check.h" + +#define SIZE (128 / 64) +#include "avx512f-mask-type.h" + +static void +compute_sqrtsd (double *s1, double *s2, double *r) +{ + r[0] = sqrt(s2[0]); + r[1] = s1[1]; +} + +void static +avx512f_test (void) +{ + union128d res1, res2, res3; + union128d s1, s2; + double res_ref[SIZE]; + MASK_TYPE mask = MASK_VALUE; + int i; + + for (i = 0; i < SIZE; i++) + { + s1.a[i] = 11.5 * (i + 1); + s2.a[i] = 10.5 * (i + 1); + res_ref[i] = 9.5 * (i + 1); + res1.a[i] = DEFAULT_VALUE; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + } + + res1.x = _mm_sqrt_round_sd (s1.x, s2.x, + _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + res2.x = _mm_mask_sqrt_round_sd (s1.x, mask, s1.x, s2.x, + _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + res3.x = _mm_maskz_sqrt_round_sd (mask, s1.x, s2.x, + _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + + compute_sqrtsd (s1.a, s2.a, res_ref); + + if (check_union128d (res1, res_ref)) + abort (); + + MASK_MERGE (d) (res_ref, mask, 1); + + if (check_union128d (res2, res_ref)) + abort (); + + MASK_ZERO (d) (res_ref, mask, 1); + + if (check_union128d (res3, res_ref)) + abort (); +} + + + + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-1.c index e43b4a1..103ff30 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-1.c @@ -1,13 +1,19 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ /* { dg-final { scan-assembler-times "vsqrtss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + #include volatile __m128 x1, x2; +volatile __mmask8 m; void extern avx512f_test (void) { x1 = _mm_sqrt_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + x1 = _mm_mask_sqrt_round_ss (x1, m, x1, x2, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + x1 = _mm_maskz_sqrt_round_ss (m, x1, x2, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-2.c new file mode 100644 index 0000000..90f88be --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-2.c @@ -0,0 +1,63 @@ +/* { dg-do run } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-require-effective-target avx512f } */ + +#include +#include "avx512f-check.h" + +#define SIZE (128 / 32) +#include "avx512f-mask-type.h" + +static void +compute_sqrtss (float *s1, float *s2, float *r) +{ + r[0] = sqrt(s2[0]); + int i; + for (i = 1; i < SIZE; i++) + { + r[i] = s1[i]; + } +} + +static void +avx512f_test (void) +{ + union128 res1, res2, res3; + union128 s1, s2; + float res_ref[SIZE]; + MASK_TYPE mask = MASK_VALUE; + int i; + + for (i = 0; i < SIZE; i++) + { + s1.a[i] = 11.5 * (i + 1); + s2.a[i] = 10.5 * (i + 1); + res_ref[i] = 9.5 * (i + 1); + res1.a[i] = DEFAULT_VALUE; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + } + + res1.x = _mm_sqrt_round_ss (s1.x, s2.x, + _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + res2.x = _mm_mask_sqrt_round_ss (s1.x, mask, s1.x, s2.x, + _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + res3.x = _mm_maskz_sqrt_round_ss (mask, s1.x, s2.x, + _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + + compute_sqrtss (s1.a, s2.a, res_ref); + + if (check_union128 (res1, res_ref)) + abort (); + + MASK_MERGE () (res_ref, mask, 1); + + if (check_union128 (res2, res_ref)) + abort (); + + MASK_ZERO () (res_ref, mask, 1); + + if (check_union128 (res3, res_ref)) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 63c38a5..7b03199 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -322,8 +322,8 @@ #define __builtin_ia32_shufps512_mask(A, B, F, D, E) __builtin_ia32_shufps512_mask(A, B, 1, D, E) #define __builtin_ia32_sqrtpd512_mask(A, B, C, D) __builtin_ia32_sqrtpd512_mask(A, B, C, 8) #define __builtin_ia32_sqrtps512_mask(A, B, C, D) __builtin_ia32_sqrtps512_mask(A, B, C, 8) -#define __builtin_ia32_sqrtss_round(A, B, C) __builtin_ia32_sqrtss_round(A, B, 8) -#define __builtin_ia32_sqrtsd_round(A, B, C) __builtin_ia32_sqrtsd_round(A, B, 8) +#define __builtin_ia32_sqrtss_mask_round(A, B, C, D, E) __builtin_ia32_sqrtss_mask_round(A, B, C, E, 8) +#define __builtin_ia32_sqrtsd_mask_round(A, B, C, D, E) __builtin_ia32_sqrtsd_mask_round(A, B, C, E, 8) #define __builtin_ia32_subpd512_mask(A, B, C, D, E) __builtin_ia32_subpd512_mask(A, B, C, D, 8) #define __builtin_ia32_subps512_mask(A, B, C, D, E) __builtin_ia32_subps512_mask(A, B, C, D, 8) #define __builtin_ia32_subsd_round(A, B, C) __builtin_ia32_subsd_round(A, B, 8) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index d2fe796..0b18eec 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -323,8 +323,8 @@ #define __builtin_ia32_shufps512_mask(A, B, F, D, E) __builtin_ia32_shufps512_mask(A, B, 1, D, E) #define __builtin_ia32_sqrtpd512_mask(A, B, C, D) __builtin_ia32_sqrtpd512_mask(A, B, C, 8) #define __builtin_ia32_sqrtps512_mask(A, B, C, D) __builtin_ia32_sqrtps512_mask(A, B, C, 8) -#define __builtin_ia32_sqrtss_round(A, B, C) __builtin_ia32_sqrtss_round(A, B, 8) -#define __builtin_ia32_sqrtsd_round(A, B, C) __builtin_ia32_sqrtsd_round(A, B, 8) +#define __builtin_ia32_sqrtss_mask_round(A, B, C, D, E) __builtin_ia32_sqrtss_mask_round(A, B, C, D, 8) +#define __builtin_ia32_sqrtsd_mask_round(A, B, C, D, E) __builtin_ia32_sqrtsd_mask_round(A, B, C, D, 8) #define __builtin_ia32_subpd512_mask(A, B, C, D, E) __builtin_ia32_subpd512_mask(A, B, C, D, 8) #define __builtin_ia32_subps512_mask(A, B, C, D, E) __builtin_ia32_subps512_mask(A, B, C, D, 8) #define __builtin_ia32_subsd_round(A, B, C) __builtin_ia32_subsd_round(A, B, 8) -- 2.7.4