Add missing mask[z]_range[_round]_s[d,s] intrinsics
authorOlga Makhotina <olga.makhotina@intel.com>
Mon, 5 Feb 2018 07:08:24 +0000 (07:08 +0000)
committerKirill Yukhin <kyukhin@gcc.gnu.org>
Mon, 5 Feb 2018 07:08:24 +0000 (07:08 +0000)
gcc/
* config/i386/avx512dqintrin.h (_mm_mask_range_sd, _mm_maskz_range_sd,
_mm_mask_range_round_sd, _mm_maskz_range_round_sd, _mm_mask_range_ss,
_mm_maskz_range_ss, _mm_mask_range_round_ss,
_mm_maskz_range_round_ss): New intrinsics.
(__builtin_ia32_rangesd128_round)
(__builtin_ia32_rangess128_round): Remove.
(__builtin_ia32_rangesd128_mask_round,
__builtin_ia32_rangess128_mask_round): New builtins.
* config/i386/i386-builtin.def (__builtin_ia32_rangesd128_round,
__builtin_ia32_rangess128_round): Remove.
(__builtin_ia32_rangesd128_mask_round,
__builtin_ia32_rangess128_mask_round): New builtins.
* config/i386/sse.md (ranges<mode><round_saeonly_name>): Renamed to ...
(ranges<mode><mask_scalar_name><round_saeonly_scalar_name>): ... this.
((match_operand:VF_128 2 "<round_saeonly_nimm_predicate>"
"<round_saeonly_constraint>")): Changed to ...
((match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>"
"<round_saeonly_scalar_constraint>")): ... this.
("vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|
%0, %1, %2<round_saeonly_op4>, %3}"): Changed to ...
("vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2,
%1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1,
%2<round_saeonly_scalar_mask_op4>, %3}"): ... this.
gcc/testsuite
* gcc.target/i386/avx512dq-vrangesd-1.c (_mm_mask_range_sd,
_mm_maskz_range_sd, _mm_mask_range_round_sd,
_mm_maskz_range_round_sd): Test new intrinsics.
* gcc.target/i386/avx512dq-vrangesd-2.c (_mm_range_sd,
_mm_mask_range_sd, _mm_maskz_range_sd, _mm_range_round_sd,
_mm_mask_range_round_sd, _mm_maskz_range_round_sd): Test new intrinsics.
* gcc.target/i386/avx512dq-vrangess-1.c (_mm_mask_range_ss,
_mm_maskz_range_ss, _mm_mask_range_round_ss,
_mm_maskz_range_round_ss): Test new intrinsics.
* gcc.target/i386/avx512dq-vrangess-2.c (_mm_range_ss,
_mm_mask_range_ss, _mm_maskz_range_ss, _mm_range_round_ss,
_mm_mask_range_round_ss, _mm_maskz_range_round_ss): Test new intrinsics.
* gcc.target/i386/avx-1.c (__builtin_ia32_rangesd128_round,
__builtin_ia32_rangess128_round): Remove builtins.
(__builtin_ia32_rangesd128_mask_round,
__builtin_ia32_rangess128_mask_round): Test new builtins.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.

From-SVN: r257382

gcc/ChangeLog
gcc/config/i386/avx512dqintrin.h
gcc/config/i386/i386-builtin.def
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx-1.c
gcc/testsuite/gcc.target/i386/avx512dq-vrangesd-1.c
gcc/testsuite/gcc.target/i386/avx512dq-vrangess-1.c
gcc/testsuite/gcc.target/i386/sse-13.c
gcc/testsuite/gcc.target/i386/sse-23.c

index ae226a5..a57b92d 100644 (file)
@@ -1,3 +1,29 @@
+2018-02-05  Olga Makhotina  <olga.makhotina@intel.com>
+
+       * config/i386/avx512dqintrin.h (_mm_mask_range_sd, _mm_maskz_range_sd,
+       _mm_mask_range_round_sd, _mm_maskz_range_round_sd, _mm_mask_range_ss,
+       _mm_maskz_range_ss, _mm_mask_range_round_ss,
+       _mm_maskz_range_round_ss): New intrinsics.
+       (__builtin_ia32_rangesd128_round)
+       (__builtin_ia32_rangess128_round): Remove.
+       (__builtin_ia32_rangesd128_mask_round,
+       __builtin_ia32_rangess128_mask_round): New builtins.
+       * config/i386/i386-builtin.def (__builtin_ia32_rangesd128_round,
+       __builtin_ia32_rangess128_round): Remove.
+       (__builtin_ia32_rangesd128_mask_round,
+       __builtin_ia32_rangess128_mask_round): New builtins.
+       * config/i386/sse.md (ranges<mode><round_saeonly_name>): Renamed to ...
+       (ranges<mode><mask_scalar_name><round_saeonly_scalar_name>): ... this.
+       ((match_operand:VF_128 2 "<round_saeonly_nimm_predicate>"
+       "<round_saeonly_constraint>")): Changed to ...
+       ((match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>"
+       "<round_saeonly_scalar_constraint>")): ... this.
+       ("vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|
+       %0, %1, %2<round_saeonly_op4>, %3}"): Changed to ...
+       ("vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2,
+       %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1,
+       %2<round_saeonly_scalar_mask_op4>, %3}"): ... this.
+
 2018-02-02  Andrew Jenner  <andrew@codesourcery.com>
 
        * config/powerpcspe/powerpcspe.opt: Add Undocumented to irrelevant
index fe26af8..1348259 100644 (file)
@@ -1223,18 +1223,70 @@ extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_range_sd (__m128d __A, __m128d __B, int __C)
 {
-  return (__m128d) __builtin_ia32_rangesd128_round ((__v2df) __A,
+  return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
                                                   (__v2df) __B, __C,
+                                                  (__v2df)
+                                                  _mm_setzero_pd (),
+                                                  (__mmask8) -1,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_range_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, int __C)
+{
+  return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
+                                                  (__v2df) __B, __C,
+                                                  (__v2df) __W,
+                                                  (__mmask8) __U,
                                                   _MM_FROUND_CUR_DIRECTION);
 }
 
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_range_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
+{
+  return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
+                                                  (__v2df) __B, __C,
+                                                  (__v2df)
+                                                  _mm_setzero_pd (),
+                                                  (__mmask8) __U,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_range_ss (__m128 __A, __m128 __B, int __C)
 {
-  return (__m128) __builtin_ia32_rangess128_round ((__v4sf) __A,
+  return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
                                                  (__v4sf) __B, __C,
+                                                 (__v4sf)
+                                                 _mm_setzero_ps (),
+                                                 (__mmask8) -1,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_range_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, int __C)
+{
+  return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
+                                                 (__v4sf) __B, __C,
+                                                 (__v4sf) __W,
+                                                 (__mmask8) __U,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_range_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C)
+{
+  return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
+                                                 (__v4sf) __B, __C,
+                                                 (__v4sf)
+                                                 _mm_setzero_ps (),
+                                                 (__mmask8) __U,
                                                  _MM_FROUND_CUR_DIRECTION);
 }
 
@@ -1242,18 +1294,68 @@ extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_range_round_sd (__m128d __A, __m128d __B, int __C, const int __R)
 {
-  return (__m128d) __builtin_ia32_rangesd128_round ((__v2df) __A,
+  return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
                                                   (__v2df) __B, __C,
-                                                  __R);
+                                                  (__v2df)
+                                                  _mm_setzero_pd (),
+                                                  (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_range_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
+                        int __C, const int __R)
+{
+  return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
+                                                  (__v2df) __B, __C,
+                                                  (__v2df) __W,
+                                                  (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_range_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C,
+                         const int __R)
+{
+  return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
+                                                  (__v2df) __B, __C,
+                                                  (__v2df)
+                                                  _mm_setzero_pd (),
+                                                  (__mmask8) __U, __R);
 }
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_range_round_ss (__m128 __A, __m128 __B, int __C, const int __R)
 {
-  return (__m128) __builtin_ia32_rangess128_round ((__v4sf) __A,
+  return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
                                                  (__v4sf) __B, __C,
-                                                 __R);
+                                                 (__v4sf)
+                                                 _mm_setzero_ps (),
+                                                 (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_range_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+                        int __C, const int __R)
+{
+  return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
+                                                 (__v4sf) __B, __C,
+                                                 (__v4sf) __W,
+                                                 (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_range_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C,
+                         const int __R)
+{
+  return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
+                                                 (__v4sf) __B, __C,
+                                                 (__v4sf)
+                                                 _mm_setzero_ps (),
+                                                 (__mmask8) __U, __R);
 }
 
 extern __inline __mmask8
@@ -2148,23 +2250,65 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
 #define _kshiftri_mask8(X, Y)                                          \
   ((__mmask8) __builtin_ia32_kshiftriqi ((__mmask8)(X), (__mmask8)(Y)))
 
-#define _mm_range_sd(A, B, C)                                          \
-  ((__m128d) __builtin_ia32_rangesd128_round ((__v2df)(__m128d)(A),    \
-    (__v2df)(__m128d)(B), (int)(C),                                    \
-    _MM_FROUND_CUR_DIRECTION))
+#define _mm_range_sd(A, B, C)                                           \
+  ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
+    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (),         \
+    (__mmask8) -1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_range_sd(W, U, A, B, C)                                \
+  ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
+    (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W),               \
+    (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_range_sd(U, A, B, C)                                  \
+  ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
+    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (),         \
+    (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
 
 #define _mm_range_ss(A, B, C)                                          \
-  ((__m128) __builtin_ia32_rangess128_round ((__v4sf)(__m128)(A),      \
-    (__v4sf)(__m128)(B), (int)(C),                                     \
-    _MM_FROUND_CUR_DIRECTION))
+  ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
+    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),         \
+    (__mmask8) -1, _MM_FROUND_CUR_DIRECTION))
 
-#define _mm_range_round_sd(A, B, C, R)                                 \
-  ((__m128d) __builtin_ia32_rangesd128_round ((__v2df)(__m128d)(A),    \
-    (__v2df)(__m128d)(B), (int)(C), (R)))
+#define _mm_mask_range_ss(W, U, A, B, C)                               \
+  ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
+    (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W),                        \
+    (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_range_ss(U, A, B, C)                                 \
+  ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
+    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),         \
+    (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_range_round_sd(A, B, C, R)                                  \
+  ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
+    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (),                 \
+    (__mmask8) -1, (R)))
+
+#define _mm_mask_range_round_sd(W, U, A, B, C, R)                       \
+  ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
+    (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W),               \
+    (__mmask8)(U), (R)))
+
+#define _mm_maskz_range_round_sd(U, A, B, C, R)                                 \
+  ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
+    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (),                 \
+    (__mmask8)(U), (R)))
 
 #define _mm_range_round_ss(A, B, C, R)                                 \
-  ((__m128) __builtin_ia32_rangess128_round ((__v4sf)(__m128)(A),      \
-    (__v4sf)(__m128)(B), (int)(C), (R)))
+  ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
+    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),         \
+    (__mmask8) -1, (R)))
+
+#define _mm_mask_range_round_ss(W, U, A, B, C, R)                      \
+  ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
+    (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W),                        \
+    (__mmask8)(U), (R)))
+
+#define _mm_maskz_range_round_ss(U, A, B, C, R)                                \
+  ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
+    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),         \
+    (__mmask8)(U), (R)))
 
 #define _mm512_cvtt_roundpd_epi64(A, B)                    \
   ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)            \
index 0b83472..2caac88 100644 (file)
@@ -2784,8 +2784,8 @@ BDESC (OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__built
 BDESC (OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 
 /* AVX512DQ.  */
-BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_mask_round, "__builtin_ia32_rangesd128_mask_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_mask_round, "__builtin_ia32_rangess128_mask_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
 BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
 BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
index 4bb06e2..ef0d015 100644 (file)
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512dq_ranges<mode><round_saeonly_name>"
+(define_insn "avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
        (vec_merge:VF_128
          (unspec:VF_128
            [(match_operand:VF_128 1 "register_operand" "v")
-            (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+            (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
             (match_operand:SI 3 "const_0_to_15_operand")]
            UNSPEC_RANGE)
          (match_dup 1)
          (const_int 1)))]
   "TARGET_AVX512DQ"
-  "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
+  "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2<round_saeonly_scalar_mask_op4>, %3}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
index 1c20a6a..0582572 100644 (file)
@@ -1,3 +1,24 @@
+2018-02-05  Olga Makhotina  <olga.makhotina@intel.com>
+
+       * gcc.target/i386/avx512dq-vrangesd-1.c (_mm_mask_range_sd,
+       _mm_maskz_range_sd, _mm_mask_range_round_sd,
+       _mm_maskz_range_round_sd): Test new intrinsics.
+       * gcc.target/i386/avx512dq-vrangesd-2.c (_mm_range_sd,
+       _mm_mask_range_sd, _mm_maskz_range_sd, _mm_range_round_sd,
+       _mm_mask_range_round_sd, _mm_maskz_range_round_sd): Test new intrinsics.
+       * gcc.target/i386/avx512dq-vrangess-1.c (_mm_mask_range_ss,
+       _mm_maskz_range_ss, _mm_mask_range_round_ss,
+       _mm_maskz_range_round_ss): Test new intrinsics.
+       * gcc.target/i386/avx512dq-vrangess-2.c (_mm_range_ss,
+       _mm_mask_range_ss, _mm_maskz_range_ss, _mm_range_round_ss,
+       _mm_mask_range_round_ss, _mm_maskz_range_round_ss): Test new intrinsics.
+       * gcc.target/i386/avx-1.c (__builtin_ia32_rangesd128_round,
+       __builtin_ia32_rangess128_round): Remove builtins.
+       (__builtin_ia32_rangesd128_mask_round,
+       __builtin_ia32_rangess128_mask_round): Test new builtins.
+       * gcc.target/i386/sse-13.c: Ditto.
+       * gcc.target/i386/sse-23.c: Ditto.
+
 2018-02-04  Jan Hubicka  <hubicka@ucw.cz>
 
        PR middle-end/79966
index db77244..2cd3fd1 100644 (file)
 #define __builtin_ia32_reducesd_mask(A, B, F, W, U) __builtin_ia32_reducesd_mask(A, B, 1, W, U)
 #define __builtin_ia32_reduceps512_mask(A, E, C, D) __builtin_ia32_reduceps512_mask(A, 1, C, D)
 #define __builtin_ia32_reducepd512_mask(A, E, C, D) __builtin_ia32_reducepd512_mask(A, 1, C, D)
-#define __builtin_ia32_rangess128_round(A, B, I, F) __builtin_ia32_rangess128_round(A, B, 1, 8)
-#define __builtin_ia32_rangesd128_round(A, B, I, F) __builtin_ia32_rangesd128_round(A, B, 1, 8)
+#define __builtin_ia32_rangess128_mask_round(A, B, I, D, E, F) \
+    __builtin_ia32_rangess128_mask_round(A, B, 1, D, E, 8)
+#define __builtin_ia32_rangesd128_mask_round(A, B, I, D, E, F) \
+    __builtin_ia32_rangesd128_mask_round(A, B, 1, D, E, 8)
 #define __builtin_ia32_rangeps512_mask(A, B, I, D, E, F) __builtin_ia32_rangeps512_mask(A, B, 1, D, E, 8)
 #define __builtin_ia32_rangepd512_mask(A, B, I, D, E, F) __builtin_ia32_rangepd512_mask(A, B, 1, D, E, 8)
 #define __builtin_ia32_inserti64x2_512_mask(A, B, F, D, E) __builtin_ia32_inserti64x2_512_mask(A, B, 1, D, E)
index 4f7d635..aa2124e 100644 (file)
@@ -2,6 +2,11 @@
 /* { dg-options "-mavx512dq -O2" } */
 /* { dg-final { scan-assembler-times "vrangesd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vrangesd\[ \\t\]+\[^\$\n\]*\\$\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrangesd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrangesd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrangesd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrangesd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+
 
 #include <immintrin.h>
 
@@ -12,5 +17,10 @@ void extern
 avx512dq_test (void)
 {
   x1 = _mm_range_sd (x1, x2, 3);
+  x1 = _mm_mask_range_sd (x1, m, x1, x2, 3);
+  x1 = _mm_maskz_range_sd (m, x1, x2, 3);
+
   x1 = _mm_range_round_sd (x1, x2, 3, _MM_FROUND_NO_EXC);
+  x1 = _mm_mask_range_round_sd (x1, m, x1, x2, 3, _MM_FROUND_NO_EXC);
+  x1 = _mm_maskz_range_round_sd (m, x1, x2, 3, _MM_FROUND_NO_EXC);
 }
index b0ed86d..3b401df 100644 (file)
@@ -2,6 +2,10 @@
 /* { dg-options "-mavx512dq -O2" } */
 /* { dg-final { scan-assembler-times "vrangess\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vrangess\[ \\t\]+\[^\$\n\]*\\$\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrangess\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrangess\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrangess\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrangess\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
@@ -12,5 +16,10 @@ void extern
 avx512dq_test (void)
 {
   x1 = _mm_range_ss (x1, x2, 1);
+  x1 = _mm_mask_range_ss (x1, m, x1, x2, 1);
+  x1 = _mm_maskz_range_ss (m, x1, x2, 1);
+
   x1 = _mm_range_round_ss (x1, x2, 1, _MM_FROUND_NO_EXC);
+  x1 = _mm_mask_range_round_ss (x1, m, x1, x2, 1, _MM_FROUND_NO_EXC);
+  x1 = _mm_maskz_range_round_ss (m, x1, x2, 1, _MM_FROUND_NO_EXC);
 }
index cc9d00a..63c38a5 100644 (file)
 #define __builtin_ia32_reducesd_mask(A, B, F, W, U) __builtin_ia32_reducesd_mask(A, B, 1, W, U)
 #define __builtin_ia32_reduceps512_mask(A, E, C, D) __builtin_ia32_reduceps512_mask(A, 1, C, D)
 #define __builtin_ia32_reducepd512_mask(A, E, C, D) __builtin_ia32_reducepd512_mask(A, 1, C, D)
-#define __builtin_ia32_rangess128_round(A, B, I, F) __builtin_ia32_rangess128_round(A, B, 1, 8)
-#define __builtin_ia32_rangesd128_round(A, B, I, F) __builtin_ia32_rangesd128_round(A, B, 1, 8)
+#define __builtin_ia32_rangess128_mask_round(A, B, I, D, E, F) \
+    __builtin_ia32_rangess128_mask_round(A, B, 1, D, E, 8)
+#define __builtin_ia32_rangesd128_mask_round(A, B, I, D, E, F) \
+    __builtin_ia32_rangesd128_mask_round(A, B, 1, D, E, 8)
 #define __builtin_ia32_rangeps512_mask(A, B, I, D, E, F) __builtin_ia32_rangeps512_mask(A, B, 1, D, E, 8)
 #define __builtin_ia32_rangepd512_mask(A, B, I, D, E, F) __builtin_ia32_rangepd512_mask(A, B, 1, D, E, 8)
 #define __builtin_ia32_inserti64x2_512_mask(A, B, F, D, E) __builtin_ia32_inserti64x2_512_mask(A, B, 1, D, E)
index 00d30ba..d2fe796 100644 (file)
 #define __builtin_ia32_reducesd_mask(A, B, F, W, U) __builtin_ia32_reducesd_mask(A, B, 1, W, U)
 #define __builtin_ia32_reduceps512_mask(A, E, C, D) __builtin_ia32_reduceps512_mask(A, 1, C, D)
 #define __builtin_ia32_reducepd512_mask(A, E, C, D) __builtin_ia32_reducepd512_mask(A, 1, C, D)
-#define __builtin_ia32_rangess128_round(A, B, I, F) __builtin_ia32_rangess128_round(A, B, 1, 8)
-#define __builtin_ia32_rangesd128_round(A, B, I, F) __builtin_ia32_rangesd128_round(A, B, 1, 8)
+#define __builtin_ia32_rangess128_mask_round(A, B, I, D, E, F) \
+    __builtin_ia32_rangess128_mask_round(A, B, 1, D, E, 8)
+#define __builtin_ia32_rangesd128_mask_round(A, B, I, D, E, F) \
+    __builtin_ia32_rangesd128_mask_round(A, B, 1, D, E, 8)
 #define __builtin_ia32_rangeps512_mask(A, B, I, D, E, F) __builtin_ia32_rangeps512_mask(A, B, 1, D, E, 8)
 #define __builtin_ia32_rangepd512_mask(A, B, I, D, E, F) __builtin_ia32_rangepd512_mask(A, B, 1, D, E, 8)
 #define __builtin_ia32_inserti64x2_512_mask(A, B, F, D, E) __builtin_ia32_inserti64x2_512_mask(A, B, 1, D, E)