+2017-10-26 Olga Makhotina <olga.makhotina@intel.com>
+
+ * config/i386/avx512fintrin.h (_mm512_cmpeq_pd_mask,
+ _mm512_cmple_pd_mask, _mm512_cmplt_pd_mask,
+ _mm512_cmpneq_pd_mask, _mm512_cmpnle_pd_mask,
+ _mm512_cmpnlt_pd_mask, _mm512_cmpord_pd_mask,
+ _mm512_cmpunord_pd_mask, _mm512_mask_cmpeq_pd_mask,
+ _mm512_mask_cmple_pd_mask, _mm512_mask_cmplt_pd_mask,
+ _mm512_mask_cmpneq_pd_mask, _mm512_mask_cmpnle_pd_mask,
+ _mm512_mask_cmpnlt_pd_mask, _mm512_mask_cmpord_pd_mask,
+ _mm512_mask_cmpunord_pd_mask, _mm512_cmpeq_ps_mask,
+ _mm512_cmple_ps_mask, _mm512_cmplt_ps_mask,
+ _mm512_cmpneq_ps_mask, _mm512_cmpnle_ps_mask,
+ _mm512_cmpnlt_ps_mask, _mm512_cmpord_ps_mask,
+ _mm512_cmpunord_ps_mask, _mm512_mask_cmpeq_ps_mask,
+ _mm512_mask_cmple_ps_mask, _mm512_mask_cmplt_ps_mask,
+ _mm512_mask_cmpneq_ps_mask, _mm512_mask_cmpnle_ps_mask,
+ _mm512_mask_cmpnlt_ps_mask, _mm512_mask_cmpord_ps_mask,
+ _mm512_mask_cmpunord_ps_mask): New intrinsics.
+
2017-10-26 Michael Meissner <meissner@linux.vnet.ibm.com>
* config/rs6000/aix.h (TARGET_IEEEQUAD_DEFAULT): Set long double
extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_EQ_OQ,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpeq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_EQ_OQ,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmplt_pd_mask (__m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_LT_OS,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmplt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_LT_OS,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmple_pd_mask (__m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_LE_OS,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmple_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_LE_OS,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpunord_pd_mask (__m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_UNORD_Q,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpunord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_UNORD_Q,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpneq_pd_mask (__m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_NEQ_UQ,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpneq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_NEQ_UQ,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpnlt_pd_mask (__m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_NLT_US,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpnlt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_NLT_US,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpnle_pd_mask (__m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_NLE_US,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpnle_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_NLE_US,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpord_pd_mask (__m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_ORD_Q,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_ORD_Q,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpeq_ps_mask (__m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_EQ_OQ,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpeq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_EQ_OQ,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmplt_ps_mask (__m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_LT_OS,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmplt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_LT_OS,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmple_ps_mask (__m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_LE_OS,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmple_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_LE_OS,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpunord_ps_mask (__m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_UNORD_Q,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpunord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_UNORD_Q,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpneq_ps_mask (__m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_NEQ_UQ,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpneq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_NEQ_UQ,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpnlt_ps_mask (__m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_NLT_US,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpnlt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_NLT_US,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpnle_ps_mask (__m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_NLE_US,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpnle_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_NLE_US,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpord_ps_mask (__m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_ORD_Q,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_ORD_Q,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
{
return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
+2017-10-26 Olga Makhotina <olga.makhotina@intel.com>
+
+ * gcc.target/i386/avx512f-vcmpps-1.c (_mm512_cmpeq_ps_mask,
+ _mm512_cmple_ps_mask, _mm512_cmplt_ps_mask,
+ _mm512_cmpneq_ps_mask, _mm512_cmpnle_ps_mask,
+ _mm512_cmpnlt_ps_mask, _mm512_cmpord_ps_mask,
+ _mm512_cmpunord_ps_mask, _mm512_mask_cmpeq_ps_mask,
+ _mm512_mask_cmple_ps_mask, _mm512_mask_cmplt_ps_mask,
+ _mm512_mask_cmpneq_ps_mask, _mm512_mask_cmpnle_ps_mask,
+ _mm512_mask_cmpnlt_ps_mask, _mm512_mask_cmpord_ps_mask,
+ _mm512_mask_cmpunord_ps_mask): Test new intrinsics.
+ * gcc.target/i386/avx512f-vcmpps-2.c (_mm512_cmpeq_ps_mask,
+ _mm512_cmple_ps_mask, _mm512_cmplt_ps_mask,
+ _mm512_cmpneq_ps_mask, _mm512_cmpnle_ps_mask,
+ _mm512_cmpnlt_ps_mask, _mm512_cmpord_ps_mask,
+ _mm512_cmpunord_ps_mask, _mm512_mask_cmpeq_ps_mask,
+ _mm512_mask_cmple_ps_mask, _mm512_mask_cmplt_ps_mask,
+ _mm512_mask_cmpneq_ps_mask, _mm512_mask_cmpnle_ps_mask,
+ _mm512_mask_cmpnlt_ps_mask, _mm512_mask_cmpord_ps_mask,
+ _mm512_mask_cmpunord_ps_mask): Test new intrinsics.
+ * gcc.target/i386/avx512f-vcmppd-1.c (_mm512_cmpeq_pd_mask,
+ _mm512_cmple_pd_mask, _mm512_cmplt_pd_mask,
+ _mm512_cmpneq_pd_mask, _mm512_cmpnle_pd_mask,
+ _mm512_cmpnlt_pd_mask, _mm512_cmpord_pd_mask,
+ _mm512_cmpunord_pd_mask, _mm512_mask_cmpeq_pd_mask,
+ _mm512_mask_cmple_pd_mask, _mm512_mask_cmplt_pd_mask,
+ _mm512_mask_cmpneq_pd_mask, _mm512_mask_cmpnle_pd_mask,
+ _mm512_mask_cmpnlt_pd_mask, _mm512_mask_cmpord_pd_mask,
+ _mm512_mask_cmpunord_pd_mask): Test new intrinsics.
+ * gcc.target/i386/avx512f-vcmppd-2.c (_mm512_cmpeq_pd_mask,
+ _mm512_cmple_pd_mask, _mm512_cmplt_pd_mask,
+ _mm512_cmpneq_pd_mask, _mm512_cmpnle_pd_mask,
+ _mm512_cmpnlt_pd_mask, _mm512_cmpord_pd_mask,
+ _mm512_cmpunord_pd_mask, _mm512_mask_cmpeq_pd_mask,
+ _mm512_mask_cmple_pd_mask, _mm512_mask_cmplt_pd_mask,
+ _mm512_mask_cmpneq_pd_mask, _mm512_mask_cmpnle_pd_mask,
+ _mm512_mask_cmpnlt_pd_mask, _mm512_mask_cmpord_pd_mask,
+ _mm512_mask_cmpunord_pd_mask): Test new intrinsics.
+
2017-10-26 Wilco Dijkstra <wdijkstr@arm.com>
* gcc.target/aarch64/ldp_stp_unaligned_2.c: New file.
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512f" } */
-/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 9 } } */
+/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 9 } } */
/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
m = _mm512_mask_cmp_pd_mask (m, x, x, _CMP_FALSE_OQ);
m = _mm512_cmp_round_pd_mask (x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
m = _mm512_mask_cmp_round_pd_mask (m, x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
+
+ m = _mm512_cmpeq_pd_mask (x, x);
+ m = _mm512_mask_cmpeq_pd_mask (m, x, x);
+
+ m = _mm512_cmplt_pd_mask (x, x);
+ m = _mm512_mask_cmplt_pd_mask (m, x, x);
+
+ m = _mm512_cmple_pd_mask (x, x);
+ m = _mm512_mask_cmple_pd_mask (m, x, x);
+
+ m = _mm512_cmpunord_pd_mask (x, x);
+ m = _mm512_mask_cmpunord_pd_mask (m, x, x);
+
+ m = _mm512_cmpneq_pd_mask (x, x);
+ m = _mm512_mask_cmpneq_pd_mask (m, x, x);
+
+ m = _mm512_cmpnlt_pd_mask (x, x);
+ m = _mm512_mask_cmpnlt_pd_mask (m, x, x);
+
+ m = _mm512_cmpnle_pd_mask (x, x);
+ m = _mm512_mask_cmpnle_pd_mask (m, x, x);
+
+ m = _mm512_cmpord_pd_mask (x, x);
+ m = _mm512_mask_cmpord_pd_mask (m, x, x);
}
+
#define SIZE (AVX512F_LEN / 64)
#include "avx512f-mask-type.h"
+#undef SUF
+#undef SSIZE
+#undef GEN_CMP
+#undef CHECK_CMP
+
#if AVX512F_LEN == 512
-#define CMP(imm, rel) \
- dst_ref = 0; \
- for (i = 0; i < 8; i++) \
- { \
- dst_ref = (((int) rel) << i) | dst_ref; \
- } \
- source1.x = _mm512_loadu_pd(s1); \
- source2.x = _mm512_loadu_pd(s2); \
- dst1 = _mm512_cmp_pd_mask(source1.x, source2.x, imm);\
- dst2 = _mm512_mask_cmp_pd_mask(mask, source1.x, source2.x, imm);\
- if (dst_ref != dst1) abort(); \
- if ((dst_ref & mask) != dst2) abort();
+#define SUF(fun) _mm512##fun
+#define SSIZE 8
+
+#define GEN_CMP(type) \
+ { \
+ dst3 = _mm512_cmp##type##_pd_mask(source1.x, source2.x);\
+ dst4 = _mm512_mask_cmp##type##_pd_mask(mask, source1.x, source2.x);\
+ if (dst3 != dst1) abort(); \
+ if (dst4 != dst2) abort(); \
+ }
+
+#define CHECK_CMP(imm) \
+ if (imm == _CMP_EQ_OQ) GEN_CMP(eq) \
+ if (imm == _CMP_LT_OS) GEN_CMP(lt) \
+ if (imm == _CMP_LE_OS) GEN_CMP(le) \
+ if (imm == _CMP_UNORD_Q) GEN_CMP(unord) \
+ if (imm == _CMP_NEQ_UQ) GEN_CMP(neq) \
+ if (imm == _CMP_NLT_US) GEN_CMP(nlt) \
+ if (imm == _CMP_NLE_US) GEN_CMP(nle) \
+ if (imm == _CMP_ORD_Q) GEN_CMP(ord)
+
#endif
#if AVX512F_LEN == 256
-#undef CMP
-#define CMP(imm, rel) \
- dst_ref = 0; \
- for (i = 0; i < 4; i++) \
- { \
- dst_ref = (((int) rel) << i) | dst_ref; \
- } \
- source1.x = _mm256_loadu_pd(s1); \
- source2.x = _mm256_loadu_pd(s2); \
- dst1 = _mm256_cmp_pd_mask(source1.x, source2.x, imm);\
- dst2 = _mm256_mask_cmp_pd_mask(mask, source1.x, source2.x, imm);\
- if (dst_ref != dst1) abort(); \
- if ((dst_ref & mask) != dst2) abort();
+#define SUF(fun) _mm256##fun
+#define SSIZE 4
+#define GEN_CMP(type)
+#define CHECK_CMP(imm)
#endif
#if AVX512F_LEN == 128
+#define SUF(fun) _mm##fun
+#define SSIZE 2
+#define GEN_CMP(type)
+#define CHECK_CMP(imm)
+#endif
+
#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
- for (i = 0; i < 2; i++) \
+ for (i = 0; i < SSIZE; i++) \
{ \
dst_ref = (((int) rel) << i) | dst_ref; \
} \
- source1.x = _mm_loadu_pd(s1); \
- source2.x = _mm_loadu_pd(s2); \
- dst1 = _mm_cmp_pd_mask(source1.x, source2.x, imm);\
- dst2 = _mm_mask_cmp_pd_mask(mask, source1.x, source2.x, imm);\
+ source1.x = SUF(_loadu_pd)(s1); \
+ source2.x = SUF(_loadu_pd)(s2); \
+ dst1 = SUF(_cmp_pd_mask)(source1.x, source2.x, imm);\
+ dst2 = SUF(_mask_cmp_pd_mask)(mask, source1.x, source2.x, imm);\
if (dst_ref != dst1) abort(); \
- if ((dst_ref & mask) != dst2) abort();
-#endif
+ if ((dst_ref & mask) != dst2) abort(); \
+ CHECK_CMP(imm)
void
TEST ()
{
UNION_TYPE (AVX512F_LEN, d) source1, source2;
- MASK_TYPE dst1, dst2, dst_ref;
+ MASK_TYPE dst1, dst2, dst3, dst4, dst_ref;
MASK_TYPE mask = MASK_VALUE;
int i;
double s1[8]={2134.3343, 6678.346, 453.345635, 54646.464,
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512f" } */
-/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 9 } } */
+/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 9 } } */
/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
m = _mm512_mask_cmp_ps_mask (m, x, x, _CMP_FALSE_OQ);
m = _mm512_cmp_round_ps_mask (x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
m = _mm512_mask_cmp_round_ps_mask (m, x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
+
+ m = _mm512_cmpeq_ps_mask (x, x);
+ m = _mm512_mask_cmpeq_ps_mask (m, x, x);
+
+ m = _mm512_cmplt_ps_mask (x, x);
+ m = _mm512_mask_cmplt_ps_mask (m, x, x);
+
+ m = _mm512_cmple_ps_mask (x, x);
+ m = _mm512_mask_cmple_ps_mask (m, x, x);
+
+ m = _mm512_cmpunord_ps_mask (x, x);
+ m = _mm512_mask_cmpunord_ps_mask (m, x, x);
+
+ m = _mm512_cmpneq_ps_mask (x, x);
+ m = _mm512_mask_cmpneq_ps_mask (m, x, x);
+
+ m = _mm512_cmpnlt_ps_mask (x, x);
+ m = _mm512_mask_cmpnlt_ps_mask (m, x, x);
+
+ m = _mm512_cmpnle_ps_mask (x, x);
+ m = _mm512_mask_cmpnle_ps_mask (m, x, x);
+
+ m = _mm512_cmpord_ps_mask (x, x);
+ m = _mm512_mask_cmpord_ps_mask (m, x, x);
}
#define SIZE (AVX512F_LEN / 32)
#include "avx512f-mask-type.h"
+#undef SUF
+#undef SSIZE
+#undef GEN_CMP
+#undef CHECK_CMP
+
#if AVX512F_LEN == 512
-#undef CMP
-#define CMP(imm, rel) \
- dst_ref = 0; \
- for (i = 0; i < 16; i++) \
- { \
- dst_ref = (((int) rel) << i) | dst_ref; \
- } \
- source1.x = _mm512_loadu_ps(s1); \
- source2.x = _mm512_loadu_ps(s2); \
- dst1 = _mm512_cmp_ps_mask(source1.x, source2.x, imm);\
- dst2 = _mm512_mask_cmp_ps_mask(mask, source1.x, source2.x, imm);\
- if (dst_ref != dst1) abort(); \
- if ((dst_ref & mask) != dst2) abort();
+#define SUF(fun) _mm512##fun
+#define SSIZE 16
+
+#define GEN_CMP(type) \
+ { \
+ dst3 = _mm512_cmp##type##_ps_mask(source1.x, source2.x);\
+ dst4 = _mm512_mask_cmp##type##_ps_mask(mask, source1.x, source2.x);\
+ if (dst3 != dst1) abort(); \
+ if (dst4 != dst2) abort(); \
+ }
+
+#define CHECK_CMP(imm) \
+ if (imm == _CMP_EQ_OQ) GEN_CMP(eq) \
+ if (imm == _CMP_LT_OS) GEN_CMP(lt) \
+ if (imm == _CMP_LE_OS) GEN_CMP(le) \
+ if (imm == _CMP_UNORD_Q) GEN_CMP(unord) \
+ if (imm == _CMP_NEQ_UQ) GEN_CMP(neq) \
+ if (imm == _CMP_NLT_US) GEN_CMP(nlt) \
+ if (imm == _CMP_NLE_US) GEN_CMP(nle) \
+ if (imm == _CMP_ORD_Q) GEN_CMP(ord)
+
#endif
#if AVX512F_LEN == 256
-#undef CMP
-#define CMP(imm, rel) \
- dst_ref = 0; \
- for (i = 0; i < 8; i++) \
- { \
- dst_ref = (((int) rel) << i) | dst_ref; \
- } \
- source1.x = _mm256_loadu_ps(s1); \
- source2.x = _mm256_loadu_ps(s2); \
- dst1 = _mm256_cmp_ps_mask(source1.x, source2.x, imm);\
- dst2 = _mm256_mask_cmp_ps_mask(mask, source1.x, source2.x, imm);\
- if (dst_ref != dst1) abort(); \
- if ((dst_ref & mask) != dst2) abort();
+#define SUF(fun) _mm256##fun
+#define SSIZE 8
+#define GEN_CMP(type)
+#define CHECK_CMP(imm)
#endif
#if AVX512F_LEN == 128
+#define SUF(fun) _mm##fun
+#define SSIZE 4
+#define GEN_CMP(type)
+#define CHECK_CMP(imm)
+#endif
+
#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
- for (i = 0; i < 4; i++) \
+ for (i = 0; i < SSIZE; i++) \
{ \
dst_ref = (((int) rel) << i) | dst_ref; \
} \
- source1.x = _mm_loadu_ps(s1); \
- source2.x = _mm_loadu_ps(s2); \
- dst1 = _mm_cmp_ps_mask(source1.x, source2.x, imm);\
- dst2 = _mm_mask_cmp_ps_mask(mask, source1.x, source2.x, imm);\
+ source1.x = SUF(_loadu_ps)(s1); \
+ source2.x = SUF(_loadu_ps)(s2); \
+ dst1 = SUF(_cmp_ps_mask)(source1.x, source2.x, imm);\
+ dst2 = SUF(_mask_cmp_ps_mask)(mask, source1.x, source2.x, imm);\
if (dst_ref != dst1) abort(); \
- if ((dst_ref & mask) != dst2) abort();
-#endif
+ if ((dst_ref & mask) != dst2) abort(); \
+ CHECK_CMP(imm)
void
TEST ()
{
UNION_TYPE (AVX512F_LEN,) source1, source2;
- MASK_TYPE dst1, dst2, dst_ref;
+ MASK_TYPE dst1, dst2, dst3, dst4, dst_ref;
MASK_TYPE mask = MASK_VALUE;
int i;
float s1[16] = {2134.3343, 6678.346, 453.345635, 54646.464,