From f77f3adebde0547ed734a260f29e5afc85dcbe49 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Mon, 2 Mar 2020 16:35:58 +0800 Subject: [PATCH] AVX512FP16: Add testcase for vaddph/vsubph/vmulph/vdivph. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512fp16-helper.h: New header file for FP16 runtime test. * gcc.target/i386/avx512fp16-vaddph-1a.c: New test. * gcc.target/i386/avx512fp16-vaddph-1b.c: Ditto. * gcc.target/i386/avx512fp16-vdivph-1a.c: Ditto. * gcc.target/i386/avx512fp16-vdivph-1b.c: Ditto. * gcc.target/i386/avx512fp16-vmulph-1a.c: Ditto. * gcc.target/i386/avx512fp16-vmulph-1b.c: Ditto. * gcc.target/i386/avx512fp16-vsubph-1a.c: Ditto. * gcc.target/i386/avx512fp16-vsubph-1b.c: Ditto. * gcc.target/i386/avx512fp16vl-vaddph-1a.c: Ditto. * gcc.target/i386/avx512fp16vl-vaddph-1b.c: Ditto. * gcc.target/i386/avx512fp16vl-vdivph-1a.c: Ditto. * gcc.target/i386/avx512fp16vl-vdivph-1b.c: Ditto. * gcc.target/i386/avx512fp16vl-vmulph-1a.c: Ditto. * gcc.target/i386/avx512fp16vl-vmulph-1b.c: Ditto. * gcc.target/i386/avx512fp16vl-vsubph-1a.c: Ditto. * gcc.target/i386/avx512fp16vl-vsubph-1b.c: Ditto. --- gcc/testsuite/gcc.target/i386/avx512fp16-helper.h | 207 +++++++++++++++++++++ .../gcc.target/i386/avx512fp16-vaddph-1a.c | 26 +++ .../gcc.target/i386/avx512fp16-vaddph-1b.c | 92 +++++++++ .../gcc.target/i386/avx512fp16-vdivph-1a.c | 26 +++ .../gcc.target/i386/avx512fp16-vdivph-1b.c | 97 ++++++++++ .../gcc.target/i386/avx512fp16-vmulph-1a.c | 26 +++ .../gcc.target/i386/avx512fp16-vmulph-1b.c | 92 +++++++++ .../gcc.target/i386/avx512fp16-vsubph-1a.c | 26 +++ .../gcc.target/i386/avx512fp16-vsubph-1b.c | 93 +++++++++ .../gcc.target/i386/avx512fp16vl-vaddph-1a.c | 29 +++ .../gcc.target/i386/avx512fp16vl-vaddph-1b.c | 16 ++ .../gcc.target/i386/avx512fp16vl-vdivph-1a.c | 29 +++ .../gcc.target/i386/avx512fp16vl-vdivph-1b.c | 16 ++ .../gcc.target/i386/avx512fp16vl-vmulph-1a.c | 29 +++ .../gcc.target/i386/avx512fp16vl-vmulph-1b.c | 16 ++ .../gcc.target/i386/avx512fp16vl-vsubph-1a.c | 29 +++ .../gcc.target/i386/avx512fp16vl-vsubph-1b.c | 16 ++ 17 files changed, 865 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-helper.h create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vaddph-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vaddph-1b.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vdivph-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vdivph-1b.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vmulph-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vmulph-1b.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vsubph-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vsubph-1b.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-vaddph-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-vaddph-1b.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-vdivph-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-vdivph-1b.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-vmulph-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-vmulph-1b.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-vsubph-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-vsubph-1b.c diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-helper.h b/gcc/testsuite/gcc.target/i386/avx512fp16-helper.h new file mode 100644 index 0000000..9fde88a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-helper.h @@ -0,0 +1,207 @@ +/* This file is used for emulation of avx512fp16 runtime tests. To + verify the correctness of _Float16 type calculation, the idea is + convert _Float16 to float and do emulation using float instructions. + _Float16 type should not be emulate or check by itself. */ + +#include "avx512f-helper.h" +#ifndef AVX512FP16_HELPER_INCLUDED +#define AVX512FP16_HELPER_INCLUDED + +#ifdef DEBUG +#include +#endif +#include +#include +#include + +/* Useful macros. */ +#define NOINLINE __attribute__((noinline,noclone)) +#define _ROUND_NINT (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) +#define AVX512F_MAX_ELEM 512 / 32 + +/* Structure for _Float16 emulation */ +typedef union +{ + __m512 zmm; + __m512h zmmh; + __m256 ymm[2]; + __m256h ymmh[2]; + __m256i ymmi[2]; + __m128h xmmh[4]; + unsigned short u16[32]; + unsigned int u32[16]; + float f32[16]; + _Float16 f16[32]; +} V512; + +/* Global variables. */ +V512 src1, src2, src3; +int n_errs = 0; + +/* Helper function for packing/unpacking ph operands. */ +void NOINLINE +unpack_ph_2twops(V512 src, V512 *op1, V512 *op2) +{ + V512 v1; + + op1->zmm = _mm512_cvtph_ps(src.ymmi[0]); + v1.ymm[0] = _mm512_extractf32x8_ps(src.zmm, 1); + op2->zmm = _mm512_cvtph_ps(v1.ymmi[0]); +} + +V512 NOINLINE +pack_twops_2ph(V512 op1, V512 op2) +{ + V512 v1, v2, v3; + + v1.ymmi[0] = _mm512_cvtps_ph(op1.zmm, _MM_FROUND_TO_NEAREST_INT); + v2.ymmi[0] = _mm512_cvtps_ph(op2.zmm, _MM_FROUND_TO_NEAREST_INT); + + v3.zmm = _mm512_insertf32x8(v1.zmm, v2.ymm[0], 1); + + return v3; +} + +/* Helper function used for result debugging */ +#ifdef DEBUG +void NOINLINE +display_ps(const void *p, const char *banner, int n_elems) +{ + int i; + V512 *v = (V512*)p; + + if (banner) { + printf("%s", banner); + } + + for (i = 15; i >= n_elems; i--) { + printf(" --------"); + if (i == 8) { + printf("\n"); + if (banner) { + printf("%*s", (int)strlen(banner), ""); + } + } + } + + for (; i >= 0; i--) { + printf(" %x", v->u32[i]); + if (i == 8) { + printf("\n"); + if (banner) { + printf("%*s", (int)strlen(banner), ""); + } + } + } + printf("\n"); +} +#endif + +/* Functions/macros used for init/result checking. + Only check components within AVX512F_LEN. */ +#define TO_STRING(x) #x +#define STRINGIFY(x) TO_STRING(x) +#define NAME_OF(NAME) STRINGIFY(INTRINSIC (NAME)) + +#define CHECK_RESULT(res, exp, size, intrin) \ + check_results ((void*)res, (void*)exp, size,\ + NAME_OF(intrin)) + +/* To evaluate whether result match _Float16 precision, + only the last bit of real/emulate result could be + different. */ +void NOINLINE +check_results(void *got, void *exp, int n_elems, char *banner) +{ + int i; + V512 *v1 = (V512*)got; + V512 *v2 = (V512*)exp; + + for (i = 0; i < n_elems; i++) { + if (v1->u16[i] != v2->u16[i] && + ((v1->u16[i] > (v2->u16[i] + 1)) || + (v1->u16[i] < (v2->u16[i] - 1)))) { + +#ifdef DEBUG + printf("ERROR: %s failed at %d'th element: %x(%f) != %x(%f)\n", + banner ? banner : "", i, + v1->u16[i], *(float *)(&v1->u16[i]), + v2->u16[i], *(float *)(&v2->u16[i])); + display_ps(got, "got:", n_elems); + display_ps(exp, "exp:", n_elems); +#endif + n_errs++; + break; + } + } +} + +/* Functions for src/dest initialization */ +void NOINLINE +init_src() +{ + V512 v1, v2, v3, v4; + int i; + + for (i = 0; i < AVX512F_MAX_ELEM; i++) { + v1.f32[i] = -i + 1; + v2.f32[i] = i * 0.5f; + v3.f32[i] = i * 2.5f; + v4.f32[i] = i - 0.5f; + + src3.u32[i] = (i + 1) * 10; + } + + src1 = pack_twops_2ph(v1, v2); + src2 = pack_twops_2ph(v3, v4); +} + +void NOINLINE +init_dest(V512 * res, V512 * exp) +{ + int i; + V512 v1; + + for (i = 0; i < AVX512F_MAX_ELEM; i++) { + v1.f32[i] = 12 + 0.5f * i; + } + *res = *exp = pack_twops_2ph(v1, v1); +} + +#define EMULATE(NAME) EVAL(emulate_, NAME, AVX512F_LEN) + +#endif /* AVX512FP16_HELPER_INCLUDED */ + +/* Macros for AVX512VL Testing. Include V512 component usage + and mask type for emulation. */ + +#if AVX512F_LEN == 256 +#undef HF +#undef SF +#undef NET_MASK +#undef MASK_VALUE +#undef ZMASK_VALUE +#define NET_MASK 0xffff +#define MASK_VALUE 0xcccc +#define ZMASK_VALUE 0xfcc1 +#define HF(x) x.ymmh[0] +#define SF(x) x.ymm[0] +#elif AVX512F_LEN == 128 +#undef HF +#undef SF +#undef NET_MASK +#undef MASK_VALUE +#undef ZMASK_VALUE +#define NET_MASK 0xff +#define MASK_VALUE 0xcc +#define ZMASK_VALUE 0xc1 +#define HF(x) x.xmmh[0] +#define SF(x) x.xmm[0] +#else +#define NET_MASK 0xffffffff +#define MASK_VALUE 0xcccccccc +#define ZMASK_VALUE 0xfcc1fcc1 +#define HF(x) x.zmmh +#define SF(x) x.zmm +#endif + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vaddph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vaddph-1a.c new file mode 100644 index 0000000..0590c34 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vaddph-1a.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +volatile __m512h res, res1, res2; +volatile __m512h x1, x2; +volatile __mmask32 m32; + +void extern +avx512f_test (void) +{ + res = _mm512_add_ph (x1, x2); + res1 = _mm512_mask_add_ph (res1, m32, x1, x2); + res2 = _mm512_maskz_add_ph (m32, x1, x2); + + res = _mm512_add_round_ph (x1, x2, 8); + res1 = _mm512_mask_add_round_ph (res1, m32, x1, x2, 8); + res2 = _mm512_maskz_add_round_ph (m32, x1, x2, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vaddph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vaddph-1b.c new file mode 100644 index 0000000..1c412b5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vaddph-1b.c @@ -0,0 +1,92 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(add_ph) (V512 * dest, V512 op1, V512 op2, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + m2 = (k >> 16) & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(op2, &v3, &v4); + unpack_ph_2twops(*dest, &v7, &v8); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.u32[i] = v7.u32[i]; + } + } + else { + v5.f32[i] = v1.f32[i] + v3.f32[i]; + } + + if (((1 << i) & m2) == 0) { + if (zero_mask) { + v6.f32[i] = 0; + } + else { + v6.u32[i] = v8.u32[i]; + } + } + else { + v6.f32[i] = v2.f32[i] + v4.f32[i]; + } + + } + *dest = pack_twops_2ph(v5, v6); +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(add_ph) (&exp, src1, src2, NET_MASK, 0); + HF(res) = INTRINSIC (_add_ph) (HF(src1), HF(src2)); + CHECK_RESULT (&res, &exp, N_ELEMS, _add_ph); + + init_dest(&res, &exp); + EMULATE(add_ph) (&exp, src1, src2, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_add_ph) (HF(res), MASK_VALUE, HF(src1), HF(src2)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_add_ph); + + EMULATE(add_ph) (&exp, src1, src2, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_add_ph) (ZMASK_VALUE, HF(src1), HF(src2)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_add_ph); + +#if AVX512F_LEN == 512 + EMULATE(add_ph) (&exp, src1, src2, NET_MASK, 0); + HF(res) = INTRINSIC (_add_round_ph) (HF(src1), HF(src2), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _add_round_ph); + + init_dest(&res, &exp); + EMULATE(add_ph) (&exp, src1, src2, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_add_round_ph) (HF(res), MASK_VALUE, HF(src1), HF(src2), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_add_round_ph); + + EMULATE(add_ph) (&exp, src1, src2, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_add_round_ph) (ZMASK_VALUE, HF(src1), HF(src2), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_add_round_ph); +#endif + + if (n_errs != 0) { + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vdivph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vdivph-1a.c new file mode 100644 index 0000000..63f111f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vdivph-1a.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +volatile __m512h res, res1, res2; +volatile __m512h x1, x2; +volatile __mmask32 m32; + +void extern +avx512f_test (void) +{ + res = _mm512_div_ph (x1, x2); + res1 = _mm512_mask_div_ph (res1, m32, x1, x2); + res2 = _mm512_maskz_div_ph (m32, x1, x2); + + res = _mm512_div_round_ph (x1, x2, 8); + res1 = _mm512_mask_div_round_ph (res1, m32, x1, x2, 8); + res2 = _mm512_maskz_div_round_ph (m32, x1, x2, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vdivph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vdivph-1b.c new file mode 100644 index 0000000..c8b3821 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vdivph-1b.c @@ -0,0 +1,97 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(div_ph) (V512 * dest, V512 op1, V512 op2, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + m2 = (k >> 16) & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(op2, &v3, &v4); + unpack_ph_2twops(*dest, &v7, &v8); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.u32[i] = v7.u32[i]; + } + } + else { + v5.f32[i] = v1.f32[i] / v3.f32[i]; + } + + if (((1 << i) & m2) == 0) { + if (zero_mask) { + v6.f32[i] = 0; + } + else { + v6.u32[i] = v8.u32[i]; + } + } + else { + v6.f32[i] = v2.f32[i] / v4.f32[i]; + } + + } + *dest = pack_twops_2ph(v5, v6); +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(div_ph) (&exp, src1, src2, NET_MASK, 0); + HF(res) = INTRINSIC (_div_ph) (HF(src1), HF(src2)); + CHECK_RESULT (&res, &exp, N_ELEMS, _div_ph); + + init_dest(&res, &exp); + EMULATE(div_ph) (&exp, src1, src2, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_div_ph) (HF(res), MASK_VALUE, HF(src1), HF(src2)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_div_ph); + + EMULATE(div_ph) (&exp, src1, src2, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_div_ph) (ZMASK_VALUE, HF(src1), HF(src2)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_div_ph); + +#if AVX512F_LEN == 512 +#if AVX512F_LEN == 512 + EMULATE(div_ph) (&exp, src1, src2, NET_MASK, 0); + HF(res) = INTRINSIC (_div_round_ph) (HF(src1), HF(src2), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _div_ph); + + init_dest(&res, &exp); + EMULATE(div_ph) (&exp, src1, src2, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_div_round_ph) (HF(res), MASK_VALUE, HF(src1), HF(src2), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_div_ph); + + EMULATE(div_ph) (&exp, src1, src2, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_div_round_ph) (ZMASK_VALUE, HF(src1), HF(src2), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_div_ph); +#endif +#endif + + if (n_errs != 0) { + abort (); + } +} + + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vmulph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vmulph-1a.c new file mode 100644 index 0000000..1088e25 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vmulph-1a.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +volatile __m512h res, res1, res2; +volatile __m512h x1, x2; +volatile __mmask32 m32; + +void extern +avx512f_test (void) +{ + res = _mm512_mul_ph (x1, x2); + res1 = _mm512_mask_mul_ph (res1, m32, x1, x2); + res2 = _mm512_maskz_mul_ph (m32, x1, x2); + + res = _mm512_mul_round_ph (x1, x2, 8); + res1 = _mm512_mask_mul_round_ph (res1, m32, x1, x2, 8); + res2 = _mm512_maskz_mul_round_ph (m32, x1, x2, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vmulph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vmulph-1b.c new file mode 100644 index 0000000..0d67e87 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vmulph-1b.c @@ -0,0 +1,92 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(mul_ph) (V512 * dest, V512 op1, V512 op2, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + m2 = (k >> 16) & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(op2, &v3, &v4); + unpack_ph_2twops(*dest, &v7, &v8); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.u32[i] = v7.u32[i]; + } + } + else { + v5.f32[i] = v1.f32[i] * v3.f32[i]; + } + + if (((1 << i) & m2) == 0) { + if (zero_mask) { + v6.f32[i] = 0; + } + else { + v6.u32[i] = v8.u32[i]; + } + } + else { + v6.f32[i] = v2.f32[i] * v4.f32[i]; + } + + } + *dest = pack_twops_2ph(v5, v6); +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(mul_ph) (&exp, src1, src2, NET_MASK, 0); + HF(res) = INTRINSIC (_mul_ph) (HF(src1), HF(src2)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mul_ph); + + init_dest(&res, &exp); + EMULATE(mul_ph) (&exp, src1, src2, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_mul_ph) (HF(res), MASK_VALUE, HF(src1), HF(src2)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_mul_ph); + + EMULATE(mul_ph) (&exp, src1, src2, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_mul_ph) (ZMASK_VALUE, HF(src1), HF(src2)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_mul_ph); + +#if AVX512F_LEN == 512 + EMULATE(mul_ph) (&exp, src1, src2, NET_MASK, 0); + HF(res) = INTRINSIC (_mul_round_ph) (HF(src1), HF(src2), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mul_ph); + + init_dest(&res, &exp); + EMULATE(mul_ph) (&exp, src1, src2, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_mul_round_ph) (HF(res), MASK_VALUE, HF(src1), HF(src2), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_mul_ph); + + EMULATE(mul_ph) (&exp, src1, src2, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_mul_round_ph) (ZMASK_VALUE, HF(src1), HF(src2), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_mul_ph); +#endif + + if (n_errs != 0) { + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vsubph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vsubph-1a.c new file mode 100644 index 0000000..bb5eda6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vsubph-1a.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +volatile __m512h res, res1, res2; +volatile __m512h x1, x2; +volatile __mmask32 m32; + +void extern +avx512f_test (void) +{ + res = _mm512_sub_ph (x1, x2); + res1 = _mm512_mask_sub_ph (res1, m32, x1, x2); + res2 = _mm512_maskz_sub_ph (m32, x1, x2); + + res = _mm512_sub_round_ph (x1, x2, 8); + res1 = _mm512_mask_sub_round_ph (res1, m32, x1, x2, 8); + res2 = _mm512_maskz_sub_round_ph (m32, x1, x2, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vsubph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vsubph-1b.c new file mode 100644 index 0000000..bd31d98 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vsubph-1b.c @@ -0,0 +1,93 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(sub_ph) (V512 * dest, V512 op1, V512 op2, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + m2 = (k >> 16) & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(op2, &v3, &v4); + unpack_ph_2twops(*dest, &v7, &v8); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.u32[i] = v7.u32[i]; + } + } + else { + v5.f32[i] = v1.f32[i] - v3.f32[i]; + } + + if (((1 << i) & m2) == 0) { + if (zero_mask) { + v6.f32[i] = 0; + } + else { + v6.u32[i] = v8.u32[i]; + } + } + else { + v6.f32[i] = v2.f32[i] - v4.f32[i]; + } + + } + *dest = pack_twops_2ph(v5, v6); +} + + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(sub_ph) (&exp, src1, src2, NET_MASK, 0); + HF(res) = INTRINSIC (_sub_ph) (HF(src1), HF(src2)); + CHECK_RESULT (&res, &exp, N_ELEMS, _sub_ph); + + init_dest(&res, &exp); + EMULATE(sub_ph) (&exp, src1, src2, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_sub_ph) (HF(res), MASK_VALUE, HF(src1), HF(src2)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_sub_ph); + + EMULATE(sub_ph) (&exp, src1, src2, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_sub_ph) (ZMASK_VALUE, HF(src1), HF(src2)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_sub_ph); + +#if AVX512F_LEN == 512 + EMULATE(sub_ph) (&exp, src1, src2, NET_MASK, 0); + HF(res) = INTRINSIC (_sub_round_ph) (HF(src1), HF(src2), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _sub_ph); + + init_dest(&res, &exp); + EMULATE(sub_ph) (&exp, src1, src2, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_sub_round_ph) (HF(res), MASK_VALUE, HF(src1), HF(src2), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_sub_ph); + + EMULATE(sub_ph) (&exp, src1, src2, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_sub_round_ph) (ZMASK_VALUE, HF(src1), HF(src2), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_sub_ph); +#endif + + if (n_errs != 0) { + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vaddph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vaddph-1a.c new file mode 100644 index 0000000..354d897 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vaddph-1a.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +volatile __m256h res1; +volatile __m128h res2; +volatile __m256h x1,x2; +volatile __m128h x3, x4; +volatile __mmask16 m16; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_add_ph (x1, x2); + res1 = _mm256_mask_add_ph (res1, m16, x1, x2); + res1 = _mm256_maskz_add_ph (m16, x1, x2); + + res2 = _mm_add_ph (x3, x4); + res2 = _mm_mask_add_ph (res2, m8, x3, x4); + res2 = _mm_maskz_add_ph (m8, x3, x4); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vaddph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vaddph-1b.c new file mode 100644 index 0000000..fcf6a90 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vaddph-1b.c @@ -0,0 +1,16 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define DEBUG +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vaddph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vaddph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vdivph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vdivph-1a.c new file mode 100644 index 0000000..038d9e4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vdivph-1a.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +volatile __m256h res1; +volatile __m128h res2; +volatile __m256h x1,x2; +volatile __m128h x3, x4; +volatile __mmask16 m16; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_div_ph (x1, x2); + res1 = _mm256_mask_div_ph (res1, m16, x1, x2); + res1 = _mm256_maskz_div_ph (m16, x1, x2); + + res2 = _mm_div_ph (x3, x4); + res2 = _mm_mask_div_ph (res2, m8, x3, x4); + res2 = _mm_maskz_div_ph (m8, x3, x4); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vdivph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vdivph-1b.c new file mode 100644 index 0000000..48965c6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vdivph-1b.c @@ -0,0 +1,16 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define DEBUG +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vdivph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vdivph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vmulph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vmulph-1a.c new file mode 100644 index 0000000..26663c5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vmulph-1a.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +volatile __m256h res1; +volatile __m128h res2; +volatile __m256h x1,x2; +volatile __m128h x3, x4; +volatile __mmask16 m16; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_mul_ph (x1, x2); + res1 = _mm256_mask_mul_ph (res1, m16, x1, x2); + res1 = _mm256_maskz_mul_ph (m16, x1, x2); + + res2 = _mm_mul_ph (x3, x4); + res2 = _mm_mask_mul_ph (res2, m8, x3, x4); + res2 = _mm_maskz_mul_ph (m8, x3, x4); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vmulph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vmulph-1b.c new file mode 100644 index 0000000..2b3ba050 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vmulph-1b.c @@ -0,0 +1,16 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define DEBUG +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vmulph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vmulph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vsubph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vsubph-1a.c new file mode 100644 index 0000000..10e5cbf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vsubph-1a.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +volatile __m256h res1; +volatile __m128h res2; +volatile __m256h x1,x2; +volatile __m128h x3, x4; +volatile __mmask16 m16; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_sub_ph (x1, x2); + res1 = _mm256_mask_sub_ph (res1, m16, x1, x2); + res1 = _mm256_maskz_sub_ph (m16, x1, x2); + + res2 = _mm_sub_ph (x3, x4); + res2 = _mm_mask_sub_ph (res2, m8, x3, x4); + res2 = _mm_maskz_sub_ph (m8, x3, x4); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vsubph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vsubph-1b.c new file mode 100644 index 0000000..fa16218 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vsubph-1b.c @@ -0,0 +1,16 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define DEBUG +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vsubph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vsubph-1b.c" + -- 2.7.4