case E_V2SFmode:
case E_V2SImode:
case E_V4HImode:
+ case E_V4HFmode:
+ case E_V2HFmode:
case E_V8QImode:
classes[0] = X86_64_SSE_CLASS;
return 1;
case E_V8QImode:
case E_V4HImode:
+ case E_V4HFmode:
case E_V2SImode:
case E_V2SFmode:
case E_V1TImode:
case E_V8QImode:
case E_V4HImode:
+ case E_V4HFmode:
case E_V2SImode:
case E_V2SFmode:
case E_V1TImode:
|| (MODE) == TImode)
#define VALID_AVX512FP16_REG_MODE(MODE) \
- ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode)
+ ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode \
+ || (MODE) == V2HFmode)
#define VALID_SSE2_REG_MODE(MODE) \
((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \
|| (MODE) == V2DImode || (MODE) == DFmode || (MODE) == HFmode)
#define VALID_SSE2_REG_VHF_MODE(MODE) \
- (VALID_SSE2_REG_MODE (MODE) || (MODE) == V8HFmode)
+ (VALID_SSE2_REG_MODE (MODE) || (MODE) == V8HFmode \
+ || (MODE) == V4HFmode || (MODE) == V2HFmode)
#define VALID_SSE_REG_MODE(MODE) \
((MODE) == V1TImode || (MODE) == TImode \
#define VALID_MMX_REG_MODE_3DNOW(MODE) \
((MODE) == V2SFmode || (MODE) == SFmode)
+/* To match ia32 psABI, V4HFmode should be added here. */
#define VALID_MMX_REG_MODE(MODE) \
((MODE) == V1DImode || (MODE) == DImode \
|| (MODE) == V2SImode || (MODE) == SImode \
- || (MODE) == V4HImode || (MODE) == V8QImode)
+ || (MODE) == V4HImode || (MODE) == V8QImode \
+ || (MODE) == V4HFmode)
#define VALID_MASK_REG_MODE(MODE) ((MODE) == HImode || (MODE) == QImode)
|| (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode \
|| (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode \
|| (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode \
- || (MODE) == V16SFmode || VALID_AVX512FP16_REG_MODE (MODE))
+ || (MODE) == V16SFmode || (MODE) == V32HFmode || (MODE) == V16HFmode \
+ || (MODE) == V8HFmode)
#define X87_FLOAT_MODE_P(MODE) \
(TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode))
;; Main data type used by the insn
(define_attr "mode"
"unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,SF,DF,XF,TF,V32HF,V16HF,V8HF,
- V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF"
+ V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V2HF"
(const_string "unknown"))
;; The CPU unit operations uses.
(V1TI "16") (V2TI "32") (V4TI "64")
(V2DF "16") (V4DF "32") (V8DF "64")
(V4SF "16") (V8SF "32") (V16SF "64")
- (V8HF "16") (V16HF "32") (V32HF "64")])
+ (V8HF "16") (V16HF "32") (V32HF "64")
+ (V4HF "8") (V2HF "4")])
;; Double word integer modes as mode attribute.
(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "OI")])
(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
;; All 8-byte vector modes handled by MMX
-(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
+(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF V4HF])
(define_mode_iterator MMXMODE124 [V8QI V4HI V2SI V2SF])
;; Mix-n-match
(define_mode_iterator MMXMODE24 [V4HI V2SI])
(define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
-;; All 4-byte integer vector modes
-(define_mode_iterator V_32 [V4QI V2HI V1SI])
+;; All 4-byte integer/float16 vector modes
+(define_mode_iterator V_32 [V4QI V2HI V1SI V2HF])
;; 4-byte integer vector modes
(define_mode_iterator VI_32 [V4QI V2HI])
;; V2S* modes
(define_mode_iterator V2FI [V2SF V2SI])
+;; 4-byte and 8-byte float16 vector modes
+(define_mode_iterator VHF_32_64 [V4HF V2HF])
+
;; Mapping from integer vector mode to mnemonic suffix
(define_mode_attr mmxvecsize
[(V8QI "b") (V4QI "b") (V4HI "w") (V2HI "w") (V2SI "d") (V1DI "q")])
(eq_attr "alternative" "11,12")
(cond [(match_test "<MODE>mode == V2SFmode")
(const_string "V4SF")
+ (match_test "<MODE>mode == V4HFmode")
+ (const_string "V4SF")
(ior (not (match_test "TARGET_SSE2"))
(match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
(const_string "TI"))
(and (eq_attr "alternative" "13")
- (ior (and (match_test "<MODE>mode == V2SFmode")
- (not (match_test "TARGET_MMX_WITH_SSE")))
- (not (match_test "TARGET_SSE2"))))
+ (ior (ior (and (match_test "<MODE>mode == V2SFmode")
+ (not (match_test "TARGET_MMX_WITH_SSE")))
+ (not (match_test "TARGET_SSE2")))
+ (match_test "<MODE>mode == V4HFmode")))
(const_string "V2SF")
(and (eq_attr "alternative" "14")
- (ior (match_test "<MODE>mode == V2SFmode")
- (not (match_test "TARGET_SSE2"))))
+ (ior (ior (match_test "<MODE>mode == V2SFmode")
+ (not (match_test "TARGET_SSE2")))
+ (match_test "<MODE>mode == V4HFmode")))
(const_string "V2SF")
]
(const_string "DI")))
(const_string "*")))
(set (attr "mode")
(cond [(eq_attr "alternative" "2,3")
- (cond [(match_test "TARGET_AVX")
+ (cond [(match_test "<MODE>mode == V2HFmode")
+ (const_string "V4SF")
+ (match_test "TARGET_AVX")
(const_string "TI")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "V4SF")
]
(const_string "TI"))
+ (and (eq_attr "alternative" "4,5")
+ (match_test "<MODE>mode == V2HFmode"))
+ (const_string "SF")
]
(const_string "SI")))
(set (attr "preferred_for_speed")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
+;; Parallel half-precision floating point arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "<insn><mode>3"
+ [(set (match_operand:VHF_32_64 0 "register_operand" "=v")
+ (plusminusmultdiv:VHF_32_64
+ (match_operand:VHF_32_64 1 "register_operand" "<comm>v")
+ (match_operand:VHF_32_64 2 "register_operand" "v")))]
+ "TARGET_AVX512FP16 && TARGET_AVX512VL"
+ "v<insn>ph\t{%2, %1, %0|%0, %1, %2}"
+ [(set (attr "type")
+ (cond [(match_test "<CODE> == MULT")
+ (const_string "ssemul")
+ (match_test "<CODE> == DIV")
+ (const_string "ssediv")]
+ (const_string "sseadd")))
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V8HF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
;; Parallel integral arithmetic
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+/* { dg-final { scan-assembler-times "vaddph" 2 } } */
+/* { dg-final { scan-assembler-times "vsubph" 2 } } */
+/* { dg-final { scan-assembler-times "vmulph" 2 } } */
+/* { dg-final { scan-assembler-times "vdivph" 2 } } */
+
+#define DO_PRAGMA(X) _Pragma(#X)
+
+#define VEC_OP_VV(size, op, name) \
+void \
+__attribute__ ((noinline, noclone, optimize("tree-slp-vectorize"))) \
+vecop_v##size##hf##name (_Float16 * restrict dst, \
+ _Float16 * restrict src1, _Float16 * restrict src2) \
+{ \
+ int i; \
+ DO_PRAGMA (GCC unroll size) \
+ for (i = 0; i < size; i++) \
+ dst[i] = src1[i] op src2[i]; \
+}
+
+VEC_OP_VV(4, +, add)
+VEC_OP_VV(2, +, add)
+VEC_OP_VV(4, -, sub)
+VEC_OP_VV(2, -, sub)
+VEC_OP_VV(4, *, mul)
+VEC_OP_VV(2, *, mul)
+VEC_OP_VV(4, /, div)
+VEC_OP_VV(2, /, div)
--- /dev/null
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+static void vec_op_test (void);
+#define DO_TEST vec_op_test
+#define AVX512FP16
+#define AVX512VL
+#include "avx512f-check.h"
+#include "avx512fp16-64-32-vecop-1.c"
+
+_Float16 a[4], b[4], fexp[4], fref[4];
+
+#define EMULATE_VEC_OP_VV(size, op, name) \
+void \
+__attribute__ ((noinline, noclone)) \
+scalar_vecop_v##size##hf##name ( \
+ _Float16 * restrict dst, _Float16 * restrict src1, \
+ _Float16 * restrict src2) \
+{ \
+ int i; \
+ for (i = 0; i < size; i++) \
+ dst[i] = src1[i] op src2[i]; \
+}
+
+EMULATE_VEC_OP_VV (4, +, add)
+EMULATE_VEC_OP_VV (2, +, add)
+EMULATE_VEC_OP_VV (4, -, sub)
+EMULATE_VEC_OP_VV (2, -, sub)
+EMULATE_VEC_OP_VV (4, *, mul)
+EMULATE_VEC_OP_VV (2, *, mul)
+EMULATE_VEC_OP_VV (4, /, div)
+EMULATE_VEC_OP_VV (2, /, div)
+
+void init()
+{
+ int i;
+ for (i = 0; i < 4; i++)
+ {
+ a[i] = i + 0.5;
+ b[i] = i * 1.5;
+ fexp[i] = fref[i] = 2.75 * i;
+ }
+}
+
+int check_cond(void *a, void *b, int size)
+{
+ int i;
+ unsigned short *pa = (unsigned short *)a,
+ *pb = (unsigned short *)b;
+ for (i = 0; i < size; i++)
+ if (pa[i] != pb[i])
+ return 0;
+ return 1;
+}
+
+#define TEST_VEC_OP_VV(size, name) \
+{ \
+ init (); \
+ scalar_vecop_v##size##hf##name (a, b, fexp); \
+ vecop_v##size##hf##name (a, b, fref); \
+ if (!check_cond ((void *)fexp, (void *)fref, size)) \
+ abort(); \
+}
+
+static void vec_op_test()
+{
+ TEST_VEC_OP_VV (4, add)
+ TEST_VEC_OP_VV (2, add)
+ TEST_VEC_OP_VV (4, sub)
+ TEST_VEC_OP_VV (2, sub)
+ TEST_VEC_OP_VV (4, mul)
+ TEST_VEC_OP_VV (2, mul)
+ TEST_VEC_OP_VV (4, div)
+ TEST_VEC_OP_VV (2, div)
+}
/* { dg-final { scan-assembler-times "vcvtqq2phz\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtuqq2phz\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcvtqq2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times "vcvtuqq2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times "vcvtqq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times "vcvtuqq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvtqq2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtdq2ph\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtudq2ph\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtdq2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtudq2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcvtdq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times "vcvtudq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvtdq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtw2ph\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtuw2ph\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtw2ph\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
EXTENDHFVV(4, sf)
/* { dg-final { scan-assembler-times "vcvtpd2phz\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcvtpd2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times "vcvtpd2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvtpd2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtps2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtps2phxy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcvtps2phxx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvtps2phxx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16" } */
+
+typedef _Float16 v4hf __attribute__ ((vector_size (8)));
+typedef _Float16 v2hf __attribute__ ((vector_size (4)));
+
+v4hf
+v4hf_abi_1 (v4hf a)
+{
+ return a;
+}
+
+v4hf
+v4hf_abi_3 (v4hf a, v4hf b, v4hf c)
+{
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "movq\[\\t \]*%mm2, %mm0" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovaps\[\\t \]*%xmm2, %xmm0" 1 { target { ! ia32 } } } } */
+
+v4hf
+v4hf_abi_4 (v4hf a, v4hf b, v4hf c, v4hf d)
+{
+ return d;
+}
+
+/* { dg-final { scan-assembler-times "movq\[\\t \]*4\\(%esp\\), %mm0" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovaps\[\\t \]*%xmm3, %xmm0" 1 { target { ! ia32 } } } } */
+
+v2hf
+v2hf_test (v2hf a, v2hf b, v2hf c, v2hf d)
+{
+ return b;
+}
+
+/* { dg-final { scan-assembler-times "movl\[\\t \]*8\\(%esp\\), %eax" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovaps\[\\t \]*%xmm1, %xmm0" 1 { target { ! ia32 } } } } */