From a68412117fa47786bd82ab79b009ec7933aef476 Mon Sep 17 00:00:00 2001 From: "Guo, Xuepeng" Date: Mon, 24 Dec 2018 19:39:26 -0800 Subject: [PATCH] AVX512FP16: Initial support for AVX512FP16 feature and scalar _Float16 instructions. gcc/ChangeLog: * common/config/i386/cpuinfo.h (get_available_features): Detect FEATURE_AVX512FP16. * common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512FP16_SET, OPTION_MASK_ISA_AVX512FP16_UNSET, OPTION_MASK_ISA2_AVX512FP16_SET, OPTION_MASK_ISA2_AVX512FP16_UNSET): New. (OPTION_MASK_ISA2_AVX512BW_UNSET, OPTION_MASK_ISA2_AVX512BF16_UNSET): Add AVX512FP16. (ix86_handle_option): Handle -mavx512fp16. * common/config/i386/i386-cpuinfo.h (enum processor_features): Add FEATURE_AVX512FP16. * common/config/i386/i386-isas.h: Add entry for AVX512FP16. * config.gcc: Add avx512fp16intrin.h. * config/i386/avx512fp16intrin.h: New intrinsic header. * config/i386/cpuid.h: Add bit_AVX512FP16. * config/i386/i386-builtin-types.def: (FLOAT16): New primitive type. * config/i386/i386-builtins.c: Support _Float16 type for i386 backend. (ix86_register_float16_builtin_type): New function. (ix86_float16_type_node): New. * config/i386/i386-c.c (ix86_target_macros_internal): Define __AVX512FP16__. * config/i386/i386-expand.c (ix86_expand_branch): Support HFmode. (ix86_prepare_fp_compare_args): Adjust TARGET_SSE_MATH && SSE_FLOAT_MODE_P to SSE_FLOAT_MODE_SSEMATH_OR_HF_P. (ix86_expand_fp_movcc): Ditto. * config/i386/i386-isa.def: Add PTA define for AVX512FP16. * config/i386/i386-options.c (isa2_opts): Add -mavx512fp16. (ix86_valid_target_attribute_inner_p): Add avx512fp16 attribute. * config/i386/i386.c (ix86_get_ssemov): Use vmovdqu16/vmovw/vmovsh for HFmode/HImode scalar or vector. (ix86_get_excess_precision): Use FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16 when TARGET_AVX512FP16 existed. (sse_store_index): Use SFmode cost for HFmode cost. (inline_memory_move_cost): Add HFmode, and perfer SSE cost over GPR cost for HFmode. (ix86_hard_regno_mode_ok): Allow HImode in sse register. (ix86_mangle_type): Add manlging for _Float16 type. (inline_secondary_memory_needed): No memory is needed for 16bit movement between gpr and sse reg under TARGET_AVX512FP16. (ix86_multiplication_cost): Adjust TARGET_SSE_MATH && SSE_FLOAT_MODE_P to SSE_FLOAT_MODE_SSEMATH_OR_HF_P. (ix86_division_cost): Ditto. (ix86_rtx_costs): Ditto. (ix86_add_stmt_cost): Ditto. (ix86_optab_supported_p): Ditto. * config/i386/i386.h (VALID_AVX512F_SCALAR_MODE): Add HFmode. (SSE_FLOAT_MODE_SSEMATH_OR_HF_P): Add HFmode. (PTA_SAPPHIRERAPIDS): Add PTA_AVX512FP16. * config/i386/i386.md (mode): Add HFmode. (MODE_SIZE): Add HFmode. (isa): Add avx512fp16. (enabled): Handle avx512fp16. (ssemodesuffix): Add sh suffix for HFmode. (comm): Add mult, div. (plusminusmultdiv): New code iterator. (insn): Add mult, div. (*movhf_internal): Adjust for avx512fp16 instruction. (*movhi_internal): Ditto. (*cmpihf): New define_insn for HFmode. (*ieee_shf3): Likewise. (extendhf2): Likewise. (trunchf2): Likewise. (floathf2): Likewise. (*hf): Likewise. (cbranchhf4): New expander. (movhfcc): Likewise. (hf3): Likewise. (mulhf3): Likewise. (divhf3): Likewise. * config/i386/i386.opt: Add mavx512fp16. * config/i386/immintrin.h: Include avx512fp16intrin.h. * doc/invoke.texi: Add mavx512fp16. * doc/extend.texi: Add avx512fp16 Usage Notes. gcc/testsuite/ChangeLog: * gcc.target/i386/avx-1.c: Add -mavx512fp16 in dg-options. * gcc.target/i386/avx-2.c: Ditto. * gcc.target/i386/avx512-check.h: Check cpuid for AVX512FP16. * gcc.target/i386/funcspec-56.inc: Add new target attribute check. * gcc.target/i386/sse-13.c: Add -mavx512fp16. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * lib/target-supports.exp: (check_effective_target_avx512fp16): New. * g++.target/i386/float16-1.C: New test. * g++.target/i386/float16-2.C: Ditto. * g++.target/i386/float16-3.C: Ditto. * gcc.target/i386/avx512fp16-12a.c: Ditto. * gcc.target/i386/avx512fp16-12b.c: Ditto. * gcc.target/i386/float16-3a.c: Ditto. * gcc.target/i386/float16-3b.c: Ditto. * gcc.target/i386/float16-4a.c: Ditto. * gcc.target/i386/float16-4b.c: Ditto. * gcc.target/i386/pr54855-12.c: Ditto. * g++.dg/other/i386-2.C: Ditto. * g++.dg/other/i386-3.C: Ditto. Co-Authored-By: H.J. Lu Co-Authored-By: Liu Hongtao Co-Authored-By: Wang Hongyu Co-Authored-By: Xu Dianhong --- gcc/common/config/i386/cpuinfo.h | 2 + gcc/common/config/i386/i386-common.c | 26 +++- gcc/common/config/i386/i386-cpuinfo.h | 1 + gcc/common/config/i386/i386-isas.h | 1 + gcc/config.gcc | 2 +- gcc/config/i386/avx512fp16intrin.h | 53 ++++++++ gcc/config/i386/cpuid.h | 1 + gcc/config/i386/i386-builtin-types.def | 1 + gcc/config/i386/i386-builtins.c | 23 ++++ gcc/config/i386/i386-c.c | 2 + gcc/config/i386/i386-expand.c | 5 +- gcc/config/i386/i386-isa.def | 1 + gcc/config/i386/i386-options.c | 4 +- gcc/config/i386/i386.c | 136 +++++++++++++------ gcc/config/i386/i386.h | 11 +- gcc/config/i386/i386.md | 172 ++++++++++++++++++++++--- gcc/config/i386/i386.opt | 4 + gcc/config/i386/immintrin.h | 4 + gcc/doc/extend.texi | 8 ++ gcc/doc/invoke.texi | 10 +- gcc/testsuite/g++.dg/other/i386-2.C | 2 +- gcc/testsuite/g++.dg/other/i386-3.C | 2 +- gcc/testsuite/g++.target/i386/float16-1.C | 8 ++ gcc/testsuite/g++.target/i386/float16-2.C | 14 ++ gcc/testsuite/g++.target/i386/float16-3.C | 10 ++ gcc/testsuite/gcc.target/i386/avx-1.c | 2 +- gcc/testsuite/gcc.target/i386/avx-2.c | 2 +- gcc/testsuite/gcc.target/i386/avx512-check.h | 3 + gcc/testsuite/gcc.target/i386/avx512fp16-12a.c | 21 +++ gcc/testsuite/gcc.target/i386/avx512fp16-12b.c | 27 ++++ gcc/testsuite/gcc.target/i386/float16-3a.c | 10 ++ gcc/testsuite/gcc.target/i386/float16-3b.c | 10 ++ gcc/testsuite/gcc.target/i386/float16-4a.c | 10 ++ gcc/testsuite/gcc.target/i386/float16-4b.c | 10 ++ gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 + gcc/testsuite/gcc.target/i386/pr54855-12.c | 14 ++ gcc/testsuite/gcc.target/i386/sse-13.c | 2 +- gcc/testsuite/gcc.target/i386/sse-14.c | 2 +- gcc/testsuite/gcc.target/i386/sse-22.c | 4 +- gcc/testsuite/gcc.target/i386/sse-23.c | 2 +- gcc/testsuite/lib/target-supports.exp | 13 +- 41 files changed, 561 insertions(+), 76 deletions(-) create mode 100644 gcc/config/i386/avx512fp16intrin.h create mode 100644 gcc/testsuite/g++.target/i386/float16-1.C create mode 100644 gcc/testsuite/g++.target/i386/float16-2.C create mode 100644 gcc/testsuite/g++.target/i386/float16-3.C create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-12a.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-12b.c create mode 100644 gcc/testsuite/gcc.target/i386/float16-3a.c create mode 100644 gcc/testsuite/gcc.target/i386/float16-3b.c create mode 100644 gcc/testsuite/gcc.target/i386/float16-4a.c create mode 100644 gcc/testsuite/gcc.target/i386/float16-4b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-12.c diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h index 458f41d..1835ac6 100644 --- a/gcc/common/config/i386/cpuinfo.h +++ b/gcc/common/config/i386/cpuinfo.h @@ -731,6 +731,8 @@ get_available_features (struct __processor_model *cpu_model, set_feature (FEATURE_AVX5124FMAPS); if (edx & bit_AVX512VP2INTERSECT) set_feature (FEATURE_AVX512VP2INTERSECT); + if (edx & bit_AVX512FP16) + set_feature (FEATURE_AVX512FP16); } __cpuid_count (7, 1, eax, ebx, ecx, edx); diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index 76ab1a1..00c65ba 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -82,6 +82,8 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA2_AVX5124VNNIW_SET OPTION_MASK_ISA2_AVX5124VNNIW #define OPTION_MASK_ISA_AVX512VBMI2_SET \ (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512F_SET) +#define OPTION_MASK_ISA_AVX512FP16_SET OPTION_MASK_ISA_AVX512BW_SET +#define OPTION_MASK_ISA2_AVX512FP16_SET OPTION_MASK_ISA2_AVX512FP16 #define OPTION_MASK_ISA_AVX512VNNI_SET \ (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512F_SET) #define OPTION_MASK_ISA2_AVXVNNI_SET OPTION_MASK_ISA2_AVXVNNI @@ -231,6 +233,8 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA2_AVX5124FMAPS_UNSET OPTION_MASK_ISA2_AVX5124FMAPS #define OPTION_MASK_ISA2_AVX5124VNNIW_UNSET OPTION_MASK_ISA2_AVX5124VNNIW #define OPTION_MASK_ISA_AVX512VBMI2_UNSET OPTION_MASK_ISA_AVX512VBMI2 +#define OPTION_MASK_ISA_AVX512FP16_UNSET OPTION_MASK_ISA_AVX512BW_UNSET +#define OPTION_MASK_ISA2_AVX512FP16_UNSET OPTION_MASK_ISA2_AVX512FP16 #define OPTION_MASK_ISA_AVX512VNNI_UNSET OPTION_MASK_ISA_AVX512VNNI #define OPTION_MASK_ISA2_AVXVNNI_UNSET OPTION_MASK_ISA2_AVXVNNI #define OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET OPTION_MASK_ISA_AVX512VPOPCNTDQ @@ -313,7 +317,8 @@ along with GCC; see the file COPYING3. If not see (OPTION_MASK_ISA2_AVX512BF16_UNSET \ | OPTION_MASK_ISA2_AVX5124FMAPS_UNSET \ | OPTION_MASK_ISA2_AVX5124VNNIW_UNSET \ - | OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET) + | OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET \ + | OPTION_MASK_ISA2_AVX512FP16_UNSET) #define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \ (OPTION_MASK_ISA2_AVX512F_UNSET) #define OPTION_MASK_ISA2_AVX_UNSET OPTION_MASK_ISA2_AVX2_UNSET @@ -326,7 +331,9 @@ along with GCC; see the file COPYING3. If not see (OPTION_MASK_ISA2_SSE3_UNSET | OPTION_MASK_ISA2_KL_UNSET) #define OPTION_MASK_ISA2_SSE_UNSET OPTION_MASK_ISA2_SSE2_UNSET -#define OPTION_MASK_ISA2_AVX512BW_UNSET OPTION_MASK_ISA2_AVX512BF16_UNSET +#define OPTION_MASK_ISA2_AVX512BW_UNSET \ + (OPTION_MASK_ISA2_AVX512BF16_UNSET \ + | OPTION_MASK_ISA2_AVX512FP16_UNSET) /* Set 1 << value as value of -malign-FLAG option. */ @@ -853,6 +860,21 @@ ix86_handle_option (struct gcc_options *opts, } return true; + case OPT_mavx512fp16: + if (value) + { + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX512FP16_SET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512FP16_SET; + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512FP16_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512FP16_SET; + } + else + { + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512FP16_UNSET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512FP16_UNSET; + } + return true; + case OPT_mavx512vnni: if (value) { diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h index e68dd65..4e0659f 100644 --- a/gcc/common/config/i386/i386-cpuinfo.h +++ b/gcc/common/config/i386/i386-cpuinfo.h @@ -228,6 +228,7 @@ enum processor_features FEATURE_AESKLE, FEATURE_WIDEKL, FEATURE_AVXVNNI, + FEATURE_AVX512FP16, CPU_FEATURE_MAX }; diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h index 898c18f..a678366 100644 --- a/gcc/common/config/i386/i386-isas.h +++ b/gcc/common/config/i386/i386-isas.h @@ -169,4 +169,5 @@ ISA_NAMES_TABLE_START ISA_NAMES_TABLE_ENTRY("aeskle", FEATURE_AESKLE, P_NONE, NULL) ISA_NAMES_TABLE_ENTRY("widekl", FEATURE_WIDEKL, P_NONE, "-mwidekl") ISA_NAMES_TABLE_ENTRY("avxvnni", FEATURE_AVXVNNI, P_NONE, "-mavxvnni") + ISA_NAMES_TABLE_ENTRY("avx512fp16", FEATURE_AVX512FP16, P_NONE, "-mavx512fp16") ISA_NAMES_TABLE_END diff --git a/gcc/config.gcc b/gcc/config.gcc index e3e9d8f..710f9ce 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -416,7 +416,7 @@ i[34567]86-*-* | x86_64-*-*) tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h amxbf16intrin.h x86gprintrin.h uintrintrin.h hresetintrin.h keylockerintrin.h avxvnniintrin.h - mwaitintrin.h" + mwaitintrin.h avx512fp16intrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h new file mode 100644 index 0000000..38d6316 --- /dev/null +++ b/gcc/config/i386/avx512fp16intrin.h @@ -0,0 +1,53 @@ +/* Copyright (C) 2019 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512FP16INTRIN_H_INCLUDED +#define __AVX512FP16INTRIN_H_INCLUDED + +#ifndef __AVX512FP16__ +#pragma GCC push_options +#pragma GCC target("avx512fp16") +#define __DISABLE_AVX512FP16__ +#endif /* __AVX512FP16__ */ + +/* Internal data types for implementing the intrinsics. */ +typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16))); +typedef _Float16 __v16hf __attribute__ ((__vector_size__ (32))); +typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64))); + +/* The Intel API is flexible enough that we must allow aliasing with other + vector types, and their scalar components. */ +typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef _Float16 __m256h __attribute__ ((__vector_size__ (32), __may_alias__)); +typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__)); + +#ifdef __DISABLE_AVX512FP16__ +#undef __DISABLE_AVX512FP16__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512FP16__ */ + +#endif /* __AVX512FP16INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index aebc17c..82b8050 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -126,6 +126,7 @@ #define bit_AVX5124VNNIW (1 << 2) #define bit_AVX5124FMAPS (1 << 3) #define bit_AVX512VP2INTERSECT (1 << 8) +#define bit_AVX512FP16 (1 << 23) #define bit_IBT (1 << 20) #define bit_UINTR (1 << 5) #define bit_PCONFIG (1 << 18) diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 3ca313c..1768b88 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -68,6 +68,7 @@ DEF_PRIMITIVE_TYPE (UINT8, unsigned_char_type_node) DEF_PRIMITIVE_TYPE (UINT16, short_unsigned_type_node) DEF_PRIMITIVE_TYPE (INT64, long_long_integer_type_node) DEF_PRIMITIVE_TYPE (UINT64, long_long_unsigned_type_node) +DEF_PRIMITIVE_TYPE (FLOAT16, ix86_float16_type_node) DEF_PRIMITIVE_TYPE (FLOAT, float_type_node) DEF_PRIMITIVE_TYPE (DOUBLE, double_type_node) DEF_PRIMITIVE_TYPE (FLOAT80, float80_type_node) diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c index 204e290..1799701 100644 --- a/gcc/config/i386/i386-builtins.c +++ b/gcc/config/i386/i386-builtins.c @@ -125,6 +125,7 @@ BDESC_VERIFYS (IX86_BUILTIN_MAX, /* Table for the ix86 builtin non-function types. */ static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1]; +tree ix86_float16_type_node = NULL_TREE; /* Retrieve an element from the above table, building some of the types lazily. */ @@ -1344,6 +1345,26 @@ ix86_init_builtins_va_builtins_abi (void) } static void +ix86_register_float16_builtin_type (void) +{ + /* Provide the _Float16 type and float16_type_node if needed so that + it can be used in AVX512FP16 intrinsics and builtins. */ + if (!float16_type_node) + { + ix86_float16_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (ix86_float16_type_node) = 16; + SET_TYPE_MODE (ix86_float16_type_node, HFmode); + layout_type (ix86_float16_type_node); + } + else + ix86_float16_type_node = float16_type_node; + + if (!maybe_get_identifier ("_Float16") && TARGET_SSE2) + lang_hooks.types.register_builtin_type (ix86_float16_type_node, + "_Float16"); +} + +static void ix86_init_builtin_types (void) { tree float80_type_node, const_string_type_node; @@ -1371,6 +1392,8 @@ ix86_init_builtin_types (void) it. */ lang_hooks.types.register_builtin_type (float128_type_node, "__float128"); + ix86_register_float16_builtin_type (); + const_string_type_node = build_pointer_type (build_qualified_type (char_type_node, TYPE_QUAL_CONST)); diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 5ed0de0..cc64f85 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -598,6 +598,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__PTWRITE__"); if (isa_flag2 & OPTION_MASK_ISA2_AVX512BF16) def_or_undef (parse_in, "__AVX512BF16__"); + if (isa_flag2 & OPTION_MASK_ISA2_AVX512FP16) + def_or_undef (parse_in, "__AVX512FP16__"); if (TARGET_MMX_WITH_SSE) def_or_undef (parse_in, "__MMX_WITH_SSE__"); if (isa_flag2 & OPTION_MASK_ISA2_ENQCMD) diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 3f90f67..fb3873b 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -2351,6 +2351,7 @@ ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label) switch (mode) { + case E_HFmode: case E_SFmode: case E_DFmode: case E_XFmode: @@ -2664,7 +2665,7 @@ ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) bool unordered_compare = ix86_unordered_fp_compare (code); rtx op0 = *pop0, op1 = *pop1; machine_mode op_mode = GET_MODE (op0); - bool is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode); + bool is_sse = SSE_FLOAT_MODE_SSEMATH_OR_HF_P (op_mode); /* All of the unordered compare instructions only work on registers. The same is true of the fcomi compare instructions. The XFmode @@ -4149,7 +4150,7 @@ ix86_expand_fp_movcc (rtx operands[]) rtx op0 = XEXP (operands[1], 0); rtx op1 = XEXP (operands[1], 1); - if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) { machine_mode cmode; diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def index a0d46cb..83d9302 100644 --- a/gcc/config/i386/i386-isa.def +++ b/gcc/config/i386/i386-isa.def @@ -108,3 +108,4 @@ DEF_PTA(HRESET) DEF_PTA(KL) DEF_PTA(WIDEKL) DEF_PTA(AVXVNNI) +DEF_PTA(AVX512FP16) diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c index fee5a48..2cb87ce 100644 --- a/gcc/config/i386/i386-options.c +++ b/gcc/config/i386/i386-options.c @@ -223,7 +223,8 @@ static struct ix86_target_opts isa2_opts[] = { "-mhreset", OPTION_MASK_ISA2_HRESET }, { "-mkl", OPTION_MASK_ISA2_KL }, { "-mwidekl", OPTION_MASK_ISA2_WIDEKL }, - { "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI } + { "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI }, + { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 } }; static struct ix86_target_opts isa_opts[] = { @@ -1049,6 +1050,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], IX86_ATTR_ISA ("amx-bf16", OPT_mamx_bf16), IX86_ATTR_ISA ("hreset", OPT_mhreset), IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni), + IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 210fc42..b2a58b0 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -5508,6 +5508,14 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands) case MODE_SI: return "%vmovd\t{%1, %0|%0, %1}"; + case MODE_HI: + if (GENERAL_REG_P (operands[0])) + return "vmovw\t{%1, %k0|%k0, %1}"; + else if (GENERAL_REG_P (operands[1])) + return "vmovw\t{%k1, %0|%0, %k1}"; + else + return "vmovw\t{%1, %0|%0, %1}"; + case MODE_DF: if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) return "vmovsd\t{%d1, %0|%0, %d1}"; @@ -5520,6 +5528,12 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands) else return "%vmovss\t{%1, %0|%0, %1}"; + case MODE_HF: + if (REG_P (operands[0]) && REG_P (operands[1])) + return "vmovsh\t{%d1, %0|%0, %d1}"; + else + return "vmovsh\t{%1, %0|%0, %1}"; + case MODE_V1DF: gcc_assert (!TARGET_AVX); return "movlpd\t{%1, %0|%0, %1}"; @@ -13999,7 +14013,7 @@ output_387_binary_op (rtx_insn *insn, rtx *operands) if (is_sse) { - p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd"; + p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd"; strcat (buf, p); if (TARGET_AVX) @@ -19311,10 +19325,19 @@ inline_secondary_memory_needed (machine_mode mode, reg_class_t class1, if (!TARGET_SSE2) return true; + if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))) + return true; + + int msize = GET_MODE_SIZE (mode); + /* Between SSE and general, we have moves no larger than word size. */ - if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)) - || GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode) - || GET_MODE_SIZE (mode) > UNITS_PER_WORD) + if (msize > UNITS_PER_WORD) + return true; + + /* In addition to SImode moves, AVX512FP16 also enables HImode moves. */ + int minsize = GET_MODE_SIZE (TARGET_AVX512FP16 ? HImode : SImode); + + if (msize < minsize) return true; /* If the target says that inter-unit moves are more expensive @@ -19408,21 +19431,27 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to, static inline int sse_store_index (machine_mode mode) { - switch (GET_MODE_SIZE (mode)) - { - case 4: - return 0; - case 8: - return 1; - case 16: - return 2; - case 32: - return 3; - case 64: - return 4; - default: - return -1; - } + /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store + costs to processor_costs, which requires changes to all entries in + processor cost table. */ + if (mode == E_HFmode) + mode = E_SFmode; + + switch (GET_MODE_SIZE (mode)) + { + case 4: + return 0; + case 8: + return 1; + case 16: + return 2; + case 32: + return 3; + case 64: + return 4; + default: + return -1; + } } /* Return the cost of moving data of mode M between a @@ -19444,11 +19473,13 @@ static inline int inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in) { int cost; + if (FLOAT_CLASS_P (regclass)) { int index; switch (mode) { + case E_HFmode: case E_SFmode: index = 0; break; @@ -19549,11 +19580,32 @@ inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in) } break; case 2: - if (in == 2) - return MAX (ix86_cost->hard_register.int_load[1], - ix86_cost->hard_register.int_store[1]); - return in ? ix86_cost->hard_register.int_load[1] - : ix86_cost->hard_register.int_store[1]; + { + int cost; + if (in == 2) + cost = MAX (ix86_cost->hard_register.int_load[1], + ix86_cost->hard_register.int_store[1]); + else + cost = in ? ix86_cost->hard_register.int_load[1] + : ix86_cost->hard_register.int_store[1]; + + if (mode == E_HFmode) + { + /* Prefer SSE over GPR for HFmode. */ + int sse_cost; + int index = sse_store_index (mode); + if (in == 2) + sse_cost = MAX (ix86_cost->hard_register.sse_load[index], + ix86_cost->hard_register.sse_store[index]); + else + sse_cost = (in + ? ix86_cost->hard_register.sse_load [index] + : ix86_cost->hard_register.sse_store [index]); + if (sse_cost >= cost) + cost = sse_cost + 1; + } + return cost; + } default: if (in == 2) cost = MAX (ix86_cost->hard_register.int_load[2], @@ -19727,6 +19779,8 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode) - XI mode - any of 512-bit wide vector mode - any scalar mode. */ + /* For AVX512FP16, vmovw supports movement of HImode + between gpr and sse registser. */ if (TARGET_AVX512F && (mode == XImode || VALID_AVX512F_REG_MODE (mode) @@ -20048,7 +20102,7 @@ ix86_multiplication_cost (const struct processor_costs *cost, if (VECTOR_MODE_P (mode)) inner_mode = GET_MODE_INNER (mode); - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) return inner_mode == DFmode ? cost->mulsd : cost->mulss; else if (X87_FLOAT_MODE_P (mode)) return cost->fmul; @@ -20100,7 +20154,7 @@ ix86_division_cost (const struct processor_costs *cost, if (VECTOR_MODE_P (mode)) inner_mode = GET_MODE_INNER (mode); - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) return inner_mode == DFmode ? cost->divsd : cost->divss; else if (X87_FLOAT_MODE_P (mode)) return cost->fdiv; @@ -20518,7 +20572,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, return true; } - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) { *total = cost->addss; return false; @@ -20557,7 +20611,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, /* FALLTHRU */ case NEG: - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) { *total = cost->sse_op; return false; @@ -20639,14 +20693,14 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, return false; case FLOAT_EXTEND: - if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) + if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) *total = 0; else *total = ix86_vec_cost (mode, cost->addss); return false; case FLOAT_TRUNCATE: - if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) + if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) *total = cost->fadd; else *total = ix86_vec_cost (mode, cost->addss); @@ -20656,7 +20710,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, /* SSE requires memory load for the constant operand. It may make sense to account for this. Of course the constant operand may or may not be reused. */ - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) *total = cost->sse_op; else if (X87_FLOAT_MODE_P (mode)) *total = cost->fabs; @@ -20665,7 +20719,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, return false; case SQRT: - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd; else if (X87_FLOAT_MODE_P (mode)) *total = cost->fsqrt; @@ -22154,6 +22208,10 @@ ix86_mangle_type (const_tree type) switch (TYPE_MODE (type)) { + case E_HFmode: + /* _Float16 is "DF16_". + Align with clang's decision in https://reviews.llvm.org/D33719. */ + return "DF16_"; case E_TFmode: /* __float128 is "g". */ return "g"; @@ -22777,7 +22835,7 @@ ix86_add_stmt_cost (class vec_info *vinfo, void *data, int count, case MINUS_EXPR: if (kind == scalar_stmt) { - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) stmt_cost = ix86_cost->addss; else if (X87_FLOAT_MODE_P (mode)) stmt_cost = ix86_cost->fadd; @@ -22803,7 +22861,7 @@ ix86_add_stmt_cost (class vec_info *vinfo, void *data, int count, break; case NEGATE_EXPR: - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) stmt_cost = ix86_cost->sse_op; else if (X87_FLOAT_MODE_P (mode)) stmt_cost = ix86_cost->fchs; @@ -22859,7 +22917,7 @@ ix86_add_stmt_cost (class vec_info *vinfo, void *data, int count, case BIT_XOR_EXPR: case BIT_AND_EXPR: case BIT_NOT_EXPR: - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) stmt_cost = ix86_cost->sse_op; else if (VECTOR_MODE_P (mode)) stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); @@ -23574,14 +23632,18 @@ ix86_get_excess_precision (enum excess_precision_type type) /* The fastest type to promote to will always be the native type, whether that occurs with implicit excess precision or otherwise. */ - return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; + return TARGET_AVX512FP16 + ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16 + : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; case EXCESS_PRECISION_TYPE_STANDARD: case EXCESS_PRECISION_TYPE_IMPLICIT: /* Otherwise, the excess precision we want when we are in a standards compliant mode, and the implicit precision we provide would be identical were it not for the unpredictable cases. */ - if (!TARGET_80387) + if (TARGET_AVX512FP16 && TARGET_SSE_MATH) + return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16; + else if (!TARGET_80387) return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; else if (!TARGET_MIX_SSE_I387) { diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index f671dae..2ac8f3e 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1007,7 +1007,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define VALID_AVX512F_SCALAR_MODE(MODE) \ ((MODE) == DImode || (MODE) == DFmode || (MODE) == SImode \ - || (MODE) == SFmode) + || (MODE) == SFmode \ + || (TARGET_AVX512FP16 && ((MODE) == HImode || (MODE) == HFmode))) #define VALID_AVX512F_REG_MODE(MODE) \ ((MODE) == V8DImode || (MODE) == V8DFmode || (MODE) == V64QImode \ @@ -1046,7 +1047,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define VALID_FP_MODE_P(MODE) \ ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode \ - || (MODE) == SCmode || (MODE) == DCmode || (MODE) == XCmode) \ + || (MODE) == SCmode || (MODE) == DCmode || (MODE) == XCmode) #define VALID_INT_MODE_P(MODE) \ ((MODE) == QImode || (MODE) == HImode \ @@ -1079,6 +1080,10 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define SSE_FLOAT_MODE_P(MODE) \ ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode)) +#define SSE_FLOAT_MODE_SSEMATH_OR_HF_P(MODE) \ + ((SSE_FLOAT_MODE_P (MODE) && TARGET_SSE_MATH) \ + || (TARGET_AVX512FP16 && (MODE) == HFmode)) + #define FMA4_VEC_FLOAT_MODE_P(MODE) \ (TARGET_FMA4 && ((MODE) == V4SFmode || (MODE) == V2DFmode \ || (MODE) == V8SFmode || (MODE) == V4DFmode)) @@ -2295,7 +2300,7 @@ constexpr wide_int_bitmask PTA_TIGERLAKE = PTA_ICELAKE_CLIENT | PTA_MOVDIRI constexpr wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_COOPERLAKE | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_ENQCMD | PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE - | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI; + | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI | PTA_AVX512FP16; constexpr wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD | PTA_PREFETCHWT1; constexpr wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 18b91c7..dcbbf2b 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -499,7 +499,7 @@ ;; Main data type used by the insn (define_attr "mode" - "unknown,none,QI,HI,SI,DI,TI,OI,XI,SF,DF,XF,TF,V16SF,V8SF,V4DF,V4SF, + "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,SF,DF,XF,TF,V16SF,V8SF,V4DF,V4SF, V2DF,V2SF,V1DF,V8DF" (const_string "unknown")) @@ -835,8 +835,7 @@ sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx, avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f, avx512bw,noavx512bw,avx512dq,noavx512dq, - avx512vl,noavx512vl, - avxvnni,avx512vnnivl" + avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16" (const_string "base")) ;; Define instruction set of MMX instructions @@ -888,6 +887,8 @@ (eq_attr "isa" "avxvnni") (symbol_ref "TARGET_AVXVNNI") (eq_attr "isa" "avx512vnnivl") (symbol_ref "TARGET_AVX512VNNI && TARGET_AVX512VL") + (eq_attr "isa" "avx512fp16") + (symbol_ref "TARGET_AVX512FP16") (eq_attr "mmx_isa" "native") (symbol_ref "!TARGET_MMX_WITH_SSE") @@ -909,6 +910,7 @@ (set_attr "type" "multi")]) (define_code_iterator plusminus [plus minus]) +(define_code_iterator plusminusmultdiv [plus minus mult div]) (define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus]) @@ -924,7 +926,8 @@ ;; Mark commutative operators as such in constraints. (define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%") - (minus "") (ss_minus "") (us_minus "")]) + (minus "") (ss_minus "") (us_minus "") + (mult "%") (div "")]) ;; Mapping of max and min (define_code_iterator maxmin [smax smin umax umin]) @@ -1024,7 +1027,8 @@ (minus "sub") (ss_minus "sssub") (us_minus "ussub") (sign_extend "extend") (zero_extend "zero_extend") (ashift "ashl") (lshiftrt "lshr") (ashiftrt "ashr") - (rotate "rotl") (rotatert "rotr")]) + (rotate "rotl") (rotatert "rotr") + (mult "mul") (div "div")]) ;; All integer modes. (define_mode_iterator SWI1248x [QI HI SI DI]) @@ -1092,8 +1096,9 @@ ;; compile time constant, it is faster to use than ;; GET_MODE_SIZE (mode). For XFmode which depends on ;; command line options just use GET_MODE_SIZE macro. -(define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8") (TI "16") - (SF "4") (DF "8") (XF "GET_MODE_SIZE (XFmode)") +(define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8") + (TI "16") (HF "2") (SF "4") (DF "8") + (XF "GET_MODE_SIZE (XFmode)") (V16QI "16") (V32QI "32") (V64QI "64") (V8HI "16") (V16HI "32") (V32HI "64") (V4SI "16") (V8SI "32") (V16SI "64") @@ -1226,8 +1231,8 @@ ;; All x87 floating point modes (define_mode_iterator X87MODEF [SF DF XF]) -;; All x87 floating point modes plus HF -(define_mode_iterator X87MODEFH [SF DF XF HF]) +;; All x87 floating point modes plus HFmode +(define_mode_iterator X87MODEFH [HF SF DF XF]) ;; All SSE floating point modes (define_mode_iterator SSEMODEF [SF DF TF]) @@ -1235,7 +1240,7 @@ ;; SSE instruction suffix for various modes (define_mode_attr ssemodesuffix - [(SF "ss") (DF "sd") + [(HF "sh") (SF "ss") (DF "sd") (V16SF "ps") (V8DF "pd") (V8SF "ps") (V4DF "pd") (V4SF "ps") (V2DF "pd") @@ -1500,6 +1505,23 @@ DONE; }) +(define_expand "cbranchhf4" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:HF 1 "cmp_fp_expander_operand") + (match_operand:HF 2 "cmp_fp_expander_operand"))) + (set (pc) (if_then_else + (match_operator 0 "ix86_fp_comparison_operator" + [(reg:CC FLAGS_REG) + (const_int 0)]) + (label_ref (match_operand 3)) + (pc)))] + "TARGET_AVX512FP16" +{ + ix86_expand_branch (GET_CODE (operands[0]), + operands[1], operands[2], operands[3]); + DONE; +}) + (define_expand "cbranch4" [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:MODEF 1 "cmp_fp_expander_operand") @@ -1709,6 +1731,17 @@ (eq_attr "alternative" "0") (symbol_ref "true") (symbol_ref "false"))))]) + +(define_insn "*cmpihf" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand:HF 0 "register_operand" "v") + (match_operand:HF 1 "nonimmediate_operand" "vm")))] + "TARGET_AVX512FP16" + "vcomish\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecomi") + (set_attr "prefix" "evex") + (set_attr "mode" "HF")]) ;; Push/pop instructions. @@ -2440,8 +2473,8 @@ (symbol_ref "true")))]) (define_insn "*movhi_internal" - [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,*k,*k ,*r,*m,*k") - (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,*r,*km,*k,*k,CBC"))] + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,*k,*k ,*r,*m,*k,?r,?v,*v,*v,*m") + (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,*r,*km,*k,*k,CBC,v, r, v, m, v"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && ix86_hardreg_mov_ok (operands[0], operands[1])" @@ -2467,6 +2500,9 @@ gcc_unreachable (); } + case TYPE_SSEMOV: + return ix86_output_ssemov (insn, operands); + case TYPE_MSKLOG: if (operands[1] == const0_rtx) return "kxorw\t%0, %0, %0"; @@ -2481,8 +2517,15 @@ return "mov{w}\t{%1, %0|%0, %1}"; } } - [(set (attr "type") - (cond [(eq_attr "alternative" "4,5,6,7") + [(set (attr "isa") + (cond [(eq_attr "alternative" "9,10,11,12,13") + (const_string "avx512fp16") + ] + (const_string "*"))) + (set (attr "type") + (cond [(eq_attr "alternative" "9,10,11,12,13") + (const_string "ssemov") + (eq_attr "alternative" "4,5,6,7") (const_string "mskmov") (eq_attr "alternative" "8") (const_string "msklog") @@ -2507,6 +2550,8 @@ (set (attr "mode") (cond [(eq_attr "type" "imovx") (const_string "SI") + (eq_attr "alternative" "11") + (const_string "HF") (and (eq_attr "alternative" "1,2") (match_operand:HI 1 "aligned_operand")) (const_string "SI") @@ -3731,7 +3776,10 @@ (eq_attr "alternative" "2") (const_string "sselog1") (eq_attr "alternative" "4,5,6,7") - (const_string "sselog") + (if_then_else + (match_test ("TARGET_AVX512FP16")) + (const_string "ssemov") + (const_string "sselog")) ] (const_string "ssemov"))) (set (attr "memory") @@ -3754,9 +3802,15 @@ (eq_attr "alternative" "2") (const_string "V4SF") (eq_attr "alternative" "4,5,6,7") - (const_string "TI") + (if_then_else + (match_test "TARGET_AVX512FP16") + (const_string "HI") + (const_string "TI")) (eq_attr "alternative" "3") - (const_string "SF") + (if_then_else + (match_test "TARGET_AVX512FP16") + (const_string "HF") + (const_string "SF")) ] (const_string "*")))]) @@ -4497,6 +4551,17 @@ emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); }) +(define_insn "extendhf2" + [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v") + (float_extend:MODEF + (match_operand:HF 1 "nonimmediate_operand" "vm")))] + "TARGET_AVX512FP16" + "vcvtsh2\t{%1, %0, %0|%0, %0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + + (define_expand "extendxf2" [(set (match_operand:XF 0 "nonimmediate_operand") (float_extend:XF (match_operand:MODEF 1 "general_operand")))] @@ -4674,6 +4739,18 @@ (symbol_ref "flag_unsafe_math_optimizations") ] (symbol_ref "true")))]) + +;; Conversion from {SF,DF}mode to HFmode. + +(define_insn "trunchf2" + [(set (match_operand:HF 0 "register_operand" "=v") + (float_truncate:HF + (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] + "TARGET_AVX512FP16" + "vcvt2sh\t{%1, %d0|%d0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "HF")]) ;; Signed conversion to DImode. @@ -5050,6 +5127,16 @@ (symbol_ref "TARGET_INTER_UNIT_CONVERSIONS")] (symbol_ref "true")))]) +(define_insn "floathf2" + [(set (match_operand:HF 0 "register_operand" "=v") + (any_float:HF + (match_operand:SWI48 1 "nonimmediate_operand" "rm")))] + "TARGET_AVX512FP16" + "vcvtsi2sh\t{%1, %d0|%d0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "evex") + (set_attr "mode" "HF")]) + (define_insn "*floatdi2_i387" [(set (match_operand:MODEF 0 "register_operand" "=f") (float:MODEF (match_operand:DI 1 "nonimmediate_operand" "m")))] @@ -7653,6 +7740,13 @@ (match_operand:XF 2 "register_operand")))] "TARGET_80387") +(define_expand "hf3" + [(set (match_operand:HF 0 "register_operand") + (plusminus:HF + (match_operand:HF 1 "register_operand") + (match_operand:HF 2 "nonimmediate_operand")))] + "TARGET_AVX512FP16") + (define_expand "3" [(set (match_operand:MODEF 0 "register_operand") (plusminus:MODEF @@ -8230,6 +8324,12 @@ (match_operand:XF 2 "register_operand")))] "TARGET_80387") +(define_expand "mulhf3" + [(set (match_operand:HF 0 "register_operand") + (mult:HF (match_operand:HF 1 "register_operand") + (match_operand:HF 2 "nonimmediate_operand")))] + "TARGET_AVX512FP16") + (define_expand "mul3" [(set (match_operand:MODEF 0 "register_operand") (mult:MODEF (match_operand:MODEF 1 "register_operand") @@ -8247,6 +8347,12 @@ (match_operand:XF 2 "register_operand")))] "TARGET_80387") +(define_expand "divhf3" + [(set (match_operand:HF 0 "register_operand") + (div:HF (match_operand:HF 1 "register_operand") + (match_operand:HF 2 "nonimmediate_operand")))] + "TARGET_AVX512FP16") + (define_expand "div3" [(set (match_operand:MODEF 0 "register_operand") (div:MODEF (match_operand:MODEF 1 "register_operand") @@ -16667,6 +16773,17 @@ (symbol_ref "true") (symbol_ref "false"))))]) +(define_insn "*hf" + [(set (match_operand:HF 0 "register_operand" "=v") + (plusminusmultdiv:HF + (match_operand:HF 1 "nonimmediate_operand" "v") + (match_operand:HF 2 "nonimmediate_operand" "vm")))] + "TARGET_AVX512FP16 + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "vsh\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "prefix" "evex") + (set_attr "mode" "HF")]) + (define_insn "*rcpsf2_sse" [(set (match_operand:SF 0 "register_operand" "=x,x,x") (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m")] @@ -19619,6 +19736,15 @@ operands[9] = replace_rtx (operands[6], operands[0], operands[1], true); }) +(define_expand "movhfcc" + [(set (match_operand:HF 0 "register_operand") + (if_then_else:HF + (match_operand 1 "comparison_operator") + (match_operand:HF 2 "register_operand") + (match_operand:HF 3 "register_operand")))] + "TARGET_AVX512FP16" + "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;") + (define_expand "movcc" [(set (match_operand:X87MODEF 0 "register_operand") (if_then_else:X87MODEF @@ -19785,6 +19911,18 @@ ;; Their operands are not commutative, and thus they may be used in the ;; presence of -0.0 and NaN. +(define_insn "*ieee_shf3" + [(set (match_operand:HF 0 "register_operand" "=v") + (unspec:HF + [(match_operand:HF 1 "register_operand" "v") + (match_operand:HF 2 "nonimmediate_operand" "vm")] + IEEE_MAXMIN))] + "TARGET_AVX512FP16" + "vsh\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "prefix" "evex") + (set_attr "type" "sseadd") + (set_attr "mode" "HF")]) + (define_insn "*ieee_s3" [(set (match_operand:MODEF 0 "register_operand" "=x,v") (unspec:MODEF diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 7b8547b..ad36697 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1166,3 +1166,7 @@ Emit GNU_PROPERTY_X86_ISA_1_NEEDED GNU property. mmwait Target Mask(ISA2_MWAIT) Var(ix86_isa_flags2) Save Support MWAIT and MONITOR built-in functions and code generation. + +mavx512fp16 +Target Mask(ISA2_AVX512FP16) Var(ix86_isa_flags2) Save +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and AVX512FP16 built-in functions and code generation. diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index f129de4..2421a78 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -94,6 +94,10 @@ #include +#ifdef __SSE2__ +#include +#endif + #include #include diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index f08238b..52bc4e5 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -1163,6 +1163,14 @@ as 32-bit precision. This may lead to inconsistent behavior between software emulation and AVX512-FP16 instructions. Using @option{-fexcess-precision=16} will force round back after each operation. +Using @option{-mavx512fp16} will generate AVX512-FP16 instructions instead of +software emulation. The default behavior of @code{FLT_EVAL_METHOD} is to round +after each operation. The same is true with @option{-fexcess-precision=standard} +and @option{-mfpmath=sse}. If there is no @option{-mfpmath=sse}, +@option{-fexcess-precision=standard} alone does the same thing as before, +It is useful for code that does not have @code{_Float16} and runs on the x87 +FPU. + @node Decimal Float @section Decimal Floating Types @cindex decimal floating types diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index e39dde0..d4b3a66 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1395,6 +1395,7 @@ See RS/6000 and PowerPC Options. -mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol -mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol -mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol +-mavx512fp16 @gol -mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol -mkl -mwidekl @gol @@ -31289,6 +31290,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @itemx -mavx512bf16 @opindex mavx512bf16 @need 200 +@itemx -mavx512fp16 +@opindex mavx512fp16 +@need 200 @itemx -mgfni @opindex mgfni @need 200 @@ -31367,9 +31371,9 @@ WBNOINVD, FMA4, PREFETCHW, RDPID, PREFETCHWT1, RDSEED, SGX, XOP, LWP, XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2, GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16, ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE, -UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI or CLDEMOTE -extended instruction sets. Each has a corresponding @option{-mno-} option to -disable use of these instructions. +UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512FP16 +or CLDEMOTE extended instruction sets. Each has a corresponding +@option{-mno-} option to disable use of these instructions. These extensions are also available as built-in functions: see @ref{x86 Built-in Functions}, for details of the functions enabled and diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C index 62b2132..fba3d1a 100644 --- a/gcc/testsuite/g++.dg/other/i386-2.C +++ b/gcc/testsuite/g++.dg/other/i386-2.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */ +/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C index 843aa2b..5cc0fa8 100644 --- a/gcc/testsuite/g++.dg/other/i386-3.C +++ b/gcc/testsuite/g++.dg/other/i386-3.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, diff --git a/gcc/testsuite/g++.target/i386/float16-1.C b/gcc/testsuite/g++.target/i386/float16-1.C new file mode 100644 index 0000000..95d1ac2 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/float16-1.C @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mno-sse2" } */ + +_Float16/* { dg-error "does not name a type" } */ +foo (_Float16 x) +{ + return x; +} diff --git a/gcc/testsuite/g++.target/i386/float16-2.C b/gcc/testsuite/g++.target/i386/float16-2.C new file mode 100644 index 0000000..99eb797 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/float16-2.C @@ -0,0 +1,14 @@ +/* { dg-do assemble { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16" } */ + +union flt +{ + _Float16 flt; + short s; +}; + +_Float16 +foo (union flt x) +{ + return x.flt; +} diff --git a/gcc/testsuite/g++.target/i386/float16-3.C b/gcc/testsuite/g++.target/i386/float16-3.C new file mode 100644 index 0000000..9408785 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/float16-3.C @@ -0,0 +1,10 @@ +/* { dg-do assemble { target avx512fp16 } } */ +/* { dg-options "-O0 -mavx512fp16" } */ + +template void a(char *) {} +char b, d; +void c() +{ + a(&d); + a<_Float16>(&b); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index 6178e38..f367607 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mavx512bw" } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mavx512bw -mavx512fp16" } */ /* { dg-add-options bind_pic_locally } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx-2.c b/gcc/testsuite/gcc.target/i386/avx-2.c index 986fbd8..1751c52 100644 --- a/gcc/testsuite/gcc.target/i386/avx-2.c +++ b/gcc/testsuite/gcc.target/i386/avx-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul -mavx512bw" } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul -mavx512bw -mavx512fp16" } */ /* { dg-add-options bind_pic_locally } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512-check.h b/gcc/testsuite/gcc.target/i386/avx512-check.h index 0a377db..0ad9064 100644 --- a/gcc/testsuite/gcc.target/i386/avx512-check.h +++ b/gcc/testsuite/gcc.target/i386/avx512-check.h @@ -87,6 +87,9 @@ main () #ifdef AVX512VNNI && (ecx & bit_AVX512VNNI) #endif +#ifdef AVX512FP16 + && (edx & bit_AVX512FP16) +#endif #ifdef VAES && (ecx & bit_VAES) #endif diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-12a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-12a.c new file mode 100644 index 0000000..8888755 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-12a.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16" } */ + +_Float16 +__attribute__ ((noinline, noclone)) +do_max (_Float16 __A, _Float16 __B) +{ + return __A > __B ? __A : __B; +} + +_Float16 +__attribute__ ((noinline, noclone)) +do_min (_Float16 __A, _Float16 __B) +{ + return __A < __B ? __A : __B; +} + +/* { dg-final { scan-assembler-times "vmaxsh\[ \\t\]" 1 } } */ +/* { dg-final { scan-assembler-times "vminsh\[ \\t\]" 1 } } */ +/* { dg-final { scan-assembler-not "vmovsh\[ \\t\]" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-not "vcomish\[ \\t\]" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-12b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-12b.c new file mode 100644 index 0000000..c9e23bf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-12b.c @@ -0,0 +1,27 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16" } */ + +#include + +static void do_test (void); + +#define DO_TEST do_test +#define AVX512FP16 +#include "avx512-check.h" +#include "avx512fp16-12a.c" + +static void +do_test (void) +{ + _Float16 x = 0.1f; + _Float16 y = -3.2f; + _Float16 z; + + z = do_max (x, y); + if (z != x) + abort (); + + z = do_min (x, y); + if (z != y) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/float16-3a.c b/gcc/testsuite/gcc.target/i386/float16-3a.c new file mode 100644 index 0000000..3846c8e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/float16-3a.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16" } */ + +_Float16 +foo (int x) +{ + return x; +} + +/* { dg-final { scan-assembler-times "vcvtsi2shl\[ \t\]+\[^\n\r]*%xmm0" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/float16-3b.c b/gcc/testsuite/gcc.target/i386/float16-3b.c new file mode 100644 index 0000000..247dd6e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/float16-3b.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16" } */ + +_Float16 +foo (unsigned int x) +{ + return x; +} + +/* { dg-final { scan-assembler-times "vcvtusi2shl\[ \t\]+\[^\n\r]*%xmm0" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/float16-4a.c b/gcc/testsuite/gcc.target/i386/float16-4a.c new file mode 100644 index 0000000..6310825 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/float16-4a.c @@ -0,0 +1,10 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512fp16" } */ + +_Float16 +foo (long long x) +{ + return x; +} + +/* { dg-final { scan-assembler-times "vcvtsi2shq\[ \t\]+\[^\n\r]*%xmm0" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/float16-4b.c b/gcc/testsuite/gcc.target/i386/float16-4b.c new file mode 100644 index 0000000..828d853 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/float16-4b.c @@ -0,0 +1,10 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512fp16" } */ + +_Float16 +foo (unsigned long long x) +{ + return x; +} + +/* { dg-final { scan-assembler-times "vcvtusi2shq\[ \t\]+\[^\n\r]*%xmm0" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc index 79265c7..8499fdf 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc @@ -79,6 +79,7 @@ extern void test_hreset (void) __attribute__((__target__("hreset"))); extern void test_keylocker (void) __attribute__((__target__("kl"))); extern void test_widekl (void) __attribute__((__target__("widekl"))); extern void test_avxvnni (void) __attribute__((__target__("avxvnni"))); +extern void test_avx512fp16 (void) __attribute__((__target__("avx512fp16"))); extern void test_no_sgx (void) __attribute__((__target__("no-sgx"))); extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps"))); @@ -159,6 +160,7 @@ extern void test_no_hreset (void) __attribute__((__target__("no-hreset"))); extern void test_no_keylocker (void) __attribute__((__target__("no-kl"))); extern void test_no_widekl (void) __attribute__((__target__("no-widekl"))); extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni"))); +extern void test_no_avx512fp16 (void) __attribute__((__target__("no-avx512fp16"))); extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona"))); extern void test_arch_core2 (void) __attribute__((__target__("arch=core2"))); diff --git a/gcc/testsuite/gcc.target/i386/pr54855-12.c b/gcc/testsuite/gcc.target/i386/pr54855-12.c new file mode 100644 index 0000000..2f8af39 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54855-12.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16" } */ +/* { dg-final { scan-assembler-times "vmaxsh\[ \\t\]" 1 } } */ +/* { dg-final { scan-assembler-not "vcomish\[ \\t\]" } } */ +/* { dg-final { scan-assembler-not "vmovsh\[ \\t\]" { target { ! ia32 } } } } */ + +#include + +_Float16 +foo (_Float16 x, _Float16 y) +{ + x = x > y ? x : y; + return x; +} diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 7029771..f5f5c11 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */ /* { dg-add-options bind_pic_locally } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 4ce0fff..747d504 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */ /* { dg-add-options bind_pic_locally } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 6e8b6f3..3341196 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -103,7 +103,7 @@ #ifndef DIFFERENT_PRAGMAS -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16") #endif /* Following intrinsics require immediate arguments. They @@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */ #ifdef DIFFERENT_PRAGMAS -#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni") +#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16") #endif #include test_1 (_cvtss_sh, unsigned short, float, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 7faa053..86590ca 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -708,6 +708,6 @@ #define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) #define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16") #include diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index ad8f011..82dc131 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -3164,7 +3164,7 @@ proc check_effective_target_has_q_floating_suffix { } { proc check_effective_target_float16 {} { return [check_no_compiler_messages_nocache float16 object { - _Float16 x; + _Float16 foo (_Float16 x) { return x; } } [add_options_for_float16 ""]] } @@ -8877,6 +8877,17 @@ proc check_prefer_avx128 { } { } +# Return 1 if avx512fp16 instructions can be compiled. + +proc check_effective_target_avx512fp16 { } { + return [check_no_compiler_messages avx512fp16 object { + void foo (void) + { + asm volatile ("vmovw %edi, %xmm0"); + } + } "-O2 -mavx512fp16" ] +} + # Return 1 if avx512f instructions can be compiled. proc check_effective_target_avx512f { } { -- 2.7.4