From 348188bf59ad01b6575165ef52e72dd58d331735 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Wed, 15 Nov 2017 19:30:58 +0000 Subject: [PATCH] i386: Add X86_TUNE_EMIT_VZEROUPPER Add X86_TUNE_EMIT_VZEROUPPER to indicate if vzeroupper instruction should be inserted before a transfer of control flow out of the function. It is turned on by default unless we are tuning for KNL. Users can always use -mzeroupper or -mno-zeroupper to override X86_TUNE_EMIT_VZEROUPPER. gcc/ PR target/82990 * config/i386/i386.c (pass_insert_vzeroupper::gate): Remove TARGET_AVX512ER check. (ix86_option_override_internal): Set MASK_VZEROUPPER if neither -mzeroupper nor -mno-zeroupper is used and TARGET_EMIT_VZEROUPPER is set. * config/i386/i386.h (TARGET_EMIT_VZEROUPPER): New. * config/i386/x86-tune.def: Add X86_TUNE_EMIT_VZEROUPPER. gcc/testsuite/ PR target/82990 * gcc.target/i386/pr82942-2.c: Add -mtune=knl. * gcc.target/i386/pr82990-1.c: New test. * gcc.target/i386/pr82990-2.c: Likewise. * gcc.target/i386/pr82990-3.c: Likewise. * gcc.target/i386/pr82990-4.c: Likewise. * gcc.target/i386/pr82990-5.c: Likewise. * gcc.target/i386/pr82990-6.c: Likewise. * gcc.target/i386/pr82990-7.c: Likewise. From-SVN: r254783 --- gcc/ChangeLog | 11 +++++++++++ gcc/config/i386/i386.c | 5 +++-- gcc/config/i386/i386.h | 2 ++ gcc/config/i386/x86-tune.def | 4 ++++ gcc/testsuite/ChangeLog | 12 ++++++++++++ gcc/testsuite/gcc.target/i386/pr82942-2.c | 2 +- gcc/testsuite/gcc.target/i386/pr82990-1.c | 14 ++++++++++++++ gcc/testsuite/gcc.target/i386/pr82990-2.c | 6 ++++++ gcc/testsuite/gcc.target/i386/pr82990-3.c | 6 ++++++ gcc/testsuite/gcc.target/i386/pr82990-4.c | 6 ++++++ gcc/testsuite/gcc.target/i386/pr82990-5.c | 14 ++++++++++++++ gcc/testsuite/gcc.target/i386/pr82990-6.c | 6 ++++++ gcc/testsuite/gcc.target/i386/pr82990-7.c | 6 ++++++ 13 files changed, 91 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-4.c create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-5.c create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-6.c create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-7.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7b271b5..eb4954e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2017-11-15 H.J. Lu + + PR target/82990 + * config/i386/i386.c (pass_insert_vzeroupper::gate): Remove + TARGET_AVX512ER check. + (ix86_option_override_internal): Set MASK_VZEROUPPER if + neither -mzeroupper nor -mno-zeroupper is used and + TARGET_EMIT_VZEROUPPER is set. + * config/i386/i386.h (TARGET_EMIT_VZEROUPPER): New. + * config/i386/x86-tune.def: Add X86_TUNE_EMIT_VZEROUPPER. + 2017-11-15 Will Schmidt * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c5e84a0..c6ca071 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2497,7 +2497,7 @@ public: /* opt_pass methods: */ virtual bool gate (function *) { - return TARGET_AVX && !TARGET_AVX512ER + return TARGET_AVX && TARGET_VZEROUPPER && flag_expensive_optimizations && !optimize_size; } @@ -4666,7 +4666,8 @@ ix86_option_override_internal (bool main_args_p, if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES) sorry ("-mcall-ms2sysv-xlogues isn%'t currently supported with SEH"); - if (!(opts_set->x_target_flags & MASK_VZEROUPPER)) + if (!(opts_set->x_target_flags & MASK_VZEROUPPER) + && TARGET_EMIT_VZEROUPPER) opts->x_target_flags |= MASK_VZEROUPPER; if (!(opts_set->x_target_flags & MASK_STV)) opts->x_target_flags |= MASK_STV; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index e3e55da..a45e2df 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -517,6 +517,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI] #define TARGET_ONE_IF_CONV_INSN \ ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN] +#define TARGET_EMIT_VZEROUPPER \ + ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER] /* Feature tests against the various architecture variations. */ enum ix86_arch_indices { diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 99282c8..19fd2b5 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -543,3 +543,7 @@ DEF_TUNE (X86_TUNE_QIMODE_MATH, "qimode_math", ~0U) arithmetic to 32bit via PROMOTE_MODE macro. This code generation scheme is usually used for RISC targets. */ DEF_TUNE (X86_TUNE_PROMOTE_QI_REGS, "promote_qi_regs", 0U) + +/* X86_TUNE_EMIT_VZEROUPPER: This enables vzeroupper instruction insertion + before a transfer of control flow out of the function. */ +DEF_TUNE (X86_TUNE_EMIT_VZEROUPPER, "emit_vzeroupper", ~m_KNL) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3d18653..06d87b9 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,15 @@ +2017-11-15 H.J. Lu + + PR target/82990 + * gcc.target/i386/pr82942-2.c: Add -mtune=knl. + * gcc.target/i386/pr82990-1.c: New test. + * gcc.target/i386/pr82990-2.c: Likewise. + * gcc.target/i386/pr82990-3.c: Likewise. + * gcc.target/i386/pr82990-4.c: Likewise. + * gcc.target/i386/pr82990-5.c: Likewise. + * gcc.target/i386/pr82990-6.c: Likewise. + * gcc.target/i386/pr82990-7.c: Likewise. + 2017-11-15 Will Schmidt * gcc.target/powerpc/builtins-3-p9.c: Add -O1, update diff --git a/gcc/testsuite/gcc.target/i386/pr82942-2.c b/gcc/testsuite/gcc.target/i386/pr82942-2.c index cb0e337..ddb4e68 100644 --- a/gcc/testsuite/gcc.target/i386/pr82942-2.c +++ b/gcc/testsuite/gcc.target/i386/pr82942-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512f -mavx512er -O2" } */ +/* { dg-options "-mavx512f -mavx512er -mtune=knl -O2" } */ #include "pr82941-1.c" diff --git a/gcc/testsuite/gcc.target/i386/pr82990-1.c b/gcc/testsuite/gcc.target/i386/pr82990-1.c new file mode 100644 index 0000000..ff1d6d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82990-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=knl -mvzeroupper" } */ + +#include + +extern __m512d y, z; + +void +pr82941 () +{ + z = y; +} + +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82990-2.c b/gcc/testsuite/gcc.target/i386/pr82990-2.c new file mode 100644 index 0000000..0d3cb23 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82990-2.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -mno-vzeroupper" } */ + +#include "pr82941-1.c" + +/* { dg-final { scan-assembler-not "vzeroupper" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82990-3.c b/gcc/testsuite/gcc.target/i386/pr82990-3.c new file mode 100644 index 0000000..201fa98 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82990-3.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -mavx512er -mvzeroupper -O2" } */ + +#include "pr82941-1.c" + +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82990-4.c b/gcc/testsuite/gcc.target/i386/pr82990-4.c new file mode 100644 index 0000000..09f161c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82990-4.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -mno-avx512er -mno-vzeroupper -O2" } */ + +#include "pr82941-1.c" + +/* { dg-final { scan-assembler-not "vzeroupper" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82990-5.c b/gcc/testsuite/gcc.target/i386/pr82990-5.c new file mode 100644 index 0000000..9932bdc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82990-5.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f -mtune=generic" } */ + +#include + +extern __m512d y, z; + +void +pr82941 () +{ + z = y; +} + +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82990-6.c b/gcc/testsuite/gcc.target/i386/pr82990-6.c new file mode 100644 index 0000000..063a61c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82990-6.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -mtune=knl" } */ + +#include "pr82941-1.c" + +/* { dg-final { scan-assembler-not "vzeroupper" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82990-7.c b/gcc/testsuite/gcc.target/i386/pr82990-7.c new file mode 100644 index 0000000..dedde8b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82990-7.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -mtune=generic -mtune-ctrl=^emit_vzeroupper" } */ + +#include "pr82941-1.c" + +/* { dg-final { scan-assembler-not "vzeroupper" } } */ -- 2.7.4