From ccd025e14c48690c37d0321be833a426df31dc16 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 14 Nov 2012 17:47:29 +0100 Subject: [PATCH] re PR target/47440 (Use LCM for vzeroupper insertion) PR target/47440 * config/i386/i386.c (gate_insert_vzeroupper): New function. (rest_of_handle_insert_vzeroupper): Ditto. (struct rtl_opt_pass pass_insert_vzeroupper): New. (ix86_option_override): Register vzeroupper insertion pass here. (ix86_check_avx256_register): Handle SUBREGs properly. (ix86_init_machine_status): Remove optimize_mode_switching[AVX_U128] initialization. From-SVN: r193503 --- gcc/ChangeLog | 16 ++++++++++-- gcc/config/i386/i386.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 77 insertions(+), 6 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ebebb1d..9d31d63 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2012-11-14 Uros Bizjak + Vladimir Yakovlev + + PR target/47440 + * config/i386/i386.c (gate_insert_vzeroupper): New function. + (rest_of_handle_insert_vzeroupper): Ditto. + (struct rtl_opt_pass pass_insert_vzeroupper): New. + (ix86_option_override): Register vzeroupper insertion pass here. + (ix86_check_avx256_register): Handle SUBREGs properly. + (ix86_init_machine_status): Remove optimize_mode_switching[AVX_U128] + initialization. + 2012-11-14 David Edelsohn * configure.ac (HAVE_LD_LARGE_TOC): Add AIX test. @@ -34,8 +46,8 @@ * cgraph.c (insert_new_cgraph_node_version): Use cgraph_get_node instead of cgraph_get_create_node. - * config/i386/i386.c (ix86_get_function_versions_dispatcher): Move ifunc - not supported code to the end. + * config/i386/i386.c (ix86_get_function_versions_dispatcher): Move + ifunc not supported code to the end. 2012-11-13 Martin Jambor diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index eee2b93..c295849 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2301,6 +2301,52 @@ static const char *const cpu_names[TARGET_CPU_DEFAULT_max] = "btver2" }; +static bool +gate_insert_vzeroupper (void) +{ + return TARGET_VZEROUPPER; +} + +static unsigned int +rest_of_handle_insert_vzeroupper (void) +{ + int i; + + /* vzeroupper instructions are inserted immediately after reload to + account for possible spills from 256bit registers. The pass + reuses mode switching infrastructure by re-running mode insertion + pass, so disable entities that have already been processed. */ + for (i = 0; i < MAX_386_ENTITIES; i++) + ix86_optimize_mode_switching[i] = 0; + + ix86_optimize_mode_switching[AVX_U128] = 1; + + /* Call optimize_mode_switching. */ + pass_mode_switching.pass.execute (); + return 0; +} + +struct rtl_opt_pass pass_insert_vzeroupper = +{ + { + RTL_PASS, + "vzeroupper", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + gate_insert_vzeroupper, /* gate */ + rest_of_handle_insert_vzeroupper, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish | TODO_verify_rtl_sharing | + 0, /* todo_flags_finish */ + } +}; + /* Return true if a red-zone is in use. */ static inline bool @@ -3705,7 +3751,16 @@ ix86_option_override_internal (bool main_args_p) static void ix86_option_override (void) { + static struct register_pass_info insert_vzeroupper_info + = { &pass_insert_vzeroupper.pass, "reload", + 1, PASS_POS_INSERT_AFTER + }; + ix86_option_override_internal (true); + + + /* This needs to be done at start up. It's convenient to do it here. */ + register_pass (&insert_vzeroupper_info); } /* Update register usage after having seen the compiler flags. */ @@ -14988,10 +15043,15 @@ output_387_binary_op (rtx insn, rtx *operands) /* Check if a 256bit AVX register is referenced inside of EXP. */ static int -ix86_check_avx256_register (rtx *exp, void *data ATTRIBUTE_UNUSED) +ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED) { - if (REG_P (*exp) - && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (*exp))) + rtx exp = *pexp; + + if (GET_CODE (exp) == SUBREG) + exp = SUBREG_REG (exp); + + if (REG_P (exp) + && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp))) return 1; return 0; @@ -23449,7 +23509,6 @@ ix86_init_machine_status (void) f = ggc_alloc_cleared_machine_function (); f->use_fast_prologue_epilogue_nregs = -1; f->call_abi = ix86_abi; - f->optimize_mode_switching[AVX_U128] = TARGET_VZEROUPPER; return f; } -- 2.7.4