From f3bef205c2b530bfec897c08bf462ff66110bb35 Mon Sep 17 00:00:00 2001 From: kyukhin Date: Thu, 4 Oct 2012 08:13:18 +0000 Subject: [PATCH] * config/i386/i386.c (ix86_dep_by_shift_count_body) : Add check on reload_completed since it can be invoked before register allocation phase in pre-reload schedule. (ia32_multipass_dfa_lookahead) : Do not use dfa_lookahead for pre-reload schedule to save compile time. (ix86_sched_reorder) : Do not perform ready list reordering for pre-reload schedule to save compile time. (insn_is_function_arg) : New function. Returns true if lhs of insn is HW function argument register. (add_parameter_dependencies) : New function. Add output dependencies for chain of function adjacent arguments if only there is a move to likely spilled HW registers. Return first argument if at least one dependence was added or NULL otherwise. (avoid_func_arg_motion) : New function. Add output or anti dependency from insn to first_arg to restrict code motion. (add_dependee_for_func_arg) : New function. Avoid cross block motion of function argument through adding dependency from the first non-jump insn in bb. (ix86_dependencies_evaluation_hook) : New function. Hook for pre-reload schedule: avoid motion of function arguments passed in passed in likely spilled HW registers. (ix86_adjust_priority) : New function. Hook for pre-reload schedule: set priority of moves from likely spilled HW registers to maximum to schedule them as soon as possible. (ix86_sched_init_global): Do not perform multipass scheduling for pre-reload schedule to save compile time. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@192065 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 29 ++++++ gcc/config/i386/i386.c | 265 ++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 268 insertions(+), 26 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8dc8174..e700643 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,32 @@ +2012-10-04 Yuri Rumyantsev + + * config/i386/i386.c (ix86_dep_by_shift_count_body) : Add + check on reload_completed since it can be invoked before + register allocation phase in pre-reload schedule. + (ia32_multipass_dfa_lookahead) : Do not use dfa_lookahead for pre-reload + schedule to save compile time. + (ix86_sched_reorder) : Do not perform ready list reordering for pre-reload + schedule to save compile time. + (insn_is_function_arg) : New function. Returns true if lhs of insn is + HW function argument register. + (add_parameter_dependencies) : New function. Add output dependencies + for chain of function adjacent arguments if only there is a move to + likely spilled HW registers. Return first argument if at least one + dependence was added or NULL otherwise. + (avoid_func_arg_motion) : New function. Add output or anti dependency + from insn to first_arg to restrict code motion. + (add_dependee_for_func_arg) : New function. Avoid cross block motion of + function argument through adding dependency from the first non-jump + insn in bb. + (ix86_dependencies_evaluation_hook) : New function. Hook for pre-reload schedule: + avoid motion of function arguments passed in passed in likely spilled + HW registers. + (ix86_adjust_priority) : New function. Hook for pre-reload schedule: set priority + of moves from likely spilled HW registers to maximum to schedule them + as soon as possible. + (ix86_sched_init_global): Do not perform multipass scheduling for pre-reload + schedule to save compile time. + 2012-10-04 Uros Bizjak * configure.ac (noexception_flags): Add -fasynchronous-unwind-tables. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index de9c687..c10e49458 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -17520,9 +17520,16 @@ ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body) rtx shift_count = XEXP (shift_rtx, 1); /* Return true if shift count is dest of SET_BODY. */ - if (REG_P (shift_count) - && true_regnum (set_dest) == true_regnum (shift_count)) - return true; + if (REG_P (shift_count)) + { + /* Add check since it can be invoked before register + allocation in pre-reload schedule. */ + if (reload_completed + && true_regnum (set_dest) == true_regnum (shift_count)) + return true; + else if (REGNO(set_dest) == REGNO(shift_count)) + return true; + } } return false; @@ -24278,7 +24285,10 @@ ia32_multipass_dfa_lookahead (void) /* Generally, we want haifa-sched:max_issue() to look ahead as far as many instructions can be executed on a cycle, i.e., issue_rate. I wonder why tuning for many CPUs does not do this. */ - return ix86_issue_rate (); + if (reload_completed) + return ix86_issue_rate (); + /* Don't use lookahead for pre-reload schedule to save compile time. */ + return 0; default: return 0; @@ -24311,6 +24321,9 @@ ix86_sched_reorder(FILE *dump, int sched_verbose, rtx *ready, int *pn_ready, /* Do reodering for Atom only. */ if (ix86_tune != PROCESSOR_ATOM) return issue_rate; + /* Do not perform ready list reodering for pre-reload schedule pass. */ + if (!reload_completed) + return issue_rate; /* Nothing to do if ready list contains only 1 instruction. */ if (n_ready <= 1) return issue_rate; @@ -24393,7 +24406,198 @@ ix86_sched_reorder(FILE *dump, int sched_verbose, rtx *ready, int *pn_ready, return issue_rate; } - +static bool +ix86_class_likely_spilled_p (reg_class_t); + +/* Returns true if lhs of insn is HW function argument register and set up + is_spilled to true if it is likely spilled HW register. */ +static bool +insn_is_function_arg (rtx insn, bool* is_spilled) +{ + rtx dst; + + if (!NONDEBUG_INSN_P (insn)) + return false; + insn = PATTERN (insn); + if (GET_CODE (insn) == PARALLEL) + insn = XVECEXP (insn, 0, 0); + if (GET_CODE (insn) != SET) + return false; + dst = SET_DEST (insn); + if (REG_P (dst) && HARD_REGISTER_P (dst) + && ix86_function_arg_regno_p (REGNO (dst))) + { + /* Is it likely spilled HW register? */ + if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst)) + && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))) + *is_spilled = true; + return true; + } + return false; +} + +/* Add output dependencies for chain of function adjacent arguments if only + there is a move to likely spilled HW register. Return first argument + if at least one dependence was added or NULL otherwise. */ +static rtx +add_parameter_dependencies (rtx call, rtx head) +{ + rtx insn; + rtx last = call; + rtx first_arg = NULL; + bool is_spilled = false; + + /* Find nearest to call argument passing instruction. */ + while (true) + { + last = PREV_INSN (last); + if (last == head) + return NULL; + if (!NONDEBUG_INSN_P (last)) + continue; + if (insn_is_function_arg (last, &is_spilled)) + break; + return NULL; + } + + first_arg = last; + while (true) + { + insn = PREV_INSN (last); + if (!INSN_P (insn)) + break; + if (insn == head) + break; + if (!NONDEBUG_INSN_P (insn)) + { + last = insn; + continue; + } + if (insn_is_function_arg (insn, &is_spilled)) + { + /* Add output depdendence between two function arguments if chain + of output arguments contains likely spilled HW registers. */ + if (is_spilled) + add_dependence (last, insn, REG_DEP_OUTPUT); + first_arg = last = insn; + } + else + break; + } + if (!is_spilled) + return NULL; + return first_arg; +} + +/* Add output or anti dependency from insn to first_arg to restrict its code + motion. */ +static void +avoid_func_arg_motion (rtx first_arg, rtx insn) +{ + rtx set; + rtx tmp; + + set = single_set (insn); + if (!set) + return; + tmp = SET_DEST (set); + if (REG_P (tmp)) + { + /* Add output dependency to the first function argument. */ + add_dependence (first_arg, insn, REG_DEP_OUTPUT); + return; + } + /* Add anti dependency. */ + add_dependence (first_arg, insn, REG_DEP_ANTI); +} + +/* Avoid cross block motion of function argument through adding dependency + from the first non-jump instruction in bb. */ +static void +add_dependee_for_func_arg (rtx arg, basic_block bb) +{ + rtx insn = BB_END (bb); + + while (insn) + { + if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn)) + { + rtx set = single_set (insn); + if (set) + { + avoid_func_arg_motion (arg, insn); + return; + } + } + if (insn == BB_HEAD (bb)) + return; + insn = PREV_INSN (insn); + } +} + +/* Hook for pre-reload schedule - avoid motion of function arguments + passed in likely spilled HW registers. */ +static void +ix86_dependencies_evaluation_hook (rtx head, rtx tail) +{ + rtx insn; + rtx first_arg = NULL; + if (reload_completed) + return; + for (insn = tail; insn != head; insn = PREV_INSN (insn)) + if (INSN_P (insn) && CALL_P (insn)) + { + first_arg = add_parameter_dependencies (insn, head); + if (first_arg) + { + /* Check if first argument has dependee out of its home block. */ + sd_iterator_def sd_it1; + dep_t dep1; + FOR_EACH_DEP (first_arg, SD_LIST_BACK, sd_it1, dep1) + { + rtx dee; + dee = DEP_PRO (dep1); + if (!NONDEBUG_INSN_P (dee)) + continue; + if (BLOCK_FOR_INSN (dee) != BLOCK_FOR_INSN (first_arg)) + /* Must add dependee for first argument in dee's block. */ + add_dependee_for_func_arg (first_arg, BLOCK_FOR_INSN (dee)); + } + insn = first_arg; + } + } + else if (first_arg) + avoid_func_arg_motion (first_arg, insn); +} + +/* Hook for pre-reload schedule - set priority of moves from likely spilled + HW registers to maximum, to schedule them at soon as possible. These are + moves from function argument registers at the top of the function entry + and moves from function return value registers after call. */ +static int +ix86_adjust_priority (rtx insn, int priority) +{ + rtx set; + + if (reload_completed) + return priority; + + if (!NONDEBUG_INSN_P (insn)) + return priority; + + set = single_set (insn); + if (set) + { + rtx tmp = SET_SRC (set); + if (REG_P (tmp) + && HARD_REGISTER_P (tmp) + && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp)) + && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp)))) + return current_sched_info->sched_max_insns_priority; + } + + return priority; +} /* Model decoder of Core 2/i7. Below hooks for multipass scheduling (see haifa-sched.c:max_issue) @@ -24606,27 +24810,32 @@ ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED, case PROCESSOR_CORE2_64: case PROCESSOR_COREI7_32: case PROCESSOR_COREI7_64: - targetm.sched.dfa_post_advance_cycle - = core2i7_dfa_post_advance_cycle; - targetm.sched.first_cycle_multipass_init - = core2i7_first_cycle_multipass_init; - targetm.sched.first_cycle_multipass_begin - = core2i7_first_cycle_multipass_begin; - targetm.sched.first_cycle_multipass_issue - = core2i7_first_cycle_multipass_issue; - targetm.sched.first_cycle_multipass_backtrack - = core2i7_first_cycle_multipass_backtrack; - targetm.sched.first_cycle_multipass_end - = core2i7_first_cycle_multipass_end; - targetm.sched.first_cycle_multipass_fini - = core2i7_first_cycle_multipass_fini; - - /* Set decoder parameters. */ - core2i7_secondary_decoder_max_insn_size = 8; - core2i7_ifetch_block_size = 16; - core2i7_ifetch_block_max_insns = 6; - break; - + /* Do not perform multipass scheduling for pre-reload schedule + to save compile time. */ + if (reload_completed) + { + targetm.sched.dfa_post_advance_cycle + = core2i7_dfa_post_advance_cycle; + targetm.sched.first_cycle_multipass_init + = core2i7_first_cycle_multipass_init; + targetm.sched.first_cycle_multipass_begin + = core2i7_first_cycle_multipass_begin; + targetm.sched.first_cycle_multipass_issue + = core2i7_first_cycle_multipass_issue; + targetm.sched.first_cycle_multipass_backtrack + = core2i7_first_cycle_multipass_backtrack; + targetm.sched.first_cycle_multipass_end + = core2i7_first_cycle_multipass_end; + targetm.sched.first_cycle_multipass_fini + = core2i7_first_cycle_multipass_fini; + + /* Set decoder parameters. */ + core2i7_secondary_decoder_max_insn_size = 8; + core2i7_ifetch_block_size = 16; + core2i7_ifetch_block_max_insns = 6; + break; + } + /* ... Fall through ... */ default: targetm.sched.dfa_post_advance_cycle = NULL; targetm.sched.first_cycle_multipass_init = NULL; @@ -39687,6 +39896,10 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree) #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width #undef TARGET_SCHED_REORDER #define TARGET_SCHED_REORDER ix86_sched_reorder +#undef TARGET_SCHED_ADJUST_PRIORITY +#define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority +#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK +#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ix86_dependencies_evaluation_hook /* The size of the dispatch window is the total number of bytes of object code allowed in a window. */ -- 2.7.4