From: Bernd Schmidt Date: Fri, 4 Jun 2010 12:44:01 +0000 (+0000) Subject: re PR rtl-optimization/39871 (Code size increase on ARM due to poor register allocation) X-Git-Tag: upstream/12.2.0~92625 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8ff49c29fd93d56df978cfe1e4e41812251d7773;p=platform%2Fupstream%2Fgcc.git re PR rtl-optimization/39871 (Code size increase on ARM due to poor register allocation) PR rtl-optimization/39871 PR rtl-optimization/40615 PR rtl-optimization/42500 PR rtl-optimization/42502 * ira.c (init_reg_equiv_memory_loc: New function. (ira): Call it twice. * reload.h (calculate_elim_costs_all_insns): Declare. * ira-costs.c: Include "reload.h". (regno_equiv_gains): New static variable. (init_costs): Allocate it. (finish_costs): Free it. (ira_costs): Call calculate_elim_costs_all_insns. (find_costs_and_classes): Take estimated elimination costs into account. (ira_adjust_equiv_reg_cost): New function. * ira.h (ira_adjust_equiv_reg_cost): Declare it. * reload1.c (init_eliminable_invariants, free_reg_equiv, elimination_costs_in_insn, note_reg_elim_costly): New static functions. (elim_bb): New static variable. (reload): Move code out of here into init_eliminable_invariants and free_reg_equiv. Call them. (calculate_elim_costs_all_insns): New function. (eliminate_regs_1): Declare. Add extra arg FOR_COSTS; all callers changed. If FOR_COSTS is true, don't call alter_reg, but call note_reg_elim_costly if we turned a valid memory address into an invalid one. * Makefile.in (ira-costs.o): Depend on reload.h. testsuite/ PR rtl-optimization/39871 PR rtl-optimization/40615 PR rtl-optimization/42500 PR rtl-optimization/42502 * gcc.target/arm/eliminate.c: New test. From-SVN: r160260 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3dae473..fa0304f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,34 @@ +2010-06-04 Bernd Schmidt + + PR rtl-optimization/39871 + PR rtl-optimization/40615 + PR rtl-optimization/42500 + PR rtl-optimization/42502 + * ira.c (init_reg_equiv_memory_loc: New function. + (ira): Call it twice. + * reload.h (calculate_elim_costs_all_insns): Declare. + * ira-costs.c: Include "reload.h". + (regno_equiv_gains): New static variable. + (init_costs): Allocate it. + (finish_costs): Free it. + (ira_costs): Call calculate_elim_costs_all_insns. + (find_costs_and_classes): Take estimated elimination costs + into account. + (ira_adjust_equiv_reg_cost): New function. + * ira.h (ira_adjust_equiv_reg_cost): Declare it. + * reload1.c (init_eliminable_invariants, free_reg_equiv, + elimination_costs_in_insn, note_reg_elim_costly): New static + functions. + (elim_bb): New static variable. + (reload): Move code out of here into init_eliminable_invariants and + free_reg_equiv. Call them. + (calculate_elim_costs_all_insns): New function. + (eliminate_regs_1): Declare. Add extra arg FOR_COSTS; + all callers changed. If FOR_COSTS is true, don't call alter_reg, + but call note_reg_elim_costly if we turned a valid memory address + into an invalid one. + * Makefile.in (ira-costs.o): Depend on reload.h. + 2010-06-04 Julian Brown * config/arm/thumb2.md (*thumb2_movdf_soft_insn): Fix alternatives diff --git a/gcc/Makefile.in b/gcc/Makefile.in index e98adec..b469850 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -3276,7 +3276,7 @@ ira-build.o: ira-build.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ira-costs.o: ira-costs.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ hard-reg-set.h $(RTL_H) $(EXPR_H) $(TM_P_H) $(FLAGS_H) $(BASIC_BLOCK_H) \ $(REGS_H) addresses.h insn-config.h $(RECOG_H) $(TOPLEV_H) $(TARGET_H) \ - $(PARAMS_H) $(IRA_INT_H) + $(PARAMS_H) $(IRA_INT_H) reload.h ira-conflicts.o: ira-conflicts.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(TARGET_H) $(RTL_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) \ insn-config.h $(RECOG_H) $(BASIC_BLOCK_H) $(TOPLEV_H) $(TM_P_H) $(PARAMS_H) \ diff --git a/gcc/ira-costs.c b/gcc/ira-costs.c index 491b86b..39955b6 100644 --- a/gcc/ira-costs.c +++ b/gcc/ira-costs.c @@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see #include "addresses.h" #include "insn-config.h" #include "recog.h" +#include "reload.h" #include "toplev.h" #include "target.h" #include "params.h" @@ -123,6 +124,10 @@ static enum reg_class *pref_buffer; /* Record cover register class of each allocno with the same regno. */ static enum reg_class *regno_cover_class; +/* Record cost gains for not allocating a register with an invariant + equivalence. */ +static int *regno_equiv_gains; + /* Execution frequency of the current insn. */ static int frequency; @@ -1263,6 +1268,7 @@ find_costs_and_classes (FILE *dump_file) #ifdef FORBIDDEN_INC_DEC_CLASSES int inc_dec_p = false; #endif + int equiv_savings = regno_equiv_gains[i]; if (! allocno_p) { @@ -1311,6 +1317,15 @@ find_costs_and_classes (FILE *dump_file) #endif } } + if (equiv_savings < 0) + temp_costs->mem_cost = -equiv_savings; + else if (equiv_savings > 0) + { + temp_costs->mem_cost = 0; + for (k = 0; k < cost_classes_num; k++) + temp_costs->cost[k] += equiv_savings; + } + best_cost = (1 << (HOST_BITS_PER_INT - 2)) - 1; best = ALL_REGS; alt_class = NO_REGS; @@ -1680,6 +1695,8 @@ init_costs (void) regno_cover_class = (enum reg_class *) ira_allocate (sizeof (enum reg_class) * max_reg_num ()); + regno_equiv_gains = (int *) ira_allocate (sizeof (int) * max_reg_num ()); + memset (regno_equiv_gains, 0, sizeof (int) * max_reg_num ()); } /* Common finalization function for ira_costs and @@ -1687,6 +1704,7 @@ init_costs (void) static void finish_costs (void) { + ira_free (regno_equiv_gains); ira_free (regno_cover_class); ira_free (pref_buffer); ira_free (costs); @@ -1702,6 +1720,7 @@ ira_costs (void) init_costs (); total_allocno_costs = (struct costs *) ira_allocate (max_struct_costs_size * ira_allocnos_num); + calculate_elim_costs_all_insns (); find_costs_and_classes (ira_dump_file); setup_allocno_cover_class_and_costs (); finish_costs (); @@ -1802,3 +1821,16 @@ ira_tune_allocno_costs_and_cover_classes (void) } } } + +/* Add COST to the estimated gain for eliminating REGNO with its + equivalence. If COST is zero, record that no such elimination is + possible. */ + +void +ira_adjust_equiv_reg_cost (unsigned regno, int cost) +{ + if (cost == 0) + regno_equiv_gains[regno] = 0; + else + regno_equiv_gains[regno] += cost; +} diff --git a/gcc/ira.c b/gcc/ira.c index 84d7bc1..4fac13c 100644 --- a/gcc/ira.c +++ b/gcc/ira.c @@ -3149,8 +3149,19 @@ build_insn_chain (void) if (dump_file) print_insn_chains (dump_file); } - +/* Allocate memory for reg_equiv_memory_loc. */ +static void +init_reg_equiv_memory_loc (void) +{ + max_regno = max_reg_num (); + + /* And the reg_equiv_memory_loc array. */ + VEC_safe_grow (rtx, gc, reg_equiv_memory_loc_vec, max_regno); + memset (VEC_address (rtx, reg_equiv_memory_loc_vec), 0, + sizeof (rtx) * max_regno); + reg_equiv_memory_loc = VEC_address (rtx, reg_equiv_memory_loc_vec); +} /* All natural loops. */ struct loops ira_loops; @@ -3255,6 +3266,8 @@ ira (FILE *f) record_loop_exits (); current_loops = &ira_loops; + init_reg_equiv_memory_loc (); + if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL) fprintf (ira_dump_file, "Building IRA IR\n"); loops_p = ira_build (optimize @@ -3315,13 +3328,8 @@ ira (FILE *f) #endif delete_trivially_dead_insns (get_insns (), max_reg_num ()); - max_regno = max_reg_num (); - /* And the reg_equiv_memory_loc array. */ - VEC_safe_grow (rtx, gc, reg_equiv_memory_loc_vec, max_regno); - memset (VEC_address (rtx, reg_equiv_memory_loc_vec), 0, - sizeof (rtx) * max_regno); - reg_equiv_memory_loc = VEC_address (rtx, reg_equiv_memory_loc_vec); + init_reg_equiv_memory_loc (); if (max_regno != max_regno_before_ira) { diff --git a/gcc/ira.h b/gcc/ira.h index 8c9734b..97bfa87 100644 --- a/gcc/ira.h +++ b/gcc/ira.h @@ -88,3 +88,4 @@ extern void ira_mark_new_stack_slot (rtx, int, unsigned int); extern bool ira_better_spill_reload_regno_p (int *, int *, rtx, rtx, rtx); extern bool ira_bad_reload_regno (int, rtx, rtx); +extern void ira_adjust_equiv_reg_cost (unsigned, int); diff --git a/gcc/reload.h b/gcc/reload.h index a285670..8168b51 100644 --- a/gcc/reload.h +++ b/gcc/reload.h @@ -349,6 +349,10 @@ extern void mark_home_live (int); extern rtx eliminate_regs (rtx, enum machine_mode, rtx); extern bool elimination_target_reg_p (rtx); +/* Called from the register allocator to estimate costs of eliminating + invariant registers. */ +extern void calculate_elim_costs_all_insns (void); + /* Deallocate the reload register used by reload number R. */ extern void deallocate_reload_reg (int r); diff --git a/gcc/reload1.c b/gcc/reload1.c index f385fbb..030d007 100644 --- a/gcc/reload1.c +++ b/gcc/reload1.c @@ -412,6 +412,7 @@ static void alter_reg (int, int, bool); static void set_label_offsets (rtx, rtx, int); static void check_eliminable_occurrences (rtx); static void elimination_effects (rtx, enum machine_mode); +static rtx eliminate_regs_1 (rtx, enum machine_mode, rtx, bool, bool); static int eliminate_regs_in_insn (rtx, int); static void update_eliminable_offsets (void); static void mark_not_eliminable (rtx, const_rtx, void *); @@ -419,8 +420,11 @@ static void set_initial_elim_offsets (void); static bool verify_initial_elim_offsets (void); static void set_initial_label_offsets (void); static void set_offsets_for_label (rtx); +static void init_eliminable_invariants (rtx, bool); static void init_elim_table (void); +static void free_reg_equiv (void); static void update_eliminables (HARD_REG_SET *); +static void elimination_costs_in_insn (rtx); static void spill_hard_reg (unsigned int, int); static int finish_spills (int); static void scan_paradoxical_subregs (rtx); @@ -697,6 +701,9 @@ has_nonexceptional_receiver (void) /* Global variables used by reload and its subroutines. */ +/* The current basic block while in calculate_elim_costs_all_insns. */ +static basic_block elim_bb; + /* Set during calculate_needs if an insn needs register elimination. */ static int something_needs_elimination; /* Set during calculate_needs if an insn needs an operand changed. */ @@ -777,22 +784,6 @@ reload (rtx first, int global) if (! call_used_regs[i] && ! fixed_regs[i] && ! LOCAL_REGNO (i)) df_set_regs_ever_live (i, true); - /* Find all the pseudo registers that didn't get hard regs - but do have known equivalent constants or memory slots. - These include parameters (known equivalent to parameter slots) - and cse'd or loop-moved constant memory addresses. - - Record constant equivalents in reg_equiv_constant - so they will be substituted by find_reloads. - Record memory equivalents in reg_mem_equiv so they can - be substituted eventually by altering the REG-rtx's. */ - - reg_equiv_constant = XCNEWVEC (rtx, max_regno); - reg_equiv_invariant = XCNEWVEC (rtx, max_regno); - reg_equiv_mem = XCNEWVEC (rtx, max_regno); - reg_equiv_alt_mem_list = XCNEWVEC (rtx, max_regno); - reg_equiv_address = XCNEWVEC (rtx, max_regno); - reg_max_ref_width = XCNEWVEC (unsigned int, max_regno); reg_old_renumber = XCNEWVEC (short, max_regno); memcpy (reg_old_renumber, reg_renumber, max_regno * sizeof (short)); pseudo_forbidden_regs = XNEWVEC (HARD_REG_SET, max_regno); @@ -800,115 +791,9 @@ reload (rtx first, int global) CLEAR_HARD_REG_SET (bad_spill_regs_global); - /* Look for REG_EQUIV notes; record what each pseudo is equivalent - to. Also find all paradoxical subregs and find largest such for - each pseudo. */ - - num_eliminable_invariants = 0; - for (insn = first; insn; insn = NEXT_INSN (insn)) - { - rtx set = single_set (insn); - - /* We may introduce USEs that we want to remove at the end, so - we'll mark them with QImode. Make sure there are no - previously-marked insns left by say regmove. */ - if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE - && GET_MODE (insn) != VOIDmode) - PUT_MODE (insn, VOIDmode); - - if (NONDEBUG_INSN_P (insn)) - scan_paradoxical_subregs (PATTERN (insn)); - - if (set != 0 && REG_P (SET_DEST (set))) - { - rtx note = find_reg_note (insn, REG_EQUIV, NULL_RTX); - rtx x; - - if (! note) - continue; - - i = REGNO (SET_DEST (set)); - x = XEXP (note, 0); - - if (i <= LAST_VIRTUAL_REGISTER) - continue; - - if (! function_invariant_p (x) - || ! flag_pic - /* A function invariant is often CONSTANT_P but may - include a register. We promise to only pass - CONSTANT_P objects to LEGITIMATE_PIC_OPERAND_P. */ - || (CONSTANT_P (x) - && LEGITIMATE_PIC_OPERAND_P (x))) - { - /* It can happen that a REG_EQUIV note contains a MEM - that is not a legitimate memory operand. As later - stages of reload assume that all addresses found - in the reg_equiv_* arrays were originally legitimate, - we ignore such REG_EQUIV notes. */ - if (memory_operand (x, VOIDmode)) - { - /* Always unshare the equivalence, so we can - substitute into this insn without touching the - equivalence. */ - reg_equiv_memory_loc[i] = copy_rtx (x); - } - else if (function_invariant_p (x)) - { - if (GET_CODE (x) == PLUS) - { - /* This is PLUS of frame pointer and a constant, - and might be shared. Unshare it. */ - reg_equiv_invariant[i] = copy_rtx (x); - num_eliminable_invariants++; - } - else if (x == frame_pointer_rtx || x == arg_pointer_rtx) - { - reg_equiv_invariant[i] = x; - num_eliminable_invariants++; - } - else if (LEGITIMATE_CONSTANT_P (x)) - reg_equiv_constant[i] = x; - else - { - reg_equiv_memory_loc[i] - = force_const_mem (GET_MODE (SET_DEST (set)), x); - if (! reg_equiv_memory_loc[i]) - reg_equiv_init[i] = NULL_RTX; - } - } - else - { - reg_equiv_init[i] = NULL_RTX; - continue; - } - } - else - reg_equiv_init[i] = NULL_RTX; - } - } - - if (dump_file) - for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) - if (reg_equiv_init[i]) - { - fprintf (dump_file, "init_insns for %u: ", i); - print_inline_rtx (dump_file, reg_equiv_init[i], 20); - fprintf (dump_file, "\n"); - } - + init_eliminable_invariants (first, true); init_elim_table (); - first_label_num = get_first_label_num (); - num_labels = max_label_num () - first_label_num; - - /* Allocate the tables used to store offset information at labels. */ - /* We used to use alloca here, but the size of what it would try to - allocate would occasionally cause it to exceed the stack limit and - cause a core dump. */ - offsets_known_at = XNEWVEC (char, num_labels); - offsets_at = (HOST_WIDE_INT (*)[NUM_ELIMINABLE_REGS]) xmalloc (num_labels * NUM_ELIMINABLE_REGS * sizeof (HOST_WIDE_INT)); - /* Alter each pseudo-reg rtx to contain its hard reg number. Assign stack slots to the pseudos that lack hard regs or equivalents. Do not touch virtual registers. */ @@ -1413,31 +1298,11 @@ reload (rtx first, int global) } } - /* Indicate that we no longer have known memory locations or constants. */ - if (reg_equiv_constant) - free (reg_equiv_constant); - if (reg_equiv_invariant) - free (reg_equiv_invariant); - reg_equiv_constant = 0; - reg_equiv_invariant = 0; - VEC_free (rtx, gc, reg_equiv_memory_loc_vec); - reg_equiv_memory_loc = 0; - free (temp_pseudo_reg_arr); - if (offsets_known_at) - free (offsets_known_at); - if (offsets_at) - free (offsets_at); - - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - if (reg_equiv_alt_mem_list[i]) - free_EXPR_LIST_list (®_equiv_alt_mem_list[i]); - free (reg_equiv_alt_mem_list); - - free (reg_equiv_mem); + /* Indicate that we no longer have known memory locations or constants. */ + free_reg_equiv (); reg_equiv_init = 0; - free (reg_equiv_address); free (reg_max_ref_width); free (reg_old_renumber); free (pseudo_previous_regs); @@ -1730,6 +1595,100 @@ calculate_needs_all_insns (int global) *pprev_reload = 0; } +/* This function is called from the register allocator to set up estimates + for the cost of eliminating pseudos which have REG_EQUIV equivalences to + an invariant. The structure is similar to calculate_needs_all_insns. */ + +void +calculate_elim_costs_all_insns (void) +{ + int *reg_equiv_init_cost; + basic_block bb; + int i; + + reg_equiv_init_cost = XCNEWVEC (int, max_regno); + init_elim_table (); + init_eliminable_invariants (get_insns (), false); + + set_initial_elim_offsets (); + set_initial_label_offsets (); + + FOR_EACH_BB (bb) + { + rtx insn; + elim_bb = bb; + + FOR_BB_INSNS (bb, insn) + { + /* If this is a label, a JUMP_INSN, or has REG_NOTES (which might + include REG_LABEL_OPERAND and REG_LABEL_TARGET), we need to see + what effects this has on the known offsets at labels. */ + + if (LABEL_P (insn) || JUMP_P (insn) + || (INSN_P (insn) && REG_NOTES (insn) != 0)) + set_label_offsets (insn, insn, 0); + + if (INSN_P (insn)) + { + rtx set = single_set (insn); + + /* Skip insns that only set an equivalence. */ + if (set && REG_P (SET_DEST (set)) + && reg_renumber[REGNO (SET_DEST (set))] < 0 + && (reg_equiv_constant[REGNO (SET_DEST (set))] + || (reg_equiv_invariant[REGNO (SET_DEST (set))]))) + { + unsigned regno = REGNO (SET_DEST (set)); + rtx init = reg_equiv_init[regno]; + if (init) + { + rtx t = eliminate_regs_1 (SET_SRC (set), VOIDmode, insn, + false, true); + int cost = rtx_cost (t, SET, + optimize_bb_for_speed_p (bb)); + int freq = REG_FREQ_FROM_BB (bb); + + reg_equiv_init_cost[regno] = cost * freq; + continue; + } + } + /* If needed, eliminate any eliminable registers. */ + if (num_eliminable || num_eliminable_invariants) + elimination_costs_in_insn (insn); + + if (num_eliminable) + update_eliminable_offsets (); + } + } + } + for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) + { + if (reg_equiv_invariant[i]) + { + if (reg_equiv_init[i]) + { + int cost = reg_equiv_init_cost[i]; + if (dump_file) + fprintf (dump_file, + "Reg %d has equivalence, initial gains %d\n", i, cost); + if (cost != 0) + ira_adjust_equiv_reg_cost (i, cost); + } + else + { + if (dump_file) + fprintf (dump_file, + "Reg %d had equivalence, but can't be eliminated\n", + i); + ira_adjust_equiv_reg_cost (i, 0); + } + } + } + + free_reg_equiv (); + free (reg_equiv_init_cost); +} + /* Comparison function for qsort to decide which of two reloads should be handled first. *P1 and *P2 are the reload numbers. */ @@ -2518,6 +2477,36 @@ set_label_offsets (rtx x, rtx insn, int initial_p) } } +/* Called through for_each_rtx, this function examines every reg that occurs + in PX and adjusts the costs for its elimination which are gathered by IRA. + DATA is the insn in which PX occurs. We do not recurse into MEM + expressions. */ + +static int +note_reg_elim_costly (rtx *px, void *data) +{ + rtx insn = (rtx)data; + rtx x = *px; + + if (MEM_P (x)) + return -1; + + if (REG_P (x) + && REGNO (x) >= FIRST_PSEUDO_REGISTER + && reg_equiv_init[REGNO (x)] + && reg_equiv_invariant[REGNO (x)]) + { + rtx t = reg_equiv_invariant[REGNO (x)]; + rtx new_rtx = eliminate_regs_1 (t, Pmode, insn, true, true); + int cost = rtx_cost (new_rtx, SET, optimize_bb_for_speed_p (elim_bb)); + int freq = REG_FREQ_FROM_BB (elim_bb); + + if (cost != 0) + ira_adjust_equiv_reg_cost (REGNO (x), -cost * freq); + } + return 0; +} + /* Scan X and replace any eliminable registers (such as fp) with a replacement (such as sp), plus an offset. @@ -2537,6 +2526,9 @@ set_label_offsets (rtx x, rtx insn, int initial_p) This means, do not set ref_outside_mem even if the reference is outside of MEMs. + If FOR_COSTS is true, we are being called before reload in order to + estimate the costs of keeping registers with an equivalence unallocated. + REG_EQUIV_MEM and REG_EQUIV_ADDRESS contain address that have had replacements done assuming all offsets are at their initial values. If they are not, or if REG_EQUIV_ADDRESS is nonzero for a pseudo we @@ -2545,7 +2537,7 @@ set_label_offsets (rtx x, rtx insn, int initial_p) static rtx eliminate_regs_1 (rtx x, enum machine_mode mem_mode, rtx insn, - bool may_use_invariant) + bool may_use_invariant, bool for_costs) { enum rtx_code code = GET_CODE (x); struct elim_table *ep; @@ -2593,11 +2585,12 @@ eliminate_regs_1 (rtx x, enum machine_mode mem_mode, rtx insn, { if (may_use_invariant || (insn && DEBUG_INSN_P (insn))) return eliminate_regs_1 (copy_rtx (reg_equiv_invariant[regno]), - mem_mode, insn, true); + mem_mode, insn, true, for_costs); /* There exists at least one use of REGNO that cannot be eliminated. Prevent the defining insn from being deleted. */ reg_equiv_init[regno] = NULL_RTX; - alter_reg (regno, -1, true); + if (!for_costs) + alter_reg (regno, -1, true); } return x; @@ -2658,8 +2651,10 @@ eliminate_regs_1 (rtx x, enum machine_mode mem_mode, rtx insn, operand of a load-address insn. */ { - rtx new0 = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, true); - rtx new1 = eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, true); + rtx new0 = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, true, + for_costs); + rtx new1 = eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, true, + for_costs); if (reg_renumber && (new0 != XEXP (x, 0) || new1 != XEXP (x, 1))) { @@ -2733,9 +2728,11 @@ eliminate_regs_1 (rtx x, enum machine_mode mem_mode, rtx insn, case GE: case GT: case GEU: case GTU: case LE: case LT: case LEU: case LTU: { - rtx new0 = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, false); + rtx new0 = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, false, + for_costs); rtx new1 = XEXP (x, 1) - ? eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, false) : 0; + ? eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, false, + for_costs) : 0; if (new0 != XEXP (x, 0) || new1 != XEXP (x, 1)) return gen_rtx_fmt_ee (code, GET_MODE (x), new0, new1); @@ -2746,7 +2743,8 @@ eliminate_regs_1 (rtx x, enum machine_mode mem_mode, rtx insn, /* If we have something in XEXP (x, 0), the usual case, eliminate it. */ if (XEXP (x, 0)) { - new_rtx = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, true); + new_rtx = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, true, + for_costs); if (new_rtx != XEXP (x, 0)) { /* If this is a REG_DEAD note, it is not valid anymore. @@ -2754,7 +2752,8 @@ eliminate_regs_1 (rtx x, enum machine_mode mem_mode, rtx insn, REG_DEAD note for the stack or frame pointer. */ if (REG_NOTE_KIND (x) == REG_DEAD) return (XEXP (x, 1) - ? eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, true) + ? eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, true, + for_costs) : NULL_RTX); x = alloc_reg_note (REG_NOTE_KIND (x), new_rtx, XEXP (x, 1)); @@ -2769,7 +2768,8 @@ eliminate_regs_1 (rtx x, enum machine_mode mem_mode, rtx insn, strictly needed, but it simplifies the code. */ if (XEXP (x, 1)) { - new_rtx = eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, true); + new_rtx = eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, true, + for_costs); if (new_rtx != XEXP (x, 1)) return gen_rtx_fmt_ee (GET_CODE (x), GET_MODE (x), XEXP (x, 0), new_rtx); @@ -2795,7 +2795,7 @@ eliminate_regs_1 (rtx x, enum machine_mode mem_mode, rtx insn, && XEXP (XEXP (x, 1), 0) == XEXP (x, 0)) { rtx new_rtx = eliminate_regs_1 (XEXP (XEXP (x, 1), 1), mem_mode, - insn, true); + insn, true, for_costs); if (new_rtx != XEXP (XEXP (x, 1), 1)) return gen_rtx_fmt_ee (code, GET_MODE (x), XEXP (x, 0), @@ -2818,7 +2818,8 @@ eliminate_regs_1 (rtx x, enum machine_mode mem_mode, rtx insn, case POPCOUNT: case PARITY: case BSWAP: - new_rtx = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, false); + new_rtx = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, false, + for_costs); if (new_rtx != XEXP (x, 0)) return gen_rtx_fmt_e (code, GET_MODE (x), new_rtx); return x; @@ -2839,7 +2840,8 @@ eliminate_regs_1 (rtx x, enum machine_mode mem_mode, rtx insn, new_rtx = SUBREG_REG (x); } else - new_rtx = eliminate_regs_1 (SUBREG_REG (x), mem_mode, insn, false); + new_rtx = eliminate_regs_1 (SUBREG_REG (x), mem_mode, insn, false, + for_costs); if (new_rtx != SUBREG_REG (x)) { @@ -2873,14 +2875,20 @@ eliminate_regs_1 (rtx x, enum machine_mode mem_mode, rtx insn, /* Our only special processing is to pass the mode of the MEM to our recursive call and copy the flags. While we are here, handle this case more efficiently. */ - return - replace_equiv_address_nv (x, - eliminate_regs_1 (XEXP (x, 0), GET_MODE (x), - insn, true)); + + new_rtx = eliminate_regs_1 (XEXP (x, 0), GET_MODE (x), insn, true, + for_costs); + if (for_costs + && memory_address_p (GET_MODE (x), XEXP (x, 0)) + && !memory_address_p (GET_MODE (x), new_rtx)) + for_each_rtx (&XEXP (x, 0), note_reg_elim_costly, insn); + + return replace_equiv_address_nv (x, new_rtx); case USE: /* Handle insn_list USE that a call to a pure function may generate. */ - new_rtx = eliminate_regs_1 (XEXP (x, 0), VOIDmode, insn, false); + new_rtx = eliminate_regs_1 (XEXP (x, 0), VOIDmode, insn, false, + for_costs); if (new_rtx != XEXP (x, 0)) return gen_rtx_USE (GET_MODE (x), new_rtx); return x; @@ -2904,7 +2912,8 @@ eliminate_regs_1 (rtx x, enum machine_mode mem_mode, rtx insn, { if (*fmt == 'e') { - new_rtx = eliminate_regs_1 (XEXP (x, i), mem_mode, insn, false); + new_rtx = eliminate_regs_1 (XEXP (x, i), mem_mode, insn, false, + for_costs); if (new_rtx != XEXP (x, i) && ! copied) { x = shallow_copy_rtx (x); @@ -2917,7 +2926,8 @@ eliminate_regs_1 (rtx x, enum machine_mode mem_mode, rtx insn, int copied_vec = 0; for (j = 0; j < XVECLEN (x, i); j++) { - new_rtx = eliminate_regs_1 (XVECEXP (x, i, j), mem_mode, insn, false); + new_rtx = eliminate_regs_1 (XVECEXP (x, i, j), mem_mode, insn, false, + for_costs); if (new_rtx != XVECEXP (x, i, j) && ! copied_vec) { rtvec new_v = gen_rtvec_v (XVECLEN (x, i), @@ -2941,7 +2951,7 @@ eliminate_regs_1 (rtx x, enum machine_mode mem_mode, rtx insn, rtx eliminate_regs (rtx x, enum machine_mode mem_mode, rtx insn) { - return eliminate_regs_1 (x, mem_mode, insn, false); + return eliminate_regs_1 (x, mem_mode, insn, false, false); } /* Scan rtx X for modifications of elimination target registers. Update @@ -3459,7 +3469,8 @@ eliminate_regs_in_insn (rtx insn, int replace) /* Companion to the above plus substitution, we can allow invariants as the source of a plain move. */ is_set_src = false; - if (old_set && recog_data.operand_loc[i] == &SET_SRC (old_set)) + if (old_set + && recog_data.operand_loc[i] == &SET_SRC (old_set)) is_set_src = true; in_plus = false; if (plus_src @@ -3470,7 +3481,7 @@ eliminate_regs_in_insn (rtx insn, int replace) substed_operand[i] = eliminate_regs_1 (recog_data.operand[i], VOIDmode, replace ? insn : NULL_RTX, - is_set_src || in_plus); + is_set_src || in_plus, false); if (substed_operand[i] != orig_operand[i]) val = 1; /* Terminate the search in check_eliminable_occurrences at @@ -3601,11 +3612,167 @@ eliminate_regs_in_insn (rtx insn, int replace) the pre-passes. */ if (val && REG_NOTES (insn) != 0) REG_NOTES (insn) - = eliminate_regs_1 (REG_NOTES (insn), VOIDmode, REG_NOTES (insn), true); + = eliminate_regs_1 (REG_NOTES (insn), VOIDmode, REG_NOTES (insn), true, + false); return val; } +/* Like eliminate_regs_in_insn, but only estimate costs for the use of the + register allocator. INSN is the instruction we need to examine, we perform + eliminations in its operands and record cases where eliminating a reg with + an invariant equivalence would add extra cost. */ + +static void +elimination_costs_in_insn (rtx insn) +{ + int icode = recog_memoized (insn); + rtx old_body = PATTERN (insn); + int insn_is_asm = asm_noperands (old_body) >= 0; + rtx old_set = single_set (insn); + int i; + rtx orig_operand[MAX_RECOG_OPERANDS]; + rtx orig_dup[MAX_RECOG_OPERANDS]; + struct elim_table *ep; + rtx plus_src, plus_cst_src; + bool sets_reg_p; + + if (! insn_is_asm && icode < 0) + { + gcc_assert (GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER + || GET_CODE (PATTERN (insn)) == ADDR_VEC + || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC + || GET_CODE (PATTERN (insn)) == ASM_INPUT + || DEBUG_INSN_P (insn)); + return; + } + + if (old_set != 0 && REG_P (SET_DEST (old_set)) + && REGNO (SET_DEST (old_set)) < FIRST_PSEUDO_REGISTER) + { + /* Check for setting an eliminable register. */ + for (ep = reg_eliminate; ep < ®_eliminate[NUM_ELIMINABLE_REGS]; ep++) + if (ep->from_rtx == SET_DEST (old_set) && ep->can_eliminate) + return; + } + + /* We allow one special case which happens to work on all machines we + currently support: a single set with the source or a REG_EQUAL + note being a PLUS of an eliminable register and a constant. */ + plus_src = plus_cst_src = 0; + sets_reg_p = false; + if (old_set && REG_P (SET_DEST (old_set))) + { + sets_reg_p = true; + if (GET_CODE (SET_SRC (old_set)) == PLUS) + plus_src = SET_SRC (old_set); + /* First see if the source is of the form (plus (...) CST). */ + if (plus_src + && CONST_INT_P (XEXP (plus_src, 1))) + plus_cst_src = plus_src; + else if (REG_P (SET_SRC (old_set)) + || plus_src) + { + /* Otherwise, see if we have a REG_EQUAL note of the form + (plus (...) CST). */ + rtx links; + for (links = REG_NOTES (insn); links; links = XEXP (links, 1)) + { + if ((REG_NOTE_KIND (links) == REG_EQUAL + || REG_NOTE_KIND (links) == REG_EQUIV) + && GET_CODE (XEXP (links, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (links, 0), 1))) + { + plus_cst_src = XEXP (links, 0); + break; + } + } + } + } + + /* Determine the effects of this insn on elimination offsets. */ + elimination_effects (old_body, VOIDmode); + + /* Eliminate all eliminable registers occurring in operands that + can be handled by reload. */ + extract_insn (insn); + for (i = 0; i < recog_data.n_dups; i++) + orig_dup[i] = *recog_data.dup_loc[i]; + + for (i = 0; i < recog_data.n_operands; i++) + { + orig_operand[i] = recog_data.operand[i]; + + /* For an asm statement, every operand is eliminable. */ + if (insn_is_asm || insn_data[icode].operand[i].eliminable) + { + bool is_set_src, in_plus; + + /* Check for setting a register that we know about. */ + if (recog_data.operand_type[i] != OP_IN + && REG_P (orig_operand[i])) + { + /* If we are assigning to a register that can be eliminated, it + must be as part of a PARALLEL, since the code above handles + single SETs. We must indicate that we can no longer + eliminate this reg. */ + for (ep = reg_eliminate; ep < ®_eliminate[NUM_ELIMINABLE_REGS]; + ep++) + if (ep->from_rtx == orig_operand[i]) + ep->can_eliminate = 0; + } + + /* Companion to the above plus substitution, we can allow + invariants as the source of a plain move. */ + is_set_src = false; + if (old_set && recog_data.operand_loc[i] == &SET_SRC (old_set)) + is_set_src = true; + if (is_set_src && !sets_reg_p) + note_reg_elim_costly (&SET_SRC (old_set), insn); + in_plus = false; + if (plus_src && sets_reg_p + && (recog_data.operand_loc[i] == &XEXP (plus_src, 0) + || recog_data.operand_loc[i] == &XEXP (plus_src, 1))) + in_plus = true; + + eliminate_regs_1 (recog_data.operand[i], VOIDmode, + NULL_RTX, + is_set_src || in_plus, true); + /* Terminate the search in check_eliminable_occurrences at + this point. */ + *recog_data.operand_loc[i] = 0; + } + } + + for (i = 0; i < recog_data.n_dups; i++) + *recog_data.dup_loc[i] + = *recog_data.operand_loc[(int) recog_data.dup_num[i]]; + + /* If any eliminable remain, they aren't eliminable anymore. */ + check_eliminable_occurrences (old_body); + + /* Restore the old body. */ + for (i = 0; i < recog_data.n_operands; i++) + *recog_data.operand_loc[i] = orig_operand[i]; + for (i = 0; i < recog_data.n_dups; i++) + *recog_data.dup_loc[i] = orig_dup[i]; + + /* Update all elimination pairs to reflect the status after the current + insn. The changes we make were determined by the earlier call to + elimination_effects. */ + + for (ep = reg_eliminate; ep < ®_eliminate[NUM_ELIMINABLE_REGS]; ep++) + { + if (ep->previous_offset != ep->offset && ep->ref_outside_mem) + ep->can_eliminate = 0; + + ep->ref_outside_mem = 0; + } + + return; +} + /* Loop through all elimination pairs. Recalculate the number not at initial offset. @@ -3915,6 +4082,172 @@ init_elim_table (void) ep->to_rtx = gen_rtx_REG (Pmode, ep->to); } } + +/* Find all the pseudo registers that didn't get hard regs + but do have known equivalent constants or memory slots. + These include parameters (known equivalent to parameter slots) + and cse'd or loop-moved constant memory addresses. + + Record constant equivalents in reg_equiv_constant + so they will be substituted by find_reloads. + Record memory equivalents in reg_mem_equiv so they can + be substituted eventually by altering the REG-rtx's. */ + +static void +init_eliminable_invariants (rtx first, bool do_subregs) +{ + int i; + rtx insn; + + reg_equiv_constant = XCNEWVEC (rtx, max_regno); + reg_equiv_invariant = XCNEWVEC (rtx, max_regno); + reg_equiv_mem = XCNEWVEC (rtx, max_regno); + reg_equiv_alt_mem_list = XCNEWVEC (rtx, max_regno); + reg_equiv_address = XCNEWVEC (rtx, max_regno); + if (do_subregs) + reg_max_ref_width = XCNEWVEC (unsigned int, max_regno); + else + reg_max_ref_width = NULL; + + num_eliminable_invariants = 0; + + first_label_num = get_first_label_num (); + num_labels = max_label_num () - first_label_num; + + /* Allocate the tables used to store offset information at labels. */ + offsets_known_at = XNEWVEC (char, num_labels); + offsets_at = (HOST_WIDE_INT (*)[NUM_ELIMINABLE_REGS]) xmalloc (num_labels * NUM_ELIMINABLE_REGS * sizeof (HOST_WIDE_INT)); + +/* Look for REG_EQUIV notes; record what each pseudo is equivalent + to. If DO_SUBREGS is true, also find all paradoxical subregs and + find largest such for each pseudo. FIRST is the head of the insn + list. */ + + for (insn = first; insn; insn = NEXT_INSN (insn)) + { + rtx set = single_set (insn); + + /* We may introduce USEs that we want to remove at the end, so + we'll mark them with QImode. Make sure there are no + previously-marked insns left by say regmove. */ + if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE + && GET_MODE (insn) != VOIDmode) + PUT_MODE (insn, VOIDmode); + + if (do_subregs && NONDEBUG_INSN_P (insn)) + scan_paradoxical_subregs (PATTERN (insn)); + + if (set != 0 && REG_P (SET_DEST (set))) + { + rtx note = find_reg_note (insn, REG_EQUIV, NULL_RTX); + rtx x; + + if (! note) + continue; + + i = REGNO (SET_DEST (set)); + x = XEXP (note, 0); + + if (i <= LAST_VIRTUAL_REGISTER) + continue; + + if (! function_invariant_p (x) + || ! flag_pic + /* A function invariant is often CONSTANT_P but may + include a register. We promise to only pass + CONSTANT_P objects to LEGITIMATE_PIC_OPERAND_P. */ + || (CONSTANT_P (x) + && LEGITIMATE_PIC_OPERAND_P (x))) + { + /* It can happen that a REG_EQUIV note contains a MEM + that is not a legitimate memory operand. As later + stages of reload assume that all addresses found + in the reg_equiv_* arrays were originally legitimate, + we ignore such REG_EQUIV notes. */ + if (memory_operand (x, VOIDmode)) + { + /* Always unshare the equivalence, so we can + substitute into this insn without touching the + equivalence. */ + reg_equiv_memory_loc[i] = copy_rtx (x); + } + else if (function_invariant_p (x)) + { + if (GET_CODE (x) == PLUS) + { + /* This is PLUS of frame pointer and a constant, + and might be shared. Unshare it. */ + reg_equiv_invariant[i] = copy_rtx (x); + num_eliminable_invariants++; + } + else if (x == frame_pointer_rtx || x == arg_pointer_rtx) + { + reg_equiv_invariant[i] = x; + num_eliminable_invariants++; + } + else if (LEGITIMATE_CONSTANT_P (x)) + reg_equiv_constant[i] = x; + else + { + reg_equiv_memory_loc[i] + = force_const_mem (GET_MODE (SET_DEST (set)), x); + if (! reg_equiv_memory_loc[i]) + reg_equiv_init[i] = NULL_RTX; + } + } + else + { + reg_equiv_init[i] = NULL_RTX; + continue; + } + } + else + reg_equiv_init[i] = NULL_RTX; + } + } + + if (dump_file) + for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) + if (reg_equiv_init[i]) + { + fprintf (dump_file, "init_insns for %u: ", i); + print_inline_rtx (dump_file, reg_equiv_init[i], 20); + fprintf (dump_file, "\n"); + } +} + +/* Indicate that we no longer have known memory locations or constants. + Free all data involved in tracking these. */ + +static void +free_reg_equiv (void) +{ + int i; + + if (reg_equiv_constant) + free (reg_equiv_constant); + if (reg_equiv_invariant) + free (reg_equiv_invariant); + reg_equiv_constant = 0; + reg_equiv_invariant = 0; + VEC_free (rtx, gc, reg_equiv_memory_loc_vec); + reg_equiv_memory_loc = 0; + + if (offsets_known_at) + free (offsets_known_at); + if (offsets_at) + free (offsets_at); + offsets_at = 0; + offsets_known_at = 0; + + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (reg_equiv_alt_mem_list[i]) + free_EXPR_LIST_list (®_equiv_alt_mem_list[i]); + free (reg_equiv_alt_mem_list); + + free (reg_equiv_mem); + free (reg_equiv_address); +} /* Kick all pseudos out of hard register REGNO. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 99b1152..1f3ae68 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2010-06-04 Bernd Schmidt + + PR rtl-optimization/39871 + PR rtl-optimization/40615 + PR rtl-optimization/42500 + PR rtl-optimization/42502 + * gcc.target/arm/eliminate.c: New test. + 2010-06-03 Paolo Carlini PR libstdc++/44410 diff --git a/gcc/testsuite/gcc.target/arm/eliminate.c b/gcc/testsuite/gcc.target/arm/eliminate.c new file mode 100644 index 0000000..f254dd8 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/eliminate.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +struct X +{ + int c; +}; + +extern void bar(struct X *); + +void foo () +{ + struct X x; + bar (&x); + bar (&x); + bar (&x); +} + +/* { dg-final { scan-assembler-times "r0,\[\\t \]*sp" 3 } } */