From 497ba60f76c13da878a9932dc38eda8e7333c0b6 Mon Sep 17 00:00:00 2001 From: vmakarov Date: Thu, 13 Nov 2014 03:02:49 +0000 Subject: [PATCH] 2014-11-12 Vladimir Makarov * common.opt (flra-remat): New. * opts.c (default_options_table): Add entry for flra_remat. * timevar_def (TV_LRA_REMAT): New. * doc/invoke.texi (-flra-remat): Add description of the new option. * doc/passes.texi (-flra-remat): Remove lra-equivs.c and lra-saves.c. Add lra-remat.c. * Makefile.in (OBJS): Add lra-remat.o. * lra-remat.c: New file. * lra.c: Add info about the rematerialization pass in the top comment. (collect_non_operand_hard_regs, add_regs_to_insn_regno_info): Process unallocatable regs too. (lra_constraint_new_insn_uid_start): Remove. (lra): Add code for calling rematerialization sub-pass. * lra-int.h (lra_constraint_new_insn_uid_start): Remove. (lra_constrain_insn, lra_remat): New prototypes. (lra_eliminate_regs_1): Add parameter. * lra-lives.c (make_hard_regno_born, make_hard_regno_dead): Process unallocatable hard regs too. (process_bb_lives): Ditto. * lra-spills.c (remove_pseudos): Add argument to lra_eliminate_regs_1 call. * lra-eliminations.c (lra_eliminate_regs_1): Add parameter. Use it for sp offset calculation. (lra_eliminate_regs): Add argument for lra_eliminate_regs_1 call. (eliminate_regs_in_insn): Add parameter. Use it for sp offset calculation. (process_insn_for_elimination): Add argument for eliminate_regs_in_insn call. * lra-constraints.c (get_equiv_with_elimination): Add argument for lra_eliminate_regs_1 call. (process_addr_reg): Add parameter. Use it. (process_address_1): Ditto. Add argument for process_addr_reg call. (process_address): Ditto. (curr_insn_transform): Add parameter. Use it. Add argument for process_address calls. (lra_constrain_insn): New function. (lra_constraints): Add argument for curr_insn_transform call. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@217458 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 43 ++ gcc/Makefile.in | 1 + gcc/common.opt | 4 + gcc/doc/invoke.texi | 11 +- gcc/doc/passes.texi | 8 +- gcc/lra-constraints.c | 188 +++++--- gcc/lra-eliminations.c | 97 ++-- gcc/lra-int.h | 10 +- gcc/lra-lives.c | 7 +- gcc/lra-remat.c | 1212 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/lra-spills.c | 2 +- gcc/lra.c | 107 +++-- gcc/opts.c | 1 + gcc/timevar.def | 1 + 14 files changed, 1531 insertions(+), 161 deletions(-) create mode 100644 gcc/lra-remat.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5b96094..b089cb8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,46 @@ +2014-11-12 Vladimir Makarov + + * common.opt (flra-remat): New. + * opts.c (default_options_table): Add entry for flra_remat. + * timevar_def (TV_LRA_REMAT): New. + * doc/invoke.texi (-flra-remat): Add description of the new + option. + * doc/passes.texi (-flra-remat): Remove lra-equivs.c and + lra-saves.c. Add lra-remat.c. + * Makefile.in (OBJS): Add lra-remat.o. + * lra-remat.c: New file. + * lra.c: Add info about the rematerialization pass in the top + comment. + (collect_non_operand_hard_regs, add_regs_to_insn_regno_info): + Process unallocatable regs too. + (lra_constraint_new_insn_uid_start): Remove. + (lra): Add code for calling rematerialization sub-pass. + * lra-int.h (lra_constraint_new_insn_uid_start): Remove. + (lra_constrain_insn, lra_remat): New prototypes. + (lra_eliminate_regs_1): Add parameter. + * lra-lives.c (make_hard_regno_born, make_hard_regno_dead): + Process unallocatable hard regs too. + (process_bb_lives): Ditto. + * lra-spills.c (remove_pseudos): Add argument to + lra_eliminate_regs_1 call. + * lra-eliminations.c (lra_eliminate_regs_1): Add parameter. Use it + for sp offset calculation. + (lra_eliminate_regs): Add argument for lra_eliminate_regs_1 call. + (eliminate_regs_in_insn): Add parameter. Use it for sp offset + calculation. + (process_insn_for_elimination): Add argument for + eliminate_regs_in_insn call. + * lra-constraints.c (get_equiv_with_elimination): Add argument + for lra_eliminate_regs_1 call. + (process_addr_reg): Add parameter. Use it. + (process_address_1): Ditto. Add argument for process_addr_reg + call. + (process_address): Ditto. + (curr_insn_transform): Add parameter. Use it. Add argument for + process_address calls. + (lra_constrain_insn): New function. + (lra_constraints): Add argument for curr_insn_transform call. + 2014-11-13 Manuel López-Ibáñez * opts-global.c (postpone_unknown_option_warning): Fix spelling. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 3d671c2..88c3f89 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1304,6 +1304,7 @@ OBJS = \ lra-constraints.o \ lra-eliminations.o \ lra-lives.o \ + lra-remat.o \ lra-spills.o \ lto-cgraph.o \ lto-streamer.o \ diff --git a/gcc/common.opt b/gcc/common.opt index 784a4b0..1a58cfb 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1551,6 +1551,10 @@ floop-optimize Common Ignore Does nothing. Preserved for backward compatibility. +flra-remat +Common Report Var(flag_lra_remat) Optimization +Do CFG-sensitive rematerialization in LRA + flto Common Enable link-time optimization. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 294e17d..12dbb27 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -392,7 +392,7 @@ Objective-C and Objective-C++ Dialects}. -fisolate-erroneous-paths-dereference -fisolate-erroneous-paths-attribute @gol -fivopts -fkeep-inline-functions -fkeep-static-consts -flive-range-shrinkage @gol -floop-block -floop-interchange -floop-strip-mine -floop-nest-optimize @gol --floop-parallelize-all -flto -flto-compression-level @gol +-floop-parallelize-all -flra-remat -flto -flto-compression-level @gol -flto-partition=@var{alg} -flto-report -flto-report-wpa -fmerge-all-constants @gol -fmerge-constants -fmodulo-sched -fmodulo-sched-allow-regmoves @gol -fmove-loop-invariants -fno-branch-count-reg @gol @@ -7183,6 +7183,7 @@ also turns on the following optimization flags: -fipa-sra @gol -fipa-icf @gol -fisolate-erroneous-paths-dereference @gol +-flra-remat @gol -foptimize-sibling-calls @gol -foptimize-strlen @gol -fpartial-inlining @gol @@ -7811,6 +7812,14 @@ Control the verbosity of the dump file for the integrated register allocator. The default value is 5. If the value @var{n} is greater or equal to 10, the dump output is sent to stderr using the same format as @var{n} minus 10. +@item -flra-remat +@opindex fcaller-saves +Enable CFG-sensitive rematerialization in LRA. Instead of loading +values of spilled pseudos, LRA tries to rematerialize (recalculate) +values if it is profitable. + +Enabled at levels @option{-O2}, @option{-O3}, @option{-Os}. + @item -fdelayed-branch @opindex fdelayed-branch If supported for the target machine, attempt to reorder instructions diff --git a/gcc/doc/passes.texi b/gcc/doc/passes.texi index 0ab8f21..090eb0a 100644 --- a/gcc/doc/passes.texi +++ b/gcc/doc/passes.texi @@ -911,10 +911,10 @@ Source files are @file{reload.c} and @file{reload1.c}, plus the header This pass is a modern replacement of the reload pass. Source files are @file{lra.c}, @file{lra-assign.c}, @file{lra-coalesce.c}, @file{lra-constraints.c}, @file{lra-eliminations.c}, -@file{lra-equivs.c}, @file{lra-lives.c}, @file{lra-saves.c}, -@file{lra-spills.c}, the header @file{lra-int.h} used for -communication between them, and the header @file{lra.h} used for -communication between LRA and the rest of compiler. +@file{lra-lives.c}, @file{lra-remat.c}, @file{lra-spills.c}, the +header @file{lra-int.h} used for communication between them, and the +header @file{lra.h} used for communication between LRA and the rest of +compiler. Unlike the reload pass, intermediate LRA decisions are reflected in RTL as much as possible. This reduces the number of target-dependent diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c index 7353354..a67bf8a 100644 --- a/gcc/lra-constraints.c +++ b/gcc/lra-constraints.c @@ -506,7 +506,8 @@ get_equiv_with_elimination (rtx x, rtx_insn *insn) if (x == res || CONSTANT_P (res)) return res; - return lra_eliminate_regs_1 (insn, res, GET_MODE (res), false, false, true); + return lra_eliminate_regs_1 (insn, res, GET_MODE (res), + 0, false, false, true); } /* Set up curr_operand_mode. */ @@ -1243,12 +1244,16 @@ static bool no_input_reloads_p, no_output_reloads_p; insn. */ static int curr_swapped; -/* Arrange for address element *LOC to be a register of class CL. - Add any input reloads to list BEFORE. AFTER is nonnull if *LOC is an - automodified value; handle that case by adding the required output - reloads to list AFTER. Return true if the RTL was changed. */ +/* if CHECK_ONLY_P is false, arrange for address element *LOC to be a + register of class CL. Add any input reloads to list BEFORE. AFTER + is nonnull if *LOC is an automodified value; handle that case by + adding the required output reloads to list AFTER. Return true if + the RTL was changed. + + if CHECK_ONLY_P is true, check that the *LOC is a correct address + register. Return false if the address register is correct. */ static bool -process_addr_reg (rtx *loc, rtx_insn **before, rtx_insn **after, +process_addr_reg (rtx *loc, bool check_only_p, rtx_insn **before, rtx_insn **after, enum reg_class cl) { int regno; @@ -1265,6 +1270,8 @@ process_addr_reg (rtx *loc, rtx_insn **before, rtx_insn **after, mode = GET_MODE (reg); if (! REG_P (reg)) { + if (check_only_p) + return true; /* Always reload memory in an address even if the target supports such addresses. */ new_reg = lra_create_new_reg_with_unique_value (mode, reg, cl, "address"); @@ -1274,7 +1281,8 @@ process_addr_reg (rtx *loc, rtx_insn **before, rtx_insn **after, { regno = REGNO (reg); rclass = get_reg_class (regno); - if ((*loc = get_equiv_with_elimination (reg, curr_insn)) != reg) + if (! check_only_p + && (*loc = get_equiv_with_elimination (reg, curr_insn)) != reg) { if (lra_dump_file != NULL) { @@ -1288,6 +1296,8 @@ process_addr_reg (rtx *loc, rtx_insn **before, rtx_insn **after, } if (*loc != reg || ! in_class_p (reg, cl, &new_class)) { + if (check_only_p) + return true; reg = *loc; if (get_reload_reg (after == NULL ? OP_IN : OP_INOUT, mode, reg, cl, subreg_p, "address", &new_reg)) @@ -1295,6 +1305,8 @@ process_addr_reg (rtx *loc, rtx_insn **before, rtx_insn **after, } else if (new_class != NO_REGS && rclass != new_class) { + if (check_only_p) + return true; lra_change_class (regno, new_class, " Change to", true); return false; } @@ -2740,8 +2752,9 @@ equiv_address_substitution (struct address_info *ad) return change_p; } -/* Major function to make reloads for an address in operand NOP. - The supported cases are: +/* Major function to make reloads for an address in operand NOP or + check its correctness (If CHECK_ONLY_P is true). The supported + cases are: 1) an address that existed before LRA started, at which point it must have been valid. These addresses are subject to elimination @@ -2761,18 +2774,19 @@ equiv_address_substitution (struct address_info *ad) address. Return true for any RTL change. The function is a helper function which does not produce all - transformations which can be necessary. It does just basic steps. - To do all necessary transformations use function - process_address. */ + transformations (when CHECK_ONLY_P is false) which can be + necessary. It does just basic steps. To do all necessary + transformations use function process_address. */ static bool -process_address_1 (int nop, rtx_insn **before, rtx_insn **after) +process_address_1 (int nop, bool check_only_p, + rtx_insn **before, rtx_insn **after) { struct address_info ad; rtx new_reg; rtx op = *curr_id->operand_loc[nop]; const char *constraint = curr_static_id->operand[nop].constraint; enum constraint_num cn = lookup_constraint (constraint); - bool change_p; + bool change_p = false; if (insn_extra_address_constraint (cn)) decompose_lea_address (&ad, curr_id->operand_loc[nop]); @@ -2783,10 +2797,11 @@ process_address_1 (int nop, rtx_insn **before, rtx_insn **after) decompose_mem_address (&ad, SUBREG_REG (op)); else return false; - change_p = equiv_address_substitution (&ad); + if (! check_only_p) + change_p = equiv_address_substitution (&ad); if (ad.base_term != NULL && (process_addr_reg - (ad.base_term, before, + (ad.base_term, check_only_p, before, (ad.autoinc_p && !(REG_P (*ad.base_term) && find_regno_note (curr_insn, REG_DEAD, @@ -2800,7 +2815,8 @@ process_address_1 (int nop, rtx_insn **before, rtx_insn **after) *ad.base_term2 = *ad.base_term; } if (ad.index_term != NULL - && process_addr_reg (ad.index_term, before, NULL, INDEX_REG_CLASS)) + && process_addr_reg (ad.index_term, check_only_p, + before, NULL, INDEX_REG_CLASS)) change_p = true; /* Target hooks sometimes don't treat extra-constraint addresses as @@ -2809,6 +2825,9 @@ process_address_1 (int nop, rtx_insn **before, rtx_insn **after) && satisfies_address_constraint_p (&ad, cn)) return change_p; + if (check_only_p) + return change_p; + /* There are three cases where the shape of *AD.INNER may now be invalid: 1) the original address was valid, but either elimination or @@ -2977,15 +2996,24 @@ process_address_1 (int nop, rtx_insn **before, rtx_insn **after) return true; } -/* Do address reloads until it is necessary. Use process_address_1 as - a helper function. Return true for any RTL changes. */ +/* If CHECK_ONLY_P is false, do address reloads until it is necessary. + Use process_address_1 as a helper function. Return true for any + RTL changes. + + If CHECK_ONLY_P is true, just check address correctness. Return + false if the address correct. */ static bool -process_address (int nop, rtx_insn **before, rtx_insn **after) +process_address (int nop, bool check_only_p, + rtx_insn **before, rtx_insn **after) { bool res = false; - while (process_address_1 (nop, before, after)) - res = true; + while (process_address_1 (nop, check_only_p, before, after)) + { + if (check_only_p) + return true; + res = true; + } return res; } @@ -3157,9 +3185,15 @@ swap_operands (int nop) model can be changed in future. Make commutative operand exchange if it is chosen. - Return true if some RTL changes happened during function call. */ + if CHECK_ONLY_P is false, do RTL changes to satisfy the + constraints. Return true if any change happened during function + call. + + If CHECK_ONLY_P is true then don't do any transformation. Just + check that the insn satisfies all constraints. If the insn does + not satisfy any constraint, return true. */ static bool -curr_insn_transform (void) +curr_insn_transform (bool check_only_p) { int i, j, k; int n_operands; @@ -3226,50 +3260,53 @@ curr_insn_transform (void) curr_swapped = false; goal_alt_swapped = false; - /* Make equivalence substitution and memory subreg elimination - before address processing because an address legitimacy can - depend on memory mode. */ - for (i = 0; i < n_operands; i++) - { - rtx op = *curr_id->operand_loc[i]; - rtx subst, old = op; - bool op_change_p = false; - - if (GET_CODE (old) == SUBREG) - old = SUBREG_REG (old); - subst = get_equiv_with_elimination (old, curr_insn); - if (subst != old) - { - subst = copy_rtx (subst); - lra_assert (REG_P (old)); - if (GET_CODE (op) == SUBREG) - SUBREG_REG (op) = subst; - else - *curr_id->operand_loc[i] = subst; - if (lra_dump_file != NULL) - { - fprintf (lra_dump_file, - "Changing pseudo %d in operand %i of insn %u on equiv ", - REGNO (old), i, INSN_UID (curr_insn)); - dump_value_slim (lra_dump_file, subst, 1); + if (! check_only_p) + /* Make equivalence substitution and memory subreg elimination + before address processing because an address legitimacy can + depend on memory mode. */ + for (i = 0; i < n_operands; i++) + { + rtx op = *curr_id->operand_loc[i]; + rtx subst, old = op; + bool op_change_p = false; + + if (GET_CODE (old) == SUBREG) + old = SUBREG_REG (old); + subst = get_equiv_with_elimination (old, curr_insn); + if (subst != old) + { + subst = copy_rtx (subst); + lra_assert (REG_P (old)); + if (GET_CODE (op) == SUBREG) + SUBREG_REG (op) = subst; + else + *curr_id->operand_loc[i] = subst; + if (lra_dump_file != NULL) + { + fprintf (lra_dump_file, + "Changing pseudo %d in operand %i of insn %u on equiv ", + REGNO (old), i, INSN_UID (curr_insn)); + dump_value_slim (lra_dump_file, subst, 1); fprintf (lra_dump_file, "\n"); - } - op_change_p = change_p = true; - } - if (simplify_operand_subreg (i, GET_MODE (old)) || op_change_p) - { - change_p = true; - lra_update_dup (curr_id, i); - } - } + } + op_change_p = change_p = true; + } + if (simplify_operand_subreg (i, GET_MODE (old)) || op_change_p) + { + change_p = true; + lra_update_dup (curr_id, i); + } + } /* Reload address registers and displacements. We do it before finding an alternative because of memory constraints. */ before = after = NULL; for (i = 0; i < n_operands; i++) if (! curr_static_id->operand[i].is_operator - && process_address (i, &before, &after)) + && process_address (i, check_only_p, &before, &after)) { + if (check_only_p) + return true; change_p = true; lra_update_dup (curr_id, i); } @@ -3279,13 +3316,13 @@ curr_insn_transform (void) we chose previously may no longer be valid. */ lra_set_used_insn_alternative (curr_insn, -1); - if (curr_insn_set != NULL_RTX + if (! check_only_p && curr_insn_set != NULL_RTX && check_and_process_move (&change_p, &sec_mem_p)) return change_p; try_swapped: - reused_alternative_num = curr_id->used_insn_alternative; + reused_alternative_num = check_only_p ? -1 : curr_id->used_insn_alternative; if (lra_dump_file != NULL && reused_alternative_num >= 0) fprintf (lra_dump_file, "Reusing alternative %d for insn #%u\n", reused_alternative_num, INSN_UID (curr_insn)); @@ -3293,6 +3330,9 @@ curr_insn_transform (void) if (process_alt_operands (reused_alternative_num)) alt_p = true; + if (check_only_p) + return ! alt_p || best_losers != 0; + /* If insn is commutative (it's safe to exchange a certain pair of operands) then we need to try each alternative twice, the second time matching those two operands as if we had exchanged them. To @@ -3522,7 +3562,7 @@ curr_insn_transform (void) *curr_id->operand_loc[i] = tem; lra_update_dup (curr_id, i); - process_address (i, &before, &after); + process_address (i, false, &before, &after); /* If the alternative accepts constant pool refs directly there will be no reload needed at all. */ @@ -3746,6 +3786,26 @@ curr_insn_transform (void) return change_p; } +/* Return true if INSN satisfies all constraints. In other words, no + reload insns are needed. */ +bool +lra_constrain_insn (rtx_insn *insn) +{ + int saved_new_regno_start = new_regno_start; + int saved_new_insn_uid_start = new_insn_uid_start; + bool change_p; + + curr_insn = insn; + curr_id = lra_get_insn_recog_data (curr_insn); + curr_static_id = curr_id->insn_static_data; + new_insn_uid_start = get_max_uid (); + new_regno_start = max_reg_num (); + change_p = curr_insn_transform (true); + new_regno_start = saved_new_regno_start; + new_insn_uid_start = saved_new_insn_uid_start; + return ! change_p; +} + /* Return true if X is in LIST. */ static bool in_list_p (rtx x, rtx list) @@ -4238,7 +4298,7 @@ lra_constraints (bool first_p) curr_static_id = curr_id->insn_static_data; init_curr_insn_input_reloads (); init_curr_operand_mode (); - if (curr_insn_transform ()) + if (curr_insn_transform (false)) changed_p = true; /* Check non-transformed insns too for equiv change as USE or CLOBBER don't need reloads but can contain pseudos diff --git a/gcc/lra-eliminations.c b/gcc/lra-eliminations.c index e742eba..8ab0222 100644 --- a/gcc/lra-eliminations.c +++ b/gcc/lra-eliminations.c @@ -298,7 +298,8 @@ get_elimination (rtx reg) a change in the offset between the eliminable register and its substitution if UPDATE_P, or the full offset if FULL_P, or otherwise zero. If FULL_P, we also use the SP offsets for - elimination to SP. + elimination to SP. If UPDATE_P, use UPDATE_SP_OFFSET for updating + offsets of register elimnable to SP. MEM_MODE is the mode of an enclosing MEM. We need this to know how much to adjust a register for, e.g., PRE_DEC. Also, if we are @@ -311,7 +312,8 @@ get_elimination (rtx reg) sp offset. */ rtx lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, - bool subst_p, bool update_p, bool full_p) + bool subst_p, bool update_p, + HOST_WIDE_INT update_sp_offset, bool full_p) { enum rtx_code code = GET_CODE (x); struct lra_elim_table *ep; @@ -346,7 +348,10 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, rtx to = subst_p ? ep->to_rtx : ep->from_rtx; if (update_p) - return plus_constant (Pmode, to, ep->offset - ep->previous_offset); + return plus_constant (Pmode, to, + ep->offset - ep->previous_offset + + (ep->to_rtx == stack_pointer_rtx + ? update_sp_offset : 0)); else if (full_p) return plus_constant (Pmode, to, ep->offset @@ -373,7 +378,10 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, return gen_rtx_PLUS (Pmode, to, XEXP (x, 1)); offset = (update_p - ? ep->offset - ep->previous_offset : ep->offset); + ? ep->offset - ep->previous_offset + + (ep->to_rtx == stack_pointer_rtx + ? update_sp_offset : 0) + : ep->offset); if (full_p && insn != NULL_RTX && ep->to_rtx == stack_pointer_rtx) offset -= lra_get_insn_recog_data (insn)->sp_offset; if (CONST_INT_P (XEXP (x, 1)) @@ -402,9 +410,11 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, { rtx new0 = lra_eliminate_regs_1 (insn, XEXP (x, 0), mem_mode, - subst_p, update_p, full_p); + subst_p, update_p, + update_sp_offset, full_p); rtx new1 = lra_eliminate_regs_1 (insn, XEXP (x, 1), mem_mode, - subst_p, update_p, full_p); + subst_p, update_p, + update_sp_offset, full_p); if (new0 != XEXP (x, 0) || new1 != XEXP (x, 1)) return form_sum (new0, new1); @@ -423,11 +433,12 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, rtx to = subst_p ? ep->to_rtx : ep->from_rtx; if (update_p) - return - plus_constant (Pmode, - gen_rtx_MULT (Pmode, to, XEXP (x, 1)), - (ep->offset - ep->previous_offset) - * INTVAL (XEXP (x, 1))); + return plus_constant (Pmode, + gen_rtx_MULT (Pmode, to, XEXP (x, 1)), + (ep->offset - ep->previous_offset + + (ep->to_rtx == stack_pointer_rtx + ? update_sp_offset : 0)) + * INTVAL (XEXP (x, 1))); else if (full_p) { HOST_WIDE_INT offset = ep->offset; @@ -459,10 +470,12 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, case LE: case LT: case LEU: case LTU: { rtx new0 = lra_eliminate_regs_1 (insn, XEXP (x, 0), mem_mode, - subst_p, update_p, full_p); + subst_p, update_p, + update_sp_offset, full_p); rtx new1 = XEXP (x, 1) ? lra_eliminate_regs_1 (insn, XEXP (x, 1), mem_mode, - subst_p, update_p, full_p) : 0; + subst_p, update_p, + update_sp_offset, full_p) : 0; if (new0 != XEXP (x, 0) || new1 != XEXP (x, 1)) return gen_rtx_fmt_ee (code, GET_MODE (x), new0, new1); @@ -475,7 +488,8 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, if (XEXP (x, 0)) { new_rtx = lra_eliminate_regs_1 (insn, XEXP (x, 0), mem_mode, - subst_p, update_p, full_p); + subst_p, update_p, + update_sp_offset, full_p); if (new_rtx != XEXP (x, 0)) { /* If this is a REG_DEAD note, it is not valid anymore. @@ -484,7 +498,8 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, if (REG_NOTE_KIND (x) == REG_DEAD) return (XEXP (x, 1) ? lra_eliminate_regs_1 (insn, XEXP (x, 1), mem_mode, - subst_p, update_p, full_p) + subst_p, update_p, + update_sp_offset, full_p) : NULL_RTX); x = alloc_reg_note (REG_NOTE_KIND (x), new_rtx, XEXP (x, 1)); @@ -501,7 +516,8 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, if (XEXP (x, 1)) { new_rtx = lra_eliminate_regs_1 (insn, XEXP (x, 1), mem_mode, - subst_p, update_p, full_p); + subst_p, update_p, + update_sp_offset, full_p); if (new_rtx != XEXP (x, 1)) return gen_rtx_fmt_ee (GET_CODE (x), GET_MODE (x), @@ -528,8 +544,8 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, && XEXP (XEXP (x, 1), 0) == XEXP (x, 0)) { rtx new_rtx = lra_eliminate_regs_1 (insn, XEXP (XEXP (x, 1), 1), - mem_mode, - subst_p, update_p, full_p); + mem_mode, subst_p, update_p, + update_sp_offset, full_p); if (new_rtx != XEXP (XEXP (x, 1), 1)) return gen_rtx_fmt_ee (code, GET_MODE (x), XEXP (x, 0), @@ -553,14 +569,16 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, case PARITY: case BSWAP: new_rtx = lra_eliminate_regs_1 (insn, XEXP (x, 0), mem_mode, - subst_p, update_p, full_p); + subst_p, update_p, + update_sp_offset, full_p); if (new_rtx != XEXP (x, 0)) return gen_rtx_fmt_e (code, GET_MODE (x), new_rtx); return x; case SUBREG: new_rtx = lra_eliminate_regs_1 (insn, SUBREG_REG (x), mem_mode, - subst_p, update_p, full_p); + subst_p, update_p, + update_sp_offset, full_p); if (new_rtx != SUBREG_REG (x)) { @@ -598,12 +616,12 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, replace_equiv_address_nv (x, lra_eliminate_regs_1 (insn, XEXP (x, 0), GET_MODE (x), - subst_p, update_p, full_p)); + subst_p, update_p, update_sp_offset, full_p)); case USE: /* Handle insn_list USE that a call to a pure function may generate. */ new_rtx = lra_eliminate_regs_1 (insn, XEXP (x, 0), VOIDmode, - subst_p, update_p, full_p); + subst_p, update_p, update_sp_offset, full_p); if (new_rtx != XEXP (x, 0)) return gen_rtx_USE (GET_MODE (x), new_rtx); return x; @@ -624,7 +642,8 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, if (*fmt == 'e') { new_rtx = lra_eliminate_regs_1 (insn, XEXP (x, i), mem_mode, - subst_p, update_p, full_p); + subst_p, update_p, + update_sp_offset, full_p); if (new_rtx != XEXP (x, i) && ! copied) { x = shallow_copy_rtx (x); @@ -638,7 +657,8 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, for (j = 0; j < XVECLEN (x, i); j++) { new_rtx = lra_eliminate_regs_1 (insn, XVECEXP (x, i, j), mem_mode, - subst_p, update_p, full_p); + subst_p, update_p, + update_sp_offset, full_p); if (new_rtx != XVECEXP (x, i, j) && ! copied_vec) { rtvec new_v = gen_rtvec_v (XVECLEN (x, i), @@ -665,7 +685,7 @@ rtx lra_eliminate_regs (rtx x, machine_mode mem_mode, rtx insn ATTRIBUTE_UNUSED) { - return lra_eliminate_regs_1 (NULL, x, mem_mode, true, false, true); + return lra_eliminate_regs_1 (NULL, x, mem_mode, true, false, 0, true); } /* Stack pointer offset before the current insn relative to one at the @@ -850,13 +870,15 @@ remove_reg_equal_offset_note (rtx insn, rtx what) If REPLACE_P is false, just update the offsets while keeping the base register the same. If FIRST_P, use the sp offset for - elimination to sp. Attach the note about used elimination for - insns setting frame pointer to update elimination easy (without - parsing already generated elimination insns to find offset - previously used) in future. */ + elimination to sp. Otherwise, use UPDATE_SP_OFFSET for this. + Attach the note about used elimination for insns setting frame + pointer to update elimination easy (without parsing already + generated elimination insns to find offset previously used) in + future. */ -static void -eliminate_regs_in_insn (rtx_insn *insn, bool replace_p, bool first_p) +void +eliminate_regs_in_insn (rtx_insn *insn, bool replace_p, bool first_p, + HOST_WIDE_INT update_sp_offset) { int icode = recog_memoized (insn); rtx old_set = single_set (insn); @@ -986,8 +1008,13 @@ eliminate_regs_in_insn (rtx_insn *insn, bool replace_p, bool first_p) if (! replace_p) { offset += (ep->offset - ep->previous_offset); - if (first_p && ep->to_rtx == stack_pointer_rtx) - offset -= lra_get_insn_recog_data (insn)->sp_offset; + if (ep->to_rtx == stack_pointer_rtx) + { + if (first_p) + offset -= lra_get_insn_recog_data (insn)->sp_offset; + else + offset += update_sp_offset; + } offset = trunc_int_for_mode (offset, GET_MODE (plus_cst_src)); } @@ -1061,7 +1088,7 @@ eliminate_regs_in_insn (rtx_insn *insn, bool replace_p, bool first_p) substed_operand[i] = lra_eliminate_regs_1 (insn, *id->operand_loc[i], VOIDmode, replace_p, ! replace_p && ! first_p, - first_p); + update_sp_offset, first_p); if (substed_operand[i] != orig_operand[i]) validate_p = true; } @@ -1349,7 +1376,7 @@ lra_eliminate_reg_if_possible (rtx *loc) static void process_insn_for_elimination (rtx_insn *insn, bool final_p, bool first_p) { - eliminate_regs_in_insn (insn, final_p, first_p); + eliminate_regs_in_insn (insn, final_p, first_p, 0); if (! final_p) { /* Check that insn changed its code. This is a case when a move diff --git a/gcc/lra-int.h b/gcc/lra-int.h index 5355ebf..92dc40c 100644 --- a/gcc/lra-int.h +++ b/gcc/lra-int.h @@ -328,7 +328,6 @@ extern bitmap_head lra_inheritance_pseudos; extern bitmap_head lra_split_regs; extern bitmap_head lra_subreg_reload_pseudos; extern bitmap_head lra_optional_reload_pseudos; -extern int lra_constraint_new_insn_uid_start; /* lra-constraints.c: */ @@ -339,6 +338,7 @@ extern int lra_constraint_iter; extern bool lra_risky_transformations_p; extern int lra_inheritance_iter; extern int lra_undo_inheritance_iter; +extern bool lra_constrain_insn (rtx_insn *); extern bool lra_constraints (bool); extern void lra_constraints_init (void); extern void lra_constraints_finish (void); @@ -389,13 +389,17 @@ extern bool lra_need_for_spills_p (void); extern void lra_spill (void); extern void lra_final_code_change (void); +/* lra-remat.c: */ + +extern bool lra_remat (void); /* lra-elimination.c: */ extern void lra_debug_elim_table (void); extern int lra_get_elimination_hard_regno (int); -extern rtx lra_eliminate_regs_1 (rtx_insn *, rtx, machine_mode, bool, - bool, bool); +extern rtx lra_eliminate_regs_1 (rtx_insn *, rtx, machine_mode, + bool, bool, HOST_WIDE_INT, bool); +extern void eliminate_regs_in_insn (rtx_insn *insn, bool, bool, HOST_WIDE_INT); extern void lra_eliminate (bool, bool); extern void lra_eliminate_reg_if_possible (rtx *); diff --git a/gcc/lra-lives.c b/gcc/lra-lives.c index 03def82..5ea5ecd 100644 --- a/gcc/lra-lives.c +++ b/gcc/lra-lives.c @@ -252,8 +252,7 @@ make_hard_regno_born (int regno) unsigned int i; lra_assert (regno < FIRST_PSEUDO_REGISTER); - if (TEST_HARD_REG_BIT (lra_no_alloc_regs, regno) - || TEST_HARD_REG_BIT (hard_regs_live, regno)) + if (TEST_HARD_REG_BIT (hard_regs_live, regno)) return; SET_HARD_REG_BIT (hard_regs_live, regno); sparseset_set_bit (start_living, regno); @@ -267,8 +266,7 @@ static void make_hard_regno_dead (int regno) { lra_assert (regno < FIRST_PSEUDO_REGISTER); - if (TEST_HARD_REG_BIT (lra_no_alloc_regs, regno) - || ! TEST_HARD_REG_BIT (hard_regs_live, regno)) + if (! TEST_HARD_REG_BIT (hard_regs_live, regno)) return; sparseset_set_bit (start_dying, regno); CLEAR_HARD_REG_BIT (hard_regs_live, regno); @@ -662,7 +660,6 @@ process_bb_lives (basic_block bb, int &curr_point) sparseset_clear (pseudos_live_through_setjumps); REG_SET_TO_HARD_REG_SET (hard_regs_live, reg_live_out); AND_COMPL_HARD_REG_SET (hard_regs_live, eliminable_regset); - AND_COMPL_HARD_REG_SET (hard_regs_live, lra_no_alloc_regs); EXECUTE_IF_SET_IN_BITMAP (reg_live_out, FIRST_PSEUDO_REGISTER, j, bi) mark_pseudo_live (j, curr_point); diff --git a/gcc/lra-remat.c b/gcc/lra-remat.c new file mode 100644 index 0000000..51a94ca --- /dev/null +++ b/gcc/lra-remat.c @@ -0,0 +1,1212 @@ +/* Rematerialize pseudos values. + Copyright (C) 2014 Free Software Foundation, Inc. + Contributed by Vladimir Makarov . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* This code objective is to rematerialize spilled pseudo values. To + do this we calculate available insn candidates. The candidate is + available at some point if there is dominated set of insns with the + same pattern, the insn inputs are not dying or modified on any path + from the set, the outputs are not modified. + + The insns containing memory or spilled pseudos (except for the + rematerialized pseudo) are not considered as such insns are not + profitable in comparison with regular loads of spilled pseudo + values. That simplifies the implementation as we don't need to + deal with memory aliasing. + + To speed up available candidate calculation, we calculate partially + available candidates first and use them for initialization of the + availability. That is because (partial) availability sets are + sparse. + + The rematerialization sub-pass could be improved further in the + following ways: + + o We could make longer live ranges of inputs in the + rematerialization candidates if their hard registers are not used + for other purposes. This could be complicated if we need to + update BB live info information as LRA does not use + DF-infrastructure for compile-time reasons. This problem could + be overcome if constrain making live ranges longer only in BB/EBB + scope. + o We could use cost-based decision to choose rematerialization insn + (currently all insns without memory is can be used). + o We could use other free hard regs for unused output pseudos in + rematerialization candidates although such cases probably will + be very rare. */ + + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "hard-reg-set.h" +#include "rtl.h" +#include "rtl-error.h" +#include "tm_p.h" +#include "target.h" +#include "insn-config.h" +#include "recog.h" +#include "output.h" +#include "regs.h" +#include "hashtab.h" +#include "hash-set.h" +#include "vec.h" +#include "machmode.h" +#include "input.h" +#include "function.h" +#include "expr.h" +#include "predict.h" +#include "dominance.h" +#include "cfg.h" +#include "basic-block.h" +#include "except.h" +#include "df.h" +#include "ira.h" +#include "sparseset.h" +#include "params.h" +#include "df.h" +#include "lra-int.h" + +/* Number of candidates for rematerialization. */ +static unsigned int cands_num; + +/* The following is used for representation of call_used_reg_set in + form array whose elements are hard register numbers with nonzero bit + in CALL_USED_REG_SET. */ +static int call_used_regs_arr_len; +static int call_used_regs_arr[FIRST_PSEUDO_REGISTER]; + +/* Bitmap used for different calculations. */ +static bitmap_head temp_bitmap; + +typedef struct cand *cand_t; +typedef const struct cand *const_cand_t; + +/* Insn candidates for rematerialization. The candidate insn should + have the following properies: + o no any memory (as access to memory is non-profitable) + o no INOUT regs (it means no non-paradoxical subreg of output reg) + o one output spilled pseudo (or reload pseudo of a spilled pseudo) + o all other pseudos are with assigned hard regs. */ +struct cand +{ + /* Index of the candidates in all_cands. */ + int index; + /* The candidate insn. */ + rtx_insn *insn; + /* Insn pseudo regno for rematerialization. */ + int regno; + /* Non-negative if a reload pseudo is in the insn instead of the + pseudo for rematerialization. */ + int reload_regno; + /* Number of the operand containing the regno or its reload + regno. */ + int nop; + /* Next candidate for the same regno. */ + cand_t next_regno_cand; +}; + +/* Vector containing all candidates. */ +static vec all_cands; +/* Map: insn -> candidate representing it. It is null if the insn can + not be used for rematerialization. */ +static cand_t *insn_to_cand; + +/* Map regno -> candidates can be used for the regno + rematerialization. */ +static cand_t *regno_cands; + +/* Data about basic blocks used for the rematerialization + sub-pass. */ +struct remat_bb_data +{ + /* Basic block about which the below data are. */ + basic_block bb; + /* Registers changed in the basic block: */ + bitmap_head changed_regs; + /* Registers becoming dead in the BB. */ + bitmap_head dead_regs; + /* Cands present in the BB whose in/out regs are not changed after + the cands occurence and are not dead (except the reload + regno). */ + bitmap_head gen_cands; + bitmap_head livein_cands; /* cands whose inputs live at the BB start. */ + bitmap_head pavin_cands; /* cands partially available at BB entry. */ + bitmap_head pavout_cands; /* cands partially available at BB exit. */ + bitmap_head avin_cands; /* cands available at the entry of the BB. */ + bitmap_head avout_cands; /* cands available at the exit of the BB. */ +}; + +/* Array for all BB data. Indexed by the corresponding BB index. */ +typedef struct remat_bb_data *remat_bb_data_t; + +/* Basic blocks for data flow problems -- all bocks except the special + ones. */ +static bitmap_head all_blocks; + +/* All basic block data are referred through the following array. */ +static remat_bb_data_t remat_bb_data; + +/* Two small functions for access to the bb data. */ +static inline remat_bb_data_t +get_remat_bb_data (basic_block bb) +{ + return &remat_bb_data[(bb)->index]; +} + +static inline remat_bb_data_t +get_remat_bb_data_by_index (int index) +{ + return &remat_bb_data[index]; +} + + + +/* Recursive hash function for RTL X. */ +static hashval_t +rtx_hash (rtx x) +{ + int i, j; + enum rtx_code code; + const char *fmt; + hashval_t val = 0; + + if (x == 0) + return val; + + code = GET_CODE (x); + val += (int) code + 4095; + + /* Some RTL can be compared nonrecursively. */ + switch (code) + { + case REG: + return val + REGNO (x); + + case LABEL_REF: + return iterative_hash_object (XEXP (x, 0), val); + + case SYMBOL_REF: + return iterative_hash_object (XSTR (x, 0), val); + + case SCRATCH: + case CONST_DOUBLE: + case CONST_INT: + case CONST_VECTOR: + return val; + + default: + break; + } + + /* Hash the elements. */ + fmt = GET_RTX_FORMAT (code); + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + switch (fmt[i]) + { + case 'w': + val += XWINT (x, i); + break; + + case 'n': + case 'i': + val += XINT (x, i); + break; + + case 'V': + case 'E': + val += XVECLEN (x, i); + + for (j = 0; j < XVECLEN (x, i); j++) + val += rtx_hash (XVECEXP (x, i, j)); + break; + + case 'e': + val += rtx_hash (XEXP (x, i)); + break; + + case 'S': + case 's': + val += htab_hash_string (XSTR (x, i)); + break; + + case 'u': + case '0': + case 't': + break; + + /* It is believed that rtx's at this level will never + contain anything but integers and other rtx's, except for + within LABEL_REFs and SYMBOL_REFs. */ + default: + abort (); + } + } + return val; +} + + + +/* Hash table for the candidates. Different insns (e.g. structurally + the same insns or even insns with different unused output regs) can + be represented by the same candidate in the table. */ +static htab_t cand_table; + +/* Hash function for candidate CAND. */ +static hashval_t +cand_hash (const void *cand) +{ + const_cand_t c = (const_cand_t) cand; + lra_insn_recog_data_t id = lra_get_insn_recog_data (c->insn); + struct lra_static_insn_data *static_id = id->insn_static_data; + int nops = static_id->n_operands; + hashval_t hash = 0; + + for (int i = 0; i < nops; i++) + if (i == c->nop) + hash = iterative_hash_object (c->regno, hash); + else if (static_id->operand[i].type == OP_IN) + hash = iterative_hash_object (*id->operand_loc[i], hash); + return hash; +} + +/* Equal function for candidates CAND1 and CAND2. They are equal if + the corresponding candidate insns have the same code, the same + regno for rematerialization, the same input operands. */ +static int +cand_eq_p (const void *cand1, const void *cand2) +{ + const_cand_t c1 = (const_cand_t) cand1; + const_cand_t c2 = (const_cand_t) cand2; + lra_insn_recog_data_t id1 = lra_get_insn_recog_data (c1->insn); + lra_insn_recog_data_t id2 = lra_get_insn_recog_data (c2->insn); + struct lra_static_insn_data *static_id1 = id1->insn_static_data; + int nops = static_id1->n_operands; + + if (c1->regno != c2->regno + || INSN_CODE (c1->insn) < 0 + || INSN_CODE (c1->insn) != INSN_CODE (c2->insn)) + return false; + gcc_assert (c1->nop == c2->nop); + for (int i = 0; i < nops; i++) + if (i != c1->nop && static_id1->operand[i].type == OP_IN + && *id1->operand_loc[i] != *id2->operand_loc[i]) + return false; + return true; +} + +/* Insert candidate CAND into the table if it is not there yet. + Return candidate which is in the table. */ +static cand_t +insert_cand (cand_t cand) +{ + void **entry_ptr; + + entry_ptr = htab_find_slot (cand_table, cand, INSERT); + if (*entry_ptr == NULL) + *entry_ptr = (void *) cand; + return (cand_t) *entry_ptr; +} + +/* Free candidate CAND memory. */ +static void +free_cand (void *cand) +{ + free (cand); +} + +/* Initiate the candidate table. */ +static void +initiate_cand_table (void) +{ + cand_table = htab_create (8000, cand_hash, cand_eq_p, + (htab_del) free_cand); +} + +/* Finish the candidate table. */ +static void +finish_cand_table (void) +{ + htab_delete (cand_table); +} + + + +/* Return true if X contains memory or UNSPEC. We can not just check + insn operands as memory or unspec might be not an operand itself + but contain an operand. Insn with memory access is not profitable + for rematerialization. Rematerialization of UNSPEC might result in + wrong code generation as the UNPEC effect is unknown + (e.g. generating a label). */ +static bool +bad_for_rematerialization_p (rtx x) +{ + int i, j; + const char *fmt; + enum rtx_code code; + + if (MEM_P (x) || GET_CODE (x) == UNSPEC) + return true; + code = GET_CODE (x); + fmt = GET_RTX_FORMAT (code); + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + if (fmt[i] == 'e') + { + if (bad_for_rematerialization_p (XEXP (x, i))) + return true; + } + else if (fmt[i] == 'E') + { + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + if (bad_for_rematerialization_p (XVECEXP (x, i, j))) + return true; + } + } + return false; +} + +/* If INSN can not be used for rematerialization, return negative + value. If INSN can be considered as a candidate for + rematerialization, return value which is the operand number of the + pseudo for which the insn can be used for rematerialization. Here + we consider the insns without any memory, spilled pseudo (except + for the rematerialization pseudo), or dying or unused regs. */ +static int +operand_to_remat (rtx_insn *insn) +{ + lra_insn_recog_data_t id = lra_get_insn_recog_data (insn); + struct lra_static_insn_data *static_id = id->insn_static_data; + struct lra_insn_reg *reg, *found_reg = NULL; + + /* First find a pseudo which can be rematerialized. */ + for (reg = id->regs; reg != NULL; reg = reg->next) + if (reg->type == OP_OUT && ! reg->subreg_p + && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL) + { + /* We permits only one spilled reg. */ + if (found_reg != NULL) + return -1; + found_reg = reg; + } + if (found_reg == NULL) + return -1; + if (found_reg->regno < FIRST_PSEUDO_REGISTER) + return -1; + if (bad_for_rematerialization_p (PATTERN (insn))) + return -1; + /* Check the other regs are not spilled. */ + for (reg = id->regs; reg != NULL; reg = reg->next) + if (found_reg == reg) + continue; + else if (reg->type == OP_INOUT) + return -1; + else if (reg->regno >= FIRST_PSEUDO_REGISTER + && reg_renumber[reg->regno] < 0) + /* Another spilled reg. */ + return -1; + else if (reg->type == OP_IN) + { + if (find_regno_note (insn, REG_DEAD, reg->regno) != NULL) + /* We don't want to make live ranges longer. */ + return -1; + /* Check that there is no output reg as the input one. */ + for (struct lra_insn_reg *reg2 = id->regs; + reg2 != NULL; + reg2 = reg2->next) + if (reg2->type == OP_OUT && reg->regno == reg2->regno) + return -1; + } + /* Find the rematerialization operand. */ + int nop = static_id->n_operands; + for (int i = 0; i < nop; i++) + if (REG_P (*id->operand_loc[i]) + && (int) REGNO (*id->operand_loc[i]) == found_reg->regno) + return i; + return -1; +} + +/* Create candidate for INSN with rematerialization operand NOP and + REGNO. Insert the candidate into the table and set up the + corresponding INSN_TO_CAND element. */ +static void +create_cand (rtx_insn *insn, int nop, int regno) +{ + lra_insn_recog_data_t id = lra_get_insn_recog_data (insn); + rtx reg = *id->operand_loc[nop]; + gcc_assert (REG_P (reg)); + int op_regno = REGNO (reg); + gcc_assert (op_regno >= FIRST_PSEUDO_REGISTER); + cand_t cand = XNEW (struct cand); + cand->insn = insn; + cand->nop = nop; + cand->regno = regno; + cand->reload_regno = op_regno == regno ? -1 : op_regno; + gcc_assert (cand->regno >= 0); + cand_t cand_in_table = insert_cand (cand); + insn_to_cand[INSN_UID (insn)] = cand_in_table; + if (cand != cand_in_table) + free (cand); + else + { + /* A new cand. */ + cand->index = all_cands.length (); + all_cands.safe_push (cand); + cand->next_regno_cand = regno_cands[cand->regno]; + regno_cands[cand->regno] = cand; + } +} + +/* Create rematerialization candidates (inserting them into the + table). */ +static void +create_cands (void) +{ + rtx_insn *insn; + struct potential_cand + { + rtx_insn *insn; + int nop; + }; + struct potential_cand *regno_potential_cand; + + /* Create candidates. */ + regno_potential_cand = XCNEWVEC (struct potential_cand, max_reg_num ()); + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + if (INSN_P (insn)) + { + rtx set; + int src_regno, dst_regno; + rtx_insn *insn2; + lra_insn_recog_data_t id = lra_get_insn_recog_data (insn); + int nop = operand_to_remat (insn); + int regno = -1; + + if ((set = single_set (insn)) != NULL + && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set)) + && (src_regno = REGNO (SET_SRC (set))) >= FIRST_PSEUDO_REGISTER + && (dst_regno = REGNO (SET_DEST (set))) >= FIRST_PSEUDO_REGISTER + && reg_renumber[dst_regno] < 0 + && (insn2 = regno_potential_cand[src_regno].insn) != NULL + && BLOCK_FOR_INSN (insn2) == BLOCK_FOR_INSN (insn)) + create_cand (insn2, regno_potential_cand[src_regno].nop, dst_regno); + if (nop < 0) + goto fail; + gcc_assert (REG_P (*id->operand_loc[nop])); + regno = REGNO (*id->operand_loc[nop]); + gcc_assert (regno >= FIRST_PSEUDO_REGISTER); + if (reg_renumber[regno] < 0) + create_cand (insn, nop, regno); + else + { + regno_potential_cand[regno].insn = insn; + regno_potential_cand[regno].nop = nop; + goto fail; + } + regno = -1; + fail: + for (struct lra_insn_reg *reg = id->regs; reg != NULL; reg = reg->next) + if (reg->type != OP_IN && reg->regno != regno + && reg->regno >= FIRST_PSEUDO_REGISTER) + regno_potential_cand[reg->regno].insn = NULL; + } + cands_num = all_cands.length (); + free (regno_potential_cand); +} + + + +/* Create and initialize BB data. */ +static void +create_remat_bb_data (void) +{ + basic_block bb; + remat_bb_data_t bb_info; + + remat_bb_data = XNEWVEC (struct remat_bb_data, + last_basic_block_for_fn (cfun)); + FOR_ALL_BB_FN (bb, cfun) + { +#ifdef ENABLE_CHECKING + if (bb->index < 0 || bb->index >= last_basic_block_for_fn (cfun)) + abort (); +#endif + bb_info = get_remat_bb_data (bb); + bb_info->bb = bb; + bitmap_initialize (&bb_info->changed_regs, ®_obstack); + bitmap_initialize (&bb_info->dead_regs, ®_obstack); + bitmap_initialize (&bb_info->gen_cands, ®_obstack); + bitmap_initialize (&bb_info->livein_cands, ®_obstack); + bitmap_initialize (&bb_info->pavin_cands, ®_obstack); + bitmap_initialize (&bb_info->pavout_cands, ®_obstack); + bitmap_initialize (&bb_info->avin_cands, ®_obstack); + bitmap_initialize (&bb_info->avout_cands, ®_obstack); + } +} + +/* Dump all candidates to DUMP_FILE. */ +static void +dump_cands (FILE *dump_file) +{ + int i; + cand_t cand; + + fprintf (dump_file, "\nCands:\n"); + for (i = 0; i < (int) cands_num; i++) + { + cand = all_cands[i]; + fprintf (dump_file, "%d (nop=%d, remat_regno=%d, reload_regno=%d):\n", + i, cand->nop, cand->regno, cand->reload_regno); + print_inline_rtx (dump_file, cand->insn, 6); + fprintf (dump_file, "\n"); + } +} + +/* Dump all candidates and BB data. */ +static void +dump_candidates_and_remat_bb_data (void) +{ + basic_block bb; + + if (lra_dump_file == NULL) + return; + dump_cands (lra_dump_file); + FOR_EACH_BB_FN (bb, cfun) + { + fprintf (lra_dump_file, "\nBB %d:\n", bb->index); + /* Livein */ + fprintf (lra_dump_file, " register live in:"); + dump_regset (df_get_live_in (bb), lra_dump_file); + putc ('\n', lra_dump_file); + /* Liveout */ + fprintf (lra_dump_file, " register live out:"); + dump_regset (df_get_live_out (bb), lra_dump_file); + putc ('\n', lra_dump_file); + /* Changed/dead regs: */ + fprintf (lra_dump_file, " changed regs:"); + dump_regset (&get_remat_bb_data (bb)->changed_regs, lra_dump_file); + putc ('\n', lra_dump_file); + fprintf (lra_dump_file, " dead regs:"); + dump_regset (&get_remat_bb_data (bb)->dead_regs, lra_dump_file); + putc ('\n', lra_dump_file); + lra_dump_bitmap_with_title ("cands generated in BB", + &get_remat_bb_data (bb)->gen_cands, bb->index); + lra_dump_bitmap_with_title ("livein cands in BB", + &get_remat_bb_data (bb)->livein_cands, bb->index); + lra_dump_bitmap_with_title ("pavin cands in BB", + &get_remat_bb_data (bb)->pavin_cands, bb->index); + lra_dump_bitmap_with_title ("pavout cands in BB", + &get_remat_bb_data (bb)->pavout_cands, bb->index); + lra_dump_bitmap_with_title ("avin cands in BB", + &get_remat_bb_data (bb)->avin_cands, bb->index); + lra_dump_bitmap_with_title ("avout cands in BB", + &get_remat_bb_data (bb)->avout_cands, bb->index); + } +} + +/* Free all BB data. */ +static void +finish_remat_bb_data (void) +{ + basic_block bb; + + FOR_EACH_BB_FN (bb, cfun) + { + bitmap_clear (&get_remat_bb_data (bb)->avout_cands); + bitmap_clear (&get_remat_bb_data (bb)->avin_cands); + bitmap_clear (&get_remat_bb_data (bb)->pavout_cands); + bitmap_clear (&get_remat_bb_data (bb)->pavin_cands); + bitmap_clear (&get_remat_bb_data (bb)->livein_cands); + bitmap_clear (&get_remat_bb_data (bb)->gen_cands); + bitmap_clear (&get_remat_bb_data (bb)->dead_regs); + bitmap_clear (&get_remat_bb_data (bb)->changed_regs); + } + free (remat_bb_data); +} + + + +/* Update changed_regs and dead_regs of BB from INSN. */ +static void +set_bb_regs (basic_block bb, rtx_insn *insn) +{ + lra_insn_recog_data_t id = lra_get_insn_recog_data (insn); + struct lra_insn_reg *reg; + + for (reg = id->regs; reg != NULL; reg = reg->next) + if (reg->type != OP_IN) + bitmap_set_bit (&get_remat_bb_data (bb)->changed_regs, reg->regno); + else + { + if (find_regno_note (insn, REG_DEAD, (unsigned) reg->regno) != NULL) + bitmap_set_bit (&get_remat_bb_data (bb)->dead_regs, reg->regno); + } + if (CALL_P (insn)) + for (int i = 0; i < call_used_regs_arr_len; i++) + bitmap_set_bit (&get_remat_bb_data (bb)->dead_regs, + call_used_regs_arr[i]); +} + +/* Calculate changed_regs and dead_regs for each BB. */ +static void +calculate_local_reg_remat_bb_data (void) +{ + basic_block bb; + rtx_insn *insn; + + FOR_EACH_BB_FN (bb, cfun) + FOR_BB_INSNS (bb, insn) + if (INSN_P (insn)) + set_bb_regs (bb, insn); +} + + + +/* Return true if REGNO is an input operand of INSN. */ +static bool +input_regno_present_p (rtx_insn *insn, int regno) +{ + lra_insn_recog_data_t id = lra_get_insn_recog_data (insn); + struct lra_insn_reg *reg; + + for (reg = id->regs; reg != NULL; reg = reg->next) + if (reg->type == OP_IN && reg->regno == regno) + return true; + return false; +} + +/* Return true if a call used register is an input operand of INSN. */ +static bool +call_used_input_regno_present_p (rtx_insn *insn) +{ + lra_insn_recog_data_t id = lra_get_insn_recog_data (insn); + struct lra_insn_reg *reg; + + for (reg = id->regs; reg != NULL; reg = reg->next) + if (reg->type == OP_IN && reg->regno <= FIRST_PSEUDO_REGISTER + && TEST_HARD_REG_BIT (call_used_reg_set, reg->regno)) + return true; + return false; +} + +/* Calculate livein_cands for each BB. */ +static void +calculate_livein_cands (void) +{ + basic_block bb; + + FOR_EACH_BB_FN (bb, cfun) + { + bitmap livein_regs = df_get_live_in (bb); + bitmap livein_cands = &get_remat_bb_data (bb)->livein_cands; + for (unsigned int i = 0; i < cands_num; i++) + { + cand_t cand = all_cands[i]; + lra_insn_recog_data_t id = lra_get_insn_recog_data (cand->insn); + struct lra_insn_reg *reg; + + for (reg = id->regs; reg != NULL; reg = reg->next) + if (reg->type == OP_IN && ! bitmap_bit_p (livein_regs, reg->regno)) + break; + if (reg == NULL) + bitmap_set_bit (livein_cands, i); + } + } +} + +/* Calculate gen_cands for each BB. */ +static void +calculate_gen_cands (void) +{ + basic_block bb; + bitmap gen_cands; + bitmap_head gen_insns; + rtx_insn *insn; + + bitmap_initialize (&gen_insns, ®_obstack); + FOR_EACH_BB_FN (bb, cfun) + { + gen_cands = &get_remat_bb_data (bb)->gen_cands; + bitmap_clear (&gen_insns); + FOR_BB_INSNS (bb, insn) + if (INSN_P (insn)) + { + lra_insn_recog_data_t id = lra_get_insn_recog_data (insn); + struct lra_insn_reg *reg; + unsigned int uid; + bitmap_iterator bi; + cand_t cand; + rtx set; + int src_regno = -1, dst_regno = -1; + + if ((set = single_set (insn)) != NULL + && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set))) + { + src_regno = REGNO (SET_SRC (set)); + dst_regno = REGNO (SET_DEST (set)); + } + + /* Update gen_cands: */ + bitmap_clear (&temp_bitmap); + for (reg = id->regs; reg != NULL; reg = reg->next) + if (reg->type != OP_IN + || find_regno_note (insn, REG_DEAD, reg->regno) != NULL) + EXECUTE_IF_SET_IN_BITMAP (&gen_insns, 0, uid, bi) + { + rtx_insn *insn2 = lra_insn_recog_data[uid]->insn; + + cand = insn_to_cand[INSN_UID (insn2)]; + gcc_assert (cand != NULL); + /* Ignore the reload insn. */ + if (src_regno == cand->reload_regno + && dst_regno == cand->regno) + continue; + if (cand->regno == reg->regno + || input_regno_present_p (insn2, reg->regno)) + { + bitmap_clear_bit (gen_cands, cand->index); + bitmap_set_bit (&temp_bitmap, uid); + } + } + + if (CALL_P (insn)) + EXECUTE_IF_SET_IN_BITMAP (&gen_insns, 0, uid, bi) + { + rtx_insn *insn2 = lra_insn_recog_data[uid]->insn; + + cand = insn_to_cand[INSN_UID (insn2)]; + gcc_assert (cand != NULL); + if (call_used_input_regno_present_p (insn2)) + { + bitmap_clear_bit (gen_cands, cand->index); + bitmap_set_bit (&temp_bitmap, uid); + } + } + bitmap_and_compl_into (&gen_insns, &temp_bitmap); + + cand = insn_to_cand[INSN_UID (insn)]; + if (cand != NULL) + { + bitmap_set_bit (gen_cands, cand->index); + bitmap_set_bit (&gen_insns, INSN_UID (insn)); + } + } + } + bitmap_clear (&gen_insns); +} + + + +/* The common transfer function used by the DF equation solver to + propagate (partial) availability info BB_IN to BB_OUT through block + with BB_INDEX according to the following equation: + + bb.out = ((bb.in & bb.livein) - bb.killed) OR bb.gen +*/ +static bool +cand_trans_fun (int bb_index, bitmap bb_in, bitmap bb_out) +{ + remat_bb_data_t bb_info; + bitmap bb_livein, bb_changed_regs, bb_dead_regs; + unsigned int cid; + bitmap_iterator bi; + + bb_info = get_remat_bb_data_by_index (bb_index); + bb_livein = &bb_info->livein_cands; + bb_changed_regs = &bb_info->changed_regs; + bb_dead_regs = &bb_info->dead_regs; + /* Calculate killed avin cands -- cands whose regs are changed or + becoming dead in the BB. We calculate it here as we hope that + repeated calculations are compensated by smaller size of BB_IN in + comparison with all candidates number. */ + bitmap_clear (&temp_bitmap); + EXECUTE_IF_SET_IN_BITMAP (bb_in, 0, cid, bi) + { + cand_t cand = all_cands[cid]; + lra_insn_recog_data_t id = lra_get_insn_recog_data (cand->insn); + struct lra_insn_reg *reg; + + if (! bitmap_bit_p (bb_livein, cid)) + { + bitmap_set_bit (&temp_bitmap, cid); + continue; + } + for (reg = id->regs; reg != NULL; reg = reg->next) + /* Ignore all outputs which are not the regno for + rematerialization. */ + if (reg->type == OP_OUT && reg->regno != cand->regno) + continue; + else if (bitmap_bit_p (bb_changed_regs, reg->regno) + || bitmap_bit_p (bb_dead_regs, reg->regno)) + { + bitmap_set_bit (&temp_bitmap, cid); + break; + } + } + return bitmap_ior_and_compl (bb_out, + &bb_info->gen_cands, bb_in, &temp_bitmap); +} + + + +/* The transfer function used by the DF equation solver to propagate + partial candidate availability info through block with BB_INDEX + according to the following equation: + + bb.pavout = ((bb.pavin & bb.livein) - bb.killed) OR bb.gen +*/ +static bool +cand_pav_trans_fun (int bb_index) +{ + remat_bb_data_t bb_info; + + bb_info = get_remat_bb_data_by_index (bb_index); + return cand_trans_fun (bb_index, &bb_info->pavin_cands, + &bb_info->pavout_cands); +} + +/* The confluence function used by the DF equation solver to set up + cand_pav info for a block BB without predecessor. */ +static void +cand_pav_con_fun_0 (basic_block bb) +{ + bitmap_clear (&get_remat_bb_data (bb)->pavin_cands); +} + +/* The confluence function used by the DF equation solver to propagate + partial candidate availability info from predecessor to successor + on edge E (pred->bb) according to the following equation: + + bb.pavin_cands = 0 for entry block | OR (pavout_cands of predecessors) + */ +static bool +cand_pav_con_fun_n (edge e) +{ + basic_block pred = e->src; + basic_block bb = e->dest; + remat_bb_data_t bb_info; + bitmap bb_pavin, pred_pavout; + + bb_info = get_remat_bb_data (bb); + bb_pavin = &bb_info->pavin_cands; + pred_pavout = &get_remat_bb_data (pred)->pavout_cands; + return bitmap_ior_into (bb_pavin, pred_pavout); +} + + + +/* The transfer function used by the DF equation solver to propagate + candidate availability info through block with BB_INDEX according + to the following equation: + + bb.avout = ((bb.avin & bb.livein) - bb.killed) OR bb.gen +*/ +static bool +cand_av_trans_fun (int bb_index) +{ + remat_bb_data_t bb_info; + + bb_info = get_remat_bb_data_by_index (bb_index); + return cand_trans_fun (bb_index, &bb_info->avin_cands, + &bb_info->avout_cands); +} + +/* The confluence function used by the DF equation solver to set up + cand_av info for a block BB without predecessor. */ +static void +cand_av_con_fun_0 (basic_block bb) +{ + bitmap_clear (&get_remat_bb_data (bb)->avin_cands); +} + +/* The confluence function used by the DF equation solver to propagate + cand_av info from predecessor to successor on edge E (pred->bb) + according to the following equation: + + bb.avin_cands = 0 for entry block | AND (avout_cands of predecessors) + */ +static bool +cand_av_con_fun_n (edge e) +{ + basic_block pred = e->src; + basic_block bb = e->dest; + remat_bb_data_t bb_info; + bitmap bb_avin, pred_avout; + + bb_info = get_remat_bb_data (bb); + bb_avin = &bb_info->avin_cands; + pred_avout = &get_remat_bb_data (pred)->avout_cands; + return bitmap_and_into (bb_avin, pred_avout); +} + +/* Calculate available candidates for each BB. */ +static void +calculate_global_remat_bb_data (void) +{ + basic_block bb; + + df_simple_dataflow + (DF_FORWARD, NULL, cand_pav_con_fun_0, cand_pav_con_fun_n, + cand_pav_trans_fun, &all_blocks, + df_get_postorder (DF_FORWARD), df_get_n_blocks (DF_FORWARD)); + /* Initialize avin by pavin. */ + FOR_EACH_BB_FN (bb, cfun) + bitmap_copy (&get_remat_bb_data (bb)->avin_cands, + &get_remat_bb_data (bb)->pavin_cands); + df_simple_dataflow + (DF_FORWARD, NULL, cand_av_con_fun_0, cand_av_con_fun_n, + cand_av_trans_fun, &all_blocks, + df_get_postorder (DF_FORWARD), df_get_n_blocks (DF_FORWARD)); +} + + + +/* Setup sp offset attribute to SP_OFFSET for all INSNS. */ +static void +change_sp_offset (rtx_insn *insns, HOST_WIDE_INT sp_offset) +{ + for (rtx_insn *insn = insns; insn != NULL; insn = NEXT_INSN (insn)) + eliminate_regs_in_insn (insn, false, false, sp_offset); +} + +/* Return start hard register of REG (can be a hard or a pseudo reg) + or -1 (if it is a spilled pseudo). Return number of hard registers + occupied by REG through parameter NREGS if the start hard reg is + not negative. */ +static int +get_hard_regs (struct lra_insn_reg *reg, int &nregs) +{ + int regno = reg->regno; + int hard_regno = regno < FIRST_PSEUDO_REGISTER ? regno : reg_renumber[regno]; + + if (hard_regno >= 0) + nregs = hard_regno_nregs[hard_regno][reg->biggest_mode]; + return hard_regno; +} + +/* Insert rematerialization insns using the data-flow data calculated + earlier. */ +static bool +do_remat (void) +{ + rtx_insn *insn; + basic_block bb; + bitmap_head avail_cands; + bool changed_p = false; + /* Living hard regs and hard registers of living pseudos. */ + HARD_REG_SET live_hard_regs; + + bitmap_initialize (&avail_cands, ®_obstack); + FOR_EACH_BB_FN (bb, cfun) + { + REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_out (bb)); + bitmap_and (&avail_cands, &get_remat_bb_data (bb)->avin_cands, + &get_remat_bb_data (bb)->livein_cands); + FOR_BB_INSNS (bb, insn) + { + if (!NONDEBUG_INSN_P (insn)) + continue; + + lra_insn_recog_data_t id = lra_get_insn_recog_data (insn); + struct lra_insn_reg *reg; + cand_t cand; + unsigned int cid; + bitmap_iterator bi; + rtx set; + int src_regno = -1, dst_regno = -1; + + if ((set = single_set (insn)) != NULL + && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set))) + { + src_regno = REGNO (SET_SRC (set)); + dst_regno = REGNO (SET_DEST (set)); + } + + cand = NULL; + /* Check possibility of rematerialization (hard reg or + unpsilled pseudo <- spilled pseudo): */ + if (dst_regno >= 0 && src_regno >= FIRST_PSEUDO_REGISTER + && reg_renumber[src_regno] < 0 + && (dst_regno < FIRST_PSEUDO_REGISTER + || reg_renumber[dst_regno] >= 0)) + { + for (cand = regno_cands[src_regno]; + cand != NULL; + cand = cand->next_regno_cand) + if (bitmap_bit_p (&avail_cands, cand->index)) + break; + } + int i, hard_regno, nregs; + rtx_insn *remat_insn = NULL; + HOST_WIDE_INT cand_sp_offset = 0; + if (cand != NULL) + { + lra_insn_recog_data_t cand_id = lra_get_insn_recog_data (cand->insn); + rtx saved_op = *cand_id->operand_loc[cand->nop]; + + /* Check clobbers do not kill something living. */ + gcc_assert (REG_P (saved_op)); + int ignore_regno = REGNO (saved_op); + + for (reg = cand_id->regs; reg != NULL; reg = reg->next) + if (reg->type != OP_IN && reg->regno != ignore_regno) + { + hard_regno = get_hard_regs (reg, nregs); + gcc_assert (hard_regno >= 0); + for (i = 0; i < nregs; i++) + if (TEST_HARD_REG_BIT (live_hard_regs, hard_regno + i)) + break; + if (i < nregs) + break; + } + + if (reg == NULL) + { + *cand_id->operand_loc[cand->nop] = SET_DEST (set); + lra_update_insn_regno_info (cand->insn); + bool ok_p = lra_constrain_insn (cand->insn); + if (ok_p) + { + rtx remat_pat = copy_insn (PATTERN (cand->insn)); + + start_sequence (); + emit_insn (remat_pat); + remat_insn = get_insns (); + end_sequence (); + if (recog_memoized (remat_insn) < 0) + remat_insn = NULL; + cand_sp_offset = cand_id->sp_offset; + } + *cand_id->operand_loc[cand->nop] = saved_op; + lra_update_insn_regno_info (cand->insn); + } + } + + /* Update avail_cands (see analogous code for + calculate_gen_cands). */ + for (reg = id->regs; reg != NULL; reg = reg->next) + if (reg->type != OP_IN + || find_regno_note (insn, REG_DEAD, reg->regno) != NULL) + EXECUTE_IF_SET_IN_BITMAP (&avail_cands, 0, cid, bi) + { + cand = all_cands[cid]; + + /* Ignore the reload insn. */ + if (src_regno == cand->reload_regno + && dst_regno == cand->regno) + continue; + if (cand->regno == reg->regno + || input_regno_present_p (cand->insn, reg->regno)) + bitmap_clear_bit (&avail_cands, cand->index); + } + + if (CALL_P (insn)) + EXECUTE_IF_SET_IN_BITMAP (&avail_cands, 0, cid, bi) + { + cand = all_cands[cid]; + + if (call_used_input_regno_present_p (cand->insn)) + bitmap_clear_bit (&avail_cands, cand->index); + } + + if ((cand = insn_to_cand[INSN_UID (insn)]) != NULL) + bitmap_set_bit (&avail_cands, cand->index); + + if (remat_insn != NULL) + { + HOST_WIDE_INT sp_offset_change = cand_sp_offset - id->sp_offset; + if (sp_offset_change != 0) + change_sp_offset (remat_insn, sp_offset_change); + lra_process_new_insns (insn, remat_insn, NULL, + "Inserting rematerialization insn"); + lra_set_insn_deleted (insn); + changed_p = true; + continue; + } + + /* Update live hard regs: */ + for (reg = id->regs; reg != NULL; reg = reg->next) + if (reg->type == OP_IN + && find_regno_note (insn, REG_DEAD, reg->regno) != NULL) + { + if ((hard_regno = get_hard_regs (reg, nregs)) < 0) + continue; + for (i = 0; i < nregs; i++) + CLEAR_HARD_REG_BIT (live_hard_regs, hard_regno + i); + } + else if (reg->type != OP_IN + && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL) + { + if ((hard_regno = get_hard_regs (reg, nregs)) < 0) + continue; + for (i = 0; i < nregs; i++) + SET_HARD_REG_BIT (live_hard_regs, hard_regno + i); + } + } + } + bitmap_clear (&avail_cands); + return changed_p; +} + + + +/* Entry point of the rematerialization sub-pass. Return true if we + did any rematerialization. */ +bool +lra_remat (void) +{ + basic_block bb; + bool result; + int max_regno = max_reg_num (); + + if (! flag_lra_remat) + return false; + timevar_push (TV_LRA_REMAT); + insn_to_cand = XCNEWVEC (cand_t, get_max_uid ()); + regno_cands = XCNEWVEC (cand_t, max_regno); + all_cands.create (8000); + call_used_regs_arr_len = 0; + for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (call_used_regs[i]) + call_used_regs_arr[call_used_regs_arr_len++] = i; + initiate_cand_table (); + create_cands (); + create_remat_bb_data (); + bitmap_initialize (&temp_bitmap, ®_obstack); + calculate_local_reg_remat_bb_data (); + calculate_livein_cands (); + calculate_gen_cands (); + bitmap_initialize (&all_blocks, ®_obstack); + FOR_ALL_BB_FN (bb, cfun) + bitmap_set_bit (&all_blocks, bb->index); + calculate_global_remat_bb_data (); + dump_candidates_and_remat_bb_data (); + result = do_remat (); + all_cands.release (); + bitmap_clear (&temp_bitmap); + finish_remat_bb_data (); + finish_cand_table (); + bitmap_clear (&all_blocks); + free (regno_cands); + free (insn_to_cand); + timevar_pop (TV_LRA_REMAT); + return result; +} diff --git a/gcc/lra-spills.c b/gcc/lra-spills.c index a4ffc85..f21a3ab 100644 --- a/gcc/lra-spills.c +++ b/gcc/lra-spills.c @@ -445,7 +445,7 @@ remove_pseudos (rtx *loc, rtx_insn *insn) { rtx x = lra_eliminate_regs_1 (insn, pseudo_slots[i].mem, GET_MODE (pseudo_slots[i].mem), - false, false, true); + 0, false, false, true); *loc = x != pseudo_slots[i].mem ? x : copy_rtx (x); } return; diff --git a/gcc/lra.c b/gcc/lra.c index 3ae47e8..a140f2a 100644 --- a/gcc/lra.c +++ b/gcc/lra.c @@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see generated; o Some pseudos might be spilled to assign hard registers to new reload pseudos; + o Recalculating spilled pseudo values (rematerialization); o Changing spilled pseudos to stack memory or their equivalences; o Allocation stack memory changes the address displacement and new iteration is needed. @@ -57,19 +58,26 @@ along with GCC; see the file COPYING3. If not see ----------- | ---------------- | | | | | V New | - ---------------- No ------------ pseudos ------------------- - | Spilled pseudo | change |Constraints:| or insns | Inheritance/split | - | to memory |<-------| RTL |--------->| transformations | - | substitution | | transfor- | | in EBB scope | - ---------------- | mations | ------------------- - | ------------ - V - ------------------------- - | Hard regs substitution, | - | devirtalization, and |------> Finish - | restoring scratches got | - | memory | - ------------------------- + | ------------ pseudos ------------------- + | |Constraints:| or insns | Inheritance/split | + | | RTL |--------->| transformations | + | | transfor- | | in EBB scope | + | substi- | mations | ------------------- + | tutions ------------ + | | No change + ---------------- V + | Spilled pseudo | ------------------- + | to memory |<----| Rematerialization | + | substitution | ------------------- + ---------------- + | No susbtitions + V + ------------------------- + | Hard regs substitution, | + | devirtalization, and |------> Finish + | restoring scratches got | + | memory | + ------------------------- To speed up the process: o We process only insns affected by changes on previous @@ -849,38 +857,38 @@ collect_non_operand_hard_regs (rtx *x, lra_insn_recog_data_t data, { if ((regno = REGNO (op)) >= FIRST_PSEUDO_REGISTER) return list; + /* Process all regs even unallocatable ones as we need info + about all regs for rematerialization pass. */ for (last = regno + hard_regno_nregs[regno][mode]; regno < last; regno++) - if (! TEST_HARD_REG_BIT (lra_no_alloc_regs, regno) - || TEST_HARD_REG_BIT (eliminable_regset, regno)) - { - for (curr = list; curr != NULL; curr = curr->next) - if (curr->regno == regno && curr->subreg_p == subreg_p - && curr->biggest_mode == mode) - { - if (curr->type != type) - curr->type = OP_INOUT; - if (curr->early_clobber != early_clobber) - curr->early_clobber = true; - break; - } - if (curr == NULL) + { + for (curr = list; curr != NULL; curr = curr->next) + if (curr->regno == regno && curr->subreg_p == subreg_p + && curr->biggest_mode == mode) { - /* This is a new hard regno or the info can not be - integrated into the found structure. */ + if (curr->type != type) + curr->type = OP_INOUT; + if (curr->early_clobber != early_clobber) + curr->early_clobber = true; + break; + } + if (curr == NULL) + { + /* This is a new hard regno or the info can not be + integrated into the found structure. */ #ifdef STACK_REGS - early_clobber - = (early_clobber - /* This clobber is to inform popping floating - point stack only. */ - && ! (FIRST_STACK_REG <= regno - && regno <= LAST_STACK_REG)); + early_clobber + = (early_clobber + /* This clobber is to inform popping floating + point stack only. */ + && ! (FIRST_STACK_REG <= regno + && regno <= LAST_STACK_REG)); #endif - list = new_insn_reg (data->insn, regno, type, mode, subreg_p, - early_clobber, list); - } - } + list = new_insn_reg (data->insn, regno, type, mode, subreg_p, + early_clobber, list); + } + } return list; } switch (code) @@ -1456,10 +1464,8 @@ add_regs_to_insn_regno_info (lra_insn_recog_data_t data, rtx x, int uid, if (REG_P (x)) { regno = REGNO (x); - if (regno < FIRST_PSEUDO_REGISTER - && TEST_HARD_REG_BIT (lra_no_alloc_regs, regno) - && ! TEST_HARD_REG_BIT (eliminable_regset, regno)) - return; + /* Process all regs even unallocatable ones as we need info about + all regs for rematerialization pass. */ expand_reg_info (); if (bitmap_set_bit (&lra_reg_info[regno].insn_bitmap, uid)) { @@ -2152,9 +2158,6 @@ bitmap_head lra_optional_reload_pseudos; pass. */ bitmap_head lra_subreg_reload_pseudos; -/* First UID of insns generated before a new spill pass. */ -int lra_constraint_new_insn_uid_start; - /* File used for output of LRA debug information. */ FILE *lra_dump_file; @@ -2252,7 +2255,6 @@ lra (FILE *f) lra_curr_reload_num = 0; push_insns (get_last_insn (), NULL); /* It is needed for the 1st coalescing. */ - lra_constraint_new_insn_uid_start = get_max_uid (); bitmap_initialize (&lra_inheritance_pseudos, ®_obstack); bitmap_initialize (&lra_split_regs, ®_obstack); bitmap_initialize (&lra_optional_reload_pseudos, ®_obstack); @@ -2345,12 +2347,21 @@ lra (FILE *f) lra_create_live_ranges (lra_reg_spill_p); live_p = true; } + /* Now we know what pseudos should be spilled. Try to + rematerialize them first. */ + if (lra_remat ()) + { + /* We need full live info -- see the comment above. */ + lra_create_live_ranges (lra_reg_spill_p); + live_p = true; + if (! lra_need_for_spills_p ()) + break; + } lra_spill (); /* Assignment of stack slots changes elimination offsets for some eliminations. So update the offsets here. */ lra_eliminate (false, false); lra_constraint_new_regno_start = max_reg_num (); - lra_constraint_new_insn_uid_start = get_max_uid (); lra_assignment_iter_after_spill = 0; } restore_scratches (); diff --git a/gcc/opts.c b/gcc/opts.c index 195cadd..4df7389 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -500,6 +500,7 @@ static const struct default_options default_options_table[] = { OPT_LEVELS_2_PLUS, OPT_fipa_icf, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_fisolate_erroneous_paths_dereference, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_fuse_caller_save, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_flra_remat, NULL, 1 }, /* -O3 optimizations. */ { OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 }, diff --git a/gcc/timevar.def b/gcc/timevar.def index f3e7b88..95c84ad 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -237,6 +237,7 @@ DEFTIMEVAR (TV_LRA_INHERITANCE , "LRA reload inheritance") DEFTIMEVAR (TV_LRA_CREATE_LIVE_RANGES, "LRA create live ranges") DEFTIMEVAR (TV_LRA_ASSIGN , "LRA hard reg assignment") DEFTIMEVAR (TV_LRA_COALESCE , "LRA coalesce pseudo regs") +DEFTIMEVAR (TV_LRA_REMAT , "LRA rematerialization") DEFTIMEVAR (TV_RELOAD , "reload") DEFTIMEVAR (TV_RELOAD_CSE_REGS , "reload CSE regs") DEFTIMEVAR (TV_GCSE_AFTER_RELOAD , "load CSE after reload") -- 2.7.4