From 312866af43c7d34fa7b50f97f20058108888e844 Mon Sep 17 00:00:00 2001 From: hubicka Date: Sun, 19 Aug 2001 23:46:10 +0000 Subject: [PATCH] * final.c (compute_alignments): New function. (init_insn_lengths): Do not care label_align. (LABEL_ALIGN_AFTER_BARRIER): Default to 1. (LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP): Default to 0. (JUMP_ALIGN, JUMP_ALIGN_MAX_SKIP): New. (shorted_branches): Realloc label_align array; do not call init_insn_lengths; Do not care about loop alignments. * output.h (compute_alignments): Declare. * toplev.c (rest_of_compilation): Call compute_alignments. * tm.texi (JUMP_ALIGN, JUMP_ALIGN_MAX_SKIP): Document. * predict.c (block_info_def): Add npredecesors, remove nvisited; change visited to tovisit. (propagate_freq): Use faster traversing algorithm. (estimate_loops_at_level, estimate_bb_frequencies): Change visited to tovisit; reverse meaning. * predict.c (struct block_info_def): Remove nvisited. (propagate_freq): Use EDGE_DFS_BACK to detect irreducible regions. (estimate_bb_frequencies): Call mark_dfs_back_edges. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@45042 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 23 ++++++++ gcc/doc/tm.texi | 20 +++++-- gcc/final.c | 175 +++++++++++++++++++++++++++++++++++++++----------------- gcc/output.h | 3 + gcc/predict.c | 88 ++++++++++++++++------------ gcc/toplev.c | 1 + 6 files changed, 214 insertions(+), 96 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 54b09bcc..d37e8f4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,26 @@ +Mon Aug 20 01:44:50 CEST 2001 Jan Hubicka + + * final.c (compute_alignments): New function. + (init_insn_lengths): Do not care label_align. + (LABEL_ALIGN_AFTER_BARRIER): Default to 1. + (LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP): Default to 0. + (JUMP_ALIGN, JUMP_ALIGN_MAX_SKIP): New. + (shorted_branches): Realloc label_align array; do + not call init_insn_lengths; Do not care about loop alignments. + * output.h (compute_alignments): Declare. + * toplev.c (rest_of_compilation): Call compute_alignments. + * tm.texi (JUMP_ALIGN, JUMP_ALIGN_MAX_SKIP): Document. + + * predict.c (block_info_def): Add npredecesors, remove nvisited; + change visited to tovisit. + (propagate_freq): Use faster traversing algorithm. + (estimate_loops_at_level, estimate_bb_frequencies): Change visited + to tovisit; reverse meaning. + + * predict.c (struct block_info_def): Remove nvisited. + (propagate_freq): Use EDGE_DFS_BACK to detect irreducible regions. + (estimate_bb_frequencies): Call mark_dfs_back_edges. + 2001-08-19 Geoffrey Keating * doc/invoke.texi (MIPS Options): Document -mfused-madd. diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 8f567db..80e8e75 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -7226,10 +7226,10 @@ the target supports DWARF 2 frame unwind information. This describes commands for alignment. @table @code -@findex LABEL_ALIGN_AFTER_BARRIER -@item LABEL_ALIGN_AFTER_BARRIER (@var{label}) -The alignment (log base 2) to put in front of @var{label}, which follows -a @code{BARRIER}. +@findex JUMP_ALIGN +@item JUMP_ALIGN (@var{label}) +The alignment (log base 2) to put in front of @var{label}, which is +a common destination of jumps and has no fallthru incomming edge. This macro need not be defined if you don't want any special alignment to be done at such a time. Most machine descriptions do not currently @@ -7238,8 +7238,16 @@ define the macro. Unless it's necessary to inspect the @var{label} parameter, it is better to set the variable @var{align_jumps} in the target's @code{OVERRIDE_OPTIONS}. Otherwise, you should try to honour the user's -selection in @var{align_jumps} in a @code{LABEL_ALIGN_AFTER_BARRIER} -implementation. +selection in @var{align_jumps} in a @code{JUMP_ALIGN} implementation. + +@findex LABEL_ALIGN_AFTER_BARRIER +@item LABEL_ALIGN_AFTER_BARRIER (@var{label}) +The alignment (log base 2) to put in front of @var{label}, which follows +a @code{BARRIER}. + +This macro need not be defined if you don't want any special alignment +to be done at such a time. Most machine descriptions do not currently +define the macro. @findex LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP @item LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP diff --git a/gcc/final.c b/gcc/final.c index ec57842..ed4b6ff 100644 --- a/gcc/final.c +++ b/gcc/final.c @@ -643,11 +643,6 @@ static struct label_alignment *label_align; void init_insn_lengths () { - if (label_align) - { - free (label_align); - label_align = 0; - } if (uid_shuid) { free (uid_shuid); @@ -791,11 +786,19 @@ get_attr_length (insn) #endif #ifndef LABEL_ALIGN_AFTER_BARRIER -#define LABEL_ALIGN_AFTER_BARRIER(LABEL) align_jumps_log +#define LABEL_ALIGN_AFTER_BARRIER(LABEL) 1 #endif #ifndef LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP -#define LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP (align_jumps-1) +#define LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP 0 +#endif + +#ifndef JUMP_ALIGN +#define JUMP_ALIGN(LABEL) align_jumps_log +#endif + +#ifndef JUMP_ALIGN_MAX_SKIP +#define JUMP_ALIGN_MAX_SKIP (align_jumps-1) #endif #ifndef ADDR_VEC_ALIGN @@ -946,6 +949,88 @@ insn_current_reference_address (branch) } #endif /* HAVE_ATTR_length */ +void +compute_alignments () +{ + int i; + int log, max_skip, max_log; + + if (label_align) + { + free (label_align); + label_align = 0; + } + + max_labelno = max_label_num (); + min_labelno = get_first_label_num (); + label_align = (struct label_alignment *) + xcalloc (max_labelno - min_labelno + 1, sizeof (struct label_alignment)); + + /* If not optimizing or optimizing for size, don't assign any alignments. */ + if (optimize || optimize_size) + return; + + for (i = 0; i < n_basic_blocks; i++) + { + basic_block bb = BASIC_BLOCK (i); + rtx label = bb->head; + int fallthru_frequency = 0, branch_frequency = 0, has_fallthru = 0; + edge e; + + if (GET_CODE (label) != CODE_LABEL) + continue; + max_log = LABEL_ALIGN (label); + max_skip = LABEL_ALIGN_MAX_SKIP; + + for (e = bb->pred; e; e = e->pred_next) + { + if (e->flags & EDGE_FALLTHRU) + has_fallthru = 1, fallthru_frequency += EDGE_FREQUENCY (e); + else + branch_frequency += EDGE_FREQUENCY (e); + } + + /* There are two purposes to align block with no fallthru incomming edge: + 1) to avoid fetch stalls when branch destination is near cache boundary + 2) to improve cache effciency in case the previous block is not executed + (so it does not need to be in the cache). + + We to catch first case, we align frequently executed blocks. + To catch the second, we align blocks that are executed more frequently + than the predecesor and the predecesor is likely to not be executed + when function is called. */ + + if (!has_fallthru + && (branch_frequency > BB_FREQ_MAX / 10 + || (bb->frequency > BASIC_BLOCK (i - 1)->frequency * 10 + && (BASIC_BLOCK (i - 1)->frequency + <= ENTRY_BLOCK_PTR->frequency / 2)))) + { + log = JUMP_ALIGN (label); + if (max_log < log) + { + max_log = log; + max_skip = JUMP_ALIGN_MAX_SKIP; + } + } + /* In case block is frequent and reached mostly by non-fallthru edge, + align it. It is most likely an first block of loop. */ + if (has_fallthru + && branch_frequency + fallthru_frequency > BB_FREQ_MAX / 10 + && branch_frequency > fallthru_frequency * 5) + { + log = LOOP_ALIGN (label); + if (max_log < log) + { + max_log = log; + max_skip = LOOP_ALIGN_MAX_SKIP; + } + } + LABEL_TO_ALIGNMENT (label) = max_log; + LABEL_TO_MAX_SKIP (label) = max_skip; + } +} + /* Make a pass over all insns and compute their actual lengths by shortening any branches of variable length if possible. */ @@ -983,21 +1068,34 @@ shorten_branches (first) #endif - /* We must do some computations even when not actually shortening, in - order to get the alignment information for the labels. */ - - init_insn_lengths (); - /* Compute maximum UID and allocate label_align / uid_shuid. */ max_uid = get_max_uid (); - max_labelno = max_label_num (); - min_labelno = get_first_label_num (); - label_align = (struct label_alignment *) - xcalloc ((max_labelno - min_labelno + 1), sizeof (struct label_alignment)); - uid_shuid = (int *) xmalloc (max_uid * sizeof *uid_shuid); + if (max_labelno != max_label_num ()) + { + int old = max_labelno; + int n_labels; + int n_old_labels; + + max_labelno = max_label_num (); + + n_labels = max_labelno - min_labelno + 1; + n_old_labels = old - min_labelno + 1; + + label_align = (struct label_alignment *) xrealloc + (label_align, n_labels * sizeof (struct label_alignment)); + + /* Range of labels grows monotonically in the function. Abort here + means that the initialization of array got lost. */ + if (n_old_labels > n_labels) + abort (); + + memset (label_align + n_old_labels, 0, + (n_labels - n_old_labels) * sizeof (struct label_alignment)); + } + /* Initialize label_align and set up uid_shuid to be strictly monotonically rising with insn order. */ /* We use max_log here to keep track of the maximum alignment we want to @@ -1023,6 +1121,14 @@ shorten_branches (first) else if (GET_CODE (insn) == CODE_LABEL) { rtx next; + + /* Merge in alignments computed by compute_alignments. */ + log = LABEL_TO_ALIGNMENT (insn); + if (max_log < log) + { + max_log = log; + max_skip = LABEL_TO_MAX_SKIP (insn); + } log = LABEL_ALIGN (insn); if (max_log < log) @@ -1074,41 +1180,6 @@ shorten_branches (first) break; } } - /* Again, we allow NOTE_INSN_LOOP_BEG - INSN - CODE_LABEL - sequences in order to handle reorg output efficiently. */ - else if (GET_CODE (insn) == NOTE - && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG) - { - rtx label; - int nest = 0; - - /* Search for the label that starts the loop. - Don't skip past the end of the loop, since that could - lead to putting an alignment where it does not belong. - However, a label after a nested (non-)loop would be OK. */ - for (label = insn; label; label = NEXT_INSN (label)) - { - if (GET_CODE (label) == NOTE - && NOTE_LINE_NUMBER (label) == NOTE_INSN_LOOP_BEG) - nest++; - else if (GET_CODE (label) == NOTE - && NOTE_LINE_NUMBER (label) == NOTE_INSN_LOOP_END - && --nest == 0) - break; - else if (GET_CODE (label) == CODE_LABEL) - { - log = LOOP_ALIGN (label); - if (max_log < log) - { - max_log = log; - max_skip = LOOP_ALIGN_MAX_SKIP; - } - break; - } - } - } - else - continue; } #ifdef HAVE_ATTR_length diff --git a/gcc/output.h b/gcc/output.h index 4c4c5ae..f1426ec 100644 --- a/gcc/output.h +++ b/gcc/output.h @@ -20,6 +20,9 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +/* Compute branch alignments based on frequency information in the CFG. */ +extern void compute_alignments PARAMS ((void)); + /* Initialize data in final at the beginning of a compilation. */ extern void init_final PARAMS ((const char *)); diff --git a/gcc/predict.c b/gcc/predict.c index 63819e3..14dfc7d 100644 --- a/gcc/predict.c +++ b/gcc/predict.c @@ -606,8 +606,11 @@ typedef struct block_info_def /* To keep queue of basic blocks to process. */ basic_block next; - /* True if block already converted. */ - int visited:1; + /* True if block needs to be visited in prop_freqency. */ + int tovisit:1; + + /* Number of predecesors we need to visit first. */ + int npredecesors; } *block_info; /* Similar information for edges. */ @@ -634,6 +637,27 @@ propagate_freq (head) basic_block last = bb; edge e; basic_block nextbb; + int n; + + /* For each basic block we need to visit count number of his predecesors + we need to visit first. */ + for (n = 0; n < n_basic_blocks; n++) + { + basic_block bb = BASIC_BLOCK (n); + if (BLOCK_INFO (bb)->tovisit) + { + int count = 0; + for (e = bb->pred; e; e = e->pred_next) + if (BLOCK_INFO (e->src)->tovisit && !(e->flags & EDGE_DFS_BACK)) + count++; + else if (BLOCK_INFO (e->src)->tovisit + && rtl_dump_file && !EDGE_INFO (e)->back_edge) + fprintf (rtl_dump_file, + "Irreducible region hit, ignoring edge to %i->%i\n", + e->src->index, bb->index); + BLOCK_INFO (bb)->npredecesors = count; + } + } BLOCK_INFO (head)->frequency = 1; for (; bb; bb = nextbb) @@ -646,31 +670,16 @@ propagate_freq (head) /* Compute frequency of basic block. */ if (bb != head) { +#ifdef ENABLE_CHECKING for (e = bb->pred; e; e = e->pred_next) - if (!BLOCK_INFO (e->src)->visited && !(e->flags & EDGE_DFS_BACK)) - break; - - /* We haven't proceeded all predecessors of edge e yet. */ - if (e) - { - if (!nextbb) - nextbb = e->dest; - else - BLOCK_INFO (last)->next = e->dest; - last = e->dest; - continue; - } - if (rtl_dump_file) - for (e = bb->pred; e; e = e->pred_next) - if (!BLOCK_INFO (e->src)->visited && !EDGE_INFO (e)->back_edge) - fprintf (rtl_dump_file, - "Irreducible region hit, ignoring edge to %i->%i\n", - e->src->index, bb->index); + if (BLOCK_INFO (e->src)->tovisit && !(e->flags & EDGE_DFS_BACK)) + abort (); +#endif for (e = bb->pred; e; e = e->pred_next) if (EDGE_INFO (e)->back_edge) cyclic_probability += EDGE_INFO (e)->back_edge_prob; - else if (BLOCK_INFO (e->src)->visited) + else if (!(e->flags & EDGE_DFS_BACK)) frequency += (e->probability * BLOCK_INFO (e->src)->frequency / REG_BR_PROB_BASE); @@ -681,7 +690,7 @@ propagate_freq (head) BLOCK_INFO (bb)->frequency = frequency / (1 - cyclic_probability); } - BLOCK_INFO (bb)->visited = 1; + BLOCK_INFO (bb)->tovisit = 0; /* Compute back edge frequencies. */ for (e = bb->succ; e; e = e->succ_next) @@ -692,16 +701,19 @@ propagate_freq (head) /* Propagate to successor blocks. */ for (e = bb->succ; e; e = e->succ_next) - if (!EDGE_INFO (e)->back_edge - && !BLOCK_INFO (e->dest)->visited - && !BLOCK_INFO (e->dest)->next && e->dest != last) + if (!(e->flags & EDGE_DFS_BACK) + && BLOCK_INFO (e->dest)->npredecesors) { - if (!nextbb) - nextbb = e->dest; - else - BLOCK_INFO (last)->next = e->dest; - last = e->dest; - } + BLOCK_INFO (e->dest)->npredecesors--; + if (!BLOCK_INFO (e->dest)->npredecesors) + { + if (!nextbb) + nextbb = e->dest; + else + BLOCK_INFO (last)->next = e->dest; + last = e->dest; + } + } } } @@ -739,8 +751,8 @@ estimate_loops_at_level (first_loop) for (l = loop->shared ? first_loop : loop; l != loop->next; l = l->next) if (loop->header == l->header) EXECUTE_IF_SET_IN_SBITMAP (l->nodes, 0, n, - BLOCK_INFO (BASIC_BLOCK (n))->visited = - 0); + BLOCK_INFO (BASIC_BLOCK (n))->tovisit = 1 + ); propagate_freq (loop->header); } } @@ -848,7 +860,7 @@ estimate_bb_frequencies (loops) else bb = BASIC_BLOCK (i); bb->aux = bi + i + 2; - BLOCK_INFO (bb)->visited = 1; + BLOCK_INFO (bb)->tovisit = 0; for (e = bb->succ; e; e = e->succ_next) { e->aux = ei + edgenum, edgenum++; @@ -862,9 +874,9 @@ estimate_bb_frequencies (loops) /* Now fake loop around whole function to finalize probabilities. */ for (i = 0; i < n_basic_blocks; i++) - BLOCK_INFO (BASIC_BLOCK (i))->visited = 0; - BLOCK_INFO (ENTRY_BLOCK_PTR)->visited = 0; - BLOCK_INFO (EXIT_BLOCK_PTR)->visited = 0; + BLOCK_INFO (BASIC_BLOCK (i))->tovisit = 1; + BLOCK_INFO (ENTRY_BLOCK_PTR)->tovisit = 1; + BLOCK_INFO (EXIT_BLOCK_PTR)->tovisit = 1; propagate_freq (ENTRY_BLOCK_PTR); for (i = 0; i < n_basic_blocks; i++) diff --git a/gcc/toplev.c b/gcc/toplev.c index 8f2e8f5..4898926 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -3608,6 +3608,7 @@ rest_of_compilation (decl) close_dump_file (DFI_bbro, print_rtl_with_bb, insns); timevar_pop (TV_REORDER_BLOCKS); } + compute_alignments (); /* If a machine dependent reorganization is needed, call it. */ #ifdef MACHINE_DEPENDENT_REORG -- 2.7.4