1 /* Calculate branch probabilities, and basic block execution counts.
2 Copyright (C) 1990-2013 Free Software Foundation, Inc.
3 Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
4 based on some ideas from Dain Samples of UC Berkeley.
5 Further mangling by Bob Manson, Cygnus Support.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Generate basic block profile instrumentation and auxiliary files.
24 Profile generation is optimized, so that not all arcs in the basic
25 block graph need instrumenting. First, the BB graph is closed with
26 one entry (function start), and one exit (function exit). Any
27 ABNORMAL_EDGE cannot be instrumented (because there is no control
28 path to place the code). We close the graph by inserting fake
29 EDGE_FAKE edges to the EXIT_BLOCK, from the sources of abnormal
30 edges that do not go to the exit_block. We ignore such abnormal
31 edges. Naturally these fake edges are never directly traversed,
32 and so *cannot* be directly instrumented. Some other graph
33 massaging is done. To optimize the instrumentation we generate the
34 BB minimal span tree, only edges that are not on the span tree
35 (plus the entry point) need instrumenting. From that information
36 all other edge counts can be deduced. By construction all fake
37 edges must be on the spanning tree. We also attempt to place
38 EDGE_CRITICAL edges on the spanning tree.
40 The auxiliary files generated are <dumpbase>.gcno (at compile time)
41 and <dumpbase>.gcda (at run time). The format is
42 described in full in gcov-io.h. */
44 /* ??? Register allocation should use basic block execution counts to
45 give preference to the most commonly executed blocks. */
47 /* ??? Should calculate branch probabilities before instrumenting code, since
48 then we can use arc counts to help decide which arcs to instrument. */
52 #include "coretypes.h"
59 #include "basic-block.h"
60 #include "diagnostic-core.h"
62 #include "value-prof.h"
64 #include "tree-flow.h"
71 unsigned int count_valid : 1;
73 /* Number of successor and predecessor edges. */
78 #define BB_INFO(b) ((struct bb_info *) (b)->aux)
81 /* Counter summary from the last set of coverage counts read. */
83 const struct gcov_ctr_summary *profile_info;
85 /* Number of data points in the working set summary array. Using 128
86 provides information for at least every 1% increment of the total
87 profile size. The last entry is hardwired to 99.9% of the total. */
88 #define NUM_GCOV_WORKING_SETS 128
90 /* Counter working set information computed from the current counter
91 summary. Not initialized unless profile_info summary is non-NULL. */
92 static gcov_working_set_t gcov_working_sets[NUM_GCOV_WORKING_SETS];
94 /* Collect statistics on the performance of this pass for the entire source
97 static int total_num_blocks;
98 static int total_num_edges;
99 static int total_num_edges_ignored;
100 static int total_num_edges_instrumented;
101 static int total_num_blocks_created;
102 static int total_num_passes;
103 static int total_num_times_called;
104 static int total_hist_br_prob[20];
105 static int total_num_branches;
107 /* Forward declarations. */
108 static void find_spanning_tree (struct edge_list *);
110 /* Add edge instrumentation code to the entire insn chain.
112 F is the first insn of the chain.
113 NUM_BLOCKS is the number of basic blocks found in F. */
116 instrument_edges (struct edge_list *el)
118 unsigned num_instr_edges = 0;
119 int num_edges = NUM_EDGES (el);
122 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
127 FOR_EACH_EDGE (e, ei, bb->succs)
129 struct edge_info *inf = EDGE_INFO (e);
131 if (!inf->ignore && !inf->on_tree)
133 gcc_assert (!(e->flags & EDGE_ABNORMAL));
135 fprintf (dump_file, "Edge %d to %d instrumented%s\n",
136 e->src->index, e->dest->index,
137 EDGE_CRITICAL_P (e) ? " (and split)" : "");
138 gimple_gen_edge_profiler (num_instr_edges++, e);
143 total_num_blocks_created += num_edges;
145 fprintf (dump_file, "%d edges instrumented\n", num_instr_edges);
146 return num_instr_edges;
149 /* Add code to measure histograms for values in list VALUES. */
151 instrument_values (histogram_values values)
155 /* Emit code to generate the histograms before the insns. */
157 for (i = 0; i < values.length (); i++)
159 histogram_value hist = values[i];
160 unsigned t = COUNTER_FOR_HIST_TYPE (hist->type);
162 if (!coverage_counter_alloc (t, hist->n_counters))
167 case HIST_TYPE_INTERVAL:
168 gimple_gen_interval_profiler (hist, t, 0);
172 gimple_gen_pow2_profiler (hist, t, 0);
175 case HIST_TYPE_SINGLE_VALUE:
176 gimple_gen_one_value_profiler (hist, t, 0);
179 case HIST_TYPE_CONST_DELTA:
180 gimple_gen_const_delta_profiler (hist, t, 0);
183 case HIST_TYPE_INDIR_CALL:
184 gimple_gen_ic_profiler (hist, t, 0);
187 case HIST_TYPE_AVERAGE:
188 gimple_gen_average_profiler (hist, t, 0);
192 gimple_gen_ior_profiler (hist, t, 0);
202 /* Compute the working set information from the counter histogram in
203 the profile summary. This is an array of information corresponding to a
204 range of percentages of the total execution count (sum_all), and includes
205 the number of counters required to cover that working set percentage and
206 the minimum counter value in that working set. */
209 compute_working_sets (void)
211 gcov_type working_set_cum_values[NUM_GCOV_WORKING_SETS];
212 gcov_type ws_cum_hotness_incr;
213 gcov_type cum, tmp_cum;
214 const gcov_bucket_type *histo_bucket;
215 unsigned ws_ix, c_num, count, pctinc, pct;
217 gcov_working_set_t *ws_info;
222 /* Compute the amount of sum_all that the cumulative hotness grows
223 by in each successive working set entry, which depends on the
224 number of working set entries. */
225 ws_cum_hotness_incr = profile_info->sum_all / NUM_GCOV_WORKING_SETS;
227 /* Next fill in an array of the cumulative hotness values corresponding
228 to each working set summary entry we are going to compute below.
229 Skip 0% statistics, which can be extrapolated from the
230 rest of the summary data. */
231 cum = ws_cum_hotness_incr;
232 for (ws_ix = 0; ws_ix < NUM_GCOV_WORKING_SETS;
233 ws_ix++, cum += ws_cum_hotness_incr)
234 working_set_cum_values[ws_ix] = cum;
235 /* The last summary entry is reserved for (roughly) 99.9% of the
236 working set. Divide by 1024 so it becomes a shift, which gives
237 almost exactly 99.9%. */
238 working_set_cum_values[NUM_GCOV_WORKING_SETS-1]
239 = profile_info->sum_all - profile_info->sum_all/1024;
241 /* Next, walk through the histogram in decending order of hotness
242 and compute the statistics for the working set summary array.
243 As histogram entries are accumulated, we check to see which
244 working set entries have had their expected cum_value reached
245 and fill them in, walking the working set entries in increasing
246 size of cum_value. */
247 ws_ix = 0; /* The current entry into the working set array. */
248 cum = 0; /* The current accumulated counter sum. */
249 count = 0; /* The current accumulated count of block counters. */
250 for (h_ix = GCOV_HISTOGRAM_SIZE - 1;
251 h_ix >= 0 && ws_ix < NUM_GCOV_WORKING_SETS; h_ix--)
253 histo_bucket = &profile_info->histogram[h_ix];
255 /* If we haven't reached the required cumulative counter value for
256 the current working set percentage, simply accumulate this histogram
257 entry into the running sums and continue to the next histogram
259 if (cum + histo_bucket->cum_value < working_set_cum_values[ws_ix])
261 cum += histo_bucket->cum_value;
262 count += histo_bucket->num_counters;
266 /* If adding the current histogram entry's cumulative counter value
267 causes us to exceed the current working set size, then estimate
268 how many of this histogram entry's counter values are required to
269 reach the working set size, and fill in working set entries
270 as we reach their expected cumulative value. */
271 for (c_num = 0, tmp_cum = cum;
272 c_num < histo_bucket->num_counters && ws_ix < NUM_GCOV_WORKING_SETS;
276 /* If we haven't reached the last histogram entry counter, add
277 in the minimum value again. This will underestimate the
278 cumulative sum so far, because many of the counter values in this
279 entry may have been larger than the minimum. We could add in the
280 average value every time, but that would require an expensive
282 if (c_num + 1 < histo_bucket->num_counters)
283 tmp_cum += histo_bucket->min_value;
284 /* If we have reached the last histogram entry counter, then add
285 in the entire cumulative value. */
287 tmp_cum = cum + histo_bucket->cum_value;
289 /* Next walk through successive working set entries and fill in
290 the statistics for any whose size we have reached by accumulating
291 this histogram counter. */
292 while (ws_ix < NUM_GCOV_WORKING_SETS
293 && tmp_cum >= working_set_cum_values[ws_ix])
295 gcov_working_sets[ws_ix].num_counters = count;
296 gcov_working_sets[ws_ix].min_counter
297 = histo_bucket->min_value;
301 /* Finally, update the running cumulative value since we were
302 using a temporary above. */
303 cum += histo_bucket->cum_value;
305 gcc_assert (ws_ix == NUM_GCOV_WORKING_SETS);
309 fprintf (dump_file, "Counter working sets:\n");
310 /* Multiply the percentage by 100 to avoid float. */
311 pctinc = 100 * 100 / NUM_GCOV_WORKING_SETS;
312 for (ws_ix = 0, pct = pctinc; ws_ix < NUM_GCOV_WORKING_SETS;
313 ws_ix++, pct += pctinc)
315 if (ws_ix == NUM_GCOV_WORKING_SETS - 1)
317 ws_info = &gcov_working_sets[ws_ix];
318 /* Print out the percentage using int arithmatic to avoid float. */
319 fprintf (dump_file, "\t\t%u.%02u%%: num counts=%u, min counter="
320 HOST_WIDEST_INT_PRINT_DEC "\n",
321 pct / 100, pct - (pct / 100 * 100),
322 ws_info->num_counters,
323 (HOST_WIDEST_INT)ws_info->min_counter);
328 /* Given a the desired percentage of the full profile (sum_all from the
329 summary), multiplied by 10 to avoid float in PCT_TIMES_10, returns
330 the corresponding working set information. If an exact match for
331 the percentage isn't found, the closest value is used. */
334 find_working_set (unsigned pct_times_10)
339 gcc_assert (pct_times_10 <= 1000);
340 if (pct_times_10 >= 999)
341 return &gcov_working_sets[NUM_GCOV_WORKING_SETS - 1];
342 i = pct_times_10 * NUM_GCOV_WORKING_SETS / 1000;
344 return &gcov_working_sets[0];
345 return &gcov_working_sets[i - 1];
348 /* Computes hybrid profile for all matching entries in da_file.
350 CFG_CHECKSUM is the precomputed checksum for the CFG. */
353 get_exec_counts (unsigned cfg_checksum, unsigned lineno_checksum)
355 unsigned num_edges = 0;
359 /* Count the edges to be (possibly) instrumented. */
360 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
365 FOR_EACH_EDGE (e, ei, bb->succs)
366 if (!EDGE_INFO (e)->ignore && !EDGE_INFO (e)->on_tree)
370 counts = get_coverage_counts (GCOV_COUNTER_ARCS, num_edges, cfg_checksum,
371 lineno_checksum, &profile_info);
375 compute_working_sets();
377 if (dump_file && profile_info)
378 fprintf(dump_file, "Merged %u profiles with maximal count %u.\n",
379 profile_info->runs, (unsigned) profile_info->sum_max);
386 is_edge_inconsistent (vec<edge, va_gc> *edges)
390 FOR_EACH_EDGE (e, ei, edges)
392 if (!EDGE_INFO (e)->ignore)
395 && (!(e->flags & EDGE_FAKE)
396 || !block_ends_with_call_p (e->src)))
401 "Edge %i->%i is inconsistent, count"HOST_WIDEST_INT_PRINT_DEC,
402 e->src->index, e->dest->index, e->count);
403 dump_bb (dump_file, e->src, 0, TDF_DETAILS);
404 dump_bb (dump_file, e->dest, 0, TDF_DETAILS);
414 correct_negative_edge_counts (void)
420 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
422 FOR_EACH_EDGE (e, ei, bb->succs)
430 /* Check consistency.
431 Return true if inconsistency is found. */
433 is_inconsistent (void)
436 bool inconsistent = false;
439 inconsistent |= is_edge_inconsistent (bb->preds);
440 if (!dump_file && inconsistent)
442 inconsistent |= is_edge_inconsistent (bb->succs);
443 if (!dump_file && inconsistent)
449 fprintf (dump_file, "BB %i count is negative "
450 HOST_WIDEST_INT_PRINT_DEC,
453 dump_bb (dump_file, bb, 0, TDF_DETAILS);
457 if (bb->count != sum_edge_counts (bb->preds))
461 fprintf (dump_file, "BB %i count does not match sum of incoming edges "
462 HOST_WIDEST_INT_PRINT_DEC" should be " HOST_WIDEST_INT_PRINT_DEC,
465 sum_edge_counts (bb->preds));
466 dump_bb (dump_file, bb, 0, TDF_DETAILS);
470 if (bb->count != sum_edge_counts (bb->succs) &&
471 ! (find_edge (bb, EXIT_BLOCK_PTR) != NULL && block_ends_with_call_p (bb)))
475 fprintf (dump_file, "BB %i count does not match sum of outgoing edges "
476 HOST_WIDEST_INT_PRINT_DEC" should be " HOST_WIDEST_INT_PRINT_DEC,
479 sum_edge_counts (bb->succs));
480 dump_bb (dump_file, bb, 0, TDF_DETAILS);
484 if (!dump_file && inconsistent)
491 /* Set each basic block count to the sum of its outgoing edge counts */
496 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
498 bb->count = sum_edge_counts (bb->succs);
499 gcc_assert (bb->count >= 0);
503 /* Reads profile data and returns total number of edge counts read */
505 read_profile_edge_counts (gcov_type *exec_counts)
509 int exec_counts_pos = 0;
510 /* For each edge not on the spanning tree, set its execution count from
512 /* The first count in the .da file is the number of times that the function
513 was entered. This is the exec_count for block zero. */
515 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
520 FOR_EACH_EDGE (e, ei, bb->succs)
521 if (!EDGE_INFO (e)->ignore && !EDGE_INFO (e)->on_tree)
526 e->count = exec_counts[exec_counts_pos++];
527 if (e->count > profile_info->sum_max)
529 if (flag_profile_correction)
531 static bool informed = 0;
533 inform (input_location,
534 "corrupted profile info: edge count exceeds maximal count");
538 error ("corrupted profile info: edge from %i to %i exceeds maximal count",
539 bb->index, e->dest->index);
545 EDGE_INFO (e)->count_valid = 1;
546 BB_INFO (bb)->succ_count--;
547 BB_INFO (e->dest)->pred_count--;
550 fprintf (dump_file, "\nRead edge from %i to %i, count:",
551 bb->index, e->dest->index);
552 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
553 (HOST_WIDEST_INT) e->count);
561 #define OVERLAP_BASE 10000
563 /* Compare the static estimated profile to the actual profile, and
564 return the "degree of overlap" measure between them.
566 Degree of overlap is a number between 0 and OVERLAP_BASE. It is
567 the sum of each basic block's minimum relative weights between
568 two profiles. And overlap of OVERLAP_BASE means two profiles are
572 compute_frequency_overlap (void)
574 gcov_type count_total = 0, freq_total = 0;
578 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
580 count_total += bb->count;
581 freq_total += bb->frequency;
584 if (count_total == 0 || freq_total == 0)
587 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
588 overlap += MIN (bb->count * OVERLAP_BASE / count_total,
589 bb->frequency * OVERLAP_BASE / freq_total);
594 /* Compute the branch probabilities for the various branches.
595 Annotate them accordingly.
597 CFG_CHECKSUM is the precomputed checksum for the CFG. */
600 compute_branch_probabilities (unsigned cfg_checksum, unsigned lineno_checksum)
607 int hist_br_prob[20];
609 gcov_type *exec_counts = get_exec_counts (cfg_checksum, lineno_checksum);
610 int inconsistent = 0;
612 /* Very simple sanity checks so we catch bugs in our profiling code. */
615 if (profile_info->run_max * profile_info->runs < profile_info->sum_max)
617 error ("corrupted profile info: run_max * runs < sum_max");
621 if (profile_info->sum_all < profile_info->sum_max)
623 error ("corrupted profile info: sum_all is smaller than sum_max");
627 /* Attach extra info block to each bb. */
628 alloc_aux_for_blocks (sizeof (struct bb_info));
629 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
634 FOR_EACH_EDGE (e, ei, bb->succs)
635 if (!EDGE_INFO (e)->ignore)
636 BB_INFO (bb)->succ_count++;
637 FOR_EACH_EDGE (e, ei, bb->preds)
638 if (!EDGE_INFO (e)->ignore)
639 BB_INFO (bb)->pred_count++;
642 /* Avoid predicting entry on exit nodes. */
643 BB_INFO (EXIT_BLOCK_PTR)->succ_count = 2;
644 BB_INFO (ENTRY_BLOCK_PTR)->pred_count = 2;
646 num_edges = read_profile_edge_counts (exec_counts);
649 fprintf (dump_file, "\n%d edge counts read\n", num_edges);
651 /* For every block in the file,
652 - if every exit/entrance edge has a known count, then set the block count
653 - if the block count is known, and every exit/entrance edge but one has
654 a known execution count, then set the count of the remaining edge
656 As edge counts are set, decrement the succ/pred count, but don't delete
657 the edge, that way we can easily tell when all edges are known, or only
658 one edge is unknown. */
660 /* The order that the basic blocks are iterated through is important.
661 Since the code that finds spanning trees starts with block 0, low numbered
662 edges are put on the spanning tree in preference to high numbered edges.
663 Hence, most instrumented edges are at the end. Graph solving works much
664 faster if we propagate numbers from the end to the start.
666 This takes an average of slightly more than 3 passes. */
674 FOR_BB_BETWEEN (bb, EXIT_BLOCK_PTR, NULL, prev_bb)
676 struct bb_info *bi = BB_INFO (bb);
677 if (! bi->count_valid)
679 if (bi->succ_count == 0)
685 FOR_EACH_EDGE (e, ei, bb->succs)
691 else if (bi->pred_count == 0)
697 FOR_EACH_EDGE (e, ei, bb->preds)
706 if (bi->succ_count == 1)
712 /* One of the counts will be invalid, but it is zero,
713 so adding it in also doesn't hurt. */
714 FOR_EACH_EDGE (e, ei, bb->succs)
717 /* Search for the invalid edge, and set its count. */
718 FOR_EACH_EDGE (e, ei, bb->succs)
719 if (! EDGE_INFO (e)->count_valid && ! EDGE_INFO (e)->ignore)
722 /* Calculate count for remaining edge by conservation. */
723 total = bb->count - total;
726 EDGE_INFO (e)->count_valid = 1;
730 BB_INFO (e->dest)->pred_count--;
733 if (bi->pred_count == 1)
739 /* One of the counts will be invalid, but it is zero,
740 so adding it in also doesn't hurt. */
741 FOR_EACH_EDGE (e, ei, bb->preds)
744 /* Search for the invalid edge, and set its count. */
745 FOR_EACH_EDGE (e, ei, bb->preds)
746 if (!EDGE_INFO (e)->count_valid && !EDGE_INFO (e)->ignore)
749 /* Calculate count for remaining edge by conservation. */
750 total = bb->count - total + e->count;
753 EDGE_INFO (e)->count_valid = 1;
757 BB_INFO (e->src)->succ_count--;
765 int overlap = compute_frequency_overlap ();
766 gimple_dump_cfg (dump_file, dump_flags);
767 fprintf (dump_file, "Static profile overlap: %d.%d%%\n",
768 overlap / (OVERLAP_BASE / 100),
769 overlap % (OVERLAP_BASE / 100));
772 total_num_passes += passes;
774 fprintf (dump_file, "Graph solving took %d passes.\n\n", passes);
776 /* If the graph has been correctly solved, every block will have a
777 succ and pred count of zero. */
780 gcc_assert (!BB_INFO (bb)->succ_count && !BB_INFO (bb)->pred_count);
783 /* Check for inconsistent basic block counts */
784 inconsistent = is_inconsistent ();
788 if (flag_profile_correction)
790 /* Inconsistency detected. Make it flow-consistent. */
791 static int informed = 0;
795 inform (input_location, "correcting inconsistent profile data");
797 correct_negative_edge_counts ();
798 /* Set bb counts to the sum of the outgoing edge counts */
801 fprintf (dump_file, "\nCalling mcf_smooth_cfg\n");
805 error ("corrupted profile info: profile data is not flow-consistent");
808 /* For every edge, calculate its branch probability and add a reg_note
809 to the branch insn to indicate this. */
811 for (i = 0; i < 20; i++)
815 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
822 error ("corrupted profile info: number of iterations for basic block %d thought to be %i",
823 bb->index, (int)bb->count);
826 FOR_EACH_EDGE (e, ei, bb->succs)
828 /* Function may return twice in the cased the called function is
829 setjmp or calls fork, but we can't represent this by extra
830 edge from the entry, since extra edge from the exit is
831 already present. We get negative frequency from the entry
834 && e->dest == EXIT_BLOCK_PTR)
835 || (e->count > bb->count
836 && e->dest != EXIT_BLOCK_PTR))
838 if (block_ends_with_call_p (bb))
839 e->count = e->count < 0 ? 0 : bb->count;
841 if (e->count < 0 || e->count > bb->count)
843 error ("corrupted profile info: number of executions for edge %d-%d thought to be %i",
844 e->src->index, e->dest->index,
846 e->count = bb->count / 2;
851 FOR_EACH_EDGE (e, ei, bb->succs)
852 e->probability = (e->count * REG_BR_PROB_BASE + bb->count / 2) / bb->count;
853 if (bb->index >= NUM_FIXED_BLOCKS
854 && block_ends_with_condjump_p (bb)
855 && EDGE_COUNT (bb->succs) >= 2)
861 /* Find the branch edge. It is possible that we do have fake
863 FOR_EACH_EDGE (e, ei, bb->succs)
864 if (!(e->flags & (EDGE_FAKE | EDGE_FALLTHRU)))
867 prob = e->probability;
868 index = prob * 20 / REG_BR_PROB_BASE;
872 hist_br_prob[index]++;
877 /* As a last resort, distribute the probabilities evenly.
878 Use simple heuristics that if there are normal edges,
879 give all abnormals frequency of 0, otherwise distribute the
880 frequency over abnormals (this is the case of noreturn
882 else if (profile_status == PROFILE_ABSENT)
886 FOR_EACH_EDGE (e, ei, bb->succs)
887 if (!(e->flags & (EDGE_COMPLEX | EDGE_FAKE)))
891 FOR_EACH_EDGE (e, ei, bb->succs)
892 if (!(e->flags & (EDGE_COMPLEX | EDGE_FAKE)))
893 e->probability = REG_BR_PROB_BASE / total;
899 total += EDGE_COUNT (bb->succs);
900 FOR_EACH_EDGE (e, ei, bb->succs)
901 e->probability = REG_BR_PROB_BASE / total;
903 if (bb->index >= NUM_FIXED_BLOCKS
904 && block_ends_with_condjump_p (bb)
905 && EDGE_COUNT (bb->succs) >= 2)
910 profile_status = PROFILE_READ;
911 compute_function_frequency ();
915 fprintf (dump_file, "%d branches\n", num_branches);
917 for (i = 0; i < 10; i++)
918 fprintf (dump_file, "%d%% branches in range %d-%d%%\n",
919 (hist_br_prob[i] + hist_br_prob[19-i]) * 100 / num_branches,
922 total_num_branches += num_branches;
923 for (i = 0; i < 20; i++)
924 total_hist_br_prob[i] += hist_br_prob[i];
926 fputc ('\n', dump_file);
927 fputc ('\n', dump_file);
930 free_aux_for_blocks ();
933 /* Load value histograms values whose description is stored in VALUES array
936 CFG_CHECKSUM is the precomputed checksum for the CFG. */
939 compute_value_histograms (histogram_values values, unsigned cfg_checksum,
940 unsigned lineno_checksum)
942 unsigned i, j, t, any;
943 unsigned n_histogram_counters[GCOV_N_VALUE_COUNTERS];
944 gcov_type *histogram_counts[GCOV_N_VALUE_COUNTERS];
945 gcov_type *act_count[GCOV_N_VALUE_COUNTERS];
946 gcov_type *aact_count;
948 for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++)
949 n_histogram_counters[t] = 0;
951 for (i = 0; i < values.length (); i++)
953 histogram_value hist = values[i];
954 n_histogram_counters[(int) hist->type] += hist->n_counters;
958 for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++)
960 if (!n_histogram_counters[t])
962 histogram_counts[t] = NULL;
966 histogram_counts[t] =
967 get_coverage_counts (COUNTER_FOR_HIST_TYPE (t),
968 n_histogram_counters[t], cfg_checksum,
969 lineno_checksum, NULL);
970 if (histogram_counts[t])
972 act_count[t] = histogram_counts[t];
977 for (i = 0; i < values.length (); i++)
979 histogram_value hist = values[i];
980 gimple stmt = hist->hvalue.stmt;
982 t = (int) hist->type;
984 aact_count = act_count[t];
985 act_count[t] += hist->n_counters;
987 gimple_add_histogram_value (cfun, stmt, hist);
988 hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
989 for (j = 0; j < hist->n_counters; j++)
990 hist->hvalue.counters[j] = aact_count[j];
993 for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++)
994 free (histogram_counts[t]);
997 /* When passed NULL as file_name, initialize.
998 When passed something else, output the necessary commands to change
999 line to LINE and offset to FILE_NAME. */
1001 output_location (char const *file_name, int line,
1002 gcov_position_t *offset, basic_block bb)
1004 static char const *prev_file_name;
1005 static int prev_line;
1006 bool name_differs, line_differs;
1010 prev_file_name = NULL;
1015 name_differs = !prev_file_name || filename_cmp (file_name, prev_file_name);
1016 line_differs = prev_line != line;
1018 if (name_differs || line_differs)
1022 *offset = gcov_write_tag (GCOV_TAG_LINES);
1023 gcov_write_unsigned (bb->index);
1024 name_differs = line_differs=true;
1027 /* If this is a new source file, then output the
1028 file's name to the .bb file. */
1031 prev_file_name = file_name;
1032 gcov_write_unsigned (0);
1033 gcov_write_string (prev_file_name);
1037 gcov_write_unsigned (line);
1043 /* Instrument and/or analyze program behavior based on program the CFG.
1045 This function creates a representation of the control flow graph (of
1046 the function being compiled) that is suitable for the instrumentation
1047 of edges and/or converting measured edge counts to counts on the
1050 When FLAG_PROFILE_ARCS is nonzero, this function instruments the edges in
1051 the flow graph that are needed to reconstruct the dynamic behavior of the
1052 flow graph. This data is written to the gcno file for gcov.
1054 When FLAG_BRANCH_PROBABILITIES is nonzero, this function reads auxiliary
1055 information from the gcda file containing edge count information from
1056 previous executions of the function being compiled. In this case, the
1057 control flow graph is annotated with actual execution counts by
1058 compute_branch_probabilities().
1060 Main entry point of this file. */
1067 unsigned num_edges, ignored_edges;
1068 unsigned num_instrumented;
1069 struct edge_list *el;
1070 histogram_values values = histogram_values();
1071 unsigned cfg_checksum, lineno_checksum;
1073 total_num_times_called++;
1075 flow_call_edges_add (NULL);
1076 add_noreturn_fake_exit_edges ();
1078 /* We can't handle cyclic regions constructed using abnormal edges.
1079 To avoid these we replace every source of abnormal edge by a fake
1080 edge from entry node and every destination by fake edge to exit.
1081 This keeps graph acyclic and our calculation exact for all normal
1082 edges except for exit and entrance ones.
1084 We also add fake exit edges for each call and asm statement in the
1085 basic, since it may not return. */
1089 int need_exit_edge = 0, need_entry_edge = 0;
1090 int have_exit_edge = 0, have_entry_edge = 0;
1094 /* Functions returning multiple times are not handled by extra edges.
1095 Instead we simply allow negative counts on edges from exit to the
1096 block past call and corresponding probabilities. We can't go
1097 with the extra edges because that would result in flowgraph that
1098 needs to have fake edges outside the spanning tree. */
1100 FOR_EACH_EDGE (e, ei, bb->succs)
1102 gimple_stmt_iterator gsi;
1105 /* It may happen that there are compiler generated statements
1106 without a locus at all. Go through the basic block from the
1107 last to the first statement looking for a locus. */
1108 for (gsi = gsi_last_nondebug_bb (bb);
1110 gsi_prev_nondebug (&gsi))
1112 last = gsi_stmt (gsi);
1113 if (gimple_has_location (last))
1117 /* Edge with goto locus might get wrong coverage info unless
1118 it is the only edge out of BB.
1119 Don't do that when the locuses match, so
1120 if (blah) goto something;
1121 is not computed twice. */
1123 && gimple_has_location (last)
1124 && LOCATION_LOCUS (e->goto_locus) != UNKNOWN_LOCATION
1125 && !single_succ_p (bb)
1126 && (LOCATION_FILE (e->goto_locus)
1127 != LOCATION_FILE (gimple_location (last))
1128 || (LOCATION_LINE (e->goto_locus)
1129 != LOCATION_LINE (gimple_location (last)))))
1131 basic_block new_bb = split_edge (e);
1132 edge ne = single_succ_edge (new_bb);
1133 ne->goto_locus = e->goto_locus;
1135 if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
1136 && e->dest != EXIT_BLOCK_PTR)
1138 if (e->dest == EXIT_BLOCK_PTR)
1141 FOR_EACH_EDGE (e, ei, bb->preds)
1143 if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
1144 && e->src != ENTRY_BLOCK_PTR)
1145 need_entry_edge = 1;
1146 if (e->src == ENTRY_BLOCK_PTR)
1147 have_entry_edge = 1;
1150 if (need_exit_edge && !have_exit_edge)
1153 fprintf (dump_file, "Adding fake exit edge to bb %i\n",
1155 make_edge (bb, EXIT_BLOCK_PTR, EDGE_FAKE);
1157 if (need_entry_edge && !have_entry_edge)
1160 fprintf (dump_file, "Adding fake entry edge to bb %i\n",
1162 make_edge (ENTRY_BLOCK_PTR, bb, EDGE_FAKE);
1163 /* Avoid bbs that have both fake entry edge and also some
1164 exit edge. One of those edges wouldn't be added to the
1165 spanning tree, but we can't instrument any of them. */
1166 if (have_exit_edge || need_exit_edge)
1168 gimple_stmt_iterator gsi;
1172 gsi = gsi_after_labels (bb);
1173 gcc_checking_assert (!gsi_end_p (gsi));
1174 first = gsi_stmt (gsi);
1175 if (is_gimple_debug (first))
1177 gsi_next_nondebug (&gsi);
1178 gcc_checking_assert (!gsi_end_p (gsi));
1179 first = gsi_stmt (gsi);
1181 /* Don't split the bbs containing __builtin_setjmp_receiver
1182 or __builtin_setjmp_dispatcher calls. These are very
1183 special and don't expect anything to be inserted before
1185 if (!is_gimple_call (first)
1186 || (fndecl = gimple_call_fndecl (first)) == NULL
1187 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL
1188 || (DECL_FUNCTION_CODE (fndecl) != BUILT_IN_SETJMP_RECEIVER
1189 && (DECL_FUNCTION_CODE (fndecl)
1190 != BUILT_IN_SETJMP_DISPATCHER)))
1193 fprintf (dump_file, "Splitting bb %i after labels\n",
1195 split_block_after_labels (bb);
1201 el = create_edge_list ();
1202 num_edges = NUM_EDGES (el);
1203 alloc_aux_for_edges (sizeof (struct edge_info));
1205 /* The basic blocks are expected to be numbered sequentially. */
1209 for (i = 0 ; i < num_edges ; i++)
1211 edge e = INDEX_EDGE (el, i);
1214 /* Mark edges we've replaced by fake edges above as ignored. */
1215 if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
1216 && e->src != ENTRY_BLOCK_PTR && e->dest != EXIT_BLOCK_PTR)
1218 EDGE_INFO (e)->ignore = 1;
1223 /* Create spanning tree from basic block graph, mark each edge that is
1224 on the spanning tree. We insert as many abnormal and critical edges
1225 as possible to minimize number of edge splits necessary. */
1227 find_spanning_tree (el);
1229 /* Fake edges that are not on the tree will not be instrumented, so
1230 mark them ignored. */
1231 for (num_instrumented = i = 0; i < num_edges; i++)
1233 edge e = INDEX_EDGE (el, i);
1234 struct edge_info *inf = EDGE_INFO (e);
1236 if (inf->ignore || inf->on_tree)
1238 else if (e->flags & EDGE_FAKE)
1247 total_num_blocks += n_basic_blocks;
1249 fprintf (dump_file, "%d basic blocks\n", n_basic_blocks);
1251 total_num_edges += num_edges;
1253 fprintf (dump_file, "%d edges\n", num_edges);
1255 total_num_edges_ignored += ignored_edges;
1257 fprintf (dump_file, "%d ignored edges\n", ignored_edges);
1259 total_num_edges_instrumented += num_instrumented;
1261 fprintf (dump_file, "%d instrumentation edges\n", num_instrumented);
1263 /* Compute two different checksums. Note that we want to compute
1264 the checksum in only once place, since it depends on the shape
1265 of the control flow which can change during
1266 various transformations. */
1267 cfg_checksum = coverage_compute_cfg_checksum ();
1268 lineno_checksum = coverage_compute_lineno_checksum ();
1270 /* Write the data from which gcov can reconstruct the basic block
1271 graph and function line numbers (the gcno file). */
1272 if (coverage_begin_function (lineno_checksum, cfg_checksum))
1274 gcov_position_t offset;
1276 /* Basic block flags */
1277 offset = gcov_write_tag (GCOV_TAG_BLOCKS);
1278 for (i = 0; i != (unsigned) (n_basic_blocks); i++)
1279 gcov_write_unsigned (0);
1280 gcov_write_length (offset);
1283 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
1288 offset = gcov_write_tag (GCOV_TAG_ARCS);
1289 gcov_write_unsigned (bb->index);
1291 FOR_EACH_EDGE (e, ei, bb->succs)
1293 struct edge_info *i = EDGE_INFO (e);
1296 unsigned flag_bits = 0;
1299 flag_bits |= GCOV_ARC_ON_TREE;
1300 if (e->flags & EDGE_FAKE)
1301 flag_bits |= GCOV_ARC_FAKE;
1302 if (e->flags & EDGE_FALLTHRU)
1303 flag_bits |= GCOV_ARC_FALLTHROUGH;
1304 /* On trees we don't have fallthru flags, but we can
1305 recompute them from CFG shape. */
1306 if (e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)
1307 && e->src->next_bb == e->dest)
1308 flag_bits |= GCOV_ARC_FALLTHROUGH;
1310 gcov_write_unsigned (e->dest->index);
1311 gcov_write_unsigned (flag_bits);
1315 gcov_write_length (offset);
1319 /* Initialize the output. */
1320 output_location (NULL, 0, NULL, NULL);
1324 gimple_stmt_iterator gsi;
1325 gcov_position_t offset = 0;
1327 if (bb == ENTRY_BLOCK_PTR->next_bb)
1329 expanded_location curr_location =
1330 expand_location (DECL_SOURCE_LOCATION (current_function_decl));
1331 output_location (curr_location.file, curr_location.line,
1335 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1337 gimple stmt = gsi_stmt (gsi);
1338 if (gimple_has_location (stmt))
1339 output_location (gimple_filename (stmt), gimple_lineno (stmt),
1343 /* Notice GOTO expressions eliminated while constructing the CFG. */
1344 if (single_succ_p (bb)
1345 && LOCATION_LOCUS (single_succ_edge (bb)->goto_locus)
1346 != UNKNOWN_LOCATION)
1348 expanded_location curr_location
1349 = expand_location (single_succ_edge (bb)->goto_locus);
1350 output_location (curr_location.file, curr_location.line,
1356 /* A file of NULL indicates the end of run. */
1357 gcov_write_unsigned (0);
1358 gcov_write_string (NULL);
1359 gcov_write_length (offset);
1364 if (flag_profile_values)
1365 gimple_find_values_to_profile (&values);
1367 if (flag_branch_probabilities)
1369 compute_branch_probabilities (cfg_checksum, lineno_checksum);
1370 if (flag_profile_values)
1371 compute_value_histograms (values, cfg_checksum, lineno_checksum);
1374 remove_fake_edges ();
1376 /* For each edge not on the spanning tree, add counting code. */
1377 if (profile_arc_flag
1378 && coverage_counter_alloc (GCOV_COUNTER_ARCS, num_instrumented))
1380 unsigned n_instrumented;
1382 gimple_init_edge_profiler ();
1384 n_instrumented = instrument_edges (el);
1386 gcc_assert (n_instrumented == num_instrumented);
1388 if (flag_profile_values)
1389 instrument_values (values);
1391 /* Commit changes done by instrumentation. */
1392 gsi_commit_edge_inserts ();
1395 free_aux_for_edges ();
1398 free_edge_list (el);
1399 coverage_end_function (lineno_checksum, cfg_checksum);
1402 /* Union find algorithm implementation for the basic blocks using
1406 find_group (basic_block bb)
1408 basic_block group = bb, bb1;
1410 while ((basic_block) group->aux != group)
1411 group = (basic_block) group->aux;
1413 /* Compress path. */
1414 while ((basic_block) bb->aux != group)
1416 bb1 = (basic_block) bb->aux;
1417 bb->aux = (void *) group;
1424 union_groups (basic_block bb1, basic_block bb2)
1426 basic_block bb1g = find_group (bb1);
1427 basic_block bb2g = find_group (bb2);
1429 /* ??? I don't have a place for the rank field. OK. Lets go w/o it,
1430 this code is unlikely going to be performance problem anyway. */
1431 gcc_assert (bb1g != bb2g);
1436 /* This function searches all of the edges in the program flow graph, and puts
1437 as many bad edges as possible onto the spanning tree. Bad edges include
1438 abnormals edges, which can't be instrumented at the moment. Since it is
1439 possible for fake edges to form a cycle, we will have to develop some
1440 better way in the future. Also put critical edges to the tree, since they
1441 are more expensive to instrument. */
1444 find_spanning_tree (struct edge_list *el)
1447 int num_edges = NUM_EDGES (el);
1450 /* We use aux field for standard union-find algorithm. */
1451 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1454 /* Add fake edge exit to entry we can't instrument. */
1455 union_groups (EXIT_BLOCK_PTR, ENTRY_BLOCK_PTR);
1457 /* First add all abnormal edges to the tree unless they form a cycle. Also
1458 add all edges to EXIT_BLOCK_PTR to avoid inserting profiling code behind
1459 setting return value from function. */
1460 for (i = 0; i < num_edges; i++)
1462 edge e = INDEX_EDGE (el, i);
1463 if (((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL | EDGE_FAKE))
1464 || e->dest == EXIT_BLOCK_PTR)
1465 && !EDGE_INFO (e)->ignore
1466 && (find_group (e->src) != find_group (e->dest)))
1469 fprintf (dump_file, "Abnormal edge %d to %d put to tree\n",
1470 e->src->index, e->dest->index);
1471 EDGE_INFO (e)->on_tree = 1;
1472 union_groups (e->src, e->dest);
1476 /* Now insert all critical edges to the tree unless they form a cycle. */
1477 for (i = 0; i < num_edges; i++)
1479 edge e = INDEX_EDGE (el, i);
1480 if (EDGE_CRITICAL_P (e) && !EDGE_INFO (e)->ignore
1481 && find_group (e->src) != find_group (e->dest))
1484 fprintf (dump_file, "Critical edge %d to %d put to tree\n",
1485 e->src->index, e->dest->index);
1486 EDGE_INFO (e)->on_tree = 1;
1487 union_groups (e->src, e->dest);
1491 /* And now the rest. */
1492 for (i = 0; i < num_edges; i++)
1494 edge e = INDEX_EDGE (el, i);
1495 if (!EDGE_INFO (e)->ignore
1496 && find_group (e->src) != find_group (e->dest))
1499 fprintf (dump_file, "Normal edge %d to %d put to tree\n",
1500 e->src->index, e->dest->index);
1501 EDGE_INFO (e)->on_tree = 1;
1502 union_groups (e->src, e->dest);
1506 clear_aux_for_blocks ();
1509 /* Perform file-level initialization for branch-prob processing. */
1512 init_branch_prob (void)
1516 total_num_blocks = 0;
1517 total_num_edges = 0;
1518 total_num_edges_ignored = 0;
1519 total_num_edges_instrumented = 0;
1520 total_num_blocks_created = 0;
1521 total_num_passes = 0;
1522 total_num_times_called = 0;
1523 total_num_branches = 0;
1524 for (i = 0; i < 20; i++)
1525 total_hist_br_prob[i] = 0;
1528 /* Performs file-level cleanup after branch-prob processing
1532 end_branch_prob (void)
1536 fprintf (dump_file, "\n");
1537 fprintf (dump_file, "Total number of blocks: %d\n",
1539 fprintf (dump_file, "Total number of edges: %d\n", total_num_edges);
1540 fprintf (dump_file, "Total number of ignored edges: %d\n",
1541 total_num_edges_ignored);
1542 fprintf (dump_file, "Total number of instrumented edges: %d\n",
1543 total_num_edges_instrumented);
1544 fprintf (dump_file, "Total number of blocks created: %d\n",
1545 total_num_blocks_created);
1546 fprintf (dump_file, "Total number of graph solution passes: %d\n",
1548 if (total_num_times_called != 0)
1549 fprintf (dump_file, "Average number of graph solution passes: %d\n",
1550 (total_num_passes + (total_num_times_called >> 1))
1551 / total_num_times_called);
1552 fprintf (dump_file, "Total number of branches: %d\n",
1553 total_num_branches);
1554 if (total_num_branches)
1558 for (i = 0; i < 10; i++)
1559 fprintf (dump_file, "%d%% branches in range %d-%d%%\n",
1560 (total_hist_br_prob[i] + total_hist_br_prob[19-i]) * 100
1561 / total_num_branches, 5*i, 5*i+5);