1 /* hist.c - Histogram related operations.
3 Copyright 1999, 2000, 2001, 2002, 2004, 2005, 2007, 2009
4 Free Software Foundation, Inc.
6 This file is part of GNU Binutils.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
24 #include "libiberty.h"
25 #include "search_list.h"
38 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
40 static void scale_and_align_entries (void);
41 static void print_header (int);
42 static void print_line (Sym *, double);
43 static int cmp_time (const PTR, const PTR);
45 /* Declarations of automatically generated functions to output blurbs. */
46 extern void flat_blurb (FILE * fp);
48 static histogram *find_histogram (bfd_vma lowpc, bfd_vma highpc);
49 static histogram *find_histogram_for_pc (bfd_vma pc);
51 histogram * histograms;
52 unsigned num_histograms;
54 static char hist_dimension[16] = "seconds";
55 static char hist_dimension_abbrev = 's';
57 static double accum_time; /* Accumulated time so far for print_line(). */
58 static double total_time; /* Total time for all routines. */
60 /* Table of SI prefixes for powers of 10 (used to automatically
61 scale some of the values in the flat profile). */
69 { 'T', 1e-12 }, /* tera */
70 { 'G', 1e-09 }, /* giga */
71 { 'M', 1e-06 }, /* mega */
72 { 'K', 1e-03 }, /* kilo */
74 { 'm', 1e+03 }, /* milli */
75 { 'u', 1e+06 }, /* micro */
76 { 'n', 1e+09 }, /* nano */
77 { 'p', 1e+12 }, /* pico */
78 { 'f', 1e+15 }, /* femto */
79 { 'a', 1e+18 } /* ato */
82 /* Reads just the header part of histogram record into
83 *RECORD from IFP. FILENAME is the name of IFP and
84 is provided for formatting error messages only.
86 If FIRST is non-zero, sets global variables HZ, HIST_DIMENSION,
87 HIST_DIMENSION_ABBREV, HIST_SCALE. If FIRST is zero, checks
88 that the new histogram is compatible with already-set values
89 of those variables and emits an error if that's not so. */
91 read_histogram_header (histogram *record,
92 FILE *ifp, const char *filename,
95 unsigned int profrate;
96 char n_hist_dimension[15];
97 char n_hist_dimension_abbrev;
100 if (gmon_io_read_vma (ifp, &record->lowpc)
101 || gmon_io_read_vma (ifp, &record->highpc)
102 || gmon_io_read_32 (ifp, &record->num_bins)
103 || gmon_io_read_32 (ifp, &profrate)
104 || gmon_io_read (ifp, n_hist_dimension, 15)
105 || gmon_io_read (ifp, &n_hist_dimension_abbrev, 1))
107 fprintf (stderr, _("%s: %s: unexpected end of file\n"),
113 n_hist_scale = (double)((record->highpc - record->lowpc) / sizeof (UNIT))
118 /* We don't try to veryfy profrate is the same for all histogram
119 records. If we have two histogram records for the same
120 address range and profiling samples is done as often
121 as possible as opposed on timer, then the actual profrate will
122 be slightly different. Most of the time the difference does not
123 matter and insisting that profiling rate is exactly the same
124 will only create inconvenient. */
126 memcpy (hist_dimension, n_hist_dimension, 15);
127 hist_dimension_abbrev = n_hist_dimension_abbrev;
128 hist_scale = n_hist_scale;
132 if (strncmp (n_hist_dimension, hist_dimension, 15) != 0)
135 _("%s: dimension unit changed between histogram records\n"
138 whoami, whoami, hist_dimension, whoami, n_hist_dimension);
142 if (n_hist_dimension_abbrev != hist_dimension_abbrev)
145 _("%s: dimension abbreviation changed between histogram records\n"
148 whoami, whoami, hist_dimension_abbrev, whoami, n_hist_dimension_abbrev);
152 /* The only reason we require the same scale for histograms is that
153 there's code (notably printing code), that prints units,
154 and it would be very confusing to have one unit mean different
155 things for different functions. */
156 if (fabs (hist_scale - n_hist_scale) > 0.000001)
159 _("%s: different scales in histogram records"),
166 /* Read the histogram from file IFP. FILENAME is the name of IFP and
167 is provided for formatting error messages only. */
170 hist_read_rec (FILE * ifp, const char *filename)
172 bfd_vma lowpc, highpc;
174 histogram *record, *existing_record;
177 /* 1. Read the header and see if there's existing record for the
178 same address range and that there are no overlapping records. */
179 read_histogram_header (&n_record, ifp, filename, num_histograms == 0);
181 existing_record = find_histogram (n_record.lowpc, n_record.highpc);
184 record = existing_record;
188 /* If this record overlaps, but does not completely match an existing
189 record, it's an error. */
190 lowpc = n_record.lowpc;
191 highpc = n_record.highpc;
192 hist_clip_symbol_address (&lowpc, &highpc);
196 _("%s: overlapping histogram records\n"),
201 /* This is new record. Add it to global array and allocate space for
203 histograms = (struct histogram *)
204 xrealloc (histograms, sizeof (histogram) * (num_histograms + 1));
205 memcpy (histograms + num_histograms,
206 &n_record, sizeof (histogram));
207 record = &histograms[num_histograms];
210 record->sample = (int *) xmalloc (record->num_bins
211 * sizeof (record->sample[0]));
212 memset (record->sample, 0, record->num_bins * sizeof (record->sample[0]));
215 /* 2. We have either a new record (with zeroed histogram data), or an existing
216 record with some data in the histogram already. Read new data into the
217 record, adding hit counts. */
220 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %u\n",
221 (unsigned long) record->lowpc, (unsigned long) record->highpc,
224 for (i = 0; i < record->num_bins; ++i)
227 if (fread (&count[0], sizeof (count), 1, ifp) != 1)
230 _("%s: %s: unexpected EOF after reading %u of %u samples\n"),
231 whoami, filename, i, record->num_bins);
234 record->sample[i] += bfd_get_16 (core_bfd, (bfd_byte *) & count[0]);
236 printf ("[hist_read_rec] 0x%lx: %u\n",
237 (unsigned long) (record->lowpc
238 + i * (record->highpc - record->lowpc)
245 /* Write all execution histograms file OFP. FILENAME is the name
246 of OFP and is provided for formatting error-messages only. */
249 hist_write_hist (FILE * ofp, const char *filename)
254 for (r = 0; r < num_histograms; ++r)
256 histogram *record = &histograms[r];
260 if (gmon_io_write_8 (ofp, GMON_TAG_TIME_HIST)
261 || gmon_io_write_vma (ofp, record->lowpc)
262 || gmon_io_write_vma (ofp, record->highpc)
263 || gmon_io_write_32 (ofp, record->num_bins)
264 || gmon_io_write_32 (ofp, hz)
265 || gmon_io_write (ofp, hist_dimension, 15)
266 || gmon_io_write (ofp, &hist_dimension_abbrev, 1))
272 for (i = 0; i < record->num_bins; ++i)
274 bfd_put_16 (core_bfd, (bfd_vma) record->sample[i], (bfd_byte *) &count[0]);
276 if (fwrite (&count[0], sizeof (count), 1, ofp) != 1)
285 /* Calculate scaled entry point addresses (to save time in
286 hist_assign_samples), and, on architectures that have procedure
287 entry masks at the start of a function, possibly push the scaled
288 entry points over the procedure entry mask, if it turns out that
289 the entry point is in one bin and the code for a routine is in the
293 scale_and_align_entries ()
296 bfd_vma bin_of_entry;
299 for (sym = symtab.base; sym < symtab.limit; sym++)
301 histogram *r = find_histogram_for_pc (sym->addr);
303 sym->hist.scaled_addr = sym->addr / sizeof (UNIT);
307 bin_of_entry = (sym->hist.scaled_addr - r->lowpc) / hist_scale;
308 bin_of_code = ((sym->hist.scaled_addr + UNITS_TO_CODE - r->lowpc)
310 if (bin_of_entry < bin_of_code)
313 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
314 (unsigned long) sym->hist.scaled_addr,
315 (unsigned long) (sym->hist.scaled_addr
317 sym->hist.scaled_addr += UNITS_TO_CODE;
324 /* Assign samples to the symbol to which they belong.
326 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
327 which may overlap one more symbol address ranges. If a symbol
328 overlaps with the bin's address range by O percent, then O percent
329 of the bin's count is credited to that symbol.
331 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
332 with respect to the symbol's address range [SYM_LOW_PC,
333 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes
334 the distance (in UNITs) between the arrows, the fraction of the
335 sample that is to be credited to the symbol which starts at
338 sym_low_pc sym_high_pc
342 +-----------------------------------------------+
344 | ->| |<- ->| |<- ->| |<- |
346 +---------+ +---------+ +---------+
350 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc
352 For the VAX we assert that samples will never fall in the first two
353 bytes of any routine, since that is the entry mask, thus we call
354 scale_and_align_entries() to adjust the entry points if the entry
355 mask falls in one bin but the code for the routine doesn't start
356 until the next bin. In conjunction with the alignment of routine
357 addresses, this should allow us to have only one sample for every
358 four bytes of text space and never have any overlap (the two end
362 hist_assign_samples_1 (histogram *r)
364 bfd_vma bin_low_pc, bin_high_pc;
365 bfd_vma sym_low_pc, sym_high_pc;
366 bfd_vma overlap, addr;
367 unsigned int bin_count;
368 unsigned int i, j, k;
369 double count_time, credit;
371 bfd_vma lowpc = r->lowpc / sizeof (UNIT);
373 /* Iterate over all sample bins. */
374 for (i = 0, k = 1; i < r->num_bins; ++i)
376 bin_count = r->sample[i];
380 bin_low_pc = lowpc + (bfd_vma) (hist_scale * i);
381 bin_high_pc = lowpc + (bfd_vma) (hist_scale * (i + 1));
382 count_time = bin_count;
386 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%u\n",
387 (unsigned long) (sizeof (UNIT) * bin_low_pc),
388 (unsigned long) (sizeof (UNIT) * bin_high_pc),
390 total_time += count_time;
392 /* Credit all symbols that are covered by bin I. */
393 /* PR gprof/13325: Make sure that J does not go below I. */
394 for (j = k - 1; j < symtab.len; k = ++j)
396 sym_low_pc = symtab.base[j].hist.scaled_addr;
397 sym_high_pc = symtab.base[j + 1].hist.scaled_addr;
399 /* If high end of bin is below entry address,
401 if (bin_high_pc < sym_low_pc)
404 /* If low end of bin is above high end of symbol,
405 go for next symbol. */
406 if (bin_low_pc >= sym_high_pc)
410 MIN (bin_high_pc, sym_high_pc) - MAX (bin_low_pc, sym_low_pc);
415 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
416 (unsigned long) symtab.base[j].addr,
417 (unsigned long) (sizeof (UNIT) * sym_high_pc),
418 symtab.base[j].name, overlap * count_time / hist_scale,
421 addr = symtab.base[j].addr;
422 credit = overlap * count_time / hist_scale;
424 /* Credit symbol if it appears in INCL_FLAT or that
425 table is empty and it does not appear it in
427 if (sym_lookup (&syms[INCL_FLAT], addr)
428 || (syms[INCL_FLAT].len == 0
429 && !sym_lookup (&syms[EXCL_FLAT], addr)))
431 symtab.base[j].hist.time += credit;
435 total_time -= credit;
441 DBG (SAMPLEDEBUG, printf ("[assign_samples] total_time %f\n",
445 /* Calls 'hist_assign_sampes_1' for all histogram records read so far. */
447 hist_assign_samples ()
451 scale_and_align_entries ();
453 for (i = 0; i < num_histograms; ++i)
454 hist_assign_samples_1 (&histograms[i]);
458 /* Print header for flag histogram profile. */
461 print_header (int prefix)
465 sprintf (unit, _("%c%c/call"), prefix, hist_dimension_abbrev);
467 if (bsd_style_output)
469 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
470 (long) hist_scale * (long) sizeof (UNIT));
471 if (total_time > 0.0)
473 printf (_(" for %.2f%% of %.2f %s\n\n"),
474 100.0 / total_time, total_time / hz, hist_dimension);
479 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz, hist_dimension);
482 if (total_time <= 0.0)
484 printf (_(" no time accumulated\n\n"));
486 /* This doesn't hurt since all the numerators will be zero. */
490 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
491 "% ", _("cumulative"), _("self "), "", _("self "), _("total "),
493 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
494 _("time"), hist_dimension, hist_dimension, _("calls"), unit, unit,
500 print_line (Sym *sym, double scale)
502 if (ignore_zeros && sym->ncalls == 0 && sym->hist.time == 0)
505 accum_time += sym->hist.time;
507 if (bsd_style_output)
508 printf ("%5.1f %10.2f %8.2f",
509 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
510 accum_time / hz, sym->hist.time / hz);
512 printf ("%6.2f %9.2f %8.2f",
513 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
514 accum_time / hz, sym->hist.time / hz);
516 if (sym->ncalls != 0)
517 printf (" %8lu %8.2f %8.2f ",
518 sym->ncalls, scale * sym->hist.time / hz / sym->ncalls,
519 scale * (sym->hist.time + sym->cg.child_time) / hz / sym->ncalls);
521 printf (" %8.8s %8.8s %8.8s ", "", "", "");
523 if (bsd_style_output)
526 print_name_only (sym);
532 /* Compare LP and RP. The primary comparison key is execution time,
533 the secondary is number of invocation, and the tertiary is the
534 lexicographic order of the function names. */
537 cmp_time (const PTR lp, const PTR rp)
539 const Sym *left = *(const Sym **) lp;
540 const Sym *right = *(const Sym **) rp;
543 time_diff = right->hist.time - left->hist.time;
551 if (right->ncalls > left->ncalls)
554 if (right->ncalls < left->ncalls)
557 return strcmp (left->name, right->name);
561 /* Print the flat histogram profile. */
566 Sym **time_sorted_syms, *top_dog, *sym;
567 unsigned int sym_index;
573 first_output = FALSE;
579 if (bsd_style_output)
581 if (print_descriptions)
583 printf (_("\n\n\nflat profile:\n"));
589 printf (_("Flat profile:\n"));
592 /* Sort the symbol table by time (call-count and name as secondary
593 and tertiary keys). */
594 time_sorted_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
596 for (sym_index = 0; sym_index < symtab.len; ++sym_index)
597 time_sorted_syms[sym_index] = &symtab.base[sym_index];
599 qsort (time_sorted_syms, symtab.len, sizeof (Sym *), cmp_time);
601 if (bsd_style_output)
603 log_scale = 5; /* Milli-seconds is BSD-default. */
607 /* Search for symbol with highest per-call
608 execution time and scale accordingly. */
613 for (sym_index = 0; sym_index < symtab.len; ++sym_index)
615 sym = time_sorted_syms[sym_index];
617 if (sym->ncalls != 0)
621 call_time = (sym->hist.time + sym->cg.child_time) / sym->ncalls;
623 if (call_time > top_time)
626 top_time = call_time;
631 if (top_dog && top_dog->ncalls != 0 && top_time > 0.0)
635 for (log_scale = 0; log_scale < ARRAY_SIZE (SItab); log_scale ++)
637 double scaled_value = SItab[log_scale].scale * top_time;
639 if (scaled_value >= 1.0 && scaled_value < 1000.0)
645 /* For now, the dimension is always seconds. In the future, we
646 may also want to support other (pseudo-)dimensions (such as
647 I-cache misses etc.). */
648 print_header (SItab[log_scale].prefix);
650 for (sym_index = 0; sym_index < symtab.len; ++sym_index)
652 addr = time_sorted_syms[sym_index]->addr;
654 /* Print symbol if its in INCL_FLAT table or that table
655 is empty and the symbol is not in EXCL_FLAT. */
656 if (sym_lookup (&syms[INCL_FLAT], addr)
657 || (syms[INCL_FLAT].len == 0
658 && !sym_lookup (&syms[EXCL_FLAT], addr)))
659 print_line (time_sorted_syms[sym_index], SItab[log_scale].scale);
662 free (time_sorted_syms);
664 if (print_descriptions && !bsd_style_output)
669 hist_check_address (unsigned address)
673 for (i = 0; i < num_histograms; ++i)
674 if (histograms[i].lowpc <= address && address < histograms[i].highpc)
681 #define min(a,b) (((a)<(b)) ? (a) : (b))
684 #define max(a,b) (((a)>(b)) ? (a) : (b))
688 hist_clip_symbol_address (bfd_vma *p_lowpc, bfd_vma *p_highpc)
693 if (num_histograms == 0)
695 *p_highpc = *p_lowpc;
699 for (i = 0; i < num_histograms; ++i)
701 bfd_vma common_low, common_high;
702 common_low = max (histograms[i].lowpc, *p_lowpc);
703 common_high = min (histograms[i].highpc, *p_highpc);
705 if (common_low < common_high)
710 _("%s: found a symbol that covers "
711 "several histogram records"),
717 *p_lowpc = common_low;
718 *p_highpc = common_high;
723 *p_highpc = *p_lowpc;
726 /* Find and return exising histogram record having the same lowpc and
727 highpc as passed via the parameters. Return NULL if nothing is found.
728 The return value is valid until any new histogram is read. */
730 find_histogram (bfd_vma lowpc, bfd_vma highpc)
733 for (i = 0; i < num_histograms; ++i)
735 if (histograms[i].lowpc == lowpc && histograms[i].highpc == highpc)
736 return &histograms[i];
741 /* Given a PC, return histogram record which address range include this PC.
742 Return NULL if there's no such record. */
744 find_histogram_for_pc (bfd_vma pc)
747 for (i = 0; i < num_histograms; ++i)
749 if (histograms[i].lowpc <= pc && pc < histograms[i].highpc)
750 return &histograms[i];