1 /* cg_print.c - Print routines for displaying call graphs.
3 Copyright 2000, 2001, 2002, 2004, 2007, 2009, 2011
4 Free Software Foundation, Inc.
6 This file is part of GNU Binutils.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
24 #include "libiberty.h"
25 #include "filenames.h"
26 #include "search_list.h"
35 /* Return value of comparison functions used to sort tables. */
40 static void print_header (void);
41 static void print_cycle (Sym *);
42 static int cmp_member (Sym *, Sym *);
43 static void sort_members (Sym *);
44 static void print_members (Sym *);
45 static int cmp_arc (Arc *, Arc *);
46 static void sort_parents (Sym *);
47 static void print_parents (Sym *);
48 static void sort_children (Sym *);
49 static void print_children (Sym *);
50 static void print_line (Sym *);
51 static int cmp_name (const PTR, const PTR);
52 static int cmp_arc_count (const PTR, const PTR);
53 static int cmp_fun_nuses (const PTR, const PTR);
54 static void order_and_dump_functions_by_arcs
55 (Arc **, unsigned long, int, Arc **, unsigned long *);
57 /* Declarations of automatically generated functions to output blurbs. */
58 extern void bsd_callg_blurb (FILE * fp);
59 extern void fsf_callg_blurb (FILE * fp);
61 double print_time = 0.0;
72 if (!bsd_style_output)
74 if (print_descriptions)
75 printf (_("\t\t Call graph (explanation follows)\n\n"));
77 printf (_("\t\t\tCall graph\n\n"));
80 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
81 (long) hist_scale * (long) sizeof (UNIT));
84 printf (_(" for %.2f%% of %.2f seconds\n\n"),
85 100.0 / print_time, print_time / hz);
88 printf (_(" no time propagated\n\n"));
90 /* This doesn't hurt, since all the numerators will be 0.0. */
96 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
97 "", "", "", "", _("called"), _("total"), _("parents"));
98 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
100 /* xgettext:no-c-format */
102 _("self"), _("descendants"), _("called"), _("self"),
103 _("name"), _("index"));
104 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
105 "", "", "", "", _("called"), _("total"), _("children"));
110 printf (_("index %% time self children called name\n"));
114 /* Print a cycle header. */
117 print_cycle (Sym *cyc)
121 sprintf (buf, "[%d]", cyc->cg.index);
122 printf (bsd_style_output
123 ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
124 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
125 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
126 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
128 if (cyc->cg.self_calls != 0)
129 printf ("+%-7lu", cyc->cg.self_calls);
131 printf (" %7.7s", "");
133 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
136 /* Compare LEFT and RIGHT membmer. Major comparison key is
137 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
140 cmp_member (Sym *left, Sym *right)
142 double left_time = left->cg.prop.self + left->cg.prop.child;
143 double right_time = right->cg.prop.self + right->cg.prop.child;
144 unsigned long left_calls = left->ncalls + left->cg.self_calls;
145 unsigned long right_calls = right->ncalls + right->cg.self_calls;
147 if (left_time > right_time)
150 if (left_time < right_time)
153 if (left_calls > right_calls)
156 if (left_calls < right_calls)
162 /* Sort members of a cycle. */
165 sort_members (Sym *cyc)
167 Sym *todo, *doing, *prev;
169 /* Detach cycle members from cyclehead,
170 and insertion sort them back on. */
171 todo = cyc->cg.cyc.next;
172 cyc->cg.cyc.next = 0;
174 for (doing = todo; doing != NULL; doing = todo)
176 todo = doing->cg.cyc.next;
178 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
180 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
184 doing->cg.cyc.next = prev->cg.cyc.next;
185 prev->cg.cyc.next = doing;
189 /* Print the members of a cycle. */
192 print_members (Sym *cyc)
198 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
200 printf (bsd_style_output
201 ? "%6.6s %5.5s %7.2f %11.2f %7lu"
202 : "%6.6s %5.5s %7.2f %7.2f %7lu",
203 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
206 if (member->cg.self_calls != 0)
207 printf ("+%-7lu", member->cg.self_calls);
209 printf (" %7.7s", "");
217 /* Compare two arcs to/from the same child/parent.
218 - if one arc is a self arc, it's least.
219 - if one arc is within a cycle, it's less than.
220 - if both arcs are within a cycle, compare arc counts.
221 - if neither arc is within a cycle, compare with
222 time + child_time as major key
223 arc count as minor key. */
226 cmp_arc (Arc *left, Arc *right)
228 Sym *left_parent = left->parent;
229 Sym *left_child = left->child;
230 Sym *right_parent = right->parent;
231 Sym *right_child = right->child;
232 double left_time, right_time;
235 printf ("[cmp_arc] ");
236 print_name (left_parent);
238 print_name (left_child);
239 printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
240 left->count, left_child->ncalls);
241 printf ("[cmp_arc] ");
242 print_name (right_parent);
244 print_name (right_child);
245 printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
246 right->count, right_child->ncalls);
250 if (left_parent == left_child)
251 return LESSTHAN; /* Left is a self call. */
253 if (right_parent == right_child)
254 return GREATERTHAN; /* Right is a self call. */
256 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
257 && left_parent->cg.cyc.num == left_child->cg.cyc.num)
259 /* Left is a call within a cycle. */
260 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
261 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
263 /* Right is a call within the cycle, too. */
264 if (left->count < right->count)
267 if (left->count > right->count)
274 /* Right isn't a call within the cycle. */
280 /* Left isn't a call within a cycle. */
281 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
282 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
284 /* Right is a call within a cycle. */
289 /* Neither is a call within a cycle. */
290 left_time = left->time + left->child_time;
291 right_time = right->time + right->child_time;
293 if (left_time < right_time)
296 if (left_time > right_time)
299 if (left->count < right->count)
302 if (left->count > right->count)
312 sort_parents (Sym * child)
314 Arc *arc, *detached, sorted, *prev;
316 /* Unlink parents from child, then insertion sort back on to
318 *arc the arc you have detached and are inserting.
319 *detached the rest of the arcs to be sorted.
320 sorted arc list onto which you insertion sort.
321 *prev arc before the arc you are comparing. */
322 sorted.next_parent = 0;
324 for (arc = child->cg.parents; arc; arc = detached)
326 detached = arc->next_parent;
328 /* Consider *arc as disconnected; insert it into sorted. */
329 for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
331 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
335 arc->next_parent = prev->next_parent;
336 prev->next_parent = arc;
339 /* Reattach sorted arcs to child. */
340 child->cg.parents = sorted.next_parent;
345 print_parents (Sym *child)
351 if (child->cg.cyc.head != 0)
352 cycle_head = child->cg.cyc.head;
356 if (!child->cg.parents)
358 printf (bsd_style_output
359 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
360 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
361 "", "", "", "", "", "");
365 sort_parents (child);
367 for (arc = child->cg.parents; arc; arc = arc->next_parent)
369 parent = arc->parent;
370 if (child == parent || (child->cg.cyc.num != 0
371 && parent->cg.cyc.num == child->cg.cyc.num))
373 /* Selfcall or call among siblings. */
374 printf (bsd_style_output
375 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
376 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
384 /* Regular parent of child. */
385 printf (bsd_style_output
386 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
387 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
389 arc->time / hz, arc->child_time / hz,
390 arc->count, cycle_head->ncalls);
399 sort_children (Sym *parent)
401 Arc *arc, *detached, sorted, *prev;
403 /* Unlink children from parent, then insertion sort back on to
405 *arc the arc you have detached and are inserting.
406 *detached the rest of the arcs to be sorted.
407 sorted arc list onto which you insertion sort.
408 *prev arc before the arc you are comparing. */
409 sorted.next_child = 0;
411 for (arc = parent->cg.children; arc; arc = detached)
413 detached = arc->next_child;
415 /* Consider *arc as disconnected; insert it into sorted. */
416 for (prev = &sorted; prev->next_child; prev = prev->next_child)
418 if (cmp_arc (arc, prev->next_child) != LESSTHAN)
422 arc->next_child = prev->next_child;
423 prev->next_child = arc;
426 /* Reattach sorted children to parent. */
427 parent->cg.children = sorted.next_child;
432 print_children (Sym *parent)
437 sort_children (parent);
438 arc = parent->cg.children;
440 for (arc = parent->cg.children; arc; arc = arc->next_child)
443 if (child == parent || (child->cg.cyc.num != 0
444 && child->cg.cyc.num == parent->cg.cyc.num))
446 /* Self call or call to sibling. */
447 printf (bsd_style_output
448 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
449 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
450 "", "", "", "", arc->count, "");
456 /* Regular child of parent. */
457 printf (bsd_style_output
458 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
459 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
461 arc->time / hz, arc->child_time / hz,
462 arc->count, child->cg.cyc.head->ncalls);
475 sprintf (buf, "[%d]", np->cg.index);
476 printf (bsd_style_output
477 ? "%-6.6s %5.1f %7.2f %11.2f"
478 : "%-6.6s %5.1f %7.2f %7.2f", buf,
479 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
480 np->cg.prop.self / hz, np->cg.prop.child / hz);
482 if ((np->ncalls + np->cg.self_calls) != 0)
484 printf (" %7lu", np->ncalls);
486 if (np->cg.self_calls != 0)
487 printf ("+%-7lu ", np->cg.self_calls);
489 printf (" %7.7s ", "");
493 printf (" %7.7s %7.7s ", "", "");
501 /* Print dynamic call graph. */
504 cg_print (Sym ** timesortsym)
506 unsigned int sym_index;
509 if (print_descriptions && bsd_style_output)
510 bsd_callg_blurb (stdout);
514 for (sym_index = 0; sym_index < symtab.len + num_cycles; ++sym_index)
516 parent = timesortsym[sym_index];
518 if ((ignore_zeros && parent->ncalls == 0
519 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
520 && parent->cg.prop.child == 0)
521 || !parent->cg.print_flag
522 || (line_granularity && ! parent->is_func))
525 if (!parent->name && parent->cg.cyc.num != 0)
528 print_cycle (parent);
529 print_members (parent);
533 print_parents (parent);
535 print_children (parent);
538 if (bsd_style_output)
541 printf ("-----------------------------------------------\n");
543 if (bsd_style_output)
549 if (print_descriptions && !bsd_style_output)
550 fsf_callg_blurb (stdout);
555 cmp_name (const PTR left, const PTR right)
557 const Sym **npp1 = (const Sym **) left;
558 const Sym **npp2 = (const Sym **) right;
560 return strcmp ((*npp1)->name, (*npp2)->name);
567 unsigned int sym_index;
568 unsigned int nnames, todo, i, j;
569 int col, starting_col;
570 Sym **name_sorted_syms, *sym;
571 const char *filename;
573 int column_width = (output_width - 1) / 3; /* Don't write in last col! */
575 /* Now, sort regular function name
576 alphabetically to create an index. */
577 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
579 for (sym_index = 0, nnames = 0; sym_index < symtab.len; sym_index++)
581 if (ignore_zeros && symtab.base[sym_index].ncalls == 0
582 && symtab.base[sym_index].hist.time == 0)
585 name_sorted_syms[nnames++] = &symtab.base[sym_index];
588 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
590 for (sym_index = 1, todo = nnames; sym_index <= num_cycles; sym_index++)
591 name_sorted_syms[todo++] = &cycle_header[sym_index];
594 printf (_("Index by function name\n\n"));
595 sym_index = (todo + 2) / 3;
597 for (i = 0; i < sym_index; i++)
602 for (j = i; j < todo; j += sym_index)
604 sym = name_sorted_syms[j];
606 if (sym->cg.print_flag)
607 sprintf (buf, "[%d]", sym->cg.index);
609 sprintf (buf, "(%d)", sym->cg.index);
613 if (bsd_style_output)
615 printf ("%6.6s %-19.19s", buf, sym->name);
621 for (; col < starting_col + 5; ++col)
624 printf (" %s ", buf);
625 col += print_name_only (sym);
627 if (!line_granularity && sym->is_static && sym->file)
629 filename = sym->file->name;
633 filename = strrchr (filename, '/');
638 filename = sym->file->name;
641 printf (" (%s)", filename);
642 col += strlen (filename) + 3;
648 if (bsd_style_output)
650 printf ("%6.6s ", buf);
651 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
652 printf ("%-19.19s", buf);
657 for (; col < starting_col + 5; ++col)
659 printf (" %s ", buf);
660 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
666 starting_col += column_width;
672 free (name_sorted_syms);
675 /* Compare two arcs based on their usage counts.
676 We want to sort in descending order. */
679 cmp_arc_count (const PTR left, const PTR right)
681 const Arc **npp1 = (const Arc **) left;
682 const Arc **npp2 = (const Arc **) right;
684 if ((*npp1)->count > (*npp2)->count)
686 else if ((*npp1)->count < (*npp2)->count)
692 /* Compare two funtions based on their usage counts.
693 We want to sort in descending order. */
696 cmp_fun_nuses (const PTR left, const PTR right)
698 const Sym **npp1 = (const Sym **) left;
699 const Sym **npp2 = (const Sym **) right;
701 if ((*npp1)->nuses > (*npp2)->nuses)
703 else if ((*npp1)->nuses < (*npp2)->nuses)
709 /* Print a suggested function ordering based on the profiling data.
711 We perform 4 major steps when ordering functions:
713 * Group unused functions together and place them at the
714 end of the function order.
716 * Search the highest use arcs (those which account for 90% of
717 the total arc count) for functions which have several parents.
719 Group those with the most call sites together (currently the
720 top 1.25% which have at least five different call sites).
722 These are emitted at the start of the function order.
724 * Use a greedy placement algorithm to place functions which
725 occur in the top 99% of the arcs in the profile. Some provisions
726 are made to handle high usage arcs where the parent and/or
727 child has already been placed.
729 * Run the same greedy placement algorithm on the remaining
730 arcs to place the leftover functions.
733 The various "magic numbers" should (one day) be tuneable by command
734 line options. They were arrived at by benchmarking a few applications
735 with various values to see which values produced better overall function
738 Of course, profiling errors, machine limitations (PA long calls), and
739 poor cutoff values for the placement algorithm may limit the usefullness
740 of the resulting function order. Improvements would be greatly appreciated.
744 * Place the functions with many callers near the middle of the
745 list to reduce long calls.
747 * Propagate arc usage changes as functions are placed. Ie if
748 func1 and func2 are placed together, arcs to/from those arcs
749 to the same parent/child should be combined, then resort the
750 arcs to choose the next one.
752 * Implement some global positioning algorithm to place the
753 chains made by the greedy local positioning algorithm. Probably
754 by examining arcs which haven't been placed yet to tie two
757 * Take a function's size and time into account in the algorithm;
758 size in particular is important on the PA (long calls). Placing
759 many small functions onto their own page may be wise.
761 * Use better profiling information; many published algorithms
762 are based on call sequences through time, rather than just
765 * Prodecure cloning could improve performance when a small number
766 of arcs account for most of the calls to a particular function.
768 * Use relocation information to avoid moving unused functions
769 completely out of the code stream; this would avoid severe lossage
770 when the profile data bears little resemblance to actual runs.
772 * Propagation of arc usages should also improve .o link line
773 ordering which shares the same arc placement algorithm with
774 the function ordering code (in fact it is a degenerate case
775 of function ordering). */
778 cg_print_function_ordering (void)
780 unsigned long sym_index;
781 unsigned long arc_index;
782 unsigned long used, unused, scratch_index;
783 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
785 unsigned long long total_arcs, tmp_arcs_count;
787 unsigned long total_arcs, tmp_arcs_count;
789 Sym **unused_syms, **used_syms, **scratch_syms;
790 Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
796 unplaced_arc_count = 0;
798 scratch_arc_count = 0;
800 /* First group all the unused functions together. */
801 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
802 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
803 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
804 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
805 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
806 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
808 /* Walk through all the functions; mark those which are never
809 called as placed (we'll emit them as a group later). */
810 for (sym_index = 0, used = 0, unused = 0; sym_index < symtab.len; sym_index++)
812 if (symtab.base[sym_index].ncalls == 0)
814 unused_syms[unused++] = &symtab.base[sym_index];
815 symtab.base[sym_index].has_been_placed = 1;
819 used_syms[used++] = &symtab.base[sym_index];
820 symtab.base[sym_index].has_been_placed = 0;
821 symtab.base[sym_index].next = 0;
822 symtab.base[sym_index].prev = 0;
823 symtab.base[sym_index].nuses = 0;
827 /* Sort the arcs from most used to least used. */
828 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
830 /* Compute the total arc count. Also mark arcs as unplaced.
832 Note we don't compensate for overflow if that happens!
833 Overflow is much less likely when this file is compiled
834 with GCC as it can double-wide integers via long long. */
836 for (arc_index = 0; arc_index < numarcs; arc_index++)
838 total_arcs += arcs[arc_index]->count;
839 arcs[arc_index]->has_been_placed = 0;
842 /* We want to pull out those functions which are referenced
843 by many highly used arcs and emit them as a group. This
844 could probably use some tuning. */
846 for (arc_index = 0; arc_index < numarcs; arc_index++)
848 tmp_arcs_count += arcs[arc_index]->count;
850 /* Count how many times each parent and child are used up
851 to our threshhold of arcs (90%). */
852 if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
855 arcs[arc_index]->child->nuses++;
858 /* Now sort a temporary symbol table based on the number of
859 times each function was used in the highest used arcs. */
860 memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
861 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
863 /* Now pick out those symbols we're going to emit as
864 a group. We take up to 1.25% of the used symbols. */
865 for (sym_index = 0; sym_index < used / 80; sym_index++)
867 Sym *sym = scratch_syms[sym_index];
870 /* If we hit symbols that aren't used from many call sites,
871 then we can quit. We choose five as the low limit for
872 no particular reason. */
876 /* We're going to need the arcs between these functions.
877 Unfortunately, we don't know all these functions
878 until we're done. So we keep track of all the arcs
879 to the functions we care about, then prune out those
880 which are uninteresting.
882 An interesting variation would be to quit when we found
883 multi-call site functions which account for some percentage
885 arc = sym->cg.children;
889 if (arc->parent != arc->child)
890 scratch_arcs[scratch_arc_count++] = arc;
891 arc->has_been_placed = 1;
892 arc = arc->next_child;
895 arc = sym->cg.parents;
899 if (arc->parent != arc->child)
900 scratch_arcs[scratch_arc_count++] = arc;
901 arc->has_been_placed = 1;
902 arc = arc->next_parent;
905 /* Keep track of how many symbols we're going to place. */
906 scratch_index = sym_index;
908 /* A lie, but it makes identifying
909 these functions easier later. */
910 sym->has_been_placed = 1;
913 /* Now walk through the temporary arcs and copy
914 those we care about into the high arcs array. */
915 for (arc_index = 0; arc_index < scratch_arc_count; arc_index++)
917 Arc *arc = scratch_arcs[arc_index];
919 /* If this arc refers to highly used functions, then
920 then we want to keep it. */
921 if (arc->child->has_been_placed
922 && arc->parent->has_been_placed)
924 high_arcs[high_arc_count++] = scratch_arcs[arc_index];
926 /* We need to turn of has_been_placed since we're going to
927 use the main arc placement algorithm on these arcs. */
928 arc->child->has_been_placed = 0;
929 arc->parent->has_been_placed = 0;
933 /* Dump the multi-site high usage functions which are not
934 going to be ordered by the main ordering algorithm. */
935 for (sym_index = 0; sym_index < scratch_index; sym_index++)
937 if (scratch_syms[sym_index]->has_been_placed)
938 printf ("%s\n", scratch_syms[sym_index]->name);
941 /* Now we can order the multi-site high use
942 functions based on the arcs between them. */
943 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
944 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
945 unplaced_arcs, &unplaced_arc_count);
947 /* Order and dump the high use functions left,
948 these typically have only a few call sites. */
949 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
950 unplaced_arcs, &unplaced_arc_count);
952 /* Now place the rarely used functions. */
953 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
954 scratch_arcs, &scratch_arc_count);
956 /* Output any functions not emitted by the order_and_dump calls. */
957 for (sym_index = 0; sym_index < used; sym_index++)
958 if (used_syms[sym_index]->has_been_placed == 0)
959 printf("%s\n", used_syms[sym_index]->name);
961 /* Output the unused functions. */
962 for (sym_index = 0; sym_index < unused; sym_index++)
963 printf("%s\n", unused_syms[sym_index]->name);
965 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
966 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
967 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
968 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
969 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
970 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
977 free (unplaced_arcs);
980 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
981 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
983 If ALL is nonzero, then place all functions referenced by THE_ARCS,
984 else only place those referenced in the top 99% of the arcs in THE_ARCS. */
988 order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
989 unplaced_arcs, unplaced_arc_count)
991 unsigned long arc_count;
994 unsigned long *unplaced_arc_count;
997 unsigned long long tmp_arcs, total_arcs;
999 unsigned long tmp_arcs, total_arcs;
1001 unsigned int arc_index;
1003 /* If needed, compute the total arc count.
1005 Note we don't compensate for overflow if that happens! */
1009 for (arc_index = 0; arc_index < arc_count; arc_index++)
1010 total_arcs += the_arcs[arc_index]->count;
1017 for (arc_index = 0; arc_index < arc_count; arc_index++)
1020 Sym *child, *parent;
1022 tmp_arcs += the_arcs[arc_index]->count;
1024 /* Ignore this arc if it's already been placed. */
1025 if (the_arcs[arc_index]->has_been_placed)
1028 child = the_arcs[arc_index]->child;
1029 parent = the_arcs[arc_index]->parent;
1031 /* If we're not using all arcs, and this is a rarely used
1032 arc, then put it on the unplaced_arc list. Similarly
1033 if both the parent and child of this arc have been placed. */
1034 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1035 || child->has_been_placed || parent->has_been_placed)
1037 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1041 /* If all slots in the parent and child are full, then there isn't
1042 anything we can do right now. We'll place this arc on the
1043 unplaced arc list in the hope that a global positioning
1044 algorithm can use it to place function chains. */
1045 if (parent->next && parent->prev && child->next && child->prev)
1047 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1051 /* If the parent is unattached, then find the closest
1052 place to attach it onto child's chain. Similarly
1053 for the opposite case. */
1054 if (!parent->next && !parent->prev)
1061 /* Walk to the beginning and end of the child's chain. */
1074 /* Choose the closest. */
1075 child = next_count < prev_count ? next : prev;
1077 else if (! child->next && !child->prev)
1096 parent = prev_count < next_count ? prev : next;
1100 /* Couldn't find anywhere to attach the functions,
1101 put the arc on the unplaced arc list. */
1102 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1106 /* Make sure we don't tie two ends together. */
1126 /* This would tie two ends together. */
1127 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1133 /* Must attach to the parent's prev field. */
1136 /* parent-prev and child-next */
1137 parent->prev = child;
1138 child->next = parent;
1139 the_arcs[arc_index]->has_been_placed = 1;
1142 else if (parent->prev)
1144 /* Must attach to the parent's next field. */
1147 /* parent-next and child-prev */
1148 parent->next = child;
1149 child->prev = parent;
1150 the_arcs[arc_index]->has_been_placed = 1;
1155 /* Can attach to either field in the parent, depends
1156 on where we've got space in the child. */
1159 /* parent-prev and child-next. */
1160 parent->prev = child;
1161 child->next = parent;
1162 the_arcs[arc_index]->has_been_placed = 1;
1166 /* parent-next and child-prev. */
1167 parent->next = child;
1168 child->prev = parent;
1169 the_arcs[arc_index]->has_been_placed = 1;
1174 /* Dump the chains of functions we've made. */
1175 for (arc_index = 0; arc_index < arc_count; arc_index++)
1178 if (the_arcs[arc_index]->parent->has_been_placed
1179 || the_arcs[arc_index]->child->has_been_placed)
1182 sym = the_arcs[arc_index]->parent;
1184 /* If this symbol isn't attached to any other
1185 symbols, then we've got a rarely used arc.
1187 Skip it for now, we'll deal with them later. */
1188 if (sym->next == NULL
1189 && sym->prev == NULL)
1192 /* Get to the start of this chain. */
1198 /* Mark it as placed. */
1199 sym->has_been_placed = 1;
1200 printf ("%s\n", sym->name);
1205 /* If we want to place all the arcs, then output
1206 those which weren't placed by the main algorithm. */
1208 for (arc_index = 0; arc_index < arc_count; arc_index++)
1211 if (the_arcs[arc_index]->parent->has_been_placed
1212 || the_arcs[arc_index]->child->has_been_placed)
1215 sym = the_arcs[arc_index]->parent;
1217 sym->has_been_placed = 1;
1218 printf ("%s\n", sym->name);
1222 /* Compare two function_map structs based on file name.
1223 We want to sort in ascending order. */
1226 cmp_symbol_map (const void * l, const void * r)
1228 return filename_cmp (((struct function_map *) l)->file_name,
1229 ((struct function_map *) r)->file_name);
1232 /* Print a suggested .o ordering for files on a link line based
1233 on profiling information. This uses the function placement
1234 code for the bulk of its work. */
1237 cg_print_file_ordering (void)
1239 unsigned long scratch_arc_count;
1240 unsigned long arc_index;
1241 unsigned long sym_index;
1245 scratch_arc_count = 0;
1247 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1248 for (arc_index = 0; arc_index < numarcs; arc_index++)
1250 if (! arcs[arc_index]->parent->mapped
1251 || ! arcs[arc_index]->child->mapped)
1252 arcs[arc_index]->has_been_placed = 1;
1255 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1256 scratch_arcs, &scratch_arc_count);
1258 /* Output .o's not handled by the main placement algorithm. */
1259 for (sym_index = 0; sym_index < symtab.len; sym_index++)
1261 if (symtab.base[sym_index].mapped
1262 && ! symtab.base[sym_index].has_been_placed)
1263 printf ("%s\n", symtab.base[sym_index].name);
1266 qsort (symbol_map, symbol_map_count, sizeof (struct function_map), cmp_symbol_map);
1268 /* Now output any .o's that didn't have any text symbols. */
1270 for (sym_index = 0; sym_index < symbol_map_count; sym_index++)
1272 unsigned int index2;
1274 /* Don't bother searching if this symbol
1275 is the same as the previous one. */
1276 if (last && !filename_cmp (last, symbol_map[sym_index].file_name))
1279 for (index2 = 0; index2 < symtab.len; index2++)
1281 if (! symtab.base[index2].mapped)
1284 if (!filename_cmp (symtab.base[index2].name,
1285 symbol_map[sym_index].file_name))
1289 /* If we didn't find it in the symbol table, then it must
1290 be a .o with no text symbols. Output it last. */
1291 if (index2 == symtab.len)
1292 printf ("%s\n", symbol_map[sym_index].file_name);
1293 last = symbol_map[sym_index].file_name;