From b630a10e9460ef2a3c08e99e2a7254982fdf23fc Mon Sep 17 00:00:00 2001 From: ebotcazou Date: Sat, 16 Apr 2011 10:43:04 +0000 Subject: [PATCH] * lto.c (lto_balanced_map): Fix typos in head comment. (lto_promote_cross_file_statics): Fix long lines and remove redundant test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@172584 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/lto/ChangeLog | 28 ++++++++++------ gcc/lto/lto.c | 98 +++++++++++++++++++++++++++++-------------------------- 2 files changed, 70 insertions(+), 56 deletions(-) diff --git a/gcc/lto/ChangeLog b/gcc/lto/ChangeLog index 6c87a38..507e9b2 100644 --- a/gcc/lto/ChangeLog +++ b/gcc/lto/ChangeLog @@ -1,3 +1,9 @@ +2011-04-16 Eric Botcazou + + * lto.c (lto_balanced_map): Fix typos in head comment. + (lto_promote_cross_file_statics): Fix long lines and remove redundant + test. + 2011-04-16 Jan Hubicka * lto.c (lto_balanced_map): Update. @@ -5,7 +11,8 @@ 2011-04-14 Jan Hubicka * lto.c: Include ipa-inline.h - (add_cgraph_node_to_partition, undo_partition): Use inline_summary accessor. + (add_cgraph_node_to_partition, undo_partition): Use inline_summary + accessor. (ipa_node_duplication_hook): Fix declaration. * Make-lang.in (lto.o): Update dependencies. @@ -77,8 +84,8 @@ PR lto/45721 PR lto/45375 - * lto.c (partition_cgraph_node_p, partition_varpool_node_p): Weakrefs are - not partitioned. + * lto.c (partition_cgraph_node_p, partition_varpool_node_p): Weakrefs + are not partitioned. 2010-12-22 Nathan Froyd @@ -376,11 +383,12 @@ * lto.c (add_cgraph_node_to_partition): Forward declare; walk also nodes from same comdat group as well as all comdat functions referenced here. - (add_varpool_node_to_partition, add_references_to_partition): New function. - (lto_1_1_map): Skip COMDAT fnctions/variables; use add_varpool_node_to_partition; - clear aux flags when done. - (lto_promote_cross_file_statics): Do not promote stuff that gets duplicated to - each ltrans. + (add_varpool_node_to_partition, add_references_to_partition): New + function. + (lto_1_1_map): Skip COMDAT fnctions/variables; use + add_varpool_node_to_partition; clear aux flags when done. + (lto_promote_cross_file_statics): Do not promote stuff that gets + duplicated to each ltrans. 2010-07-04 Jan Hubicka @@ -588,8 +596,8 @@ 2010-04-30 Jan Hubicka - * lto.c (get_filename_for_set): Look for cgraph node and if none found, use - default name. + * lto.c (get_filename_for_set): Look for cgraph node and if none found, + use default name. (lto_wpa_write_files): Write any non-empty partition. 2010-04-30 Jan Hubicka diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c index f515aaa..8215c02 100644 --- a/gcc/lto/lto.c +++ b/gcc/lto/lto.c @@ -1,5 +1,5 @@ /* Top-level LTO routines. - Copyright 2009, 2010 Free Software Foundation, Inc. + Copyright 2009, 2010, 2011 Free Software Foundation, Inc. Contributed by CodeSourcery, Inc. This file is part of GCC. @@ -962,40 +962,43 @@ lto_1_to_1_map (void) } -/* Group cgraph nodes in qually sized partitions. +/* Group cgraph nodes into equally-sized partitions. - The algorithm deciding paritions are simple: nodes are taken in predefined - order. The order correspond to order we wish to have functions in final - output. In future this will be given by function reordering pass, but at - the moment we use topological order that serve a good approximation. + The partitioning algorithm is simple: nodes are taken in predefined order. + The order corresponds to the order we want functions to have in the final + output. In the future this will be given by function reordering pass, but + at the moment we use the topological order, which is a good approximation. - The goal is to partition this linear order into intervals (partitions) such - that all partitions have approximately the same size and that the number of - callgraph or IPA reference edgess crossing boundaries is minimal. + The goal is to partition this linear order into intervals (partitions) so + that all the partitions have approximately the same size and the number of + callgraph or IPA reference edges crossing boundaries is minimal. This is a lot faster (O(n) in size of callgraph) than algorithms doing - priority based graph clustering that are generally O(n^2) and since WHOPR - is designed to make things go well across partitions, it leads to good results. - - We compute the expected size of partition as - max (total_size / lto_partitions, min_partition_size). - We use dynamic expected size of partition, so small programs - are partitioning into enough partitions to allow use of multiple CPUs while - large programs are not partitioned too much. Creating too many partition - increase streaming overhead significandly. - - In the future we would like to bound maximal size of partition to avoid - ltrans stage consuming too much memory. At the moment however WPA stage is - most memory intensive phase at large benchmark since too many types and - declarations are read into memory. - - The function implement simple greedy algorithm. Nodes are begin added into - current partition until 3/4th of expected partition size is reached. - After this threshold we keep track of boundary size (number of edges going to - other partitions) and continue adding functions until the current partition - grows into a double of expected partition size. Then the process is undone - till the point when minimal ration of boundary size and in partition calls - was reached. */ + priority-based graph clustering that are generally O(n^2) and, since + WHOPR is designed to make things go well across partitions, it leads + to good results. + + We compute the expected size of a partition as: + + max (total_size / lto_partitions, min_partition_size) + + We use dynamic expected size of partition so small programs are partitioned + into enough partitions to allow use of multiple CPUs, while large programs + are not partitioned too much. Creating too many partitions significantly + increases the streaming overhead. + + In the future, we would like to bound the maximal size of partitions so as + to prevent the LTRANS stage from consuming too much memory. At the moment, + however, the WPA stage is the most memory intensive for large benchmarks, + since too many types and declarations are read into memory. + + The function implements a simple greedy algorithm. Nodes are being added + to the current partition until after 3/4 of the expected partition size is + reached. Past this threshold, we keep track of boundary size (number of + edges going to other partitions) and continue adding functions until after + the current partition has grown to twice the expected partition size. Then + the process is undone to the point where the minimal ratio of boundary size + and in-partition calls was reached. */ static void lto_balanced_map (void) @@ -1330,7 +1333,8 @@ lto_promote_cross_file_statics (void) n_sets = VEC_length (ltrans_partition, ltrans_partitions); for (i = 0; i < n_sets; i++) { - ltrans_partition part = VEC_index (ltrans_partition, ltrans_partitions, i); + ltrans_partition part + = VEC_index (ltrans_partition, ltrans_partitions, i); set = part->cgraph_set; vset = part->varpool_set; @@ -1361,16 +1365,15 @@ lto_promote_cross_file_statics (void) promote_var (vnode); } - /* We export initializers of read-only var into each partition - referencing it. Folding might take declarations from the - initializers and use it; so everything referenced from the - initializers needs can be accessed from this partition after - folding. + /* We export the initializer of a read-only var into each partition + referencing the var. Folding might take declarations from the + initializer and use them, so everything referenced from the + initializer can be accessed from this partition after folding. This means that we need to promote all variables and functions - referenced from all initializers from readonly vars referenced - from this partition that are not in this partition. - This needs to be done recursively. */ + referenced from all initializers of read-only vars referenced + from this partition that are not in this partition. This needs + to be done recursively. */ for (vnode = varpool_nodes; vnode; vnode = vnode->next) if (const_value_known_p (vnode->decl) && DECL_INITIAL (vnode->decl) @@ -1378,13 +1381,16 @@ lto_promote_cross_file_statics (void) && referenced_from_this_partition_p (&vnode->ref_list, set, vset) && !pointer_set_insert (inserted, vnode)) VEC_safe_push (varpool_node_ptr, heap, promoted_initializers, vnode); + while (!VEC_empty (varpool_node_ptr, promoted_initializers)) { int i; struct ipa_ref *ref; vnode = VEC_pop (varpool_node_ptr, promoted_initializers); - for (i = 0; ipa_ref_list_reference_iterate (&vnode->ref_list, i, ref); i++) + for (i = 0; + ipa_ref_list_reference_iterate (&vnode->ref_list, i, ref); + i++) { if (ref->refered_type == IPA_REF_CGRAPH) { @@ -1399,17 +1405,17 @@ lto_promote_cross_file_statics (void) struct varpool_node *v = ipa_ref_varpool_node (ref); if (varpool_node_in_set_p (v, vset)) continue; - /* Constant pool references use internal labels and thus can not - be made global. It is sensible to keep those ltrans local to - allow better optimization. */ + + /* Constant pool references use internal labels and thus + cannot be made global. It is sensible to keep those + ltrans local to allow better optimization. */ if (DECL_IN_CONSTANT_POOL (v->decl)) { if (!pointer_set_insert (inserted, vnode)) VEC_safe_push (varpool_node_ptr, heap, promoted_initializers, v); } - else if (!DECL_IN_CONSTANT_POOL (v->decl) - && !v->externally_visible && v->analyzed) + else if (!v->externally_visible && v->analyzed) { if (promote_var (v) && DECL_INITIAL (v->decl) -- 2.7.4