From 9e179a64d573527c3a3f934ba88746a203036000 Mon Sep 17 00:00:00 2001 From: hubicka Date: Fri, 29 Mar 2013 18:07:34 +0000 Subject: [PATCH] * lto-cgraph.c (output_profile_summary, input_profile_summary): Use gcov streaming; stream hot bb threshold to ltrans. * predict.c (get_hot_bb_threshold): Break out from .... (maybe_hot_count_p): ... here. (set_hot_bb_threshold): New function. * lto-section-in.c (lto_section_name): Add profile. * profile.h (get_hot_bb_threshold, set_hot_bb_threshold): Declare. * ipa.c: Include hash-table.h, tree-inline.h, profile.h, lto-streamer.h and data-streamer.h (histogram_entry): New structure. (histogram, histogram_pool): New global vars. (histogram_hash): New structure. (histogram_hash::hash): New method. (histogram_hash::equal): Likewise. (account_time_size): New function. (cmp_counts): New function. (dump_histogram): New function. (ipa_profile_generate_summary): New function. (ipa_profile_write_summary): New function. (ipa_profile_read_summary): New function. (ipa_profile): Decide on threshold. (pass_ipa_profile): Add ipa_profile_write_summary and ipa_profile_read_summary. * Makefile.in (ipa.o): Update dependencies. * lto-streamer.h (LTO_section_ipa_profile): New section. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@197243 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 27 +++++ gcc/Makefile.in | 3 +- gcc/ipa.c | 276 ++++++++++++++++++++++++++++++++++++++++++++++++++- gcc/lto-cgraph.c | 31 ++++-- gcc/lto-section-in.c | 1 + gcc/lto-streamer.h | 1 + gcc/predict.c | 35 +++++-- gcc/profile.h | 4 + 8 files changed, 354 insertions(+), 24 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0fd0f95..3c53513 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,30 @@ +2013-03-29 Jan Hubicka + + * lto-cgraph.c (output_profile_summary, input_profile_summary): Use + gcov streaming; stream hot bb threshold to ltrans. + * predict.c (get_hot_bb_threshold): Break out from .... + (maybe_hot_count_p): ... here. + (set_hot_bb_threshold): New function. + * lto-section-in.c (lto_section_name): Add profile. + * profile.h (get_hot_bb_threshold, set_hot_bb_threshold): Declare. + * ipa.c: Include hash-table.h, tree-inline.h, profile.h, lto-streamer.h + and data-streamer.h + (histogram_entry): New structure. + (histogram, histogram_pool): New global vars. + (histogram_hash): New structure. + (histogram_hash::hash): New method. + (histogram_hash::equal): Likewise. + (account_time_size): New function. + (cmp_counts): New function. + (dump_histogram): New function. + (ipa_profile_generate_summary): New function. + (ipa_profile_write_summary): New function. + (ipa_profile_read_summary): New function. + (ipa_profile): Decide on threshold. + (pass_ipa_profile): Add ipa_profile_write_summary and ipa_profile_read_summary. + * Makefile.in (ipa.o): Update dependencies. + * lto-streamer.h (LTO_section_ipa_profile): New section. + 2013-03-29 Gabriel Dos Reis * tree.h (VAR_P): New. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 4453377..19377a9 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2903,7 +2903,8 @@ varpool.o : varpool.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(TREE_FLOW_H) ipa.o : ipa.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(CGRAPH_H) \ $(TREE_PASS_H) $(GIMPLE_H) $(TARGET_H) $(GGC_H) pointer-set.h \ - $(IPA_UTILS_H) + $(IPA_UTILS_H) tree-inline.h $(HASH_TABLE_H) profile.h $(PARAMS_H) \ + $(LTO_STREAMER_H) $(DATA_STREAMER_H) ipa-prop.o : ipa-prop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ langhooks.h $(GGC_H) $(TARGET_H) $(CGRAPH_H) $(IPA_PROP_H) $(DIAGNOSTIC_H) \ $(TREE_FLOW_H) $(TM_H) $(TREE_PASS_H) $(FLAGS_H) $(TREE_H) \ diff --git a/gcc/ipa.c b/gcc/ipa.c index a9b8fb4..0ea73dd 100644 --- a/gcc/ipa.c +++ b/gcc/ipa.c @@ -32,6 +32,12 @@ along with GCC; see the file COPYING3. If not see #include "ipa-utils.h" #include "pointer-set.h" #include "ipa-inline.h" +#include "hash-table.h" +#include "tree-inline.h" +#include "profile.h" +#include "params.h" +#include "lto-streamer.h" +#include "data-streamer.h" /* Look for all functions inlined to NODE and update their inlined_to pointers to INLINED_TO. */ @@ -1040,6 +1046,201 @@ struct ipa_opt_pass_d pass_ipa_whole_program_visibility = NULL, /* variable_transform */ }; +/* Entry in the histogram. */ + +struct histogram_entry +{ + gcov_type count; + int time; + int size; +}; + +/* Histogram of profile values. + The histogram is represented as an ordered vector of entries allocated via + histogram_pool. During construction a separate hashtable is kept to lookup + duplicate entries. */ + +vec histogram; +static alloc_pool histogram_pool; + +/* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */ + +struct histogram_hash : typed_noop_remove +{ + typedef histogram_entry value_type; + typedef histogram_entry compare_type; + static inline hashval_t hash (const value_type *); + static inline int equal (const value_type *, const compare_type *); +}; + +inline hashval_t +histogram_hash::hash (const histogram_entry *val) +{ + return val->count; +} + +inline int +histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2) +{ + return val->count == val2->count; +} + +/* Account TIME and SIZE executed COUNT times into HISTOGRAM. + HASHTABLE is the on-side hash kept to avoid duplicates. */ + +static void +account_time_size (hash_table hashtable, + vec &histogram, + gcov_type count, int time, int size) +{ + histogram_entry key = {count, 0, 0}; + histogram_entry **val = hashtable.find_slot (&key, INSERT); + + if (!*val) + { + *val = (histogram_entry *) pool_alloc (histogram_pool); + **val = key; + histogram.safe_push (*val); + } + (*val)->time += time; + (*val)->size += size; +} + +int +cmp_counts (const void *v1, const void *v2) +{ + const histogram_entry *h1 = *(const histogram_entry * const *)v1; + const histogram_entry *h2 = *(const histogram_entry * const *)v2; + if (h1->count < h2->count) + return 1; + if (h1->count > h2->count) + return -1; + return 0; +} + +/* Dump HISTOGRAM to FILE. */ + +static void +dump_histogram (FILE *file, vec histogram) +{ + unsigned int i; + gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0, overall_size = 0; + + fprintf (dump_file, "Histogram:\n"); + for (i = 0; i < histogram.length (); i++) + { + overall_time += histogram[i]->count * histogram[i]->time; + overall_size += histogram[i]->size; + } + if (!overall_time) + overall_time = 1; + if (!overall_size) + overall_size = 1; + for (i = 0; i < histogram.length (); i++) + { + cumulated_time += histogram[i]->count * histogram[i]->time; + cumulated_size += histogram[i]->size; + fprintf (file, " "HOST_WIDEST_INT_PRINT_DEC": time:%i (%2.2f) size:%i (%2.2f)\n", + (HOST_WIDEST_INT) histogram[i]->count, + histogram[i]->time, + cumulated_time * 100.0 / overall_time, + histogram[i]->size, + cumulated_size * 100.0 / overall_size); + } +} + +/* Collect histogram from CFG profiles. */ + +static void +ipa_profile_generate_summary (void) +{ + struct cgraph_node *node; + gimple_stmt_iterator gsi; + hash_table hashtable; + basic_block bb; + + hashtable.create (10); + histogram_pool = create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry), + 10); + + FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) + FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->symbol.decl)) + { + int time = 0; + int size = 0; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + time += estimate_num_insns (gsi_stmt (gsi), &eni_time_weights); + size += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights); + } + account_time_size (hashtable, histogram, bb->count, time, size); + } + hashtable.dispose (); + histogram.qsort (cmp_counts); +} + +/* Serialize the ipa info for lto. */ + +static void +ipa_profile_write_summary (void) +{ + struct lto_simple_output_block *ob + = lto_create_simple_output_block (LTO_section_ipa_profile); + unsigned int i; + + streamer_write_uhwi_stream (ob->main_stream, histogram.length()); + for (i = 0; i < histogram.length (); i++) + { + streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count); + streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time); + streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size); + } + lto_destroy_simple_output_block (ob); +} + +/* Deserialize the ipa info for lto. */ + +static void +ipa_profile_read_summary (void) +{ + struct lto_file_decl_data ** file_data_vec + = lto_get_file_decl_data (); + struct lto_file_decl_data * file_data; + hash_table hashtable; + int j = 0; + + hashtable.create (10); + histogram_pool = create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry), + 10); + + while ((file_data = file_data_vec[j++])) + { + const char *data; + size_t len; + struct lto_input_block *ib + = lto_create_simple_input_block (file_data, + LTO_section_ipa_profile, + &data, &len); + if (ib) + { + unsigned int num = streamer_read_uhwi (ib); + unsigned int n; + for (n = 0; n < num; n++) + { + gcov_type count = streamer_read_gcov_count (ib); + int time = streamer_read_uhwi (ib); + int size = streamer_read_uhwi (ib); + account_time_size (hashtable, histogram, + count, time, size); + } + lto_destroy_simple_input_block (file_data, + LTO_section_ipa_profile, + ib, data, len); + } + } + hashtable.dispose (); + histogram.qsort (cmp_counts); +} /* Simple ipa profile pass propagating frequencies across the callgraph. */ @@ -1051,6 +1252,75 @@ ipa_profile (void) int order_pos; bool something_changed = false; int i; + gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0; + + if (dump_file) + dump_histogram (dump_file, histogram); + for (i = 0; i < (int)histogram.length (); i++) + { + overall_time += histogram[i]->count * histogram[i]->time; + overall_size += histogram[i]->size; + } + if (overall_time) + { + gcov_type threshold; + + gcc_assert (overall_size); + if (dump_file) + { + gcov_type min, cumulated_time = 0, cumulated_size = 0; + + fprintf (dump_file, "Overall time: "HOST_WIDEST_INT_PRINT_DEC"\n", + (HOST_WIDEST_INT)overall_time); + min = get_hot_bb_threshold (); + for (i = 0; i < (int)histogram.length () && histogram[i]->count >= min; + i++) + { + cumulated_time += histogram[i]->count * histogram[i]->time; + cumulated_size += histogram[i]->size; + } + fprintf (dump_file, "GCOV min count: "HOST_WIDEST_INT_PRINT_DEC + " Time:%3.2f%% Size:%3.2f%%\n", + (HOST_WIDEST_INT)min, + cumulated_time * 100.0 / overall_time, + cumulated_size * 100.0 / overall_size); + } + cutoff = (overall_time * PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE) + 500) / 1000; + threshold = 0; + for (i = 0; cumulated < cutoff; i++) + { + cumulated += histogram[i]->count * histogram[i]->time; + threshold = histogram[i]->count; + } + if (!threshold) + threshold = 1; + if (dump_file) + { + gcov_type cumulated_time = 0, cumulated_size = 0; + + for (i = 0; + i < (int)histogram.length () && histogram[i]->count >= threshold; + i++) + { + cumulated_time += histogram[i]->count * histogram[i]->time; + cumulated_size += histogram[i]->size; + } + fprintf (dump_file, "Determined min count: "HOST_WIDEST_INT_PRINT_DEC + " Time:%3.2f%% Size:%3.2f%%\n", + (HOST_WIDEST_INT)threshold, + cumulated_time * 100.0 / overall_time, + cumulated_size * 100.0 / overall_size); + } + if (threshold > get_hot_bb_threshold () + || in_lto_p) + { + if (dump_file) + fprintf (dump_file, "Threshold updated.\n"); + set_hot_bb_threshold (threshold); + } + } + histogram.release(); + free_alloc_pool (histogram_pool); order_pos = ipa_reverse_postorder (order); for (i = order_pos - 1; i >= 0; i--) @@ -1112,9 +1382,9 @@ struct ipa_opt_pass_d pass_ipa_profile = 0, /* todo_flags_start */ 0 /* todo_flags_finish */ }, - NULL, /* generate_summary */ - NULL, /* write_summary */ - NULL, /* read_summary */ + ipa_profile_generate_summary, /* generate_summary */ + ipa_profile_write_summary, /* write_summary */ + ipa_profile_read_summary, /* read_summary */ NULL, /* write_optimization_summary */ NULL, /* read_optimization_summary */ NULL, /* stmt_fixup */ diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c index 6d639a9..c619197 100644 --- a/gcc/lto-cgraph.c +++ b/gcc/lto-cgraph.c @@ -604,11 +604,11 @@ output_profile_summary (struct lto_simple_output_block *ob) units. */ gcc_assert (profile_info->runs); streamer_write_uhwi_stream (ob->main_stream, profile_info->runs); - streamer_write_uhwi_stream (ob->main_stream, profile_info->sum_max); + streamer_write_gcov_count_stream (ob->main_stream, profile_info->sum_max); /* sum_all is needed for computing the working set with the histogram. */ - streamer_write_uhwi_stream (ob->main_stream, profile_info->sum_all); + streamer_write_gcov_count_stream (ob->main_stream, profile_info->sum_all); /* Create and output a bitpack of non-zero histogram entries indices. */ bp = bitpack_create (ob->main_stream); @@ -620,13 +620,18 @@ output_profile_summary (struct lto_simple_output_block *ob) { if (!profile_info->histogram[h_ix].num_counters) continue; - streamer_write_uhwi_stream (ob->main_stream, + streamer_write_gcov_count_stream (ob->main_stream, profile_info->histogram[h_ix].num_counters); - streamer_write_uhwi_stream (ob->main_stream, + streamer_write_gcov_count_stream (ob->main_stream, profile_info->histogram[h_ix].min_value); - streamer_write_uhwi_stream (ob->main_stream, + streamer_write_gcov_count_stream (ob->main_stream, profile_info->histogram[h_ix].cum_value); - } + } + /* IPA-profile computes hot bb threshold based on cumulated + whole program profile. We need to stream it down to ltrans. */ + if (flag_wpa) + streamer_write_gcov_count_stream (ob->main_stream, + get_hot_bb_threshold ()); } else streamer_write_uhwi_stream (ob->main_stream, 0); @@ -1259,8 +1264,8 @@ input_profile_summary (struct lto_input_block *ib, if (runs) { file_data->profile_info.runs = runs; - file_data->profile_info.sum_max = streamer_read_uhwi (ib); - file_data->profile_info.sum_all = streamer_read_uhwi (ib); + file_data->profile_info.sum_max = streamer_read_gcov_count (ib); + file_data->profile_info.sum_all = streamer_read_gcov_count (ib); memset (file_data->profile_info.histogram, 0, sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE); @@ -1279,12 +1284,16 @@ input_profile_summary (struct lto_input_block *ib, continue; file_data->profile_info.histogram[h_ix].num_counters - = streamer_read_uhwi (ib); + = streamer_read_gcov_count (ib); file_data->profile_info.histogram[h_ix].min_value - = streamer_read_uhwi (ib); + = streamer_read_gcov_count (ib); file_data->profile_info.histogram[h_ix].cum_value - = streamer_read_uhwi (ib); + = streamer_read_gcov_count (ib); } + /* IPA-profile computes hot bb threshold based on cumulated + whole program profile. We need to stream it down to ltrans. */ + if (flag_ltrans) + set_hot_bb_threshold (streamer_read_gcov_count (ib)); } } diff --git a/gcc/lto-section-in.c b/gcc/lto-section-in.c index 3a86151..0ef421f 100644 --- a/gcc/lto-section-in.c +++ b/gcc/lto-section-in.c @@ -55,6 +55,7 @@ const char *lto_section_name[LTO_N_SECTION_TYPES] = "jmpfuncs", "pureconst", "reference", + "profile", "symbol_nodes", "opts", "cgraphopt", diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h index 919e304..95fe33a 100644 --- a/gcc/lto-streamer.h +++ b/gcc/lto-streamer.h @@ -243,6 +243,7 @@ enum lto_section_type LTO_section_jump_functions, LTO_section_ipa_pure_const, LTO_section_ipa_reference, + LTO_section_ipa_profile, LTO_section_symtab_nodes, LTO_section_opts, LTO_section_cgraph_opt_sum, diff --git a/gcc/predict.c b/gcc/predict.c index 57975d1..5394ef5 100644 --- a/gcc/predict.c +++ b/gcc/predict.c @@ -128,25 +128,42 @@ maybe_hot_frequency_p (struct function *fun, int freq) return true; } +static gcov_type min_count = -1; + +/* Determine the threshold for hot BB counts. */ + +gcov_type +get_hot_bb_threshold () +{ + gcov_working_set_t *ws; + if (min_count == -1) + { + ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE)); + gcc_assert (ws); + min_count = ws->min_counter; + } + return min_count; +} + +/* Set the threshold for hot BB counts. */ + +void +set_hot_bb_threshold (gcov_type min) +{ + min_count = min; +} + /* Return TRUE if frequency FREQ is considered to be hot. */ static inline bool maybe_hot_count_p (struct function *fun, gcov_type count) { - gcov_working_set_t *ws; - static gcov_type min_count = -1; if (fun && profile_status_for_function (fun) != PROFILE_READ) return true; /* Code executed at most once is not hot. */ if (profile_info->runs >= count) return false; - if (min_count == -1) - { - ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE)); - gcc_assert (ws); - min_count = ws->min_counter; - } - return (count >= min_count); + return (count >= get_hot_bb_threshold ()); } /* Return true in case BB can be CPU intensive and should be optimized diff --git a/gcc/profile.h b/gcc/profile.h index e21250b..c7d5f1a 100644 --- a/gcc/profile.h +++ b/gcc/profile.h @@ -48,4 +48,8 @@ extern void del_node_map (void); extern void compute_working_sets (void); +/* In predict.c. */ +extern gcov_type get_hot_bb_threshold (void); +extern void set_hot_bb_threshold (gcov_type); + #endif /* PROFILE_H */ -- 2.7.4