* lto-cgraph.c (output_profile_summary, input_profile_summary): Use
authorhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>
Fri, 29 Mar 2013 18:07:34 +0000 (18:07 +0000)
committerhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>
Fri, 29 Mar 2013 18:07:34 +0000 (18:07 +0000)
gcov streaming; stream hot bb threshold to ltrans.
* predict.c (get_hot_bb_threshold): Break out from ....
(maybe_hot_count_p): ... here.
(set_hot_bb_threshold): New function.
* lto-section-in.c (lto_section_name): Add profile.
* profile.h (get_hot_bb_threshold, set_hot_bb_threshold): Declare.
* ipa.c: Include hash-table.h, tree-inline.h, profile.h, lto-streamer.h
and data-streamer.h
(histogram_entry): New structure.
(histogram, histogram_pool): New global vars.
(histogram_hash): New structure.
(histogram_hash::hash): New method.
(histogram_hash::equal): Likewise.
(account_time_size): New function.
(cmp_counts): New function.
(dump_histogram): New function.
(ipa_profile_generate_summary): New function.
(ipa_profile_write_summary): New function.
(ipa_profile_read_summary): New function.
(ipa_profile): Decide on threshold.
(pass_ipa_profile): Add ipa_profile_write_summary and ipa_profile_read_summary.
* Makefile.in (ipa.o): Update dependencies.
* lto-streamer.h (LTO_section_ipa_profile): New section.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@197243 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/Makefile.in
gcc/ipa.c
gcc/lto-cgraph.c
gcc/lto-section-in.c
gcc/lto-streamer.h
gcc/predict.c
gcc/profile.h

index 0fd0f95..3c53513 100644 (file)
@@ -1,3 +1,30 @@
+2013-03-29  Jan Hubicka  <jh@suse.cz>
+
+       * lto-cgraph.c (output_profile_summary, input_profile_summary): Use
+       gcov streaming; stream hot bb threshold to ltrans.
+       * predict.c (get_hot_bb_threshold): Break out from ....
+       (maybe_hot_count_p): ... here.
+       (set_hot_bb_threshold): New function.
+       * lto-section-in.c (lto_section_name): Add profile.
+       * profile.h (get_hot_bb_threshold, set_hot_bb_threshold): Declare.
+       * ipa.c: Include hash-table.h, tree-inline.h, profile.h, lto-streamer.h
+       and data-streamer.h
+       (histogram_entry): New structure.
+       (histogram, histogram_pool): New global vars.
+       (histogram_hash): New structure.
+       (histogram_hash::hash): New method.
+       (histogram_hash::equal): Likewise.
+       (account_time_size): New function.
+       (cmp_counts): New function.
+       (dump_histogram): New function.
+       (ipa_profile_generate_summary): New function.
+       (ipa_profile_write_summary): New function.
+       (ipa_profile_read_summary): New function.
+       (ipa_profile): Decide on threshold.
+       (pass_ipa_profile): Add ipa_profile_write_summary and ipa_profile_read_summary.
+       * Makefile.in (ipa.o): Update dependencies.
+       * lto-streamer.h (LTO_section_ipa_profile): New section.
+
 2013-03-29  Gabriel Dos Reis  <gdr@integrable-solutions.net>
 
        * tree.h (VAR_P): New.
index 4453377..19377a9 100644 (file)
@@ -2903,7 +2903,8 @@ varpool.o : varpool.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
    $(TREE_FLOW_H) 
 ipa.o : ipa.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(CGRAPH_H) \
    $(TREE_PASS_H) $(GIMPLE_H) $(TARGET_H) $(GGC_H) pointer-set.h \
-   $(IPA_UTILS_H)
+   $(IPA_UTILS_H) tree-inline.h $(HASH_TABLE_H) profile.h $(PARAMS_H) \
+   $(LTO_STREAMER_H) $(DATA_STREAMER_H)
 ipa-prop.o : ipa-prop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    langhooks.h $(GGC_H) $(TARGET_H) $(CGRAPH_H) $(IPA_PROP_H) $(DIAGNOSTIC_H) \
    $(TREE_FLOW_H) $(TM_H) $(TREE_PASS_H) $(FLAGS_H) $(TREE_H) \
index a9b8fb4..0ea73dd 100644 (file)
--- a/gcc/ipa.c
+++ b/gcc/ipa.c
@@ -32,6 +32,12 @@ along with GCC; see the file COPYING3.  If not see
 #include "ipa-utils.h"
 #include "pointer-set.h"
 #include "ipa-inline.h"
+#include "hash-table.h"
+#include "tree-inline.h"
+#include "profile.h"
+#include "params.h"
+#include "lto-streamer.h"
+#include "data-streamer.h"
 
 /* Look for all functions inlined to NODE and update their inlined_to pointers
    to INLINED_TO.  */
@@ -1040,6 +1046,201 @@ struct ipa_opt_pass_d pass_ipa_whole_program_visibility =
  NULL,                                 /* variable_transform */
 };
 
+/* Entry in the histogram.  */
+
+struct histogram_entry
+{
+  gcov_type count;
+  int time;
+  int size;
+};
+
+/* Histogram of profile values.
+   The histogram is represented as an ordered vector of entries allocated via
+   histogram_pool. During construction a separate hashtable is kept to lookup
+   duplicate entries.  */
+
+vec<histogram_entry *> histogram;
+static alloc_pool histogram_pool;
+
+/* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR.  */
+
+struct histogram_hash : typed_noop_remove <histogram_entry>
+{
+  typedef histogram_entry value_type;
+  typedef histogram_entry compare_type;
+  static inline hashval_t hash (const value_type *);
+  static inline int equal (const value_type *, const compare_type *);
+};
+
+inline hashval_t
+histogram_hash::hash (const histogram_entry *val)
+{
+  return val->count;
+}
+
+inline int
+histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2)
+{
+  return val->count == val2->count;
+}
+
+/* Account TIME and SIZE executed COUNT times into HISTOGRAM.
+   HASHTABLE is the on-side hash kept to avoid duplicates.  */
+
+static void
+account_time_size (hash_table <histogram_hash> hashtable,
+                  vec<histogram_entry *> &histogram,
+                  gcov_type count, int time, int size)
+{
+  histogram_entry key = {count, 0, 0};
+  histogram_entry **val = hashtable.find_slot (&key, INSERT);
+
+  if (!*val)
+    {
+      *val = (histogram_entry *) pool_alloc (histogram_pool);
+      **val = key;
+      histogram.safe_push (*val);
+    }
+  (*val)->time += time;
+  (*val)->size += size;
+}
+
+int
+cmp_counts (const void *v1, const void *v2)
+{
+  const histogram_entry *h1 = *(const histogram_entry * const *)v1;
+  const histogram_entry *h2 = *(const histogram_entry * const *)v2;
+  if (h1->count < h2->count)
+    return 1;
+  if (h1->count > h2->count)
+    return -1;
+  return 0;
+}
+
+/* Dump HISTOGRAM to FILE.  */
+
+static void
+dump_histogram (FILE *file, vec<histogram_entry *> histogram)
+{
+  unsigned int i;
+  gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0, overall_size = 0;
+  
+  fprintf (dump_file, "Histogram:\n");
+  for (i = 0; i < histogram.length (); i++)
+    {
+      overall_time += histogram[i]->count * histogram[i]->time;
+      overall_size += histogram[i]->size;
+    }
+  if (!overall_time)
+    overall_time = 1;
+  if (!overall_size)
+    overall_size = 1;
+  for (i = 0; i < histogram.length (); i++)
+    {
+      cumulated_time += histogram[i]->count * histogram[i]->time;
+      cumulated_size += histogram[i]->size;
+      fprintf (file, "  "HOST_WIDEST_INT_PRINT_DEC": time:%i (%2.2f) size:%i (%2.2f)\n",
+              (HOST_WIDEST_INT) histogram[i]->count,
+              histogram[i]->time,
+              cumulated_time * 100.0 / overall_time,
+              histogram[i]->size,
+              cumulated_size * 100.0 / overall_size);
+   }
+}
+
+/* Collect histogram from CFG profiles.  */
+
+static void
+ipa_profile_generate_summary (void)
+{
+  struct cgraph_node *node;
+  gimple_stmt_iterator gsi;
+  hash_table <histogram_hash> hashtable;
+  basic_block bb;
+
+  hashtable.create (10);
+  histogram_pool = create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry),
+                                     10);
+  
+  FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
+    FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->symbol.decl))
+      {
+       int time = 0;
+       int size = 0;
+        for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+         {
+           time += estimate_num_insns (gsi_stmt (gsi), &eni_time_weights);
+           size += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights);
+         }
+       account_time_size (hashtable, histogram, bb->count, time, size);
+      }
+  hashtable.dispose ();
+  histogram.qsort (cmp_counts);
+}
+
+/* Serialize the ipa info for lto.  */
+
+static void
+ipa_profile_write_summary (void)
+{
+  struct lto_simple_output_block *ob
+    = lto_create_simple_output_block (LTO_section_ipa_profile);
+  unsigned int i;
+
+  streamer_write_uhwi_stream (ob->main_stream, histogram.length());
+  for (i = 0; i < histogram.length (); i++)
+    {
+      streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count);
+      streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time);
+      streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size);
+    }
+  lto_destroy_simple_output_block (ob);
+}
+
+/* Deserialize the ipa info for lto.  */
+
+static void
+ipa_profile_read_summary (void)
+{
+  struct lto_file_decl_data ** file_data_vec
+    = lto_get_file_decl_data ();
+  struct lto_file_decl_data * file_data;
+  hash_table <histogram_hash> hashtable;
+  int j = 0;
+
+  hashtable.create (10);
+  histogram_pool = create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry),
+                                     10);
+
+  while ((file_data = file_data_vec[j++]))
+    {
+      const char *data;
+      size_t len;
+      struct lto_input_block *ib
+       = lto_create_simple_input_block (file_data,
+                                        LTO_section_ipa_profile,
+                                        &data, &len);
+      if (ib)
+       {
+          unsigned int num = streamer_read_uhwi (ib);
+         unsigned int n;
+         for (n = 0; n < num; n++)
+           {
+             gcov_type count = streamer_read_gcov_count (ib);
+             int time = streamer_read_uhwi (ib);
+             int size = streamer_read_uhwi (ib);
+             account_time_size (hashtable, histogram,
+                                count, time, size);
+           }
+         lto_destroy_simple_input_block (file_data,
+                                         LTO_section_ipa_profile,
+                                         ib, data, len);
+       }
+    }
+  hashtable.dispose ();
+  histogram.qsort (cmp_counts);
+}
 
 /* Simple ipa profile pass propagating frequencies across the callgraph.  */
 
@@ -1051,6 +1252,75 @@ ipa_profile (void)
   int order_pos;
   bool something_changed = false;
   int i;
+  gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
+
+  if (dump_file)
+    dump_histogram (dump_file, histogram);
+  for (i = 0; i < (int)histogram.length (); i++)
+    {
+      overall_time += histogram[i]->count * histogram[i]->time;
+      overall_size += histogram[i]->size;
+    }
+  if (overall_time)
+    {
+      gcov_type threshold;
+
+      gcc_assert (overall_size);
+      if (dump_file)
+       {
+         gcov_type min, cumulated_time = 0, cumulated_size = 0;
+
+         fprintf (dump_file, "Overall time: "HOST_WIDEST_INT_PRINT_DEC"\n", 
+                  (HOST_WIDEST_INT)overall_time);
+         min = get_hot_bb_threshold ();
+          for (i = 0; i < (int)histogram.length () && histogram[i]->count >= min;
+              i++)
+           {
+             cumulated_time += histogram[i]->count * histogram[i]->time;
+             cumulated_size += histogram[i]->size;
+           }
+         fprintf (dump_file, "GCOV min count: "HOST_WIDEST_INT_PRINT_DEC
+                  " Time:%3.2f%% Size:%3.2f%%\n", 
+                  (HOST_WIDEST_INT)min,
+                  cumulated_time * 100.0 / overall_time,
+                  cumulated_size * 100.0 / overall_size);
+       }
+      cutoff = (overall_time * PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE) + 500) / 1000;
+      threshold = 0;
+      for (i = 0; cumulated < cutoff; i++)
+       {
+         cumulated += histogram[i]->count * histogram[i]->time;
+          threshold = histogram[i]->count;
+       }
+      if (!threshold)
+       threshold = 1;
+      if (dump_file)
+       {
+         gcov_type cumulated_time = 0, cumulated_size = 0;
+
+          for (i = 0;
+              i < (int)histogram.length () && histogram[i]->count >= threshold;
+              i++)
+           {
+             cumulated_time += histogram[i]->count * histogram[i]->time;
+             cumulated_size += histogram[i]->size;
+           }
+         fprintf (dump_file, "Determined min count: "HOST_WIDEST_INT_PRINT_DEC
+                  " Time:%3.2f%% Size:%3.2f%%\n", 
+                  (HOST_WIDEST_INT)threshold,
+                  cumulated_time * 100.0 / overall_time,
+                  cumulated_size * 100.0 / overall_size);
+       }
+      if (threshold > get_hot_bb_threshold ()
+         || in_lto_p)
+       {
+         if (dump_file)
+           fprintf (dump_file, "Threshold updated.\n");
+          set_hot_bb_threshold (threshold);
+       }
+    }
+  histogram.release();
+  free_alloc_pool (histogram_pool);
 
   order_pos = ipa_reverse_postorder (order);
   for (i = order_pos - 1; i >= 0; i--)
@@ -1112,9 +1382,9 @@ struct ipa_opt_pass_d pass_ipa_profile =
   0,                                   /* todo_flags_start */
   0                                     /* todo_flags_finish */
  },
NULL,                                 /* generate_summary */
NULL,                                 /* write_summary */
NULL,                                 /* read_summary */
ipa_profile_generate_summary,         /* generate_summary */
ipa_profile_write_summary,            /* write_summary */
ipa_profile_read_summary,             /* read_summary */
  NULL,                                 /* write_optimization_summary */
  NULL,                                 /* read_optimization_summary */
  NULL,                                 /* stmt_fixup */
index 6d639a9..c619197 100644 (file)
@@ -604,11 +604,11 @@ output_profile_summary (struct lto_simple_output_block *ob)
          units.  */
       gcc_assert (profile_info->runs);
       streamer_write_uhwi_stream (ob->main_stream, profile_info->runs);
-      streamer_write_uhwi_stream (ob->main_stream, profile_info->sum_max);
+      streamer_write_gcov_count_stream (ob->main_stream, profile_info->sum_max);
 
       /* sum_all is needed for computing the working set with the
          histogram.  */
-      streamer_write_uhwi_stream (ob->main_stream, profile_info->sum_all);
+      streamer_write_gcov_count_stream (ob->main_stream, profile_info->sum_all);
 
       /* Create and output a bitpack of non-zero histogram entries indices.  */
       bp = bitpack_create (ob->main_stream);
@@ -620,13 +620,18 @@ output_profile_summary (struct lto_simple_output_block *ob)
         {
           if (!profile_info->histogram[h_ix].num_counters)
             continue;
-          streamer_write_uhwi_stream (ob->main_stream,
+          streamer_write_gcov_count_stream (ob->main_stream,
                                       profile_info->histogram[h_ix].num_counters);
-          streamer_write_uhwi_stream (ob->main_stream,
+          streamer_write_gcov_count_stream (ob->main_stream,
                                       profile_info->histogram[h_ix].min_value);
-          streamer_write_uhwi_stream (ob->main_stream,
+          streamer_write_gcov_count_stream (ob->main_stream,
                                       profile_info->histogram[h_ix].cum_value);
-        }
+         }
+      /* IPA-profile computes hot bb threshold based on cumulated
+        whole program profile.  We need to stream it down to ltrans.  */
+       if (flag_wpa)
+         streamer_write_gcov_count_stream (ob->main_stream,
+                                          get_hot_bb_threshold ());
     }
   else
     streamer_write_uhwi_stream (ob->main_stream, 0);
@@ -1259,8 +1264,8 @@ input_profile_summary (struct lto_input_block *ib,
   if (runs)
     {
       file_data->profile_info.runs = runs;
-      file_data->profile_info.sum_max = streamer_read_uhwi (ib);
-      file_data->profile_info.sum_all = streamer_read_uhwi (ib);
+      file_data->profile_info.sum_max = streamer_read_gcov_count (ib);
+      file_data->profile_info.sum_all = streamer_read_gcov_count (ib);
 
       memset (file_data->profile_info.histogram, 0,
               sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
@@ -1279,12 +1284,16 @@ input_profile_summary (struct lto_input_block *ib,
             continue;
 
           file_data->profile_info.histogram[h_ix].num_counters
-              = streamer_read_uhwi (ib);
+              = streamer_read_gcov_count (ib);
           file_data->profile_info.histogram[h_ix].min_value
-              = streamer_read_uhwi (ib);
+              = streamer_read_gcov_count (ib);
           file_data->profile_info.histogram[h_ix].cum_value
-              = streamer_read_uhwi (ib);
+              = streamer_read_gcov_count (ib);
         }
+      /* IPA-profile computes hot bb threshold based on cumulated
+        whole program profile.  We need to stream it down to ltrans.  */
+      if (flag_ltrans)
+       set_hot_bb_threshold (streamer_read_gcov_count (ib));
     }
 
 }
index 3a86151..0ef421f 100644 (file)
@@ -55,6 +55,7 @@ const char *lto_section_name[LTO_N_SECTION_TYPES] =
   "jmpfuncs",
   "pureconst",
   "reference",
+  "profile",
   "symbol_nodes",
   "opts",
   "cgraphopt",
index 919e304..95fe33a 100644 (file)
@@ -243,6 +243,7 @@ enum lto_section_type
   LTO_section_jump_functions,
   LTO_section_ipa_pure_const,
   LTO_section_ipa_reference,
+  LTO_section_ipa_profile,
   LTO_section_symtab_nodes,
   LTO_section_opts,
   LTO_section_cgraph_opt_sum,
index 57975d1..5394ef5 100644 (file)
@@ -128,25 +128,42 @@ maybe_hot_frequency_p (struct function *fun, int freq)
   return true;
 }
 
+static gcov_type min_count = -1;
+
+/* Determine the threshold for hot BB counts.  */
+
+gcov_type
+get_hot_bb_threshold ()
+{
+  gcov_working_set_t *ws;
+  if (min_count == -1)
+    {
+      ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
+      gcc_assert (ws);
+      min_count = ws->min_counter;
+    }
+  return min_count;
+}
+
+/* Set the threshold for hot BB counts.  */
+
+void
+set_hot_bb_threshold (gcov_type min)
+{
+  min_count = min;
+}
+
 /* Return TRUE if frequency FREQ is considered to be hot.  */
 
 static inline bool
 maybe_hot_count_p (struct function *fun, gcov_type count)
 {
-  gcov_working_set_t *ws;
-  static gcov_type min_count = -1;
   if (fun && profile_status_for_function (fun) != PROFILE_READ)
     return true;
   /* Code executed at most once is not hot.  */
   if (profile_info->runs >= count)
     return false;
-  if (min_count == -1)
-    {
-      ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
-      gcc_assert (ws);
-      min_count = ws->min_counter;
-    }
-  return (count >= min_count);
+  return (count >= get_hot_bb_threshold ());
 }
 
 /* Return true in case BB can be CPU intensive and should be optimized
index e21250b..c7d5f1a 100644 (file)
@@ -48,4 +48,8 @@ extern void del_node_map (void);
 
 extern void compute_working_sets (void);
 
+/* In predict.c.  */
+extern gcov_type get_hot_bb_threshold (void);
+extern void set_hot_bb_threshold (gcov_type);
+
 #endif /* PROFILE_H */