1 /* Read and annotate call graph profile from the auto profile data file.
2 Copyright (C) 2014. Free Software Foundation, Inc.
3 Contributed by Dehao Chen (dehao@google.com)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
28 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "basic-block.h"
33 #include "diagnostic-core.h"
37 #include "langhooks.h"
39 #include "tree-pass.h"
41 #include "tree-ssa-alias.h"
43 #include "tree-cfgcleanup.h"
44 #include "tree-ssa-operands.h"
45 #include "tree-into-ssa.h"
46 #include "internal-fn.h"
48 #include "gimple-expr.h"
50 #include "gimple-iterator.h"
51 #include "gimple-ssa.h"
53 #include "value-prof.h"
56 #include "ipa-inline.h"
57 #include "tree-inline.h"
58 #include "stringpool.h"
59 #include "auto-profile.h"
62 /* The following routines implements AutoFDO optimization.
64 This optimization uses sampling profiles to annotate basic block counts
65 and uses heuristics to estimate branch probabilities.
67 There are three phases in AutoFDO:
69 Phase 1: Read profile from the profile data file.
70 The following info is read from the profile datafile:
71 * string_table: a map between function name and its index.
72 * autofdo_source_profile: a map from function_instance name to
73 function_instance. This is represented as a forest of
75 * WorkingSet: a histogram of how many instructions are covered for a
76 given percentage of total cycles. This is describing the binary
77 level information (not source level). This info is used to help
78 decide if we want aggressive optimizations that could increase
79 code footprint (e.g. loop unroll etc.)
80 A function instance is an instance of function that could either be a
81 standalone symbol, or a clone of a function that is inlined into another
84 Phase 2: Early inline + value profile transformation.
85 Early inline uses autofdo_source_profile to find if a callsite is:
86 * inlined in the profiled binary.
87 * callee body is hot in the profiling run.
88 If both condition satisfies, early inline will inline the callsite
89 regardless of the code growth.
90 Phase 2 is an iterative process. During each iteration, we also check
91 if an indirect callsite is promoted and inlined in the profiling run.
92 If yes, vpt will happen to force promote it and in the next iteration,
93 einline will inline the promoted callsite in the next iteration.
95 Phase 3: Annotate control flow graph.
96 AutoFDO uses a separate pass to:
97 * Annotate basic block count
98 * Estimate branch probability
100 After the above 3 phases, all profile is readily annotated on the GCC IR.
101 AutoFDO tries to reuse all FDO infrastructure as much as possible to make
102 use of the profile. E.g. it uses existing mechanism to calculate the basic
103 block/edge frequency, as well as the cgraph node/edge count.
106 #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
107 #define AUTO_PROFILE_VERSION 1
112 /* Represent a source location: (function_decl, lineno). */
113 typedef std::pair<tree, unsigned> decl_lineno;
115 /* Represent an inline stack. vector[0] is the leaf node. */
116 typedef auto_vec<decl_lineno> inline_stack;
118 /* String array that stores function names. */
119 typedef auto_vec<char *> string_vector;
121 /* Map from function name's index in string_table to target's
123 typedef std::map<unsigned, gcov_type> icall_target_map;
125 /* Set of gimple stmts. Used to track if the stmt has already been promoted
127 typedef std::set<gimple> stmt_set;
129 /* Represent count info of an inline stack. */
132 /* Sampled count of the inline stack. */
135 /* Map from indirect call target to its sample count. */
136 icall_target_map targets;
138 /* Whether this inline stack is already used in annotation.
140 Each inline stack should only be used to annotate IR once.
141 This will be enforced when instruction-level discriminator
146 /* operator< for "const char *". */
147 struct string_compare
149 bool operator()(const char *a, const char *b) const
151 return strcmp (a, b) < 0;
155 /* Store a string array, indexed by string position in the array. */
164 /* For a given string, returns its index. */
165 int get_index (const char *name) const;
167 /* For a given decl, returns the index of the decl name. */
168 int get_index_by_decl (tree decl) const;
170 /* For a given index, returns the string. */
171 const char *get_name (int index) const;
173 /* Read profile, return TRUE on success. */
177 typedef std::map<const char *, unsigned, string_compare> string_index_map;
178 string_vector vector_;
179 string_index_map map_;
182 /* Profile of a function instance:
183 1. total_count of the function.
184 2. head_count (entry basic block count) of the function (only valid when
185 function is a top-level function_instance, i.e. it is the original copy
186 instead of the inlined copy).
187 3. map from source location (decl_lineno) to profile (count_info).
188 4. map from callsite to callee function_instance. */
189 class function_instance
192 typedef auto_vec<function_instance *> function_instance_stack;
194 /* Read the profile and return a function_instance with head count as
195 HEAD_COUNT. Recursively read callsites to create nested function_instances
196 too. STACK is used to track the recursive creation process. */
197 static function_instance *
198 read_function_instance (function_instance_stack *stack,
199 gcov_type head_count);
201 /* Recursively deallocate all callsites (nested function_instances). */
202 ~function_instance ();
221 /* Traverse callsites of the current function_instance to find one at the
222 location of LINENO and callee name represented in DECL. */
223 function_instance *get_function_instance_by_decl (unsigned lineno,
226 /* Store the profile info for LOC in INFO. Return TRUE if profile info
228 bool get_count_info (location_t loc, count_info *info) const;
230 /* Read the inlined indirect call target profile for STMT and store it in
231 MAP, return the total count for all inlined indirect calls. */
232 gcov_type find_icall_target_map (gimple stmt, icall_target_map *map) const;
234 /* Sum of counts that is used during annotation. */
235 gcov_type total_annotated_count () const;
237 /* Mark LOC as annotated. */
238 void mark_annotated (location_t loc);
241 /* Callsite, represented as (decl_lineno, callee_function_name_index). */
242 typedef std::pair<unsigned, unsigned> callsite;
244 /* Map from callsite to callee function_instance. */
245 typedef std::map<callsite, function_instance *> callsite_map;
247 function_instance (unsigned name, gcov_type head_count)
248 : name_ (name), total_count_ (0), head_count_ (head_count)
252 /* Map from source location (decl_lineno) to profile (count_info). */
253 typedef std::map<unsigned, count_info> position_count_map;
255 /* function_instance name index in the string_table. */
258 /* Total sample count. */
259 gcov_type total_count_;
261 /* Entry BB's sample count. */
262 gcov_type head_count_;
264 /* Map from callsite location to callee function_instance. */
265 callsite_map callsites;
267 /* Map from source location to count_info. */
268 position_count_map pos_counts;
271 /* Profile for all functions. */
272 class autofdo_source_profile
275 static autofdo_source_profile *
278 autofdo_source_profile *map = new autofdo_source_profile ();
286 ~autofdo_source_profile ();
288 /* For a given DECL, returns the top-level function_instance. */
289 function_instance *get_function_instance_by_decl (tree decl) const;
291 /* Find count_info for a given gimple STMT. If found, store the count_info
292 in INFO and return true; otherwise return false. */
293 bool get_count_info (gimple stmt, count_info *info) const;
295 /* Find total count of the callee of EDGE. */
296 gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
298 /* Update value profile INFO for STMT from the inlined indirect callsite.
299 Return true if INFO is updated. */
300 bool update_inlined_ind_target (gimple stmt, count_info *info);
302 /* Mark LOC as annotated. */
303 void mark_annotated (location_t loc);
305 /* Check whether the profile is empty. */
312 /* Map from function_instance name index (in string_table) to
313 function_instance. */
314 typedef std::map<unsigned, function_instance *> name_function_instance_map;
316 autofdo_source_profile () {}
318 /* Read AutoFDO profile and returns TRUE on success. */
321 /* Return the function_instance in the profile that correspond to the
324 get_function_instance_by_inline_stack (const inline_stack &stack) const;
326 name_function_instance_map map_;
329 /* Store the strings read from the profile data file. */
330 static string_table *afdo_string_table;
332 /* Store the AutoFDO source profile. */
333 static autofdo_source_profile *afdo_source_profile;
335 /* gcov_ctr_summary structure to store the profile_info. */
336 static struct gcov_ctr_summary *afdo_profile_info;
338 /* Helper functions. */
340 /* Return the original name of NAME: strip the suffix that starts
341 with '.' Caller is responsible for freeing RET. */
344 get_original_name (const char *name)
346 char *ret = xstrdup (name);
347 char *find = strchr (ret, '.');
353 /* Return the combined location, which is a 32bit integer in which
354 higher 16 bits stores the line offset of LOC to the start lineno
355 of DECL, The lower 16 bits stores the discriminator. */
358 get_combined_location (location_t loc, tree decl)
360 /* TODO: allow more bits for line and less bits for discriminator. */
361 if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16))
362 warning_at (loc, OPT_Woverflow, "Offset exceeds 16 bytes.");
363 return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
366 /* Return the function decl of a given lexical BLOCK. */
369 get_function_decl_from_block (tree block)
373 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block) == UNKNOWN_LOCATION))
376 for (decl = BLOCK_ABSTRACT_ORIGIN (block);
377 decl && (TREE_CODE (decl) == BLOCK);
378 decl = BLOCK_ABSTRACT_ORIGIN (decl))
379 if (TREE_CODE (decl) == FUNCTION_DECL)
384 /* Store inline stack for STMT in STACK. */
387 get_inline_stack (location_t locus, inline_stack *stack)
389 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
392 tree block = LOCATION_BLOCK (locus);
393 if (block && TREE_CODE (block) == BLOCK)
396 for (block = BLOCK_SUPERCONTEXT (block);
397 block && (TREE_CODE (block) == BLOCK);
398 block = BLOCK_SUPERCONTEXT (block))
400 location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
401 if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
404 tree decl = get_function_decl_from_block (block);
406 std::make_pair (decl, get_combined_location (locus, decl)));
412 std::make_pair (current_function_decl,
413 get_combined_location (locus, current_function_decl)));
416 /* Return STMT's combined location, which is a 32bit integer in which
417 higher 16 bits stores the line offset of LOC to the start lineno
418 of DECL, The lower 16 bits stores the discriminator. */
421 get_relative_location_for_stmt (gimple stmt)
423 location_t locus = gimple_location (stmt);
424 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
425 return UNKNOWN_LOCATION;
427 for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK);
428 block = BLOCK_SUPERCONTEXT (block))
429 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
430 return get_combined_location (locus,
431 get_function_decl_from_block (block));
432 return get_combined_location (locus, current_function_decl);
435 /* Return true if BB contains indirect call. */
438 has_indirect_call (basic_block bb)
440 gimple_stmt_iterator gsi;
442 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
444 gimple stmt = gsi_stmt (gsi);
445 if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
446 && (gimple_call_fn (stmt) == NULL
447 || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
453 /* Member functions for string_table. */
457 string_table::~string_table ()
459 for (unsigned i = 0; i < vector_.length (); i++)
464 /* Return the index of a given function NAME. Return -1 if NAME is not
465 found in string table. */
468 string_table::get_index (const char *name) const
472 string_index_map::const_iterator iter = map_.find (name);
473 if (iter == map_.end ())
479 /* Return the index of a given function DECL. Return -1 if DECL is not
480 found in string table. */
483 string_table::get_index_by_decl (tree decl) const
486 = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
487 int ret = get_index (name);
491 ret = get_index (lang_hooks.dwarf_name (decl, 0));
494 if (DECL_ABSTRACT_ORIGIN (decl))
495 return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
500 /* Return the function name of a given INDEX. */
503 string_table::get_name (int index) const
505 gcc_assert (index > 0 && index < (int)vector_.length ());
506 return vector_[index];
509 /* Read the string table. Return TRUE if reading is successful. */
512 string_table::read ()
514 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
516 /* Skip the length of the section. */
517 gcov_read_unsigned ();
518 /* Read in the file name table. */
519 unsigned string_num = gcov_read_unsigned ();
520 for (unsigned i = 0; i < string_num; i++)
522 vector_.safe_push (get_original_name (gcov_read_string ()));
523 map_[vector_.last ()] = i;
528 /* Member functions for function_instance. */
530 function_instance::~function_instance ()
532 for (callsite_map::iterator iter = callsites.begin ();
533 iter != callsites.end (); ++iter)
537 /* Traverse callsites of the current function_instance to find one at the
538 location of LINENO and callee name represented in DECL. */
541 function_instance::get_function_instance_by_decl (unsigned lineno,
544 int func_name_idx = afdo_string_table->get_index_by_decl (decl);
545 if (func_name_idx != -1)
547 callsite_map::const_iterator ret
548 = callsites.find (std::make_pair (lineno, func_name_idx));
549 if (ret != callsites.end ())
553 = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0));
554 if (func_name_idx != -1)
556 callsite_map::const_iterator ret
557 = callsites.find (std::make_pair (lineno, func_name_idx));
558 if (ret != callsites.end ())
561 if (DECL_ABSTRACT_ORIGIN (decl))
562 return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl));
567 /* Store the profile info for LOC in INFO. Return TRUE if profile info
571 function_instance::get_count_info (location_t loc, count_info *info) const
573 position_count_map::const_iterator iter = pos_counts.find (loc);
574 if (iter == pos_counts.end ())
576 *info = iter->second;
580 /* Mark LOC as annotated. */
583 function_instance::mark_annotated (location_t loc)
585 position_count_map::iterator iter = pos_counts.find (loc);
586 if (iter == pos_counts.end ())
588 iter->second.annotated = true;
591 /* Read the inlined indirect call target profile for STMT and store it in
592 MAP, return the total count for all inlined indirect calls. */
595 function_instance::find_icall_target_map (gimple stmt,
596 icall_target_map *map) const
599 unsigned stmt_offset = get_relative_location_for_stmt (stmt);
601 for (callsite_map::const_iterator iter = callsites.begin ();
602 iter != callsites.end (); ++iter)
604 unsigned callee = iter->second->name ();
605 /* Check if callsite location match the stmt. */
606 if (iter->first.first != stmt_offset)
608 struct cgraph_node *node = cgraph_node_for_asm (
609 get_identifier (afdo_string_table->get_name (callee)));
612 if (!check_ic_target (stmt, node))
614 (*map)[callee] = iter->second->total_count ();
615 ret += iter->second->total_count ();
620 /* Read the profile and create a function_instance with head count as
621 HEAD_COUNT. Recursively read callsites to create nested function_instances
622 too. STACK is used to track the recursive creation process. */
624 /* function instance profile format:
628 NUM_POS_COUNTS: 4 bytes
629 NUM_CALLSITES: 4 byte
631 POS_1_OFFSET: 4 bytes
635 VALUE_PROFILE_TYPE: 4 bytes
645 CALLSITE_1_OFFSET: 4 bytes
646 FUNCTION_INSTANCE_PROFILE (nested)
652 function_instance::read_function_instance (function_instance_stack *stack,
653 gcov_type head_count)
655 unsigned name = gcov_read_unsigned ();
656 unsigned num_pos_counts = gcov_read_unsigned ();
657 unsigned num_callsites = gcov_read_unsigned ();
658 function_instance *s = new function_instance (name, head_count);
659 stack->safe_push (s);
661 for (unsigned i = 0; i < num_pos_counts; i++)
663 unsigned offset = gcov_read_unsigned () & 0xffff0000;
664 unsigned num_targets = gcov_read_unsigned ();
665 gcov_type count = gcov_read_counter ();
666 s->pos_counts[offset].count = count;
667 for (unsigned j = 0; j < stack->length (); j++)
668 (*stack)[j]->total_count_ += count;
669 for (unsigned j = 0; j < num_targets; j++)
671 /* Only indirect call target histogram is supported now. */
672 gcov_read_unsigned ();
673 gcov_type target_idx = gcov_read_counter ();
674 s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
677 for (unsigned i = 0; i < num_callsites; i++)
679 unsigned offset = gcov_read_unsigned ();
680 function_instance *callee_function_instance
681 = read_function_instance (stack, 0);
682 s->callsites[std::make_pair (offset, callee_function_instance->name ())]
683 = callee_function_instance;
689 /* Sum of counts that is used during annotation. */
692 function_instance::total_annotated_count () const
695 for (callsite_map::const_iterator iter = callsites.begin ();
696 iter != callsites.end (); ++iter)
697 ret += iter->second->total_annotated_count ();
698 for (position_count_map::const_iterator iter = pos_counts.begin ();
699 iter != pos_counts.end (); ++iter)
700 if (iter->second.annotated)
701 ret += iter->second.count;
705 /* Member functions for autofdo_source_profile. */
707 autofdo_source_profile::~autofdo_source_profile ()
709 for (name_function_instance_map::const_iterator iter = map_.begin ();
710 iter != map_.end (); ++iter)
714 /* For a given DECL, returns the top-level function_instance. */
717 autofdo_source_profile::get_function_instance_by_decl (tree decl) const
719 int index = afdo_string_table->get_index_by_decl (decl);
722 name_function_instance_map::const_iterator ret = map_.find (index);
723 return ret == map_.end () ? NULL : ret->second;
726 /* Find count_info for a given gimple STMT. If found, store the count_info
727 in INFO and return true; otherwise return false. */
730 autofdo_source_profile::get_count_info (gimple stmt, count_info *info) const
732 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
736 get_inline_stack (gimple_location (stmt), &stack);
737 if (stack.length () == 0)
739 function_instance *s = get_function_instance_by_inline_stack (stack);
742 return s->get_count_info (stack[0].second, info);
745 /* Mark LOC as annotated. */
748 autofdo_source_profile::mark_annotated (location_t loc)
751 get_inline_stack (loc, &stack);
752 if (stack.length () == 0)
754 function_instance *s = get_function_instance_by_inline_stack (stack);
757 s->mark_annotated (stack[0].second);
760 /* Update value profile INFO for STMT from the inlined indirect callsite.
761 Return true if INFO is updated. */
764 autofdo_source_profile::update_inlined_ind_target (gimple stmt,
767 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
771 get_count_info (stmt, &old_info);
773 for (icall_target_map::const_iterator iter = old_info.targets.begin ();
774 iter != old_info.targets.end (); ++iter)
775 total += iter->second;
777 /* Program behavior changed, original promoted (and inlined) target is not
778 hot any more. Will avoid promote the original target.
780 To check if original promoted target is still hot, we check the total
781 count of the unpromoted targets (stored in old_info). If it is no less
782 than half of the callsite count (stored in INFO), the original promoted
783 target is considered not hot any more. */
784 if (total >= info->count / 2)
788 get_inline_stack (gimple_location (stmt), &stack);
789 if (stack.length () == 0)
791 function_instance *s = get_function_instance_by_inline_stack (stack);
794 icall_target_map map;
795 if (s->find_icall_target_map (stmt, &map) == 0)
797 for (icall_target_map::const_iterator iter = map.begin ();
798 iter != map.end (); ++iter)
799 info->targets[iter->first] = iter->second;
803 /* Find total count of the callee of EDGE. */
806 autofdo_source_profile::get_callsite_total_count (
807 struct cgraph_edge *edge) const
810 stack.safe_push (std::make_pair (edge->callee->decl, 0));
811 get_inline_stack (gimple_location (edge->call_stmt), &stack);
813 function_instance *s = get_function_instance_by_inline_stack (stack);
815 || afdo_string_table->get_index (IDENTIFIER_POINTER (
816 DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
819 return s->total_count ();
822 /* Read AutoFDO profile and returns TRUE on success. */
824 /* source profile format:
826 GCOV_TAG_AFDO_FUNCTION: 4 bytes
828 NUM_FUNCTIONS: 4 bytes
832 FUNCTION_INSTANCE_N. */
835 autofdo_source_profile::read ()
837 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
839 inform (0, "Not expected TAG.");
843 /* Skip the length of the section. */
844 gcov_read_unsigned ();
846 /* Read in the function/callsite profile, and store it in local
848 unsigned function_num = gcov_read_unsigned ();
849 for (unsigned i = 0; i < function_num; i++)
851 function_instance::function_instance_stack stack;
852 function_instance *s = function_instance::read_function_instance (
853 &stack, gcov_read_counter ());
854 afdo_profile_info->sum_all += s->total_count ();
855 map_[s->name ()] = s;
860 /* Return the function_instance in the profile that correspond to the
864 autofdo_source_profile::get_function_instance_by_inline_stack (
865 const inline_stack &stack) const
867 name_function_instance_map::const_iterator iter = map_.find (
868 afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first));
869 if (iter == map_.end())
871 function_instance *s = iter->second;
872 for (unsigned i = stack.length() - 1; i > 0; i--)
874 s = s->get_function_instance_by_decl (
875 stack[i].second, stack[i - 1].first);
882 /* Module profile is only used by LIPO. Here we simply ignore it. */
885 fake_read_autofdo_module_profile ()
887 /* Read in the module info. */
888 gcov_read_unsigned ();
890 /* Skip the length of the section. */
891 gcov_read_unsigned ();
893 /* Read in the file name table. */
894 unsigned total_module_num = gcov_read_unsigned ();
895 gcc_assert (total_module_num == 0);
898 /* Read data from profile data file. */
903 if (gcov_open (auto_profile_file, 1) == 0)
904 error ("Cannot open profile file %s.", auto_profile_file);
906 if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
907 error ("AutoFDO profile magic number does not mathch.");
909 /* Skip the version number. */
910 unsigned version = gcov_read_unsigned ();
911 if (version != AUTO_PROFILE_VERSION)
912 error ("AutoFDO profile version %u does match %u.",
913 version, AUTO_PROFILE_VERSION);
915 /* Skip the empty integer. */
916 gcov_read_unsigned ();
919 afdo_string_table = new string_table ();
920 if (!afdo_string_table->read())
921 error ("Cannot read string table from %s.", auto_profile_file);
923 /* autofdo_source_profile. */
924 afdo_source_profile = autofdo_source_profile::create ();
925 if (afdo_source_profile == NULL)
926 error ("Cannot read function profile from %s.", auto_profile_file);
927 if (afdo_source_profile->empty ())
928 error ("The given profile %s is empty.", auto_profile_file);
930 /* autofdo_module_profile. */
931 fake_read_autofdo_module_profile ();
933 /* Read in the working set. */
934 if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET)
935 error ("Cannot read working set from %s.", auto_profile_file);
937 /* Skip the length of the section. */
938 gcov_read_unsigned ();
939 gcov_working_set_t set[128];
940 for (unsigned i = 0; i < 128; i++)
942 set[i].num_counters = gcov_read_unsigned ();
943 set[i].min_counter = gcov_read_counter ();
945 add_working_set (set);
948 /* From AutoFDO profiles, find values inside STMT for that we want to measure
949 histograms for indirect-call optimization.
951 This function is actually served for 2 purposes:
952 * before annotation, we need to mark histogram, promote and inline
953 * after annotation, we just need to mark, and let follow-up logic to
954 decide if it needs to promote and inline. */
957 afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
960 gimple stmt = gsi_stmt (*gsi);
963 if (map.size () == 0 || gimple_code (stmt) != GIMPLE_CALL
964 || gimple_call_fndecl (stmt) != NULL_TREE)
967 callee = gimple_call_fn (stmt);
969 histogram_value hist = gimple_alloc_histogram_value (
970 cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
971 hist->n_counters = 3;
972 hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
973 gimple_add_histogram_value (cfun, stmt, hist);
976 icall_target_map::const_iterator max_iter = map.end ();
978 for (icall_target_map::const_iterator iter = map.begin ();
979 iter != map.end (); ++iter)
981 total += iter->second;
982 if (max_iter == map.end () || max_iter->second < iter->second)
986 hist->hvalue.counters[0]
987 = (unsigned long long)afdo_string_table->get_name (max_iter->first);
988 hist->hvalue.counters[1] = max_iter->second;
989 hist->hvalue.counters[2] = total;
994 struct cgraph_edge *indirect_edge
995 = cgraph_edge (cgraph_get_node (current_function_decl), stmt);
996 struct cgraph_node *direct_call = cgraph_node_for_asm (
997 get_identifier ((const char *) hist->hvalue.counters[0]));
999 if (direct_call == NULL || !check_ic_target (stmt, direct_call))
1001 if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
1003 struct cgraph_edge *new_edge
1004 = cgraph_turn_edge_to_speculative(indirect_edge, direct_call, 0, 0);
1005 cgraph_redirect_edge_call_stmt_to_callee(new_edge);
1006 gimple_remove_histogram_value (cfun, stmt, hist);
1007 inline_call (new_edge, true, NULL, NULL, false);
1010 /* From AutoFDO profiles, find values inside STMT for that we want to measure
1011 histograms and adds them to list VALUES. */
1014 afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
1017 afdo_indirect_call (gsi, map, transform);
1020 typedef std::set<basic_block> bb_set;
1021 typedef std::set<edge> edge_set;
1024 is_bb_annotated (const basic_block bb, const bb_set &annotated)
1026 return annotated.find (bb) != annotated.end ();
1030 set_bb_annotated (basic_block bb, bb_set *annotated)
1032 annotated->insert (bb);
1036 is_edge_annotated (const edge e, const edge_set &annotated)
1038 return annotated.find (e) != annotated.end ();
1042 set_edge_annotated (edge e, edge_set *annotated)
1044 annotated->insert (e);
1047 /* For a given BB, set its execution count. Attach value profile if a stmt
1048 is not in PROMOTED, because we only want to promote an indirect call once.
1049 Return TRUE if BB is annotated. */
1052 afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
1054 gimple_stmt_iterator gsi;
1057 gcov_type max_count = 0;
1058 bool has_annotated = false;
1060 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1063 gimple stmt = gsi_stmt (gsi);
1064 if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
1066 if (afdo_source_profile->get_count_info (stmt, &info))
1068 if (info.count > max_count)
1069 max_count = info.count;
1070 has_annotated = true;
1071 if (info.targets.size () > 0
1072 && promoted.find (stmt) == promoted.end ())
1073 afdo_vpt (&gsi, info.targets, false);
1080 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1081 afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
1082 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1084 gimple phi = gsi_stmt (gsi);
1086 for (i = 0; i < gimple_phi_num_args (phi); i++)
1087 afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
1089 FOR_EACH_EDGE (e, ei, bb->succs)
1090 afdo_source_profile->mark_annotated (e->goto_locus);
1092 bb->count = max_count;
1096 /* BB1 and BB2 are in an equivalent class iff:
1097 1. BB1 dominates BB2.
1098 2. BB2 post-dominates BB1.
1099 3. BB1 and BB2 are in the same loop nest.
1100 This function finds the equivalent class for each basic block, and
1101 stores a pointer to the first BB in its equivalent class. Meanwhile,
1102 set bb counts for the same equivalent class to be idenical. Update
1103 ANNOTATED_BB for the first BB in its equivalent class. */
1106 afdo_find_equiv_class (bb_set *annotated_bb)
1110 FOR_ALL_BB_FN (bb, cfun)
1113 FOR_ALL_BB_FN (bb, cfun)
1115 vec<basic_block> dom_bbs;
1119 if (bb->aux != NULL)
1122 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1123 FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1124 if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
1125 && bb1->loop_father == bb->loop_father)
1128 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1130 bb->count = bb1->count;
1131 set_bb_annotated (bb, annotated_bb);
1134 dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb);
1135 FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1136 if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
1137 && bb1->loop_father == bb->loop_father)
1140 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1142 bb->count = bb1->count;
1143 set_bb_annotated (bb, annotated_bb);
1149 /* If a basic block's count is known, and only one of its in/out edges' count
1150 is unknown, its count can be calculated. Meanwhile, if all of the in/out
1151 edges' counts are known, then the basic block's unknown count can also be
1153 IS_SUCC is true if out edges of a basic blocks are examined.
1154 Update ANNOTATED_BB and ANNOTATED_EDGE accordingly.
1155 Return TRUE if any basic block/edge count is changed. */
1158 afdo_propagate_edge (bool is_succ, bb_set *annotated_bb,
1159 edge_set *annotated_edge)
1162 bool changed = false;
1164 FOR_EACH_BB_FN (bb, cfun)
1166 edge e, unknown_edge = NULL;
1168 int num_unknown_edge = 0;
1169 gcov_type total_known_count = 0;
1171 FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
1172 if (!is_edge_annotated (e, *annotated_edge))
1173 num_unknown_edge++, unknown_edge = e;
1175 total_known_count += e->count;
1177 if (num_unknown_edge == 0)
1179 if (total_known_count > bb->count)
1181 bb->count = total_known_count;
1184 if (!is_bb_annotated (bb, *annotated_bb))
1186 set_bb_annotated (bb, annotated_bb);
1190 else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
1192 if (bb->count >= total_known_count)
1193 unknown_edge->count = bb->count - total_known_count;
1195 unknown_edge->count = 0;
1196 set_edge_annotated (unknown_edge, annotated_edge);
1203 /* Special propagation for circuit expressions. Because GCC translates
1204 control flow into data flow for circuit expressions. E.g.
1211 will be translated into:
1226 tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
1232 In this case, we need to propagate through PHI to determine the edge
1233 count of BB1->BB.t1, BB.t1->BB.t2.
1234 Update ANNOTATED_EDGE accordingly. */
1237 afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge)
1240 FOR_ALL_BB_FN (bb, cfun)
1243 tree cmp_rhs, cmp_lhs;
1244 gimple cmp_stmt = last_stmt (bb);
1248 if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1250 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1251 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1252 if (!TREE_CONSTANT (cmp_rhs)
1253 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1255 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1257 if (!is_bb_annotated (bb, annotated_bb))
1259 phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1260 while (phi_stmt && gimple_code (phi_stmt) == GIMPLE_ASSIGN
1261 && gimple_assign_single_p (phi_stmt)
1262 && TREE_CODE (gimple_assign_rhs1 (phi_stmt)) == SSA_NAME)
1263 phi_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (phi_stmt));
1264 if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1266 FOR_EACH_EDGE (e, ei, bb->succs)
1268 unsigned i, total = 0;
1270 bool check_value_one = (((integer_onep (cmp_rhs))
1271 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1272 ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
1273 if (!is_edge_annotated (e, *annotated_edge))
1275 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1277 tree val = gimple_phi_arg_def (phi_stmt, i);
1278 edge ep = gimple_phi_arg_edge (phi_stmt, i);
1280 if (!TREE_CONSTANT (val)
1281 || !(integer_zerop (val) || integer_onep (val)))
1283 if (check_value_one ^ integer_onep (val))
1287 if (e->probability == 0 && !is_edge_annotated (ep, *annotated_edge))
1289 ep->probability = 0;
1291 set_edge_annotated (ep, annotated_edge);
1294 if (total == 1 && !is_edge_annotated (only_one, *annotated_edge))
1296 only_one->probability = e->probability;
1297 only_one->count = e->count;
1298 set_edge_annotated (only_one, annotated_edge);
1304 /* Propagate the basic block count and edge count on the control flow
1305 graph. We do the propagation iteratively until stablize. */
1308 afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge)
1311 bool changed = true;
1314 FOR_ALL_BB_FN (bb, cfun)
1316 bb->count = ((basic_block)bb->aux)->count;
1317 if (is_bb_annotated ((const basic_block)bb->aux, *annotated_bb))
1318 set_bb_annotated (bb, annotated_bb);
1321 while (changed && i++ < 10)
1325 if (afdo_propagate_edge (true, annotated_bb, annotated_edge))
1327 if (afdo_propagate_edge (false, annotated_bb, annotated_edge))
1329 afdo_propagate_circuit (*annotated_bb, annotated_edge);
1333 /* Propagate counts on control flow graph and calculate branch
1337 afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge)
1340 bool has_sample = false;
1342 FOR_EACH_BB_FN (bb, cfun)
1349 calculate_dominance_info (CDI_POST_DOMINATORS);
1350 calculate_dominance_info (CDI_DOMINATORS);
1351 loop_optimizer_init (0);
1353 afdo_find_equiv_class (annotated_bb);
1354 afdo_propagate (annotated_bb, annotated_edge);
1356 FOR_EACH_BB_FN (bb, cfun)
1360 int num_unknown_succ = 0;
1361 gcov_type total_count = 0;
1363 FOR_EACH_EDGE (e, ei, bb->succs)
1365 if (!is_edge_annotated (e, *annotated_edge))
1368 total_count += e->count;
1370 if (num_unknown_succ == 0 && total_count > 0)
1372 FOR_EACH_EDGE (e, ei, bb->succs)
1373 e->probability = (double)e->count * REG_BR_PROB_BASE / total_count;
1376 FOR_ALL_BB_FN (bb, cfun)
1381 FOR_EACH_EDGE (e, ei, bb->succs)
1382 e->count = (double)bb->count * e->probability / REG_BR_PROB_BASE;
1386 loop_optimizer_finalize ();
1387 free_dominance_info (CDI_DOMINATORS);
1388 free_dominance_info (CDI_POST_DOMINATORS);
1391 /* Perform value profile transformation using AutoFDO profile. Add the
1392 promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
1393 indirect call promoted. */
1396 afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
1399 if (afdo_source_profile->get_function_instance_by_decl (
1400 current_function_decl) == NULL)
1403 compute_inline_parameters (cgraph_get_node (current_function_decl), true);
1405 bool has_vpt = false;
1406 FOR_EACH_BB_FN (bb, cfun)
1408 if (!has_indirect_call (bb))
1410 gimple_stmt_iterator gsi;
1412 gcov_type bb_count = 0;
1413 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1416 gimple stmt = gsi_stmt (gsi);
1417 if (afdo_source_profile->get_count_info (stmt, &info))
1418 bb_count = MAX (bb_count, info.count);
1421 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1423 gimple stmt = gsi_stmt (gsi);
1424 /* IC_promotion and early_inline_2 is done in multiple iterations.
1425 No need to promoted the stmt if its in promoted_stmts (means
1426 it is already been promoted in the previous iterations). */
1427 if (gimple_code (stmt) != GIMPLE_CALL || gimple_call_fn (stmt) == NULL
1428 || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
1429 || promoted_stmts->find (stmt) != promoted_stmts->end ())
1433 afdo_source_profile->get_count_info (stmt, &info);
1434 info.count = bb_count;
1435 if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
1437 /* Promote the indirect call and update the promoted_stmts. */
1438 promoted_stmts->insert (stmt);
1439 afdo_vpt (&gsi, info.targets, true);
1447 optimize_inline_calls (current_function_decl);
1454 /* Annotate auto profile to the control flow graph. Do not annotate value
1455 profile for stmts in PROMOTED_STMTS. */
1458 afdo_annotate_cfg (const stmt_set &promoted_stmts)
1461 bb_set annotated_bb;
1462 edge_set annotated_edge;
1463 const function_instance *s
1464 = afdo_source_profile->get_function_instance_by_decl (
1465 current_function_decl);
1469 cgraph_get_node (current_function_decl)->count = s->head_count ();
1470 ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = s->head_count ();
1471 gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1473 FOR_EACH_BB_FN (bb, cfun)
1479 FOR_EACH_EDGE (e, ei, bb->succs)
1482 if (afdo_set_bb_count (bb, promoted_stmts))
1483 set_bb_annotated (bb, &annotated_bb);
1484 if (bb->count > max_count)
1485 max_count = bb->count;
1487 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1488 > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
1490 ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
1491 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1492 set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb);
1494 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1495 > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
1497 EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
1498 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1499 set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
1501 afdo_source_profile->mark_annotated (
1502 DECL_SOURCE_LOCATION (current_function_decl));
1503 afdo_source_profile->mark_annotated (cfun->function_start_locus);
1504 afdo_source_profile->mark_annotated (cfun->function_end_locus);
1507 afdo_calculate_branch_prob (&annotated_bb, &annotated_edge);
1509 profile_status_for_fn (cfun) = PROFILE_READ;
1511 if (flag_value_profile_transformations)
1513 gimple_value_profile_transformations ();
1514 free_dominance_info (CDI_DOMINATORS);
1515 free_dominance_info (CDI_POST_DOMINATORS);
1516 update_ssa (TODO_update_ssa);
1520 /* Wrapper function to invoke early inliner. */
1525 compute_inline_parameters (cgraph_get_node (current_function_decl), true);
1526 unsigned todo = early_inliner ();
1527 if (todo & TODO_update_ssa_any)
1528 update_ssa (TODO_update_ssa);
1531 /* Use AutoFDO profile to annoate the control flow graph.
1532 Return the todo flag. */
1537 struct cgraph_node *node;
1539 if (cgraph_state == CGRAPH_STATE_FINISHED)
1542 init_node_map (true);
1543 profile_info = autofdo::afdo_profile_info;
1545 FOR_EACH_FUNCTION (node)
1547 if (!gimple_has_body_p (node->decl))
1550 /* Don't profile functions produced for builtin stuff. */
1551 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1554 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1556 /* First do indirect call promotion and early inline to make the
1557 IR match the profiled binary before actual annotation.
1559 This is needed because an indirect call might have been promoted
1560 and inlined in the profiled binary. If we do not promote and
1561 inline these indirect calls before annotation, the profile for
1562 these promoted functions will be lost.
1564 e.g. foo() --indirect_call--> bar()
1565 In profiled binary, the callsite is promoted and inlined, making
1566 the profile look like:
1576 Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
1577 If we perform annotation on it, the profile inside bar@loc_foo2
1580 To avoid this, we promote loc_foo_2 and inline the promoted bar
1581 function before annotation, so the profile inside bar@loc_foo2
1583 autofdo::stmt_set promoted_stmts;
1584 for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++)
1586 if (!flag_value_profile_transformations
1587 || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
1593 autofdo::afdo_annotate_cfg (promoted_stmts);
1594 compute_function_frequency ();
1596 /* Local pure-const may imply need to fixup the cfg. */
1597 if (execute_fixup_cfg () & TODO_cleanup_cfg)
1598 cleanup_tree_cfg ();
1600 free_dominance_info (CDI_DOMINATORS);
1601 free_dominance_info (CDI_POST_DOMINATORS);
1602 rebuild_cgraph_edges ();
1603 compute_inline_parameters (cgraph_get_node (current_function_decl), true);
1607 return TODO_rebuild_cgraph_edges;
1609 } /* namespace autofdo. */
1611 /* Read the profile from the profile data file. */
1614 read_autofdo_file (void)
1616 if (auto_profile_file == NULL)
1617 auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
1619 autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc (
1620 1, sizeof (struct gcov_ctr_summary));
1621 autofdo::afdo_profile_info->runs = 1;
1622 autofdo::afdo_profile_info->sum_max = 0;
1623 autofdo::afdo_profile_info->sum_all = 0;
1625 /* Read the profile from the profile file. */
1626 autofdo::read_profile ();
1629 /* Free the resources. */
1632 end_auto_profile (void)
1634 delete autofdo::afdo_source_profile;
1635 delete autofdo::afdo_string_table;
1636 profile_info = NULL;
1639 /* Returns TRUE if EDGE is hot enough to be inlined early. */
1642 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
1645 = autofdo::afdo_source_profile->get_callsite_total_count (edge);
1650 const struct gcov_ctr_summary *saved_profile_info = profile_info;
1651 /* At early inline stage, profile_info is not set yet. We need to
1652 temporarily set it to afdo_profile_info to calculate hotness. */
1653 profile_info = autofdo::afdo_profile_info;
1654 is_hot = maybe_hot_count_p (NULL, count);
1655 profile_info = saved_profile_info;
1665 const pass_data pass_data_ipa_auto_profile =
1669 OPTGROUP_NONE, /* optinfo_flags */
1670 true, /* has_gate */
1671 true, /* has_execute */
1672 TV_IPA_AUTOFDO, /* tv_id */
1673 0, /* properties_required */
1674 0, /* properties_provided */
1675 0, /* properties_destroyed */
1676 0, /* todo_flags_start */
1677 0, /* todo_flags_finish */
1680 class pass_ipa_auto_profile : public simple_ipa_opt_pass
1683 pass_ipa_auto_profile (gcc::context *ctxt)
1684 : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
1688 /* opt_pass methods: */
1692 return flag_auto_profile;
1694 virtual unsigned int
1695 execute (function *)
1697 return autofdo::auto_profile ();
1699 }; // class pass_ipa_auto_profile
1703 simple_ipa_opt_pass *
1704 make_pass_ipa_auto_profile (gcc::context *ctxt)
1706 return new pass_ipa_auto_profile (ctxt);