From f20a6c57f0f26d9c60d6d6182f1e2181f727c834 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Mon, 26 Oct 2020 18:19:48 +0100 Subject: [PATCH] Implement three-level optimize_for_size predicates this patch implements thre two-state optimize_for_size predicates, so with -Os and with profile feedback for never executed code it returns OPTIMIZE_SIZE_MAX while in cases we decide to optimize for size based on branch prediction logic it return OPTIMIZE_SIZE_BALLANCED. The idea is that for places where we guess that code is unlikely we do not want to do extreme optimizations for size that leads to many fold slowdowns (using idiv rather than few shigts or using rep based inlined stringops). I will update RTL handling code to also support this with BB granuality (which we don't currently). LLVM has -Os and -Oz levels where -Oz is our -Os and LLVM's -Os would ocrrespond to OPTIMIZE_SIZE_BALLANCED. I wonder if we want to export this to command line somehow? For me it would be definitly useful to test things, I am not sure how "weaker" -Os is desired in practice. gcc/ChangeLog: * cgraph.h (cgraph_node::optimize_for_size_p): Return optimize_size_level. (cgraph_node::optimize_for_size_p): Update. * coretypes.h (enum optimize_size_level): New enum. * predict.c (unlikely_executed_edge_p): Microoptimize. (optimize_function_for_size_p): Return optimize_size_level. (optimize_bb_for_size_p): Likewise. (optimize_edge_for_size_p): Likewise. (optimize_insn_for_size_p): Likewise. (optimize_loop_nest_for_size_p): Likewise. * predict.h (optimize_function_for_size_p): Update declaration. (optimize_bb_for_size_p): Update declaration. (optimize_edge_for_size_p): Update declaration. (optimize_insn_for_size_p): Update declaration. (optimize_loop_for_size_p): Update declaration. (optimize_loop_nest_for_size_p): Update declaration. --- gcc/cgraph.h | 12 ++++++----- gcc/coretypes.h | 12 +++++++++++ gcc/predict.c | 65 ++++++++++++++++++++++++++++++++++++++++++++------------- gcc/predict.h | 12 +++++------ 4 files changed, 76 insertions(+), 25 deletions(-) diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 65e4646..fb3ad95 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -1279,7 +1279,7 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node bool check_calls_comdat_local_p (); /* Return true if function should be optimized for size. */ - bool optimize_for_size_p (void); + enum optimize_size_level optimize_for_size_p (void); /* Dump the callgraph to file F. */ static void dump_cgraph (FILE *f); @@ -3315,15 +3315,17 @@ cgraph_node::mark_force_output (void) /* Return true if function should be optimized for size. */ -inline bool +inline enum optimize_size_level cgraph_node::optimize_for_size_p (void) { if (opt_for_fn (decl, optimize_size)) - return true; + return OPTIMIZE_SIZE_MAX; + if (count == profile_count::zero ()) + return OPTIMIZE_SIZE_MAX; if (frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) - return true; + return OPTIMIZE_SIZE_BALANCED; else - return false; + return OPTIMIZE_SIZE_NO; } /* Return symtab_node for NODE or create one if it is not present diff --git a/gcc/coretypes.h b/gcc/coretypes.h index 81a1b59..da178b6 100644 --- a/gcc/coretypes.h +++ b/gcc/coretypes.h @@ -444,6 +444,18 @@ enum excess_precision_type EXCESS_PRECISION_TYPE_FAST }; +/* Level of size optimization. */ + +enum optimize_size_level +{ + /* Do not optimize for size. */ + OPTIMIZE_SIZE_NO, + /* Optimize for size but not at extreme performance costs. */ + OPTIMIZE_SIZE_BALANCED, + /* Optimize for size as much as possible. */ + OPTIMIZE_SIZE_MAX +}; + /* Support for user-provided GGC and PCH markers. The first parameter is a pointer to a pointer, the second a cookie. */ typedef void (*gt_pointer_operator) (void *, void *); diff --git a/gcc/predict.c b/gcc/predict.c index 5983889..361c401 100644 --- a/gcc/predict.c +++ b/gcc/predict.c @@ -243,7 +243,7 @@ probably_never_executed_bb_p (struct function *fun, const_basic_block bb) static bool unlikely_executed_edge_p (edge e) { - return (e->count () == profile_count::zero () + return (e->src->count == profile_count::zero () || e->probability == profile_probability::never ()) || (e->flags & (EDGE_EH | EDGE_FAKE)); } @@ -260,13 +260,15 @@ probably_never_executed_edge_p (struct function *fun, edge e) /* Return true if function FUN should always be optimized for size. */ -bool +optimize_size_level optimize_function_for_size_p (struct function *fun) { if (!fun || !fun->decl) - return optimize_size; + return optimize_size ? OPTIMIZE_SIZE_MAX : OPTIMIZE_SIZE_NO; cgraph_node *n = cgraph_node::get (fun->decl); - return n && n->optimize_for_size_p (); + if (n) + return n->optimize_for_size_p (); + return OPTIMIZE_SIZE_NO; } /* Return true if function FUN should always be optimized for speed. */ @@ -289,11 +291,16 @@ function_optimization_type (struct function *fun) /* Return TRUE if basic block BB should be optimized for size. */ -bool +optimize_size_level optimize_bb_for_size_p (const_basic_block bb) { - return (optimize_function_for_size_p (cfun) - || (bb && !maybe_hot_bb_p (cfun, bb))); + enum optimize_size_level ret = optimize_function_for_size_p (cfun); + + if (bb && ret < OPTIMIZE_SIZE_MAX && bb->count == profile_count::zero ()) + ret = OPTIMIZE_SIZE_MAX; + if (bb && ret < OPTIMIZE_SIZE_BALANCED && !maybe_hot_bb_p (cfun, bb)) + ret = OPTIMIZE_SIZE_BALANCED; + return ret; } /* Return TRUE if basic block BB should be optimized for speed. */ @@ -316,10 +323,16 @@ bb_optimization_type (const_basic_block bb) /* Return TRUE if edge E should be optimized for size. */ -bool +optimize_size_level optimize_edge_for_size_p (edge e) { - return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e); + enum optimize_size_level ret = optimize_function_for_size_p (cfun); + + if (ret < OPTIMIZE_SIZE_MAX && unlikely_executed_edge_p (e)) + ret = OPTIMIZE_SIZE_MAX; + if (ret < OPTIMIZE_SIZE_BALANCED && !maybe_hot_edge_p (e)) + ret = OPTIMIZE_SIZE_BALANCED; + return ret; } /* Return TRUE if edge E should be optimized for speed. */ @@ -332,10 +345,13 @@ optimize_edge_for_speed_p (edge e) /* Return TRUE if the current function is optimized for size. */ -bool +optimize_size_level optimize_insn_for_size_p (void) { - return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p; + enum optimize_size_level ret = optimize_function_for_size_p (cfun); + if (ret < OPTIMIZE_SIZE_BALANCED && !crtl->maybe_hot_insn_p) + ret = OPTIMIZE_SIZE_BALANCED; + return ret; } /* Return TRUE if the current function is optimized for speed. */ @@ -348,7 +364,7 @@ optimize_insn_for_speed_p (void) /* Return TRUE if LOOP should be optimized for size. */ -bool +optimize_size_level optimize_loop_for_size_p (class loop *loop) { return optimize_bb_for_size_p (loop->header); @@ -392,10 +408,31 @@ optimize_loop_nest_for_speed_p (class loop *loop) /* Return TRUE if nest rooted at LOOP should be optimized for size. */ -bool +optimize_size_level optimize_loop_nest_for_size_p (class loop *loop) { - return !optimize_loop_nest_for_speed_p (loop); + enum optimize_size_level ret = optimize_loop_for_size_p (loop); + class loop *l = loop; + + l = loop->inner; + while (l && l != loop) + { + if (ret == OPTIMIZE_SIZE_NO) + break; + ret = MIN (optimize_loop_for_size_p (l), ret); + if (l->inner) + l = l->inner; + else if (l->next) + l = l->next; + else + { + while (l != loop && !l->next) + l = loop_outer (l); + if (l != loop) + l = l->next; + } + } + return ret; } /* Return true if edge E is likely to be well predictable by branch diff --git a/gcc/predict.h b/gcc/predict.h index 274597e..b64d209 100644 --- a/gcc/predict.h +++ b/gcc/predict.h @@ -58,20 +58,20 @@ extern bool maybe_hot_bb_p (struct function *, const_basic_block); extern bool maybe_hot_edge_p (edge); extern bool probably_never_executed_bb_p (struct function *, const_basic_block); extern bool probably_never_executed_edge_p (struct function *, edge); -extern bool optimize_function_for_size_p (struct function *); +extern enum optimize_size_level optimize_function_for_size_p (struct function *); extern bool optimize_function_for_speed_p (struct function *); extern optimization_type function_optimization_type (struct function *); -extern bool optimize_bb_for_size_p (const_basic_block); +extern enum optimize_size_level optimize_bb_for_size_p (const_basic_block); extern bool optimize_bb_for_speed_p (const_basic_block); extern optimization_type bb_optimization_type (const_basic_block); -extern bool optimize_edge_for_size_p (edge); +extern enum optimize_size_level optimize_edge_for_size_p (edge); extern bool optimize_edge_for_speed_p (edge); -extern bool optimize_insn_for_size_p (void); +extern enum optimize_size_level optimize_insn_for_size_p (void); extern bool optimize_insn_for_speed_p (void); -extern bool optimize_loop_for_size_p (class loop *); +extern enum optimize_size_level optimize_loop_for_size_p (class loop *); extern bool optimize_loop_for_speed_p (class loop *); extern bool optimize_loop_nest_for_speed_p (class loop *); -extern bool optimize_loop_nest_for_size_p (class loop *); +extern enum optimize_size_level optimize_loop_nest_for_size_p (class loop *); extern bool predictable_edge_p (edge); extern void rtl_profile_for_bb (basic_block); extern void rtl_profile_for_edge (edge); -- 2.7.4