* optabs.c (expand_abs_nojump): Update BRANCH_COST call.
* fold-cost.c (LOGICAL_OP_NON_SHORT_CIRCUIT, fold_truthop): Likewise.
* dojump.c (do_jump): Likewise.
* ifcvt.c (MAX_CONDITIONAL_EXECUTE): Likewise.
(note-if_info): Add BRANCH_COST.
(noce_try_store_flag_constants, noce_try_addcc, noce_try_store_flag_mask,
noce_try_cmove_arith, noce_try_cmove_arith, noce_try_cmove_arith,
noce_find_if_block, find_if_case_1, find_if_case_2): Use compuated
branch cost.
* expr.h (BRANCH_COST): Update default.
* predict.c (predictable_edge_p): New function.
* expmed.c (expand_smod_pow2, expand_sdiv_pow2, emit_store_flag):
Update BRANCH_COST call.
* basic-block.h (predictable_edge_p): Declare.
* config/alpha/alpha.h (BRANCH_COST): Update.
* config/frv/frv.h (BRANCH_COST): Update.
* config/s390/s390.h (BRANCH_COST): Update.
* config/spu/spu.h (BRANCH_COST): Update.
* config/sparc/sparc.h (BRANCH_COST): Update.
* config/m32r/m32r.h (BRANCH_COST): Update.
* config/i386/i386.h (BRANCH_COST): Update.
* config/i386/i386.c (ix86_expand_int_movcc): Update use of BRANCH_COST.
* config/sh/sh.h (BRANCH_COST): Update.
* config/pdp11/pdp11.h (BRANCH_COST): Update.
* config/avr/avr.h (BRANCH_COST): Update.
* config/crx/crx.h (BRANCH_COST): Update.
* config/xtensa/xtensa.h (BRANCH_COST): Update.
* config/stormy16/stormy16.h (BRANCH_COST): Update.
* config/m68hc11/m68hc11.h (BRANCH_COST): Update.
* config/iq2000/iq2000.h (BRANCH_COST): Update.
* config/ia64/ia64.h (BRANCH_COST): Update.
* config/rs6000/rs6000.h (BRANCH_COST): Update.
* config/arc/arc.h (BRANCH_COST): Update.
* config/score/score.h (BRANCH_COST): Update.
* config/arm/arm.h (BRANCH_COST): Update.
* config/pa/pa.h (BRANCH_COST): Update.
* config/mips/mips.h (BRANCH_COST): Update.
* config/vax/vax.h (BRANCH_COST): Update.
* config/h8300/h8300.h (BRANCH_COST): Update.
* params.def (PARAM_PREDICTABLE_BRANCH_OUTCOME): New.
* doc/invoke.texi (predictable-branch-cost-outcome): Document.
* doc/tm.texi (BRANCH_COST): Update.
From-SVN: r139804
+2008-08-30 Jan Hubicka <jh@suse.cz>
+
+ * optabs.c (expand_abs_nojump): Update BRANCH_COST call.
+ * fold-cost.c (LOGICAL_OP_NON_SHORT_CIRCUIT, fold_truthop): Likewise.
+ * dojump.c (do_jump): Likewise.
+ * ifcvt.c (MAX_CONDITIONAL_EXECUTE): Likewise.
+ (note-if_info): Add BRANCH_COST.
+ (noce_try_store_flag_constants, noce_try_addcc, noce_try_store_flag_mask,
+ noce_try_cmove_arith, noce_try_cmove_arith, noce_try_cmove_arith,
+ noce_find_if_block, find_if_case_1, find_if_case_2): Use compuated
+ branch cost.
+ * expr.h (BRANCH_COST): Update default.
+ * predict.c (predictable_edge_p): New function.
+ * expmed.c (expand_smod_pow2, expand_sdiv_pow2, emit_store_flag):
+ Update BRANCH_COST call.
+ * basic-block.h (predictable_edge_p): Declare.
+ * config/alpha/alpha.h (BRANCH_COST): Update.
+ * config/frv/frv.h (BRANCH_COST): Update.
+ * config/s390/s390.h (BRANCH_COST): Update.
+ * config/spu/spu.h (BRANCH_COST): Update.
+ * config/sparc/sparc.h (BRANCH_COST): Update.
+ * config/m32r/m32r.h (BRANCH_COST): Update.
+ * config/i386/i386.h (BRANCH_COST): Update.
+ * config/i386/i386.c (ix86_expand_int_movcc): Update use of BRANCH_COST.
+ * config/sh/sh.h (BRANCH_COST): Update.
+ * config/pdp11/pdp11.h (BRANCH_COST): Update.
+ * config/avr/avr.h (BRANCH_COST): Update.
+ * config/crx/crx.h (BRANCH_COST): Update.
+ * config/xtensa/xtensa.h (BRANCH_COST): Update.
+ * config/stormy16/stormy16.h (BRANCH_COST): Update.
+ * config/m68hc11/m68hc11.h (BRANCH_COST): Update.
+ * config/iq2000/iq2000.h (BRANCH_COST): Update.
+ * config/ia64/ia64.h (BRANCH_COST): Update.
+ * config/rs6000/rs6000.h (BRANCH_COST): Update.
+ * config/arc/arc.h (BRANCH_COST): Update.
+ * config/score/score.h (BRANCH_COST): Update.
+ * config/arm/arm.h (BRANCH_COST): Update.
+ * config/pa/pa.h (BRANCH_COST): Update.
+ * config/mips/mips.h (BRANCH_COST): Update.
+ * config/vax/vax.h (BRANCH_COST): Update.
+ * config/h8300/h8300.h (BRANCH_COST): Update.
+ * params.def (PARAM_PREDICTABLE_BRANCH_OUTCOME): New.
+ * doc/invoke.texi (predictable-branch-cost-outcome): Document.
+ * doc/tm.texi (BRANCH_COST): Update.
+
2008-08-30 Samuel Tardieu <sam@rfc1149.net>
PR target/37283
extern void remove_predictions_associated_with_edge (edge);
extern bool edge_probability_reliable_p (const_edge);
extern bool br_prob_note_reliable_p (const_rtx);
+extern bool predictable_edge_p (edge);
/* In cfg.c */
extern void dump_regset (regset, FILE *);
#define MEMORY_MOVE_COST(MODE,CLASS,IN) (2*alpha_memory_latency)
/* Provide the cost of a branch. Exact meaning under development. */
-#define BRANCH_COST 5
+#define BRANCH_COST(speed_p, predictable_p) 5
\f
/* Stack layout; function entry, exit and calling. */
/* The cost of a branch insn. */
/* ??? What's the right value here? Branches are certainly more
expensive than reg->reg moves. */
-#define BRANCH_COST 2
+#define BRANCH_COST(speed_p, predictable_p) 2
/* Nonzero if access to memory by bytes is slow and undesirable.
For RISC chips, it means that access to memory by bytes is no
/* Try to generate sequences that don't involve branches, we can then use
conditional instructions */
-#define BRANCH_COST \
+#define BRANCH_COST(speed_p, predictable_p) \
(TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0))
\f
/* Position Independent Code. */
(MODE)==SImode ? 8 : \
(MODE)==SFmode ? 8 : 16)
-#define BRANCH_COST 0
+#define BRANCH_COST(speed_p, predictable_p) 0
#define SLOW_BYTE_ACCESS 0
/* Moving to processor register flushes pipeline - thus asymmetric */
#define REGISTER_MOVE_COST(MODE, FROM, TO) ((TO != GENERAL_REGS) ? 8 : 2)
/* Assume best case (branch predicted) */
-#define BRANCH_COST 2
+#define BRANCH_COST(speed_p, predictable_p) 2
#define SLOW_BYTE_ACCESS 1
/* A C expression for the cost of a branch instruction. A value of 1 is the
default; other values are interpreted relative to that. */
-#define BRANCH_COST frv_branch_cost_int
+#define BRANCH_COST(speed_p, predictable_p) frv_branch_cost_int
/* Define this macro as a C expression which is nonzero if accessing less than
a word of memory (i.e. a `char' or a `short') is no faster than accessing a
#define DELAY_SLOT_LENGTH(JUMP) \
(NEXT_INSN (PREV_INSN (JUMP)) == JUMP ? 0 : 2)
-#define BRANCH_COST 0
+#define BRANCH_COST(speed_p, predictable_p) 0
/* Tell final.c how to eliminate redundant test instructions. */
*/
if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
- && BRANCH_COST >= 2)
+ && BRANCH_COST (optimize_insn_for_speed_p (),
+ false) >= 2)
{
if (cf == 0)
{
optab op;
rtx var, orig_out, out, tmp;
- if (BRANCH_COST <= 2)
+ if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
return 0; /* FAIL */
/* If one of the two operands is an interesting constant, load a
/* A C expression for the cost of a branch instruction. A value of 1
is the default; other values are interpreted relative to that. */
-#define BRANCH_COST ix86_branch_cost
+#define BRANCH_COST(speed_p, predictable_p) \
+ (!(speed_p) ? 2 : (predictable_p) ? 0 : ix86_branch_cost)
/* Define this macro as a C expression which is nonzero if accessing
less than a word of memory (i.e. a `char' or a `short') is no
many additional insn groups we run into, vs how good the dynamic
branch predictor is. */
-#define BRANCH_COST 6
+#define BRANCH_COST(speed_p, predictable_p) 6
/* Define this macro as a C expression which is nonzero if accessing less than
a word of memory (i.e. a `char' or a `short') is no faster than accessing a
#define MEMORY_MOVE_COST(MODE,CLASS,TO_P) \
(TO_P ? 2 : 16)
-#define BRANCH_COST 2
+#define BRANCH_COST(speed_p, predictable_p) 2
#define SLOW_BYTE_ACCESS 1
/* A value of 2 here causes GCC to avoid using branches in comparisons like
while (a < N && a). Branches aren't that expensive on the M32R so
we define this as 1. Defining it as 2 had a heavy hit in fp-bit.c. */
-#define BRANCH_COST ((TARGET_BRANCH_COST) ? 2 : 1)
+#define BRANCH_COST(speed_p, predictable_p) ((TARGET_BRANCH_COST) ? 2 : 1)
/* Nonzero if access to memory by bytes is slow and undesirable.
For RISC chips, it means that access to memory by bytes is no
Pretend branches are cheap because GCC generates sub-optimal code
for the default value. */
-#define BRANCH_COST 0
+#define BRANCH_COST(speed_p, predictable_p) 0
/* Nonzero if access to memory by bytes is slow and undesirable. */
#define SLOW_BYTE_ACCESS 0
/* A C expression for the cost of a branch instruction. A value of
1 is the default; other values are interpreted relative to that. */
-#define BRANCH_COST mips_branch_cost
+#define BRANCH_COST(speed_p, predictable_p) mips_branch_cost
#define LOGICAL_OP_NON_SHORT_CIRCUIT 0
/* If defined, modifies the length assigned to instruction INSN as a
: 2)
/* Adjust the cost of branches. */
-#define BRANCH_COST (pa_cpu == PROCESSOR_8000 ? 2 : 1)
+#define BRANCH_COST(speed_p, predictable_p) (pa_cpu == PROCESSOR_8000 ? 2 : 1)
/* Handling the special cases is going to get too complicated for a macro,
just call `pa_adjust_insn_length' to do the real work. */
/* there is no point in avoiding branches on a pdp,
since branches are really cheap - I just want to find out
how much difference the BRANCH_COST macro makes in code */
-#define BRANCH_COST (TARGET_BRANCH_CHEAP ? 0 : 1)
+#define BRANCH_COST(speed_p, predictable_p) (TARGET_BRANCH_CHEAP ? 0 : 1)
#define COMPARE_FLAG_MODE HImode
Set this to 3 on the RS/6000 since that is roughly the average cost of an
unscheduled conditional branch. */
-#define BRANCH_COST 3
+#define BRANCH_COST(speed_p, predictable_p) 3
/* Override BRANCH_COST heuristic which empirically produces worse
performance for removing short circuiting from the logical ops. */
/* A C expression for the cost of a branch instruction. A value of 1
is the default; other values are interpreted relative to that. */
-#define BRANCH_COST 1
+#define BRANCH_COST(speed_p, predictable_p) 1
/* Nonzero if access to memory by bytes is slow and undesirable. */
#define SLOW_BYTE_ACCESS 1
(4 + memory_move_secondary_cost ((MODE), (CLASS), (TO_P)))
/* Try to generate sequences that don't involve branches. */
-#define BRANCH_COST 2
+#define BRANCH_COST(speed_p, predictable_p) 2
/* Nonzero if access to memory by bytes is slow and undesirable. */
#define SLOW_BYTE_ACCESS 1
The SH1 does not have delay slots, hence we get a pipeline stall
at every branch. The SH4 is superscalar, so the single delay slot
is not sufficient to keep both pipelines filled. */
-#define BRANCH_COST (TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1)
+#define BRANCH_COST(speed_p, predictable_p) \
+ (TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1)
\f
/* Assembler output control. */
On Niagara-2, a not-taken branch costs 1 cycle whereas a taken
branch costs 6 cycles. */
-#define BRANCH_COST \
+#define BRANCH_COST (speed_p, predictable_p) \
((sparc_cpu == PROCESSOR_V9 \
|| sparc_cpu == PROCESSOR_ULTRASPARC) \
? 7 \
\f
/* Costs */
-#define BRANCH_COST spu_branch_cost
+#define BRANCH_COST(speed_p, predictable_p) spu_branch_cost
#define SLOW_BYTE_ACCESS 0
#define MEMORY_MOVE_COST(M,C,I) (5 + memory_move_secondary_cost (M, C, I))
-#define BRANCH_COST 5
+#define BRANCH_COST(speed_p, predictable_p) 5
#define SLOW_BYTE_ACCESS 0
Branches are extremely cheap on the VAX while the shift insns often
used to replace branches can be expensive. */
-#define BRANCH_COST 0
+#define BRANCH_COST(speed_p, predictable_p) 0
\f
/* Tell final.c how to eliminate redundant test instructions. */
#define MEMORY_MOVE_COST(MODE, CLASS, IN) 4
-#define BRANCH_COST 3
+#define BRANCH_COST(speed_p, predictable_p) 3
/* How to refer to registers in assembler output.
This sequence is indexed by compiler's hard-register-number (see above). */
parameter, then structure reorganization is not applied to this structure.
The default is 10.
+@item predictable-branch-cost-outcome
+When branch is predicted to be taken with probability lower than this threshold
+(in percent), then it is considered well predictable. The default is 10.
+
@item max-crossjump-edges
The maximum number of incoming edges to consider for crossjumping.
The algorithm used by @option{-fcrossjumping} is @math{O(N^2)} in
are the same as to this macro.
@end defmac
-@defmac BRANCH_COST
-A C expression for the cost of a branch instruction. A value of 1 is
-the default; other values are interpreted relative to that.
+@defmac BRANCH_COST (@var{speed_p}, @var{predictable_p})
+A C expression for the cost of a branch instruction. A value of 1 is the
+default; other values are interpreted relative to that. Parameter @var{speed_p}
+is true when the branch in question should be optimized for speed. When
+it is false, @code{BRANCH_COST} should be returning value optimal for code size
+rather then performance considerations. @var{predictable_p} is true for well
+predictable branches. On many architectures the @code{BRANCH_COST} can be
+reduced then.
@end defmac
Here are additional macros which do not specify precise relative costs,
/* High branch cost, expand as the bitwise AND of the conditions.
Do the same if the RHS has side effects, because we're effectively
turning a TRUTH_AND_EXPR into a TRUTH_ANDIF_EXPR. */
- if (BRANCH_COST >= 4 || TREE_SIDE_EFFECTS (TREE_OPERAND (exp, 1)))
+ if (BRANCH_COST (optimize_insn_for_speed_p (),
+ false) >= 4
+ || TREE_SIDE_EFFECTS (TREE_OPERAND (exp, 1)))
goto normal;
case TRUTH_ANDIF_EXPR:
/* High branch cost, expand as the bitwise OR of the conditions.
Do the same if the RHS has side effects, because we're effectively
turning a TRUTH_OR_EXPR into a TRUTH_ORIF_EXPR. */
- if (BRANCH_COST >= 4 || TREE_SIDE_EFFECTS (TREE_OPERAND (exp, 1)))
+ if (BRANCH_COST (optimize_insn_for_speed_p (), false)>= 4
+ || TREE_SIDE_EFFECTS (TREE_OPERAND (exp, 1)))
goto normal;
case TRUTH_ORIF_EXPR:
result = gen_reg_rtx (mode);
/* Avoid conditional branches when they're expensive. */
- if (BRANCH_COST >= 2
+ if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
&& optimize_insn_for_speed_p ())
{
rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
logd = floor_log2 (d);
shift = build_int_cst (NULL_TREE, logd);
- if (d == 2 && BRANCH_COST >= 1)
+ if (d == 2
+ && BRANCH_COST (optimize_insn_for_speed_p (),
+ false) >= 1)
{
temp = gen_reg_rtx (mode);
temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
}
#ifdef HAVE_conditional_move
- if (BRANCH_COST >= 2)
+ if (BRANCH_COST (optimize_insn_for_speed_p (), false)
+ >= 2)
{
rtx temp2;
}
#endif
- if (BRANCH_COST >= 2)
+ if (BRANCH_COST (optimize_insn_for_speed_p (),
+ false) >= 2)
{
int ushift = GET_MODE_BITSIZE (mode) - logd;
comparison with zero. Don't do any of these cases if branches are
very cheap. */
- if (BRANCH_COST > 0
+ if (BRANCH_COST (optimize_insn_for_speed_p (),
+ false) > 0
&& GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
&& op1 != const0_rtx)
{
do LE and GT if branches are expensive since they are expensive on
2-operand machines. */
- if (BRANCH_COST == 0
+ if (BRANCH_COST (optimize_insn_for_speed_p (),
+ false) == 0
|| GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
|| (code != EQ && code != NE
- && (BRANCH_COST <= 1 || (code != LE && code != GT))))
+ && (BRANCH_COST (optimize_insn_for_speed_p (),
+ false) <= 1 || (code != LE && code != GT))))
return 0;
/* See what we need to return. We can only return a 1, -1, or the
that "or", which is an extra insn, so we only handle EQ if branches
are expensive. */
- if (tem == 0 && (code == NE || BRANCH_COST > 1))
+ if (tem == 0
+ && (code == NE
+ || BRANCH_COST (optimize_insn_for_speed_p (),
+ false) > 1))
{
if (rtx_equal_p (subtarget, op0))
subtarget = 0;
/* The default branch cost is 1. */
#ifndef BRANCH_COST
-#define BRANCH_COST 1
+#define BRANCH_COST(speed_p, predictable_p) 1
#endif
/* This is the 4th arg to `expand_expr'.
\f
#ifndef LOGICAL_OP_NON_SHORT_CIRCUIT
-#define LOGICAL_OP_NON_SHORT_CIRCUIT (BRANCH_COST >= 2)
+#define LOGICAL_OP_NON_SHORT_CIRCUIT \
+ (BRANCH_COST (!cfun || optimize_function_for_speed_p (cfun), \
+ false) >= 2)
#endif
/* EXP is some logical combination of boolean tests. See if we can
that can be merged. Avoid doing this if the RHS is a floating-point
comparison since those can trap. */
- if (BRANCH_COST >= 2
+ if (BRANCH_COST (!cfun || optimize_function_for_speed_p (cfun),
+ false) >= 2
&& ! FLOAT_TYPE_P (TREE_TYPE (rl_arg))
&& simple_operand_p (rl_arg)
&& simple_operand_p (rr_arg))
#endif
#ifndef MAX_CONDITIONAL_EXECUTE
-#define MAX_CONDITIONAL_EXECUTE (BRANCH_COST + 1)
+#define MAX_CONDITIONAL_EXECUTE \
+ (BRANCH_COST (optimize_function_for_speed_p (cfun), false) \
+ + 1)
#endif
#define IFCVT_MULTIPLE_DUMPS 1
from TEST_BB. For the noce transformations, we allow the symmetric
form as well. */
bool then_else_reversed;
+
+ /* Estimated cost of the particular branch instruction. */
+ int branch_cost;
};
static rtx noce_emit_store_flag (struct noce_if_info *, rtx, int, int);
normalize = 0;
else if (ifalse == 0 && exact_log2 (itrue) >= 0
&& (STORE_FLAG_VALUE == 1
- || BRANCH_COST >= 2))
+ || if_info->branch_cost >= 2))
normalize = 1;
else if (itrue == 0 && exact_log2 (ifalse) >= 0 && can_reverse
- && (STORE_FLAG_VALUE == 1 || BRANCH_COST >= 2))
+ && (STORE_FLAG_VALUE == 1 || if_info->branch_cost >= 2))
normalize = 1, reversep = 1;
else if (itrue == -1
&& (STORE_FLAG_VALUE == -1
- || BRANCH_COST >= 2))
+ || if_info->branch_cost >= 2))
normalize = -1;
else if (ifalse == -1 && can_reverse
- && (STORE_FLAG_VALUE == -1 || BRANCH_COST >= 2))
+ && (STORE_FLAG_VALUE == -1 || if_info->branch_cost >= 2))
normalize = -1, reversep = 1;
- else if ((BRANCH_COST >= 2 && STORE_FLAG_VALUE == -1)
- || BRANCH_COST >= 3)
+ else if ((if_info->branch_cost >= 2 && STORE_FLAG_VALUE == -1)
+ || if_info->branch_cost >= 3)
normalize = -1;
else
return FALSE;
/* If that fails, construct conditional increment or decrement using
setcc. */
- if (BRANCH_COST >= 2
+ if (if_info->branch_cost >= 2
&& (XEXP (if_info->a, 1) == const1_rtx
|| XEXP (if_info->a, 1) == constm1_rtx))
{
int reversep;
reversep = 0;
- if ((BRANCH_COST >= 2
+ if ((if_info->branch_cost >= 2
|| STORE_FLAG_VALUE == -1)
&& ((if_info->a == const0_rtx
&& rtx_equal_p (if_info->b, if_info->x))
/* ??? FIXME: Magic number 5. */
if (cse_not_expected
&& MEM_P (a) && MEM_P (b)
- && BRANCH_COST >= 5)
+ && if_info->branch_cost >= 5)
{
a = XEXP (a, 0);
b = XEXP (b, 0);
if (insn_a)
{
insn_cost = insn_rtx_cost (PATTERN (insn_a));
- if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (BRANCH_COST))
+ if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (if_info->branch_cost))
return FALSE;
}
else
if (insn_b)
{
insn_cost += insn_rtx_cost (PATTERN (insn_b));
- if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (BRANCH_COST))
+ if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (if_info->branch_cost))
return FALSE;
}
if_info.cond_earliest = cond_earliest;
if_info.jump = jump;
if_info.then_else_reversed = then_else_reversed;
+ if_info.branch_cost = BRANCH_COST (optimize_bb_for_speed_p (test_bb),
+ predictable_edge_p (then_edge));
/* Do the real work. */
test_bb->index, then_bb->index);
/* THEN is small. */
- if (! cheap_bb_rtx_cost_p (then_bb, COSTS_N_INSNS (BRANCH_COST)))
+ if (! cheap_bb_rtx_cost_p (then_bb,
+ COSTS_N_INSNS (BRANCH_COST (optimize_bb_for_speed_p (then_edge->src),
+ predictable_edge_p (then_edge)))))
return FALSE;
/* Registers set are dead, or are predicable. */
test_bb->index, else_bb->index);
/* ELSE is small. */
- if (! cheap_bb_rtx_cost_p (else_bb, COSTS_N_INSNS (BRANCH_COST)))
+ if (! cheap_bb_rtx_cost_p (else_bb,
+ COSTS_N_INSNS (BRANCH_COST (optimize_bb_for_speed_p (else_edge->src),
+ predictable_edge_p (else_edge)))))
return FALSE;
/* Registers set are dead, or are predicable. */
value of X as (((signed) x >> (W-1)) ^ x) - ((signed) x >> (W-1)),
where W is the width of MODE. */
- if (GET_MODE_CLASS (mode) == MODE_INT && BRANCH_COST >= 2)
+ if (GET_MODE_CLASS (mode) == MODE_INT
+ && BRANCH_COST (optimize_insn_for_speed_p (),
+ false) >= 2)
{
rtx extended = expand_shift (RSHIFT_EXPR, mode, op0,
size_int (GET_MODE_BITSIZE (mode) - 1),
"The threshold ratio between current and hottest structure counts",
10, 0, 100)
+/* When branch is predicted to be taken with probability lower than this
+ threshold (in percent), then it is considered well predictable. */
+DEFPARAM (PARAM_PREDICTABLE_BRANCH_OUTCOME,
+ "predictable-branch-outcome",
+ "Maximal esitmated outcome of branch considered predictable",
+ 2, 0, 50)
+
/* The single function inlining limit. This is the maximum size
of a function counted in internal gcc instructions (not in
real machine instructions) that is eligible for inlining
return !optimize_loop_nest_for_speed_p (loop);
}
+/* Return true when edge E is likely to be well predictable by branch
+ predictor. */
+
+bool
+predictable_edge_p (edge e)
+{
+ if (profile_status == PROFILE_ABSENT)
+ return false;
+ if ((e->probability
+ <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
+ || (REG_BR_PROB_BASE - e->probability
+ <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
+ return true;
+ return false;
+}
+
+
/* Set RTL expansion for BB profile. */
void