gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "tm.h"
  34 #include "alias.h"
  35 #include "symtab.h"
  36 #include "tree.h"
  37 #include "fold-const.h"
  38 #include "calls.h"
  39 #include "rtl.h"
  40 #include "tm_p.h"
  41 #include "hard-reg-set.h"
  42 #include "predict.h"
  43 #include "function.h"
  44 #include "dominance.h"
  45 #include "cfg.h"
  46 #include "cfganal.h"
  47 #include "basic-block.h"
  48 #include "insn-config.h"
  49 #include "regs.h"
  50 #include "flags.h"
  51 #include "profile.h"
  52 #include "except.h"
  53 #include "diagnostic-core.h"
  54 #include "recog.h"
  55 #include "expmed.h"
  56 #include "dojump.h"
  57 #include "explow.h"
  58 #include "emit-rtl.h"
  59 #include "varasm.h"
  60 #include "stmt.h"
  61 #include "expr.h"
  62 #include "coverage.h"
  63 #include "sreal.h"
  64 #include "params.h"
  65 #include "target.h"
  66 #include "cfgloop.h"
  67 #include "tree-ssa-alias.h"
  68 #include "internal-fn.h"
  69 #include "gimple-expr.h"
  70 #include "gimple.h"
  71 #include "gimple-iterator.h"
  72 #include "gimple-ssa.h"
  73 #include "plugin-api.h"
  74 #include "ipa-ref.h"
  75 #include "cgraph.h"
  76 #include "tree-cfg.h"
  77 #include "tree-phinodes.h"
  78 #include "ssa-iterators.h"
  79 #include "tree-ssa-loop-niter.h"
  80 #include "tree-ssa-loop.h"
  81 #include "tree-pass.h"
  82 #include "tree-scalar-evolution.h"
  83
  84 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  85                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  86 static sreal real_almost_one, real_br_prob_base,
  87              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  88
  89 static void combine_predictions_for_insn (rtx_insn *, basic_block);
  90 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  91 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  92 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  93 static bool can_predict_insn_p (const rtx_insn *);
  94
  95 /* Information we hold about each branch predictor.
  96    Filled using information from predict.def.  */
  97
  98 struct predictor_info
  99 {
 100   const char *const name;       /* Name used in the debugging dumps.  */
 101   const int hitrate;            /* Expected hitrate used by
 102                                    predict_insn_def call.  */
 103   const int flags;
 104 };
 105
 106 /* Use given predictor without Dempster-Shaffer theory if it matches
 107    using first_match heuristics.  */
 108 #define PRED_FLAG_FIRST_MATCH 1
 109
 110 /* Recompute hitrate in percent to our representation.  */
 111
 112 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
 113
 114 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
 115 static const struct predictor_info predictor_info[]= {
 116 #include "predict.def"
 117
 118   /* Upper bound on predictors.  */
 119   {NULL, 0, 0}
 120 };
 121 #undef DEF_PREDICTOR
 122
 123 /* Return TRUE if frequency FREQ is considered to be hot.  */
 124
 125 static inline bool
 126 maybe_hot_frequency_p (struct function *fun, int freq)
 127 {
 128   struct cgraph_node *node = cgraph_node::get (fun->decl);
 129   if (!profile_info
 130       || !opt_for_fn (fun->decl, flag_branch_probabilities))
 131     {
 132       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 133         return false;
 134       if (node->frequency == NODE_FREQUENCY_HOT)
 135         return true;
 136     }
 137   if (profile_status_for_fn (fun) == PROFILE_ABSENT)
 138     return true;
 139   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 140       && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
 141     return false;
 142   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 143     return false;
 144   if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
 145               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 146     return false;
 147   return true;
 148 }
 149
 150 static gcov_type min_count = -1;
 151
 152 /* Determine the threshold for hot BB counts.  */
 153
 154 gcov_type
 155 get_hot_bb_threshold ()
 156 {
 157   gcov_working_set_t *ws;
 158   if (min_count == -1)
 159     {
 160       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 161       gcc_assert (ws);
 162       min_count = ws->min_counter;
 163     }
 164   return min_count;
 165 }
 166
 167 /* Set the threshold for hot BB counts.  */
 168
 169 void
 170 set_hot_bb_threshold (gcov_type min)
 171 {
 172   min_count = min;
 173 }
 174
 175 /* Return TRUE if frequency FREQ is considered to be hot.  */
 176
 177 bool
 178 maybe_hot_count_p (struct function *fun, gcov_type count)
 179 {
 180   if (fun && profile_status_for_fn (fun) != PROFILE_READ)
 181     return true;
 182   /* Code executed at most once is not hot.  */
 183   if (profile_info->runs >= count)
 184     return false;
 185   return (count >= get_hot_bb_threshold ());
 186 }
 187
 188 /* Return true in case BB can be CPU intensive and should be optimized
 189    for maximal performance.  */
 190
 191 bool
 192 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 193 {
 194   gcc_checking_assert (fun);
 195   if (profile_status_for_fn (fun) == PROFILE_READ)
 196     return maybe_hot_count_p (fun, bb->count);
 197   return maybe_hot_frequency_p (fun, bb->frequency);
 198 }
 199
 200 /* Return true in case BB can be CPU intensive and should be optimized
 201    for maximal performance.  */
 202
 203 bool
 204 maybe_hot_edge_p (edge e)
 205 {
 206   if (profile_status_for_fn (cfun) == PROFILE_READ)
 207     return maybe_hot_count_p (cfun, e->count);
 208   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 209 }
 210
 211 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 212    node frequency reflects never being executed.  */
 213
 214 static bool
 215 probably_never_executed (struct function *fun,
 216                          gcov_type count, int frequency)
 217 {
 218   gcc_checking_assert (fun);
 219   if (profile_status_for_fn (fun) == PROFILE_READ)
 220     {
 221       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 222       if (count * unlikely_count_fraction >= profile_info->runs)
 223         return false;
 224       if (!frequency)
 225         return true;
 226       if (!ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
 227         return false;
 228       if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
 229         {
 230           gcov_type computed_count;
 231           /* Check for possibility of overflow, in which case entry bb count
 232              is large enough to do the division first without losing much
 233              precision.  */
 234           if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count < REG_BR_PROB_BASE *
 235               REG_BR_PROB_BASE)
 236             {
 237               gcov_type scaled_count
 238                   = frequency * ENTRY_BLOCK_PTR_FOR_FN (fun)->count *
 239              unlikely_count_fraction;
 240               computed_count = RDIV (scaled_count,
 241                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 242             }
 243           else
 244             {
 245               computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun)->count,
 246                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 247               computed_count *= frequency * unlikely_count_fraction;
 248             }
 249           if (computed_count >= profile_info->runs)
 250             return false;
 251         }
 252       return true;
 253     }
 254   if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities)))
 255       && (cgraph_node::get (fun->decl)->frequency
 256           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 257     return true;
 258   return false;
 259 }
 260
 261
 262 /* Return true in case BB is probably never executed.  */
 263
 264 bool
 265 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 266 {
 267   return probably_never_executed (fun, bb->count, bb->frequency);
 268 }
 269
 270
 271 /* Return true in case edge E is probably never executed.  */
 272
 273 bool
 274 probably_never_executed_edge_p (struct function *fun, edge e)
 275 {
 276   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 277 }
 278
 279 /* Return true when current function should always be optimized for size.  */
 280
 281 bool
 282 optimize_function_for_size_p (struct function *fun)
 283 {
 284   if (!fun || !fun->decl)
 285     return optimize_size;
 286   cgraph_node *n = cgraph_node::get (fun->decl);
 287   return n && n->optimize_for_size_p ();
 288 }
 289
 290 /* Return true when current function should always be optimized for speed.  */
 291
 292 bool
 293 optimize_function_for_speed_p (struct function *fun)
 294 {
 295   return !optimize_function_for_size_p (fun);
 296 }
 297
 298 /* Return TRUE when BB should be optimized for size.  */
 299
 300 bool
 301 optimize_bb_for_size_p (const_basic_block bb)
 302 {
 303   return (optimize_function_for_size_p (cfun)
 304           || (bb && !maybe_hot_bb_p (cfun, bb)));
 305 }
 306
 307 /* Return TRUE when BB should be optimized for speed.  */
 308
 309 bool
 310 optimize_bb_for_speed_p (const_basic_block bb)
 311 {
 312   return !optimize_bb_for_size_p (bb);
 313 }
 314
 315 /* Return TRUE when BB should be optimized for size.  */
 316
 317 bool
 318 optimize_edge_for_size_p (edge e)
 319 {
 320   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 321 }
 322
 323 /* Return TRUE when BB should be optimized for speed.  */
 324
 325 bool
 326 optimize_edge_for_speed_p (edge e)
 327 {
 328   return !optimize_edge_for_size_p (e);
 329 }
 330
 331 /* Return TRUE when BB should be optimized for size.  */
 332
 333 bool
 334 optimize_insn_for_size_p (void)
 335 {
 336   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 337 }
 338
 339 /* Return TRUE when BB should be optimized for speed.  */
 340
 341 bool
 342 optimize_insn_for_speed_p (void)
 343 {
 344   return !optimize_insn_for_size_p ();
 345 }
 346
 347 /* Return TRUE when LOOP should be optimized for size.  */
 348
 349 bool
 350 optimize_loop_for_size_p (struct loop *loop)
 351 {
 352   return optimize_bb_for_size_p (loop->header);
 353 }
 354
 355 /* Return TRUE when LOOP should be optimized for speed.  */
 356
 357 bool
 358 optimize_loop_for_speed_p (struct loop *loop)
 359 {
 360   return optimize_bb_for_speed_p (loop->header);
 361 }
 362
 363 /* Return TRUE when LOOP nest should be optimized for speed.  */
 364
 365 bool
 366 optimize_loop_nest_for_speed_p (struct loop *loop)
 367 {
 368   struct loop *l = loop;
 369   if (optimize_loop_for_speed_p (loop))
 370     return true;
 371   l = loop->inner;
 372   while (l && l != loop)
 373     {
 374       if (optimize_loop_for_speed_p (l))
 375         return true;
 376       if (l->inner)
 377         l = l->inner;
 378       else if (l->next)
 379         l = l->next;
 380       else
 381         {
 382           while (l != loop && !l->next)
 383             l = loop_outer (l);
 384           if (l != loop)
 385             l = l->next;
 386         }
 387     }
 388   return false;
 389 }
 390
 391 /* Return TRUE when LOOP nest should be optimized for size.  */
 392
 393 bool
 394 optimize_loop_nest_for_size_p (struct loop *loop)
 395 {
 396   return !optimize_loop_nest_for_speed_p (loop);
 397 }
 398
 399 /* Return true when edge E is likely to be well predictable by branch
 400    predictor.  */
 401
 402 bool
 403 predictable_edge_p (edge e)
 404 {
 405   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 406     return false;
 407   if ((e->probability
 408        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 409       || (REG_BR_PROB_BASE - e->probability
 410           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 411     return true;
 412   return false;
 413 }
 414
 415
 416 /* Set RTL expansion for BB profile.  */
 417
 418 void
 419 rtl_profile_for_bb (basic_block bb)
 420 {
 421   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 422 }
 423
 424 /* Set RTL expansion for edge profile.  */
 425
 426 void
 427 rtl_profile_for_edge (edge e)
 428 {
 429   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 430 }
 431
 432 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 433 void
 434 default_rtl_profile (void)
 435 {
 436   crtl->maybe_hot_insn_p = true;
 437 }
 438
 439 /* Return true if the one of outgoing edges is already predicted by
 440    PREDICTOR.  */
 441
 442 bool
 443 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 444 {
 445   rtx note;
 446   if (!INSN_P (BB_END (bb)))
 447     return false;
 448   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 449     if (REG_NOTE_KIND (note) == REG_BR_PRED
 450         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 451       return true;
 452   return false;
 453 }
 454
 455 /*  Structure representing predictions in tree level. */
 456
 457 struct edge_prediction {
 458     struct edge_prediction *ep_next;
 459     edge ep_edge;
 460     enum br_predictor ep_predictor;
 461     int ep_probability;
 462 };
 463
 464 /* This map contains for a basic block the list of predictions for the
 465    outgoing edges.  */
 466
 467 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
 468
 469 /* Return true if the one of outgoing edges is already predicted by
 470    PREDICTOR.  */
 471
 472 bool
 473 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 474 {
 475   struct edge_prediction *i;
 476   edge_prediction **preds = bb_predictions->get (bb);
 477
 478   if (!preds)
 479     return false;
 480
 481   for (i = *preds; i; i = i->ep_next)
 482     if (i->ep_predictor == predictor)
 483       return true;
 484   return false;
 485 }
 486
 487 /* Return true when the probability of edge is reliable.
 488
 489    The profile guessing code is good at predicting branch outcome (ie.
 490    taken/not taken), that is predicted right slightly over 75% of time.
 491    It is however notoriously poor on predicting the probability itself.
 492    In general the profile appear a lot flatter (with probabilities closer
 493    to 50%) than the reality so it is bad idea to use it to drive optimization
 494    such as those disabling dynamic branch prediction for well predictable
 495    branches.
 496
 497    There are two exceptions - edges leading to noreturn edges and edges
 498    predicted by number of iterations heuristics are predicted well.  This macro
 499    should be able to distinguish those, but at the moment it simply check for
 500    noreturn heuristic that is only one giving probability over 99% or bellow
 501    1%.  In future we might want to propagate reliability information across the
 502    CFG if we find this information useful on multiple places.   */
 503 static bool
 504 probability_reliable_p (int prob)
 505 {
 506   return (profile_status_for_fn (cfun) == PROFILE_READ
 507           || (profile_status_for_fn (cfun) == PROFILE_GUESSED
 508               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 509 }
 510
 511 /* Same predicate as above, working on edges.  */
 512 bool
 513 edge_probability_reliable_p (const_edge e)
 514 {
 515   return probability_reliable_p (e->probability);
 516 }
 517
 518 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 519 bool
 520 br_prob_note_reliable_p (const_rtx note)
 521 {
 522   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 523   return probability_reliable_p (XINT (note, 0));
 524 }
 525
 526 static void
 527 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
 528 {
 529   gcc_assert (any_condjump_p (insn));
 530   if (!flag_guess_branch_prob)
 531     return;
 532
 533   add_reg_note (insn, REG_BR_PRED,
 534                 gen_rtx_CONCAT (VOIDmode,
 535                                 GEN_INT ((int) predictor),
 536                                 GEN_INT ((int) probability)));
 537 }
 538
 539 /* Predict insn by given predictor.  */
 540
 541 void
 542 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
 543                   enum prediction taken)
 544 {
 545    int probability = predictor_info[(int) predictor].hitrate;
 546
 547    if (taken != TAKEN)
 548      probability = REG_BR_PROB_BASE - probability;
 549
 550    predict_insn (insn, predictor, probability);
 551 }
 552
 553 /* Predict edge E with given probability if possible.  */
 554
 555 void
 556 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 557 {
 558   rtx_insn *last_insn;
 559   last_insn = BB_END (e->src);
 560
 561   /* We can store the branch prediction information only about
 562      conditional jumps.  */
 563   if (!any_condjump_p (last_insn))
 564     return;
 565
 566   /* We always store probability of branching.  */
 567   if (e->flags & EDGE_FALLTHRU)
 568     probability = REG_BR_PROB_BASE - probability;
 569
 570   predict_insn (last_insn, predictor, probability);
 571 }
 572
 573 /* Predict edge E with the given PROBABILITY.  */
 574 void
 575 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 576 {
 577   gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
 578   if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
 579        1)
 580       && flag_guess_branch_prob && optimize)
 581     {
 582       struct edge_prediction *i = XNEW (struct edge_prediction);
 583       edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
 584
 585       i->ep_next = preds;
 586       preds = i;
 587       i->ep_probability = probability;
 588       i->ep_predictor = predictor;
 589       i->ep_edge = e;
 590     }
 591 }
 592
 593 /* Remove all predictions on given basic block that are attached
 594    to edge E.  */
 595 void
 596 remove_predictions_associated_with_edge (edge e)
 597 {
 598   if (!bb_predictions)
 599     return;
 600
 601   edge_prediction **preds = bb_predictions->get (e->src);
 602
 603   if (preds)
 604     {
 605       struct edge_prediction **prediction = preds;
 606       struct edge_prediction *next;
 607
 608       while (*prediction)
 609         {
 610           if ((*prediction)->ep_edge == e)
 611             {
 612               next = (*prediction)->ep_next;
 613               free (*prediction);
 614               *prediction = next;
 615             }
 616           else
 617             prediction = &((*prediction)->ep_next);
 618         }
 619     }
 620 }
 621
 622 /* Clears the list of predictions stored for BB.  */
 623
 624 static void
 625 clear_bb_predictions (basic_block bb)
 626 {
 627   edge_prediction **preds = bb_predictions->get (bb);
 628   struct edge_prediction *pred, *next;
 629
 630   if (!preds)
 631     return;
 632
 633   for (pred = *preds; pred; pred = next)
 634     {
 635       next = pred->ep_next;
 636       free (pred);
 637     }
 638   *preds = NULL;
 639 }
 640
 641 /* Return true when we can store prediction on insn INSN.
 642    At the moment we represent predictions only on conditional
 643    jumps, not at computed jump or other complicated cases.  */
 644 static bool
 645 can_predict_insn_p (const rtx_insn *insn)
 646 {
 647   return (JUMP_P (insn)
 648           && any_condjump_p (insn)
 649           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 650 }
 651
 652 /* Predict edge E by given predictor if possible.  */
 653
 654 void
 655 predict_edge_def (edge e, enum br_predictor predictor,
 656                   enum prediction taken)
 657 {
 658    int probability = predictor_info[(int) predictor].hitrate;
 659
 660    if (taken != TAKEN)
 661      probability = REG_BR_PROB_BASE - probability;
 662
 663    predict_edge (e, predictor, probability);
 664 }
 665
 666 /* Invert all branch predictions or probability notes in the INSN.  This needs
 667    to be done each time we invert the condition used by the jump.  */
 668
 669 void
 670 invert_br_probabilities (rtx insn)
 671 {
 672   rtx note;
 673
 674   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 675     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 676       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 677     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 678       XEXP (XEXP (note, 0), 1)
 679         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 680 }
 681
 682 /* Dump information about the branch prediction to the output file.  */
 683
 684 static void
 685 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 686                  basic_block bb, int used)
 687 {
 688   edge e;
 689   edge_iterator ei;
 690
 691   if (!file)
 692     return;
 693
 694   FOR_EACH_EDGE (e, ei, bb->succs)
 695     if (! (e->flags & EDGE_FALLTHRU))
 696       break;
 697
 698   fprintf (file, "  %s heuristics%s: %.1f%%",
 699            predictor_info[predictor].name,
 700            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 701
 702   if (bb->count)
 703     {
 704       fprintf (file, "  exec %" PRId64, bb->count);
 705       if (e)
 706         {
 707           fprintf (file, " hit %" PRId64, e->count);
 708           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 709         }
 710     }
 711
 712   fprintf (file, "\n");
 713 }
 714
 715 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 716    evenly and hope for the best.  */
 717 static void
 718 set_even_probabilities (basic_block bb)
 719 {
 720   int nedges = 0;
 721   edge e;
 722   edge_iterator ei;
 723
 724   FOR_EACH_EDGE (e, ei, bb->succs)
 725     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 726       nedges ++;
 727   FOR_EACH_EDGE (e, ei, bb->succs)
 728     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 729       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 730     else
 731       e->probability = 0;
 732 }
 733
 734 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 735    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 736
 737 static void
 738 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
 739 {
 740   rtx prob_note;
 741   rtx *pnote;
 742   rtx note;
 743   int best_probability = PROB_EVEN;
 744   enum br_predictor best_predictor = END_PREDICTORS;
 745   int combined_probability = REG_BR_PROB_BASE / 2;
 746   int d;
 747   bool first_match = false;
 748   bool found = false;
 749
 750   if (!can_predict_insn_p (insn))
 751     {
 752       set_even_probabilities (bb);
 753       return;
 754     }
 755
 756   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 757   pnote = &REG_NOTES (insn);
 758   if (dump_file)
 759     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 760              bb->index);
 761
 762   /* We implement "first match" heuristics and use probability guessed
 763      by predictor with smallest index.  */
 764   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 765     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 766       {
 767         enum br_predictor predictor = ((enum br_predictor)
 768                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 769         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 770
 771         found = true;
 772         if (best_predictor > predictor)
 773           best_probability = probability, best_predictor = predictor;
 774
 775         d = (combined_probability * probability
 776              + (REG_BR_PROB_BASE - combined_probability)
 777              * (REG_BR_PROB_BASE - probability));
 778
 779         /* Use FP math to avoid overflows of 32bit integers.  */
 780         if (d == 0)
 781           /* If one probability is 0% and one 100%, avoid division by zero.  */
 782           combined_probability = REG_BR_PROB_BASE / 2;
 783         else
 784           combined_probability = (((double) combined_probability) * probability
 785                                   * REG_BR_PROB_BASE / d + 0.5);
 786       }
 787
 788   /* Decide which heuristic to use.  In case we didn't match anything,
 789      use no_prediction heuristic, in case we did match, use either
 790      first match or Dempster-Shaffer theory depending on the flags.  */
 791
 792   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 793     first_match = true;
 794
 795   if (!found)
 796     dump_prediction (dump_file, PRED_NO_PREDICTION,
 797                      combined_probability, bb, true);
 798   else
 799     {
 800       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 801                        bb, !first_match);
 802       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 803                        bb, first_match);
 804     }
 805
 806   if (first_match)
 807     combined_probability = best_probability;
 808   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 809
 810   while (*pnote)
 811     {
 812       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 813         {
 814           enum br_predictor predictor = ((enum br_predictor)
 815                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 816           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 817
 818           dump_prediction (dump_file, predictor, probability, bb,
 819                            !first_match || best_predictor == predictor);
 820           *pnote = XEXP (*pnote, 1);
 821         }
 822       else
 823         pnote = &XEXP (*pnote, 1);
 824     }
 825
 826   if (!prob_note)
 827     {
 828       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 829
 830       /* Save the prediction into CFG in case we are seeing non-degenerated
 831          conditional jump.  */
 832       if (!single_succ_p (bb))
 833         {
 834           BRANCH_EDGE (bb)->probability = combined_probability;
 835           FALLTHRU_EDGE (bb)->probability
 836             = REG_BR_PROB_BASE - combined_probability;
 837         }
 838     }
 839   else if (!single_succ_p (bb))
 840     {
 841       int prob = XINT (prob_note, 0);
 842
 843       BRANCH_EDGE (bb)->probability = prob;
 844       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 845     }
 846   else
 847     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 848 }
 849
 850 /* Combine predictions into single probability and store them into CFG.
 851    Remove now useless prediction entries.  */
 852
 853 static void
 854 combine_predictions_for_bb (basic_block bb)
 855 {
 856   int best_probability = PROB_EVEN;
 857   enum br_predictor best_predictor = END_PREDICTORS;
 858   int combined_probability = REG_BR_PROB_BASE / 2;
 859   int d;
 860   bool first_match = false;
 861   bool found = false;
 862   struct edge_prediction *pred;
 863   int nedges = 0;
 864   edge e, first = NULL, second = NULL;
 865   edge_iterator ei;
 866
 867   FOR_EACH_EDGE (e, ei, bb->succs)
 868     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 869       {
 870         nedges ++;
 871         if (first && !second)
 872           second = e;
 873         if (!first)
 874           first = e;
 875       }
 876
 877   /* When there is no successor or only one choice, prediction is easy.
 878
 879      We are lazy for now and predict only basic blocks with two outgoing
 880      edges.  It is possible to predict generic case too, but we have to
 881      ignore first match heuristics and do more involved combining.  Implement
 882      this later.  */
 883   if (nedges != 2)
 884     {
 885       if (!bb->count)
 886         set_even_probabilities (bb);
 887       clear_bb_predictions (bb);
 888       if (dump_file)
 889         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 890                  nedges, bb->index);
 891       return;
 892     }
 893
 894   if (dump_file)
 895     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 896
 897   edge_prediction **preds = bb_predictions->get (bb);
 898   if (preds)
 899     {
 900       /* We implement "first match" heuristics and use probability guessed
 901          by predictor with smallest index.  */
 902       for (pred = *preds; pred; pred = pred->ep_next)
 903         {
 904           enum br_predictor predictor = pred->ep_predictor;
 905           int probability = pred->ep_probability;
 906
 907           if (pred->ep_edge != first)
 908             probability = REG_BR_PROB_BASE - probability;
 909
 910           found = true;
 911           /* First match heuristics would be widly confused if we predicted
 912              both directions.  */
 913           if (best_predictor > predictor)
 914             {
 915               struct edge_prediction *pred2;
 916               int prob = probability;
 917
 918               for (pred2 = (struct edge_prediction *) *preds;
 919                    pred2; pred2 = pred2->ep_next)
 920                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 921                  {
 922                    int probability2 = pred->ep_probability;
 923
 924                    if (pred2->ep_edge != first)
 925                      probability2 = REG_BR_PROB_BASE - probability2;
 926
 927                    if ((probability < REG_BR_PROB_BASE / 2) !=
 928                        (probability2 < REG_BR_PROB_BASE / 2))
 929                      break;
 930
 931                    /* If the same predictor later gave better result, go for it! */
 932                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 933                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 934                      prob = probability2;
 935                  }
 936               if (!pred2)
 937                 best_probability = prob, best_predictor = predictor;
 938             }
 939
 940           d = (combined_probability * probability
 941                + (REG_BR_PROB_BASE - combined_probability)
 942                * (REG_BR_PROB_BASE - probability));
 943
 944           /* Use FP math to avoid overflows of 32bit integers.  */
 945           if (d == 0)
 946             /* If one probability is 0% and one 100%, avoid division by zero.  */
 947             combined_probability = REG_BR_PROB_BASE / 2;
 948           else
 949             combined_probability = (((double) combined_probability)
 950                                     * probability
 951                                     * REG_BR_PROB_BASE / d + 0.5);
 952         }
 953     }
 954
 955   /* Decide which heuristic to use.  In case we didn't match anything,
 956      use no_prediction heuristic, in case we did match, use either
 957      first match or Dempster-Shaffer theory depending on the flags.  */
 958
 959   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 960     first_match = true;
 961
 962   if (!found)
 963     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
 964   else
 965     {
 966       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
 967                        !first_match);
 968       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
 969                        first_match);
 970     }
 971
 972   if (first_match)
 973     combined_probability = best_probability;
 974   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 975
 976   if (preds)
 977     {
 978       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 979         {
 980           enum br_predictor predictor = pred->ep_predictor;
 981           int probability = pred->ep_probability;
 982
 983           if (pred->ep_edge != EDGE_SUCC (bb, 0))
 984             probability = REG_BR_PROB_BASE - probability;
 985           dump_prediction (dump_file, predictor, probability, bb,
 986                            !first_match || best_predictor == predictor);
 987         }
 988     }
 989   clear_bb_predictions (bb);
 990
 991   if (!bb->count)
 992     {
 993       first->probability = combined_probability;
 994       second->probability = REG_BR_PROB_BASE - combined_probability;
 995     }
 996 }
 997
 998 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
 999    Return the SSA_NAME if the condition satisfies, NULL otherwise.
1000
1001    T1 and T2 should be one of the following cases:
1002      1. T1 is SSA_NAME, T2 is NULL
1003      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1004      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1005
1006 static tree
1007 strips_small_constant (tree t1, tree t2)
1008 {
1009   tree ret = NULL;
1010   int value = 0;
1011
1012   if (!t1)
1013     return NULL;
1014   else if (TREE_CODE (t1) == SSA_NAME)
1015     ret = t1;
1016   else if (tree_fits_shwi_p (t1))
1017     value = tree_to_shwi (t1);
1018   else
1019     return NULL;
1020
1021   if (!t2)
1022     return ret;
1023   else if (tree_fits_shwi_p (t2))
1024     value = tree_to_shwi (t2);
1025   else if (TREE_CODE (t2) == SSA_NAME)
1026     {
1027       if (ret)
1028         return NULL;
1029       else
1030         ret = t2;
1031     }
1032
1033   if (value <= 4 && value >= -4)
1034     return ret;
1035   else
1036     return NULL;
1037 }
1038
1039 /* Return the SSA_NAME in T or T's operands.
1040    Return NULL if SSA_NAME cannot be found.  */
1041
1042 static tree
1043 get_base_value (tree t)
1044 {
1045   if (TREE_CODE (t) == SSA_NAME)
1046     return t;
1047
1048   if (!BINARY_CLASS_P (t))
1049     return NULL;
1050
1051   switch (TREE_OPERAND_LENGTH (t))
1052     {
1053     case 1:
1054       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1055     case 2:
1056       return strips_small_constant (TREE_OPERAND (t, 0),
1057                                     TREE_OPERAND (t, 1));
1058     default:
1059       return NULL;
1060     }
1061 }
1062
1063 /* Check the compare STMT in LOOP. If it compares an induction
1064    variable to a loop invariant, return true, and save
1065    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1066    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1067
1068 static bool
1069 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1070                                      tree *loop_invariant,
1071                                      enum tree_code *compare_code,
1072                                      tree *loop_step,
1073                                      tree *loop_iv_base)
1074 {
1075   tree op0, op1, bound, base;
1076   affine_iv iv0, iv1;
1077   enum tree_code code;
1078   tree step;
1079
1080   code = gimple_cond_code (stmt);
1081   *loop_invariant = NULL;
1082
1083   switch (code)
1084     {
1085     case GT_EXPR:
1086     case GE_EXPR:
1087     case NE_EXPR:
1088     case LT_EXPR:
1089     case LE_EXPR:
1090     case EQ_EXPR:
1091       break;
1092
1093     default:
1094       return false;
1095     }
1096
1097   op0 = gimple_cond_lhs (stmt);
1098   op1 = gimple_cond_rhs (stmt);
1099
1100   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1101        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1102     return false;
1103   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1104     return false;
1105   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1106     return false;
1107   if (TREE_CODE (iv0.step) != INTEGER_CST
1108       || TREE_CODE (iv1.step) != INTEGER_CST)
1109     return false;
1110   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1111       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1112     return false;
1113
1114   if (integer_zerop (iv0.step))
1115     {
1116       if (code != NE_EXPR && code != EQ_EXPR)
1117         code = invert_tree_comparison (code, false);
1118       bound = iv0.base;
1119       base = iv1.base;
1120       if (tree_fits_shwi_p (iv1.step))
1121         step = iv1.step;
1122       else
1123         return false;
1124     }
1125   else
1126     {
1127       bound = iv1.base;
1128       base = iv0.base;
1129       if (tree_fits_shwi_p (iv0.step))
1130         step = iv0.step;
1131       else
1132         return false;
1133     }
1134
1135   if (TREE_CODE (bound) != INTEGER_CST)
1136     bound = get_base_value (bound);
1137   if (!bound)
1138     return false;
1139   if (TREE_CODE (base) != INTEGER_CST)
1140     base = get_base_value (base);
1141   if (!base)
1142     return false;
1143
1144   *loop_invariant = bound;
1145   *compare_code = code;
1146   *loop_step = step;
1147   *loop_iv_base = base;
1148   return true;
1149 }
1150
1151 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1152
1153 static bool
1154 expr_coherent_p (tree t1, tree t2)
1155 {
1156   gimple stmt;
1157   tree ssa_name_1 = NULL;
1158   tree ssa_name_2 = NULL;
1159
1160   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1161   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1162
1163   if (t1 == t2)
1164     return true;
1165
1166   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1167     return true;
1168   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1169     return false;
1170
1171   /* Check to see if t1 is expressed/defined with t2.  */
1172   stmt = SSA_NAME_DEF_STMT (t1);
1173   gcc_assert (stmt != NULL);
1174   if (is_gimple_assign (stmt))
1175     {
1176       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1177       if (ssa_name_1 && ssa_name_1 == t2)
1178         return true;
1179     }
1180
1181   /* Check to see if t2 is expressed/defined with t1.  */
1182   stmt = SSA_NAME_DEF_STMT (t2);
1183   gcc_assert (stmt != NULL);
1184   if (is_gimple_assign (stmt))
1185     {
1186       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1187       if (ssa_name_2 && ssa_name_2 == t1)
1188         return true;
1189     }
1190
1191   /* Compare if t1 and t2's def_stmts are identical.  */
1192   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1193     return true;
1194   else
1195     return false;
1196 }
1197
1198 /* Predict branch probability of BB when BB contains a branch that compares
1199    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1200    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1201
1202    E.g.
1203      for (int i = 0; i < bound; i++) {
1204        if (i < bound - 2)
1205          computation_1();
1206        else
1207          computation_2();
1208      }
1209
1210   In this loop, we will predict the branch inside the loop to be taken.  */
1211
1212 static void
1213 predict_iv_comparison (struct loop *loop, basic_block bb,
1214                        tree loop_bound_var,
1215                        tree loop_iv_base_var,
1216                        enum tree_code loop_bound_code,
1217                        int loop_bound_step)
1218 {
1219   gimple stmt;
1220   tree compare_var, compare_base;
1221   enum tree_code compare_code;
1222   tree compare_step_var;
1223   edge then_edge;
1224   edge_iterator ei;
1225
1226   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1227       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1228       || predicted_by_p (bb, PRED_LOOP_EXIT))
1229     return;
1230
1231   stmt = last_stmt (bb);
1232   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1233     return;
1234   if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1235                                             loop, &compare_var,
1236                                             &compare_code,
1237                                             &compare_step_var,
1238                                             &compare_base))
1239     return;
1240
1241   /* Find the taken edge.  */
1242   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1243     if (then_edge->flags & EDGE_TRUE_VALUE)
1244       break;
1245
1246   /* When comparing an IV to a loop invariant, NE is more likely to be
1247      taken while EQ is more likely to be not-taken.  */
1248   if (compare_code == NE_EXPR)
1249     {
1250       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1251       return;
1252     }
1253   else if (compare_code == EQ_EXPR)
1254     {
1255       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1256       return;
1257     }
1258
1259   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1260     return;
1261
1262   /* If loop bound, base and compare bound are all constants, we can
1263      calculate the probability directly.  */
1264   if (tree_fits_shwi_p (loop_bound_var)
1265       && tree_fits_shwi_p (compare_var)
1266       && tree_fits_shwi_p (compare_base))
1267     {
1268       int probability;
1269       bool overflow, overall_overflow = false;
1270       widest_int compare_count, tem;
1271
1272       /* (loop_bound - base) / compare_step */
1273       tem = wi::sub (wi::to_widest (loop_bound_var),
1274                      wi::to_widest (compare_base), SIGNED, &overflow);
1275       overall_overflow |= overflow;
1276       widest_int loop_count = wi::div_trunc (tem,
1277                                              wi::to_widest (compare_step_var),
1278                                              SIGNED, &overflow);
1279       overall_overflow |= overflow;
1280
1281       if (!wi::neg_p (wi::to_widest (compare_step_var))
1282           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1283         {
1284           /* (loop_bound - compare_bound) / compare_step */
1285           tem = wi::sub (wi::to_widest (loop_bound_var),
1286                          wi::to_widest (compare_var), SIGNED, &overflow);
1287           overall_overflow |= overflow;
1288           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1289                                          SIGNED, &overflow);
1290           overall_overflow |= overflow;
1291         }
1292       else
1293         {
1294           /* (compare_bound - base) / compare_step */
1295           tem = wi::sub (wi::to_widest (compare_var),
1296                          wi::to_widest (compare_base), SIGNED, &overflow);
1297           overall_overflow |= overflow;
1298           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1299                                          SIGNED, &overflow);
1300           overall_overflow |= overflow;
1301         }
1302       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1303         ++compare_count;
1304       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1305         ++loop_count;
1306       if (wi::neg_p (compare_count))
1307         compare_count = 0;
1308       if (wi::neg_p (loop_count))
1309         loop_count = 0;
1310       if (loop_count == 0)
1311         probability = 0;
1312       else if (wi::cmps (compare_count, loop_count) == 1)
1313         probability = REG_BR_PROB_BASE;
1314       else
1315         {
1316           tem = compare_count * REG_BR_PROB_BASE;
1317           tem = wi::udiv_trunc (tem, loop_count);
1318           probability = tem.to_uhwi ();
1319         }
1320
1321       if (!overall_overflow)
1322         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1323
1324       return;
1325     }
1326
1327   if (expr_coherent_p (loop_bound_var, compare_var))
1328     {
1329       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1330           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1331         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1332       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1333                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1334         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1335       else if (loop_bound_code == NE_EXPR)
1336         {
1337           /* If the loop backedge condition is "(i != bound)", we do
1338              the comparison based on the step of IV:
1339              * step < 0 : backedge condition is like (i > bound)
1340              * step > 0 : backedge condition is like (i < bound)  */
1341           gcc_assert (loop_bound_step != 0);
1342           if (loop_bound_step > 0
1343               && (compare_code == LT_EXPR
1344                   || compare_code == LE_EXPR))
1345             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1346           else if (loop_bound_step < 0
1347                    && (compare_code == GT_EXPR
1348                        || compare_code == GE_EXPR))
1349             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1350           else
1351             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1352         }
1353       else
1354         /* The branch is predicted not-taken if loop_bound_code is
1355            opposite with compare_code.  */
1356         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1357     }
1358   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1359     {
1360       /* For cases like:
1361            for (i = s; i < h; i++)
1362              if (i > s + 2) ....
1363          The branch should be predicted taken.  */
1364       if (loop_bound_step > 0
1365           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1366         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1367       else if (loop_bound_step < 0
1368                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1369         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1370       else
1371         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1372     }
1373 }
1374
1375 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1376    exits are resulted from short-circuit conditions that will generate an
1377    if_tmp. E.g.:
1378
1379    if (foo() || global > 10)
1380      break;
1381
1382    This will be translated into:
1383
1384    BB3:
1385      loop header...
1386    BB4:
1387      if foo() goto BB6 else goto BB5
1388    BB5:
1389      if global > 10 goto BB6 else goto BB7
1390    BB6:
1391      goto BB7
1392    BB7:
1393      iftmp = (PHI 0(BB5), 1(BB6))
1394      if iftmp == 1 goto BB8 else goto BB3
1395    BB8:
1396      outside of the loop...
1397
1398    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1399    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1400    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1401    exits to predict them using PRED_LOOP_EXIT.  */
1402
1403 static void
1404 predict_extra_loop_exits (edge exit_edge)
1405 {
1406   unsigned i;
1407   bool check_value_one;
1408   gimple lhs_def_stmt;
1409   gphi *phi_stmt;
1410   tree cmp_rhs, cmp_lhs;
1411   gimple last;
1412   gcond *cmp_stmt;
1413
1414   last = last_stmt (exit_edge->src);
1415   if (!last)
1416     return;
1417   cmp_stmt = dyn_cast <gcond *> (last);
1418   if (!cmp_stmt)
1419     return;
1420
1421   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1422   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1423   if (!TREE_CONSTANT (cmp_rhs)
1424       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1425     return;
1426   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1427     return;
1428
1429   /* If check_value_one is true, only the phi_args with value '1' will lead
1430      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1431      loop exit.  */
1432   check_value_one = (((integer_onep (cmp_rhs))
1433                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1434                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1435
1436   lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1437   if (!lhs_def_stmt)
1438     return;
1439
1440   phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1441   if (!phi_stmt)
1442     return;
1443
1444   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1445     {
1446       edge e1;
1447       edge_iterator ei;
1448       tree val = gimple_phi_arg_def (phi_stmt, i);
1449       edge e = gimple_phi_arg_edge (phi_stmt, i);
1450
1451       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1452         continue;
1453       if ((check_value_one ^ integer_onep (val)) == 1)
1454         continue;
1455       if (EDGE_COUNT (e->src->succs) != 1)
1456         {
1457           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1458           continue;
1459         }
1460
1461       FOR_EACH_EDGE (e1, ei, e->src->preds)
1462         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1463     }
1464 }
1465
1466 /* Predict edge probabilities by exploiting loop structure.  */
1467
1468 static void
1469 predict_loops (void)
1470 {
1471   struct loop *loop;
1472
1473   /* Try to predict out blocks in a loop that are not part of a
1474      natural loop.  */
1475   FOR_EACH_LOOP (loop, 0)
1476     {
1477       basic_block bb, *bbs;
1478       unsigned j, n_exits;
1479       vec<edge> exits;
1480       struct tree_niter_desc niter_desc;
1481       edge ex;
1482       struct nb_iter_bound *nb_iter;
1483       enum tree_code loop_bound_code = ERROR_MARK;
1484       tree loop_bound_step = NULL;
1485       tree loop_bound_var = NULL;
1486       tree loop_iv_base = NULL;
1487       gcond *stmt = NULL;
1488
1489       exits = get_loop_exit_edges (loop);
1490       n_exits = exits.length ();
1491       if (!n_exits)
1492         {
1493           exits.release ();
1494           continue;
1495         }
1496
1497       FOR_EACH_VEC_ELT (exits, j, ex)
1498         {
1499           tree niter = NULL;
1500           HOST_WIDE_INT nitercst;
1501           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1502           int probability;
1503           enum br_predictor predictor;
1504
1505           predict_extra_loop_exits (ex);
1506
1507           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1508             niter = niter_desc.niter;
1509           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1510             niter = loop_niter_by_eval (loop, ex);
1511
1512           if (TREE_CODE (niter) == INTEGER_CST)
1513             {
1514               if (tree_fits_uhwi_p (niter)
1515                   && max
1516                   && compare_tree_int (niter, max - 1) == -1)
1517                 nitercst = tree_to_uhwi (niter) + 1;
1518               else
1519                 nitercst = max;
1520               predictor = PRED_LOOP_ITERATIONS;
1521             }
1522           /* If we have just one exit and we can derive some information about
1523              the number of iterations of the loop from the statements inside
1524              the loop, use it to predict this exit.  */
1525           else if (n_exits == 1)
1526             {
1527               nitercst = estimated_stmt_executions_int (loop);
1528               if (nitercst < 0)
1529                 continue;
1530               if (nitercst > max)
1531                 nitercst = max;
1532
1533               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1534             }
1535           else
1536             continue;
1537
1538           /* If the prediction for number of iterations is zero, do not
1539              predict the exit edges.  */
1540           if (nitercst == 0)
1541             continue;
1542
1543           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1544           predict_edge (ex, predictor, probability);
1545         }
1546       exits.release ();
1547
1548       /* Find information about loop bound variables.  */
1549       for (nb_iter = loop->bounds; nb_iter;
1550            nb_iter = nb_iter->next)
1551         if (nb_iter->stmt
1552             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1553           {
1554             stmt = as_a <gcond *> (nb_iter->stmt);
1555             break;
1556           }
1557       if (!stmt && last_stmt (loop->header)
1558           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1559         stmt = as_a <gcond *> (last_stmt (loop->header));
1560       if (stmt)
1561         is_comparison_with_loop_invariant_p (stmt, loop,
1562                                              &loop_bound_var,
1563                                              &loop_bound_code,
1564                                              &loop_bound_step,
1565                                              &loop_iv_base);
1566
1567       bbs = get_loop_body (loop);
1568
1569       for (j = 0; j < loop->num_nodes; j++)
1570         {
1571           int header_found = 0;
1572           edge e;
1573           edge_iterator ei;
1574
1575           bb = bbs[j];
1576
1577           /* Bypass loop heuristics on continue statement.  These
1578              statements construct loops via "non-loop" constructs
1579              in the source language and are better to be handled
1580              separately.  */
1581           if (predicted_by_p (bb, PRED_CONTINUE))
1582             continue;
1583
1584           /* Loop branch heuristics - predict an edge back to a
1585              loop's head as taken.  */
1586           if (bb == loop->latch)
1587             {
1588               e = find_edge (loop->latch, loop->header);
1589               if (e)
1590                 {
1591                   header_found = 1;
1592                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1593                 }
1594             }
1595
1596           /* Loop exit heuristics - predict an edge exiting the loop if the
1597              conditional has no loop header successors as not taken.  */
1598           if (!header_found
1599               /* If we already used more reliable loop exit predictors, do not
1600                  bother with PRED_LOOP_EXIT.  */
1601               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1602               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1603             {
1604               /* For loop with many exits we don't want to predict all exits
1605                  with the pretty large probability, because if all exits are
1606                  considered in row, the loop would be predicted to iterate
1607                  almost never.  The code to divide probability by number of
1608                  exits is very rough.  It should compute the number of exits
1609                  taken in each patch through function (not the overall number
1610                  of exits that might be a lot higher for loops with wide switch
1611                  statements in them) and compute n-th square root.
1612
1613                  We limit the minimal probability by 2% to avoid
1614                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1615                  as this was causing regression in perl benchmark containing such
1616                  a wide loop.  */
1617
1618               int probability = ((REG_BR_PROB_BASE
1619                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1620                                  / n_exits);
1621               if (probability < HITRATE (2))
1622                 probability = HITRATE (2);
1623               FOR_EACH_EDGE (e, ei, bb->succs)
1624                 if (e->dest->index < NUM_FIXED_BLOCKS
1625                     || !flow_bb_inside_loop_p (loop, e->dest))
1626                   predict_edge (e, PRED_LOOP_EXIT, probability);
1627             }
1628           if (loop_bound_var)
1629             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1630                                    loop_bound_code,
1631                                    tree_to_shwi (loop_bound_step));
1632         }
1633
1634       /* Free basic blocks from get_loop_body.  */
1635       free (bbs);
1636     }
1637 }
1638
1639 /* Attempt to predict probabilities of BB outgoing edges using local
1640    properties.  */
1641 static void
1642 bb_estimate_probability_locally (basic_block bb)
1643 {
1644   rtx_insn *last_insn = BB_END (bb);
1645   rtx cond;
1646
1647   if (! can_predict_insn_p (last_insn))
1648     return;
1649   cond = get_condition (last_insn, NULL, false, false);
1650   if (! cond)
1651     return;
1652
1653   /* Try "pointer heuristic."
1654      A comparison ptr == 0 is predicted as false.
1655      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1656   if (COMPARISON_P (cond)
1657       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1658           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1659     {
1660       if (GET_CODE (cond) == EQ)
1661         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1662       else if (GET_CODE (cond) == NE)
1663         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1664     }
1665   else
1666
1667   /* Try "opcode heuristic."
1668      EQ tests are usually false and NE tests are usually true. Also,
1669      most quantities are positive, so we can make the appropriate guesses
1670      about signed comparisons against zero.  */
1671     switch (GET_CODE (cond))
1672       {
1673       case CONST_INT:
1674         /* Unconditional branch.  */
1675         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1676                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1677         break;
1678
1679       case EQ:
1680       case UNEQ:
1681         /* Floating point comparisons appears to behave in a very
1682            unpredictable way because of special role of = tests in
1683            FP code.  */
1684         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1685           ;
1686         /* Comparisons with 0 are often used for booleans and there is
1687            nothing useful to predict about them.  */
1688         else if (XEXP (cond, 1) == const0_rtx
1689                  || XEXP (cond, 0) == const0_rtx)
1690           ;
1691         else
1692           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1693         break;
1694
1695       case NE:
1696       case LTGT:
1697         /* Floating point comparisons appears to behave in a very
1698            unpredictable way because of special role of = tests in
1699            FP code.  */
1700         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1701           ;
1702         /* Comparisons with 0 are often used for booleans and there is
1703            nothing useful to predict about them.  */
1704         else if (XEXP (cond, 1) == const0_rtx
1705                  || XEXP (cond, 0) == const0_rtx)
1706           ;
1707         else
1708           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1709         break;
1710
1711       case ORDERED:
1712         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1713         break;
1714
1715       case UNORDERED:
1716         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1717         break;
1718
1719       case LE:
1720       case LT:
1721         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1722             || XEXP (cond, 1) == constm1_rtx)
1723           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1724         break;
1725
1726       case GE:
1727       case GT:
1728         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1729             || XEXP (cond, 1) == constm1_rtx)
1730           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1731         break;
1732
1733       default:
1734         break;
1735       }
1736 }
1737
1738 /* Set edge->probability for each successor edge of BB.  */
1739 void
1740 guess_outgoing_edge_probabilities (basic_block bb)
1741 {
1742   bb_estimate_probability_locally (bb);
1743   combine_predictions_for_insn (BB_END (bb), bb);
1744 }
1745 \f
1746 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1747
1748 /* Helper function for expr_expected_value.  */
1749
1750 static tree
1751 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1752                        tree op1, bitmap visited, enum br_predictor *predictor)
1753 {
1754   gimple def;
1755
1756   if (predictor)
1757     *predictor = PRED_UNCONDITIONAL;
1758
1759   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1760     {
1761       if (TREE_CONSTANT (op0))
1762         return op0;
1763
1764       if (code != SSA_NAME)
1765         return NULL_TREE;
1766
1767       def = SSA_NAME_DEF_STMT (op0);
1768
1769       /* If we were already here, break the infinite cycle.  */
1770       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1771         return NULL;
1772
1773       if (gimple_code (def) == GIMPLE_PHI)
1774         {
1775           /* All the arguments of the PHI node must have the same constant
1776              length.  */
1777           int i, n = gimple_phi_num_args (def);
1778           tree val = NULL, new_val;
1779
1780           for (i = 0; i < n; i++)
1781             {
1782               tree arg = PHI_ARG_DEF (def, i);
1783               enum br_predictor predictor2;
1784
1785               /* If this PHI has itself as an argument, we cannot
1786                  determine the string length of this argument.  However,
1787                  if we can find an expected constant value for the other
1788                  PHI args then we can still be sure that this is
1789                  likely a constant.  So be optimistic and just
1790                  continue with the next argument.  */
1791               if (arg == PHI_RESULT (def))
1792                 continue;
1793
1794               new_val = expr_expected_value (arg, visited, &predictor2);
1795
1796               /* It is difficult to combine value predictors.  Simply assume
1797                  that later predictor is weaker and take its prediction.  */
1798               if (predictor && *predictor < predictor2)
1799                 *predictor = predictor2;
1800               if (!new_val)
1801                 return NULL;
1802               if (!val)
1803                 val = new_val;
1804               else if (!operand_equal_p (val, new_val, false))
1805                 return NULL;
1806             }
1807           return val;
1808         }
1809       if (is_gimple_assign (def))
1810         {
1811           if (gimple_assign_lhs (def) != op0)
1812             return NULL;
1813
1814           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1815                                         gimple_assign_rhs1 (def),
1816                                         gimple_assign_rhs_code (def),
1817                                         gimple_assign_rhs2 (def),
1818                                         visited, predictor);
1819         }
1820
1821       if (is_gimple_call (def))
1822         {
1823           tree decl = gimple_call_fndecl (def);
1824           if (!decl)
1825             {
1826               if (gimple_call_internal_p (def)
1827                   && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1828                 {
1829                   gcc_assert (gimple_call_num_args (def) == 3);
1830                   tree val = gimple_call_arg (def, 0);
1831                   if (TREE_CONSTANT (val))
1832                     return val;
1833                   if (predictor)
1834                     {
1835                       tree val2 = gimple_call_arg (def, 2);
1836                       gcc_assert (TREE_CODE (val2) == INTEGER_CST
1837                                   && tree_fits_uhwi_p (val2)
1838                                   && tree_to_uhwi (val2) < END_PREDICTORS);
1839                       *predictor = (enum br_predictor) tree_to_uhwi (val2);
1840                     }
1841                   return gimple_call_arg (def, 1);
1842                 }
1843               return NULL;
1844             }
1845           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1846             switch (DECL_FUNCTION_CODE (decl))
1847               {
1848               case BUILT_IN_EXPECT:
1849                 {
1850                   tree val;
1851                   if (gimple_call_num_args (def) != 2)
1852                     return NULL;
1853                   val = gimple_call_arg (def, 0);
1854                   if (TREE_CONSTANT (val))
1855                     return val;
1856                   if (predictor)
1857                     *predictor = PRED_BUILTIN_EXPECT;
1858                   return gimple_call_arg (def, 1);
1859                 }
1860
1861               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1862               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1863               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1864               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1865               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1866               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1867               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1868               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1869               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1870               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1871               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1872               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1873               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1874                 /* Assume that any given atomic operation has low contention,
1875                    and thus the compare-and-swap operation succeeds.  */
1876                 if (predictor)
1877                   *predictor = PRED_COMPARE_AND_SWAP;
1878                 return boolean_true_node;
1879               default:
1880                 break;
1881             }
1882         }
1883
1884       return NULL;
1885     }
1886
1887   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1888     {
1889       tree res;
1890       enum br_predictor predictor2;
1891       op0 = expr_expected_value (op0, visited, predictor);
1892       if (!op0)
1893         return NULL;
1894       op1 = expr_expected_value (op1, visited, &predictor2);
1895       if (predictor && *predictor < predictor2)
1896         *predictor = predictor2;
1897       if (!op1)
1898         return NULL;
1899       res = fold_build2 (code, type, op0, op1);
1900       if (TREE_CONSTANT (res))
1901         return res;
1902       return NULL;
1903     }
1904   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1905     {
1906       tree res;
1907       op0 = expr_expected_value (op0, visited, predictor);
1908       if (!op0)
1909         return NULL;
1910       res = fold_build1 (code, type, op0);
1911       if (TREE_CONSTANT (res))
1912         return res;
1913       return NULL;
1914     }
1915   return NULL;
1916 }
1917
1918 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1919    The function is used by builtin_expect branch predictor so the evidence
1920    must come from this construct and additional possible constant folding.
1921
1922    We may want to implement more involved value guess (such as value range
1923    propagation based prediction), but such tricks shall go to new
1924    implementation.  */
1925
1926 static tree
1927 expr_expected_value (tree expr, bitmap visited,
1928                      enum br_predictor *predictor)
1929 {
1930   enum tree_code code;
1931   tree op0, op1;
1932
1933   if (TREE_CONSTANT (expr))
1934     {
1935       if (predictor)
1936         *predictor = PRED_UNCONDITIONAL;
1937       return expr;
1938     }
1939
1940   extract_ops_from_tree (expr, &code, &op0, &op1);
1941   return expr_expected_value_1 (TREE_TYPE (expr),
1942                                 op0, code, op1, visited, predictor);
1943 }
1944 \f
1945 /* Predict using opcode of the last statement in basic block.  */
1946 static void
1947 tree_predict_by_opcode (basic_block bb)
1948 {
1949   gimple stmt = last_stmt (bb);
1950   edge then_edge;
1951   tree op0, op1;
1952   tree type;
1953   tree val;
1954   enum tree_code cmp;
1955   bitmap visited;
1956   edge_iterator ei;
1957   enum br_predictor predictor;
1958
1959   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1960     return;
1961   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1962     if (then_edge->flags & EDGE_TRUE_VALUE)
1963       break;
1964   op0 = gimple_cond_lhs (stmt);
1965   op1 = gimple_cond_rhs (stmt);
1966   cmp = gimple_cond_code (stmt);
1967   type = TREE_TYPE (op0);
1968   visited = BITMAP_ALLOC (NULL);
1969   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1970                                &predictor);
1971   BITMAP_FREE (visited);
1972   if (val && TREE_CODE (val) == INTEGER_CST)
1973     {
1974       if (predictor == PRED_BUILTIN_EXPECT)
1975         {
1976           int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
1977
1978           gcc_assert (percent >= 0 && percent <= 100);
1979           if (integer_zerop (val))
1980             percent = 100 - percent;
1981           predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
1982         }
1983       else
1984         predict_edge (then_edge, predictor,
1985                       integer_zerop (val) ? NOT_TAKEN : TAKEN);
1986     }
1987   /* Try "pointer heuristic."
1988      A comparison ptr == 0 is predicted as false.
1989      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1990   if (POINTER_TYPE_P (type))
1991     {
1992       if (cmp == EQ_EXPR)
1993         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1994       else if (cmp == NE_EXPR)
1995         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1996     }
1997   else
1998
1999   /* Try "opcode heuristic."
2000      EQ tests are usually false and NE tests are usually true. Also,
2001      most quantities are positive, so we can make the appropriate guesses
2002      about signed comparisons against zero.  */
2003     switch (cmp)
2004       {
2005       case EQ_EXPR:
2006       case UNEQ_EXPR:
2007         /* Floating point comparisons appears to behave in a very
2008            unpredictable way because of special role of = tests in
2009            FP code.  */
2010         if (FLOAT_TYPE_P (type))
2011           ;
2012         /* Comparisons with 0 are often used for booleans and there is
2013            nothing useful to predict about them.  */
2014         else if (integer_zerop (op0) || integer_zerop (op1))
2015           ;
2016         else
2017           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2018         break;
2019
2020       case NE_EXPR:
2021       case LTGT_EXPR:
2022         /* Floating point comparisons appears to behave in a very
2023            unpredictable way because of special role of = tests in
2024            FP code.  */
2025         if (FLOAT_TYPE_P (type))
2026           ;
2027         /* Comparisons with 0 are often used for booleans and there is
2028            nothing useful to predict about them.  */
2029         else if (integer_zerop (op0)
2030                  || integer_zerop (op1))
2031           ;
2032         else
2033           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2034         break;
2035
2036       case ORDERED_EXPR:
2037         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2038         break;
2039
2040       case UNORDERED_EXPR:
2041         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2042         break;
2043
2044       case LE_EXPR:
2045       case LT_EXPR:
2046         if (integer_zerop (op1)
2047             || integer_onep (op1)
2048             || integer_all_onesp (op1)
2049             || real_zerop (op1)
2050             || real_onep (op1)
2051             || real_minus_onep (op1))
2052           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2053         break;
2054
2055       case GE_EXPR:
2056       case GT_EXPR:
2057         if (integer_zerop (op1)
2058             || integer_onep (op1)
2059             || integer_all_onesp (op1)
2060             || real_zerop (op1)
2061             || real_onep (op1)
2062             || real_minus_onep (op1))
2063           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2064         break;
2065
2066       default:
2067         break;
2068       }
2069 }
2070
2071 /* Try to guess whether the value of return means error code.  */
2072
2073 static enum br_predictor
2074 return_prediction (tree val, enum prediction *prediction)
2075 {
2076   /* VOID.  */
2077   if (!val)
2078     return PRED_NO_PREDICTION;
2079   /* Different heuristics for pointers and scalars.  */
2080   if (POINTER_TYPE_P (TREE_TYPE (val)))
2081     {
2082       /* NULL is usually not returned.  */
2083       if (integer_zerop (val))
2084         {
2085           *prediction = NOT_TAKEN;
2086           return PRED_NULL_RETURN;
2087         }
2088     }
2089   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2090     {
2091       /* Negative return values are often used to indicate
2092          errors.  */
2093       if (TREE_CODE (val) == INTEGER_CST
2094           && tree_int_cst_sgn (val) < 0)
2095         {
2096           *prediction = NOT_TAKEN;
2097           return PRED_NEGATIVE_RETURN;
2098         }
2099       /* Constant return values seems to be commonly taken.
2100          Zero/one often represent booleans so exclude them from the
2101          heuristics.  */
2102       if (TREE_CONSTANT (val)
2103           && (!integer_zerop (val) && !integer_onep (val)))
2104         {
2105           *prediction = TAKEN;
2106           return PRED_CONST_RETURN;
2107         }
2108     }
2109   return PRED_NO_PREDICTION;
2110 }
2111
2112 /* Find the basic block with return expression and look up for possible
2113    return value trying to apply RETURN_PREDICTION heuristics.  */
2114 static void
2115 apply_return_prediction (void)
2116 {
2117   greturn *return_stmt = NULL;
2118   tree return_val;
2119   edge e;
2120   gphi *phi;
2121   int phi_num_args, i;
2122   enum br_predictor pred;
2123   enum prediction direction;
2124   edge_iterator ei;
2125
2126   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2127     {
2128       gimple last = last_stmt (e->src);
2129       if (last
2130           && gimple_code (last) == GIMPLE_RETURN)
2131         {
2132           return_stmt = as_a <greturn *> (last);
2133           break;
2134         }
2135     }
2136   if (!e)
2137     return;
2138   return_val = gimple_return_retval (return_stmt);
2139   if (!return_val)
2140     return;
2141   if (TREE_CODE (return_val) != SSA_NAME
2142       || !SSA_NAME_DEF_STMT (return_val)
2143       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2144     return;
2145   phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2146   phi_num_args = gimple_phi_num_args (phi);
2147   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2148
2149   /* Avoid the degenerate case where all return values form the function
2150      belongs to same category (ie they are all positive constants)
2151      so we can hardly say something about them.  */
2152   for (i = 1; i < phi_num_args; i++)
2153     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2154       break;
2155   if (i != phi_num_args)
2156     for (i = 0; i < phi_num_args; i++)
2157       {
2158         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2159         if (pred != PRED_NO_PREDICTION)
2160           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2161                                          direction);
2162       }
2163 }
2164
2165 /* Look for basic block that contains unlikely to happen events
2166    (such as noreturn calls) and mark all paths leading to execution
2167    of this basic blocks as unlikely.  */
2168
2169 static void
2170 tree_bb_level_predictions (void)
2171 {
2172   basic_block bb;
2173   bool has_return_edges = false;
2174   edge e;
2175   edge_iterator ei;
2176
2177   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2178     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2179       {
2180         has_return_edges = true;
2181         break;
2182       }
2183
2184   apply_return_prediction ();
2185
2186   FOR_EACH_BB_FN (bb, cfun)
2187     {
2188       gimple_stmt_iterator gsi;
2189
2190       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2191         {
2192           gimple stmt = gsi_stmt (gsi);
2193           tree decl;
2194
2195           if (is_gimple_call (stmt))
2196             {
2197               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2198                   && has_return_edges)
2199                 predict_paths_leading_to (bb, PRED_NORETURN,
2200                                           NOT_TAKEN);
2201               decl = gimple_call_fndecl (stmt);
2202               if (decl
2203                   && lookup_attribute ("cold",
2204                                        DECL_ATTRIBUTES (decl)))
2205                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2206                                           NOT_TAKEN);
2207             }
2208           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2209             {
2210               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2211                                         gimple_predict_outcome (stmt));
2212               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2213                  hints to callers.  */
2214             }
2215         }
2216     }
2217 }
2218
2219 #ifdef ENABLE_CHECKING
2220
2221 /* Callback for hash_map::traverse, asserts that the pointer map is
2222    empty.  */
2223
2224 bool
2225 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2226                  void *)
2227 {
2228   gcc_assert (!value);
2229   return false;
2230 }
2231 #endif
2232
2233 /* Predict branch probabilities and estimate profile for basic block BB.  */
2234
2235 static void
2236 tree_estimate_probability_bb (basic_block bb)
2237 {
2238   edge e;
2239   edge_iterator ei;
2240   gimple last;
2241
2242   FOR_EACH_EDGE (e, ei, bb->succs)
2243     {
2244       /* Predict edges to user labels with attributes.  */
2245       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2246         {
2247           gimple_stmt_iterator gi;
2248           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2249             {
2250               glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
2251               tree decl;
2252
2253               if (!label_stmt)
2254                 break;
2255               decl = gimple_label_label (label_stmt);
2256               if (DECL_ARTIFICIAL (decl))
2257                 continue;
2258
2259               /* Finally, we have a user-defined label.  */
2260               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2261                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2262               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2263                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2264             }
2265         }
2266
2267       /* Predict early returns to be probable, as we've already taken
2268          care for error returns and other cases are often used for
2269          fast paths through function.
2270
2271          Since we've already removed the return statements, we are
2272          looking for CFG like:
2273
2274          if (conditional)
2275          {
2276          ..
2277          goto return_block
2278          }
2279          some other blocks
2280          return_block:
2281          return_stmt.  */
2282       if (e->dest != bb->next_bb
2283           && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2284           && single_succ_p (e->dest)
2285           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2286           && (last = last_stmt (e->dest)) != NULL
2287           && gimple_code (last) == GIMPLE_RETURN)
2288         {
2289           edge e1;
2290           edge_iterator ei1;
2291
2292           if (single_succ_p (bb))
2293             {
2294               FOR_EACH_EDGE (e1, ei1, bb->preds)
2295                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2296                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2297                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2298                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2299             }
2300           else
2301             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2302                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2303                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2304               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2305         }
2306
2307       /* Look for block we are guarding (ie we dominate it,
2308          but it doesn't postdominate us).  */
2309       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2310           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2311           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2312         {
2313           gimple_stmt_iterator bi;
2314
2315           /* The call heuristic claims that a guarded function call
2316              is improbable.  This is because such calls are often used
2317              to signal exceptional situations such as printing error
2318              messages.  */
2319           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2320                gsi_next (&bi))
2321             {
2322               gimple stmt = gsi_stmt (bi);
2323               if (is_gimple_call (stmt)
2324                   /* Constant and pure calls are hardly used to signalize
2325                      something exceptional.  */
2326                   && gimple_has_side_effects (stmt))
2327                 {
2328                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2329                   break;
2330                 }
2331             }
2332         }
2333     }
2334   tree_predict_by_opcode (bb);
2335 }
2336
2337 /* Predict branch probabilities and estimate profile of the tree CFG.
2338    This function can be called from the loop optimizers to recompute
2339    the profile information.  */
2340
2341 void
2342 tree_estimate_probability (void)
2343 {
2344   basic_block bb;
2345
2346   add_noreturn_fake_exit_edges ();
2347   connect_infinite_loops_to_exit ();
2348   /* We use loop_niter_by_eval, which requires that the loops have
2349      preheaders.  */
2350   create_preheaders (CP_SIMPLE_PREHEADERS);
2351   calculate_dominance_info (CDI_POST_DOMINATORS);
2352
2353   bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2354   tree_bb_level_predictions ();
2355   record_loop_exits ();
2356
2357   if (number_of_loops (cfun) > 1)
2358     predict_loops ();
2359
2360   FOR_EACH_BB_FN (bb, cfun)
2361     tree_estimate_probability_bb (bb);
2362
2363   FOR_EACH_BB_FN (bb, cfun)
2364     combine_predictions_for_bb (bb);
2365
2366 #ifdef ENABLE_CHECKING
2367   bb_predictions->traverse<void *, assert_is_empty> (NULL);
2368 #endif
2369   delete bb_predictions;
2370   bb_predictions = NULL;
2371
2372   estimate_bb_frequencies (false);
2373   free_dominance_info (CDI_POST_DOMINATORS);
2374   remove_fake_exit_edges ();
2375 }
2376 \f
2377 /* Predict edges to successors of CUR whose sources are not postdominated by
2378    BB by PRED and recurse to all postdominators.  */
2379
2380 static void
2381 predict_paths_for_bb (basic_block cur, basic_block bb,
2382                       enum br_predictor pred,
2383                       enum prediction taken,
2384                       bitmap visited)
2385 {
2386   edge e;
2387   edge_iterator ei;
2388   basic_block son;
2389
2390   /* We are looking for all edges forming edge cut induced by
2391      set of all blocks postdominated by BB.  */
2392   FOR_EACH_EDGE (e, ei, cur->preds)
2393     if (e->src->index >= NUM_FIXED_BLOCKS
2394         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2395     {
2396       edge e2;
2397       edge_iterator ei2;
2398       bool found = false;
2399
2400       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2401       if (e->flags & (EDGE_EH | EDGE_FAKE))
2402         continue;
2403       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2404
2405       /* See if there is an edge from e->src that is not abnormal
2406          and does not lead to BB.  */
2407       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2408         if (e2 != e
2409             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2410             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2411           {
2412             found = true;
2413             break;
2414           }
2415
2416       /* If there is non-abnormal path leaving e->src, predict edge
2417          using predictor.  Otherwise we need to look for paths
2418          leading to e->src.
2419
2420          The second may lead to infinite loop in the case we are predicitng
2421          regions that are only reachable by abnormal edges.  We simply
2422          prevent visiting given BB twice.  */
2423       if (found)
2424         predict_edge_def (e, pred, taken);
2425       else if (bitmap_set_bit (visited, e->src->index))
2426         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2427     }
2428   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2429        son;
2430        son = next_dom_son (CDI_POST_DOMINATORS, son))
2431     predict_paths_for_bb (son, bb, pred, taken, visited);
2432 }
2433
2434 /* Sets branch probabilities according to PREDiction and
2435    FLAGS.  */
2436
2437 static void
2438 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2439                           enum prediction taken)
2440 {
2441   bitmap visited = BITMAP_ALLOC (NULL);
2442   predict_paths_for_bb (bb, bb, pred, taken, visited);
2443   BITMAP_FREE (visited);
2444 }
2445
2446 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2447
2448 static void
2449 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2450                                enum prediction taken)
2451 {
2452   bool has_nonloop_edge = false;
2453   edge_iterator ei;
2454   edge e2;
2455
2456   basic_block bb = e->src;
2457   FOR_EACH_EDGE (e2, ei, bb->succs)
2458     if (e2->dest != e->src && e2->dest != e->dest
2459         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2460         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2461       {
2462         has_nonloop_edge = true;
2463         break;
2464       }
2465   if (!has_nonloop_edge)
2466     {
2467       bitmap visited = BITMAP_ALLOC (NULL);
2468       predict_paths_for_bb (bb, bb, pred, taken, visited);
2469       BITMAP_FREE (visited);
2470     }
2471   else
2472     predict_edge_def (e, pred, taken);
2473 }
2474 \f
2475 /* This is used to carry information about basic blocks.  It is
2476    attached to the AUX field of the standard CFG block.  */
2477
2478 struct block_info
2479 {
2480   /* Estimated frequency of execution of basic_block.  */
2481   sreal frequency;
2482
2483   /* To keep queue of basic blocks to process.  */
2484   basic_block next;
2485
2486   /* Number of predecessors we need to visit first.  */
2487   int npredecessors;
2488 };
2489
2490 /* Similar information for edges.  */
2491 struct edge_prob_info
2492 {
2493   /* In case edge is a loopback edge, the probability edge will be reached
2494      in case header is.  Estimated number of iterations of the loop can be
2495      then computed as 1 / (1 - back_edge_prob).  */
2496   sreal back_edge_prob;
2497   /* True if the edge is a loopback edge in the natural loop.  */
2498   unsigned int back_edge:1;
2499 };
2500
2501 #define BLOCK_INFO(B)   ((block_info *) (B)->aux)
2502 #undef EDGE_INFO
2503 #define EDGE_INFO(E)    ((edge_prob_info *) (E)->aux)
2504
2505 /* Helper function for estimate_bb_frequencies.
2506    Propagate the frequencies in blocks marked in
2507    TOVISIT, starting in HEAD.  */
2508
2509 static void
2510 propagate_freq (basic_block head, bitmap tovisit)
2511 {
2512   basic_block bb;
2513   basic_block last;
2514   unsigned i;
2515   edge e;
2516   basic_block nextbb;
2517   bitmap_iterator bi;
2518
2519   /* For each basic block we need to visit count number of his predecessors
2520      we need to visit first.  */
2521   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2522     {
2523       edge_iterator ei;
2524       int count = 0;
2525
2526       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2527
2528       FOR_EACH_EDGE (e, ei, bb->preds)
2529         {
2530           bool visit = bitmap_bit_p (tovisit, e->src->index);
2531
2532           if (visit && !(e->flags & EDGE_DFS_BACK))
2533             count++;
2534           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2535             fprintf (dump_file,
2536                      "Irreducible region hit, ignoring edge to %i->%i\n",
2537                      e->src->index, bb->index);
2538         }
2539       BLOCK_INFO (bb)->npredecessors = count;
2540       /* When function never returns, we will never process exit block.  */
2541       if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2542         bb->count = bb->frequency = 0;
2543     }
2544
2545   BLOCK_INFO (head)->frequency = 1;
2546   last = head;
2547   for (bb = head; bb; bb = nextbb)
2548     {
2549       edge_iterator ei;
2550       sreal cyclic_probability = 0;
2551       sreal frequency = 0;
2552
2553       nextbb = BLOCK_INFO (bb)->next;
2554       BLOCK_INFO (bb)->next = NULL;
2555
2556       /* Compute frequency of basic block.  */
2557       if (bb != head)
2558         {
2559 #ifdef ENABLE_CHECKING
2560           FOR_EACH_EDGE (e, ei, bb->preds)
2561             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2562                         || (e->flags & EDGE_DFS_BACK));
2563 #endif
2564
2565           FOR_EACH_EDGE (e, ei, bb->preds)
2566             if (EDGE_INFO (e)->back_edge)
2567               {
2568                 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
2569               }
2570             else if (!(e->flags & EDGE_DFS_BACK))
2571               {
2572                 /*  frequency += (e->probability
2573                                   * BLOCK_INFO (e->src)->frequency /
2574                                   REG_BR_PROB_BASE);  */
2575
2576                 sreal tmp = e->probability;
2577                 tmp *= BLOCK_INFO (e->src)->frequency;
2578                 tmp *= real_inv_br_prob_base;
2579                 frequency += tmp;
2580               }
2581
2582           if (cyclic_probability == 0)
2583             {
2584               BLOCK_INFO (bb)->frequency = frequency;
2585             }
2586           else
2587             {
2588               if (cyclic_probability > real_almost_one)
2589                 cyclic_probability = real_almost_one;
2590
2591               /* BLOCK_INFO (bb)->frequency = frequency
2592                                               / (1 - cyclic_probability) */
2593
2594               cyclic_probability = sreal (1) - cyclic_probability;
2595               BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
2596             }
2597         }
2598
2599       bitmap_clear_bit (tovisit, bb->index);
2600
2601       e = find_edge (bb, head);
2602       if (e)
2603         {
2604           /* EDGE_INFO (e)->back_edge_prob
2605              = ((e->probability * BLOCK_INFO (bb)->frequency)
2606              / REG_BR_PROB_BASE); */
2607
2608           sreal tmp = e->probability;
2609           tmp *= BLOCK_INFO (bb)->frequency;
2610           EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
2611         }
2612
2613       /* Propagate to successor blocks.  */
2614       FOR_EACH_EDGE (e, ei, bb->succs)
2615         if (!(e->flags & EDGE_DFS_BACK)
2616             && BLOCK_INFO (e->dest)->npredecessors)
2617           {
2618             BLOCK_INFO (e->dest)->npredecessors--;
2619             if (!BLOCK_INFO (e->dest)->npredecessors)
2620               {
2621                 if (!nextbb)
2622                   nextbb = e->dest;
2623                 else
2624                   BLOCK_INFO (last)->next = e->dest;
2625
2626                 last = e->dest;
2627               }
2628           }
2629     }
2630 }
2631
2632 /* Estimate frequencies in loops at same nest level.  */
2633
2634 static void
2635 estimate_loops_at_level (struct loop *first_loop)
2636 {
2637   struct loop *loop;
2638
2639   for (loop = first_loop; loop; loop = loop->next)
2640     {
2641       edge e;
2642       basic_block *bbs;
2643       unsigned i;
2644       bitmap tovisit = BITMAP_ALLOC (NULL);
2645
2646       estimate_loops_at_level (loop->inner);
2647
2648       /* Find current loop back edge and mark it.  */
2649       e = loop_latch_edge (loop);
2650       EDGE_INFO (e)->back_edge = 1;
2651
2652       bbs = get_loop_body (loop);
2653       for (i = 0; i < loop->num_nodes; i++)
2654         bitmap_set_bit (tovisit, bbs[i]->index);
2655       free (bbs);
2656       propagate_freq (loop->header, tovisit);
2657       BITMAP_FREE (tovisit);
2658     }
2659 }
2660
2661 /* Propagates frequencies through structure of loops.  */
2662
2663 static void
2664 estimate_loops (void)
2665 {
2666   bitmap tovisit = BITMAP_ALLOC (NULL);
2667   basic_block bb;
2668
2669   /* Start by estimating the frequencies in the loops.  */
2670   if (number_of_loops (cfun) > 1)
2671     estimate_loops_at_level (current_loops->tree_root->inner);
2672
2673   /* Now propagate the frequencies through all the blocks.  */
2674   FOR_ALL_BB_FN (bb, cfun)
2675     {
2676       bitmap_set_bit (tovisit, bb->index);
2677     }
2678   propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2679   BITMAP_FREE (tovisit);
2680 }
2681
2682 /* Drop the profile for NODE to guessed, and update its frequency based on
2683    whether it is expected to be hot given the CALL_COUNT.  */
2684
2685 static void
2686 drop_profile (struct cgraph_node *node, gcov_type call_count)
2687 {
2688   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2689   /* In the case where this was called by another function with a
2690      dropped profile, call_count will be 0. Since there are no
2691      non-zero call counts to this function, we don't know for sure
2692      whether it is hot, and therefore it will be marked normal below.  */
2693   bool hot = maybe_hot_count_p (NULL, call_count);
2694
2695   if (dump_file)
2696     fprintf (dump_file,
2697              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2698              node->name (), node->order,
2699              hot ? "Function is hot" : "Function is normal");
2700   /* We only expect to miss profiles for functions that are reached
2701      via non-zero call edges in cases where the function may have
2702      been linked from another module or library (COMDATs and extern
2703      templates). See the comments below for handle_missing_profiles.
2704      Also, only warn in cases where the missing counts exceed the
2705      number of training runs. In certain cases with an execv followed
2706      by a no-return call the profile for the no-return call is not
2707      dumped and there can be a mismatch.  */
2708   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2709       && call_count > profile_info->runs)
2710     {
2711       if (flag_profile_correction)
2712         {
2713           if (dump_file)
2714             fprintf (dump_file,
2715                      "Missing counts for called function %s/%i\n",
2716                      node->name (), node->order);
2717         }
2718       else
2719         warning (0, "Missing counts for called function %s/%i",
2720                  node->name (), node->order);
2721     }
2722
2723   profile_status_for_fn (fn)
2724       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2725   node->frequency
2726       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2727 }
2728
2729 /* In the case of COMDAT routines, multiple object files will contain the same
2730    function and the linker will select one for the binary. In that case
2731    all the other copies from the profile instrument binary will be missing
2732    profile counts. Look for cases where this happened, due to non-zero
2733    call counts going to 0-count functions, and drop the profile to guessed
2734    so that we can use the estimated probabilities and avoid optimizing only
2735    for size.
2736
2737    The other case where the profile may be missing is when the routine
2738    is not going to be emitted to the object file, e.g. for "extern template"
2739    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2740    all other cases of non-zero calls to 0-count functions.  */
2741
2742 void
2743 handle_missing_profiles (void)
2744 {
2745   struct cgraph_node *node;
2746   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2747   vec<struct cgraph_node *> worklist;
2748   worklist.create (64);
2749
2750   /* See if 0 count function has non-0 count callers.  In this case we
2751      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2752   FOR_EACH_DEFINED_FUNCTION (node)
2753     {
2754       struct cgraph_edge *e;
2755       gcov_type call_count = 0;
2756       gcov_type max_tp_first_run = 0;
2757       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2758
2759       if (node->count)
2760         continue;
2761       for (e = node->callers; e; e = e->next_caller)
2762       {
2763         call_count += e->count;
2764
2765         if (e->caller->tp_first_run > max_tp_first_run)
2766           max_tp_first_run = e->caller->tp_first_run;
2767       }
2768
2769       /* If time profile is missing, let assign the maximum that comes from
2770          caller functions.  */
2771       if (!node->tp_first_run && max_tp_first_run)
2772         node->tp_first_run = max_tp_first_run + 1;
2773
2774       if (call_count
2775           && fn && fn->cfg
2776           && (call_count * unlikely_count_fraction >= profile_info->runs))
2777         {
2778           drop_profile (node, call_count);
2779           worklist.safe_push (node);
2780         }
2781     }
2782
2783   /* Propagate the profile dropping to other 0-count COMDATs that are
2784      potentially called by COMDATs we already dropped the profile on.  */
2785   while (worklist.length () > 0)
2786     {
2787       struct cgraph_edge *e;
2788
2789       node = worklist.pop ();
2790       for (e = node->callees; e; e = e->next_caller)
2791         {
2792           struct cgraph_node *callee = e->callee;
2793           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2794
2795           if (callee->count > 0)
2796             continue;
2797           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2798               && profile_status_for_fn (fn) == PROFILE_READ)
2799             {
2800               drop_profile (node, 0);
2801               worklist.safe_push (callee);
2802             }
2803         }
2804     }
2805   worklist.release ();
2806 }
2807
2808 /* Convert counts measured by profile driven feedback to frequencies.
2809    Return nonzero iff there was any nonzero execution count.  */
2810
2811 int
2812 counts_to_freqs (void)
2813 {
2814   gcov_type count_max, true_count_max = 0;
2815   basic_block bb;
2816
2817   /* Don't overwrite the estimated frequencies when the profile for
2818      the function is missing.  We may drop this function PROFILE_GUESSED
2819      later in drop_profile ().  */
2820   if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2821     return 0;
2822
2823   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2824     true_count_max = MAX (bb->count, true_count_max);
2825
2826   count_max = MAX (true_count_max, 1);
2827   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2828     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2829
2830   return true_count_max;
2831 }
2832
2833 /* Return true if function is likely to be expensive, so there is no point to
2834    optimize performance of prologue, epilogue or do inlining at the expense
2835    of code size growth.  THRESHOLD is the limit of number of instructions
2836    function can execute at average to be still considered not expensive.  */
2837
2838 bool
2839 expensive_function_p (int threshold)
2840 {
2841   unsigned int sum = 0;
2842   basic_block bb;
2843   unsigned int limit;
2844
2845   /* We can not compute accurately for large thresholds due to scaled
2846      frequencies.  */
2847   gcc_assert (threshold <= BB_FREQ_MAX);
2848
2849   /* Frequencies are out of range.  This either means that function contains
2850      internal loop executing more than BB_FREQ_MAX times or profile feedback
2851      is available and function has not been executed at all.  */
2852   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2853     return true;
2854
2855   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2856   limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2857   FOR_EACH_BB_FN (bb, cfun)
2858     {
2859       rtx_insn *insn;
2860
2861       FOR_BB_INSNS (bb, insn)
2862         if (active_insn_p (insn))
2863           {
2864             sum += bb->frequency;
2865             if (sum > limit)
2866               return true;
2867         }
2868     }
2869
2870   return false;
2871 }
2872
2873 /* Estimate and propagate basic block frequencies using the given branch
2874    probabilities.  If FORCE is true, the frequencies are used to estimate
2875    the counts even when there are already non-zero profile counts.  */
2876
2877 void
2878 estimate_bb_frequencies (bool force)
2879 {
2880   basic_block bb;
2881   sreal freq_max;
2882
2883   if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2884     {
2885       static int real_values_initialized = 0;
2886
2887       if (!real_values_initialized)
2888         {
2889           real_values_initialized = 1;
2890           real_br_prob_base = REG_BR_PROB_BASE;
2891           real_bb_freq_max = BB_FREQ_MAX;
2892           real_one_half = sreal (1, -1);
2893           real_inv_br_prob_base = sreal (1) / real_br_prob_base;
2894           real_almost_one = sreal (1) - real_inv_br_prob_base;
2895         }
2896
2897       mark_dfs_back_edges ();
2898
2899       single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2900          REG_BR_PROB_BASE;
2901
2902       /* Set up block info for each basic block.  */
2903       alloc_aux_for_blocks (sizeof (block_info));
2904       alloc_aux_for_edges (sizeof (edge_prob_info));
2905       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2906         {
2907           edge e;
2908           edge_iterator ei;
2909
2910           FOR_EACH_EDGE (e, ei, bb->succs)
2911             {
2912               EDGE_INFO (e)->back_edge_prob = e->probability;
2913               EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
2914             }
2915         }
2916
2917       /* First compute frequencies locally for each loop from innermost
2918          to outermost to examine frequencies for back edges.  */
2919       estimate_loops ();
2920
2921       freq_max = 0;
2922       FOR_EACH_BB_FN (bb, cfun)
2923         if (freq_max < BLOCK_INFO (bb)->frequency)
2924           freq_max = BLOCK_INFO (bb)->frequency;
2925
2926       freq_max = real_bb_freq_max / freq_max;
2927       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2928         {
2929           sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
2930           bb->frequency = tmp.to_int ();
2931         }
2932
2933       free_aux_for_blocks ();
2934       free_aux_for_edges ();
2935     }
2936   compute_function_frequency ();
2937 }
2938
2939 /* Decide whether function is hot, cold or unlikely executed.  */
2940 void
2941 compute_function_frequency (void)
2942 {
2943   basic_block bb;
2944   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2945
2946   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2947       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2948     node->only_called_at_startup = true;
2949   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2950     node->only_called_at_exit = true;
2951
2952   if (profile_status_for_fn (cfun) != PROFILE_READ)
2953     {
2954       int flags = flags_from_decl_or_type (current_function_decl);
2955       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2956           != NULL)
2957         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2958       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2959                != NULL)
2960         node->frequency = NODE_FREQUENCY_HOT;
2961       else if (flags & ECF_NORETURN)
2962         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2963       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2964         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2965       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2966                || DECL_STATIC_DESTRUCTOR (current_function_decl))
2967         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2968       return;
2969     }
2970
2971   /* Only first time try to drop function into unlikely executed.
2972      After inlining the roundoff errors may confuse us.
2973      Ipa-profile pass will drop functions only called from unlikely
2974      functions to unlikely and that is most of what we care about.  */
2975   if (!cfun->after_inlining)
2976     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2977   FOR_EACH_BB_FN (bb, cfun)
2978     {
2979       if (maybe_hot_bb_p (cfun, bb))
2980         {
2981           node->frequency = NODE_FREQUENCY_HOT;
2982           return;
2983         }
2984       if (!probably_never_executed_bb_p (cfun, bb))
2985         node->frequency = NODE_FREQUENCY_NORMAL;
2986     }
2987 }
2988
2989 /* Build PREDICT_EXPR.  */
2990 tree
2991 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2992 {
2993   tree t = build1 (PREDICT_EXPR, void_type_node,
2994                    build_int_cst (integer_type_node, predictor));
2995   SET_PREDICT_EXPR_OUTCOME (t, taken);
2996   return t;
2997 }
2998
2999 const char *
3000 predictor_name (enum br_predictor predictor)
3001 {
3002   return predictor_info[predictor].name;
3003 }
3004
3005 /* Predict branch probabilities and estimate profile of the tree CFG. */
3006
3007 namespace {
3008
3009 const pass_data pass_data_profile =
3010 {
3011   GIMPLE_PASS, /* type */
3012   "profile_estimate", /* name */
3013   OPTGROUP_NONE, /* optinfo_flags */
3014   TV_BRANCH_PROB, /* tv_id */
3015   PROP_cfg, /* properties_required */
3016   0, /* properties_provided */
3017   0, /* properties_destroyed */
3018   0, /* todo_flags_start */
3019   0, /* todo_flags_finish */
3020 };
3021
3022 class pass_profile : public gimple_opt_pass
3023 {
3024 public:
3025   pass_profile (gcc::context *ctxt)
3026     : gimple_opt_pass (pass_data_profile, ctxt)
3027   {}
3028
3029   /* opt_pass methods: */
3030   virtual bool gate (function *) { return flag_guess_branch_prob; }
3031   virtual unsigned int execute (function *);
3032
3033 }; // class pass_profile
3034
3035 unsigned int
3036 pass_profile::execute (function *fun)
3037 {
3038   unsigned nb_loops;
3039
3040   if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3041     return 0;
3042
3043   loop_optimizer_init (LOOPS_NORMAL);
3044   if (dump_file && (dump_flags & TDF_DETAILS))
3045     flow_loops_dump (dump_file, NULL, 0);
3046
3047   mark_irreducible_loops ();
3048
3049   nb_loops = number_of_loops (fun);
3050   if (nb_loops > 1)
3051     scev_initialize ();
3052
3053   tree_estimate_probability ();
3054
3055   if (nb_loops > 1)
3056     scev_finalize ();
3057
3058   loop_optimizer_finalize ();
3059   if (dump_file && (dump_flags & TDF_DETAILS))
3060     gimple_dump_cfg (dump_file, dump_flags);
3061  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3062     profile_status_for_fn (fun) = PROFILE_GUESSED;
3063   return 0;
3064 }
3065
3066 } // anon namespace
3067
3068 gimple_opt_pass *
3069 make_pass_profile (gcc::context *ctxt)
3070 {
3071   return new pass_profile (ctxt);
3072 }
3073
3074 namespace {
3075
3076 const pass_data pass_data_strip_predict_hints =
3077 {
3078   GIMPLE_PASS, /* type */
3079   "*strip_predict_hints", /* name */
3080   OPTGROUP_NONE, /* optinfo_flags */
3081   TV_BRANCH_PROB, /* tv_id */
3082   PROP_cfg, /* properties_required */
3083   0, /* properties_provided */
3084   0, /* properties_destroyed */
3085   0, /* todo_flags_start */
3086   0, /* todo_flags_finish */
3087 };
3088
3089 class pass_strip_predict_hints : public gimple_opt_pass
3090 {
3091 public:
3092   pass_strip_predict_hints (gcc::context *ctxt)
3093     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3094   {}
3095
3096   /* opt_pass methods: */
3097   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3098   virtual unsigned int execute (function *);
3099
3100 }; // class pass_strip_predict_hints
3101
3102 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3103    we no longer need.  */
3104 unsigned int
3105 pass_strip_predict_hints::execute (function *fun)
3106 {
3107   basic_block bb;
3108   gimple ass_stmt;
3109   tree var;
3110
3111   FOR_EACH_BB_FN (bb, fun)
3112     {
3113       gimple_stmt_iterator bi;
3114       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3115         {
3116           gimple stmt = gsi_stmt (bi);
3117
3118           if (gimple_code (stmt) == GIMPLE_PREDICT)
3119             {
3120               gsi_remove (&bi, true);
3121               continue;
3122             }
3123           else if (is_gimple_call (stmt))
3124             {
3125               tree fndecl = gimple_call_fndecl (stmt);
3126
3127               if ((fndecl
3128                    && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3129                    && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3130                    && gimple_call_num_args (stmt) == 2)
3131                   || (gimple_call_internal_p (stmt)
3132                       && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3133                 {
3134                   var = gimple_call_lhs (stmt);
3135                   if (var)
3136                     {
3137                       ass_stmt
3138                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3139                       gsi_replace (&bi, ass_stmt, true);
3140                     }
3141                   else
3142                     {
3143                       gsi_remove (&bi, true);
3144                       continue;
3145                     }
3146                 }
3147             }
3148           gsi_next (&bi);
3149         }
3150     }
3151   return 0;
3152 }
3153
3154 } // anon namespace
3155
3156 gimple_opt_pass *
3157 make_pass_strip_predict_hints (gcc::context *ctxt)
3158 {
3159   return new pass_strip_predict_hints (ctxt);
3160 }
3161
3162 /* Rebuild function frequencies.  Passes are in general expected to
3163    maintain profile by hand, however in some cases this is not possible:
3164    for example when inlining several functions with loops freuqencies might run
3165    out of scale and thus needs to be recomputed.  */
3166
3167 void
3168 rebuild_frequencies (void)
3169 {
3170   timevar_push (TV_REBUILD_FREQUENCIES);
3171
3172   /* When the max bb count in the function is small, there is a higher
3173      chance that there were truncation errors in the integer scaling
3174      of counts by inlining and other optimizations. This could lead
3175      to incorrect classification of code as being cold when it isn't.
3176      In that case, force the estimation of bb counts/frequencies from the
3177      branch probabilities, rather than computing frequencies from counts,
3178      which may also lead to frequencies incorrectly reduced to 0. There
3179      is less precision in the probabilities, so we only do this for small
3180      max counts.  */
3181   gcov_type count_max = 0;
3182   basic_block bb;
3183   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3184     count_max = MAX (bb->count, count_max);
3185
3186   if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3187       || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3188           && count_max < REG_BR_PROB_BASE/10))
3189     {
3190       loop_optimizer_init (0);
3191       add_noreturn_fake_exit_edges ();
3192       mark_irreducible_loops ();
3193       connect_infinite_loops_to_exit ();
3194       estimate_bb_frequencies (true);
3195       remove_fake_exit_edges ();
3196       loop_optimizer_finalize ();
3197     }
3198   else if (profile_status_for_fn (cfun) == PROFILE_READ)
3199     counts_to_freqs ();
3200   else
3201     gcc_unreachable ();
3202   timevar_pop (TV_REBUILD_FREQUENCIES);
3203 }