gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "tm.h"
  34 #include "tree.h"
  35 #include "calls.h"
  36 #include "rtl.h"
  37 #include "tm_p.h"
  38 #include "hard-reg-set.h"
  39 #include "basic-block.h"
  40 #include "insn-config.h"
  41 #include "regs.h"
  42 #include "flags.h"
  43 #include "function.h"
  44 #include "except.h"
  45 #include "diagnostic-core.h"
  46 #include "recog.h"
  47 #include "expr.h"
  48 #include "predict.h"
  49 #include "coverage.h"
  50 #include "sreal.h"
  51 #include "params.h"
  52 #include "target.h"
  53 #include "cfgloop.h"
  54 #include "hash-map.h"
  55 #include "tree-ssa-alias.h"
  56 #include "internal-fn.h"
  57 #include "gimple-expr.h"
  58 #include "is-a.h"
  59 #include "gimple.h"
  60 #include "gimple-iterator.h"
  61 #include "gimple-ssa.h"
  62 #include "cgraph.h"
  63 #include "tree-cfg.h"
  64 #include "tree-phinodes.h"
  65 #include "ssa-iterators.h"
  66 #include "tree-ssa-loop-niter.h"
  67 #include "tree-ssa-loop.h"
  68 #include "tree-pass.h"
  69 #include "tree-scalar-evolution.h"
  70 #include "cfgloop.h"
  71
  72 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  73                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  74 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
  75              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  76
  77 static void combine_predictions_for_insn (rtx, basic_block);
  78 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  79 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  80 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  81 static bool can_predict_insn_p (const_rtx);
  82
  83 /* Information we hold about each branch predictor.
  84    Filled using information from predict.def.  */
  85
  86 struct predictor_info
  87 {
  88   const char *const name;       /* Name used in the debugging dumps.  */
  89   const int hitrate;            /* Expected hitrate used by
  90                                    predict_insn_def call.  */
  91   const int flags;
  92 };
  93
  94 /* Use given predictor without Dempster-Shaffer theory if it matches
  95    using first_match heuristics.  */
  96 #define PRED_FLAG_FIRST_MATCH 1
  97
  98 /* Recompute hitrate in percent to our representation.  */
  99
 100 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
 101
 102 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
 103 static const struct predictor_info predictor_info[]= {
 104 #include "predict.def"
 105
 106   /* Upper bound on predictors.  */
 107   {NULL, 0, 0}
 108 };
 109 #undef DEF_PREDICTOR
 110
 111 /* Return TRUE if frequency FREQ is considered to be hot.  */
 112
 113 static inline bool
 114 maybe_hot_frequency_p (struct function *fun, int freq)
 115 {
 116   struct cgraph_node *node = cgraph_node::get (fun->decl);
 117   if (!profile_info || !flag_branch_probabilities)
 118     {
 119       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 120         return false;
 121       if (node->frequency == NODE_FREQUENCY_HOT)
 122         return true;
 123     }
 124   if (profile_status_for_fn (fun) == PROFILE_ABSENT)
 125     return true;
 126   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 127       && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
 128     return false;
 129   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 130     return false;
 131   if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
 132               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 133     return false;
 134   return true;
 135 }
 136
 137 static gcov_type min_count = -1;
 138
 139 /* Determine the threshold for hot BB counts.  */
 140
 141 gcov_type
 142 get_hot_bb_threshold ()
 143 {
 144   gcov_working_set_t *ws;
 145   if (min_count == -1)
 146     {
 147       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 148       gcc_assert (ws);
 149       min_count = ws->min_counter;
 150     }
 151   return min_count;
 152 }
 153
 154 /* Set the threshold for hot BB counts.  */
 155
 156 void
 157 set_hot_bb_threshold (gcov_type min)
 158 {
 159   min_count = min;
 160 }
 161
 162 /* Return TRUE if frequency FREQ is considered to be hot.  */
 163
 164 static inline bool
 165 maybe_hot_count_p (struct function *fun, gcov_type count)
 166 {
 167   if (fun && profile_status_for_fn (fun) != PROFILE_READ)
 168     return true;
 169   /* Code executed at most once is not hot.  */
 170   if (profile_info->runs >= count)
 171     return false;
 172   return (count >= get_hot_bb_threshold ());
 173 }
 174
 175 /* Return true in case BB can be CPU intensive and should be optimized
 176    for maximal performance.  */
 177
 178 bool
 179 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 180 {
 181   gcc_checking_assert (fun);
 182   if (profile_status_for_fn (fun) == PROFILE_READ)
 183     return maybe_hot_count_p (fun, bb->count);
 184   return maybe_hot_frequency_p (fun, bb->frequency);
 185 }
 186
 187 /* Return true if the call can be hot.  */
 188
 189 bool
 190 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
 191 {
 192   if (profile_info && flag_branch_probabilities
 193       && !maybe_hot_count_p (NULL,
 194                              edge->count))
 195     return false;
 196   if (edge->caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
 197       || (edge->callee
 198           && edge->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 199     return false;
 200   if (edge->caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
 201       && (edge->callee
 202           && edge->callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
 203     return false;
 204   if (optimize_size)
 205     return false;
 206   if (edge->caller->frequency == NODE_FREQUENCY_HOT)
 207     return true;
 208   if (edge->caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 209       && edge->frequency < CGRAPH_FREQ_BASE * 3 / 2)
 210     return false;
 211   if (flag_guess_branch_prob)
 212     {
 213       if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0
 214           || edge->frequency <= (CGRAPH_FREQ_BASE
 215                                  / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 216         return false;
 217     }
 218   return true;
 219 }
 220
 221 /* Return true in case BB can be CPU intensive and should be optimized
 222    for maximal performance.  */
 223
 224 bool
 225 maybe_hot_edge_p (edge e)
 226 {
 227   if (profile_status_for_fn (cfun) == PROFILE_READ)
 228     return maybe_hot_count_p (cfun, e->count);
 229   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 230 }
 231
 232
 233
 234 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 235    node frequency reflects never being executed.  */
 236
 237 static bool
 238 probably_never_executed (struct function *fun,
 239                          gcov_type count, int frequency)
 240 {
 241   gcc_checking_assert (fun);
 242   if (profile_status_for_fn (cfun) == PROFILE_READ)
 243     {
 244       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 245       if (count * unlikely_count_fraction >= profile_info->runs)
 246         return false;
 247       if (!frequency)
 248         return true;
 249       if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
 250         return false;
 251       if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
 252         {
 253           gcov_type computed_count;
 254           /* Check for possibility of overflow, in which case entry bb count
 255              is large enough to do the division first without losing much
 256              precision.  */
 257           if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count < REG_BR_PROB_BASE *
 258               REG_BR_PROB_BASE)
 259             {
 260               gcov_type scaled_count
 261                   = frequency * ENTRY_BLOCK_PTR_FOR_FN (cfun)->count *
 262              unlikely_count_fraction;
 263               computed_count = RDIV (scaled_count,
 264                                      ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
 265             }
 266           else
 267             {
 268               computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count,
 269                                      ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
 270               computed_count *= frequency * unlikely_count_fraction;
 271             }
 272           if (computed_count >= profile_info->runs)
 273             return false;
 274         }
 275       return true;
 276     }
 277   if ((!profile_info || !flag_branch_probabilities)
 278       && (cgraph_node::get (fun->decl)->frequency
 279           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 280     return true;
 281   return false;
 282 }
 283
 284
 285 /* Return true in case BB is probably never executed.  */
 286
 287 bool
 288 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 289 {
 290   return probably_never_executed (fun, bb->count, bb->frequency);
 291 }
 292
 293
 294 /* Return true in case edge E is probably never executed.  */
 295
 296 bool
 297 probably_never_executed_edge_p (struct function *fun, edge e)
 298 {
 299   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 300 }
 301
 302 /* Return true if function should be optimized for size.  */
 303
 304 bool
 305 cgraph_node::optimize_for_size_p (void)
 306 {
 307   if (optimize_size)
 308     return true;
 309   if (frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 310     return true;
 311   else
 312     return false;
 313 }
 314
 315 /* Return true when current function should always be optimized for size.  */
 316
 317 bool
 318 optimize_function_for_size_p (struct function *fun)
 319 {
 320   if (optimize_size)
 321     return true;
 322   if (!fun || !fun->decl)
 323     return false;
 324
 325   cgraph_node *n = cgraph_node::get (fun->decl);
 326   return n && n->optimize_for_size_p ();
 327 }
 328
 329 /* Return true when current function should always be optimized for speed.  */
 330
 331 bool
 332 optimize_function_for_speed_p (struct function *fun)
 333 {
 334   return !optimize_function_for_size_p (fun);
 335 }
 336
 337 /* Return TRUE when BB should be optimized for size.  */
 338
 339 bool
 340 optimize_bb_for_size_p (const_basic_block bb)
 341 {
 342   return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (cfun, bb);
 343 }
 344
 345 /* Return TRUE when BB should be optimized for speed.  */
 346
 347 bool
 348 optimize_bb_for_speed_p (const_basic_block bb)
 349 {
 350   return !optimize_bb_for_size_p (bb);
 351 }
 352
 353 /* Return TRUE when BB should be optimized for size.  */
 354
 355 bool
 356 optimize_edge_for_size_p (edge e)
 357 {
 358   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 359 }
 360
 361 /* Return TRUE when BB should be optimized for speed.  */
 362
 363 bool
 364 optimize_edge_for_speed_p (edge e)
 365 {
 366   return !optimize_edge_for_size_p (e);
 367 }
 368
 369 /* Return TRUE when BB should be optimized for size.  */
 370
 371 bool
 372 optimize_insn_for_size_p (void)
 373 {
 374   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 375 }
 376
 377 /* Return TRUE when BB should be optimized for speed.  */
 378
 379 bool
 380 optimize_insn_for_speed_p (void)
 381 {
 382   return !optimize_insn_for_size_p ();
 383 }
 384
 385 /* Return TRUE when LOOP should be optimized for size.  */
 386
 387 bool
 388 optimize_loop_for_size_p (struct loop *loop)
 389 {
 390   return optimize_bb_for_size_p (loop->header);
 391 }
 392
 393 /* Return TRUE when LOOP should be optimized for speed.  */
 394
 395 bool
 396 optimize_loop_for_speed_p (struct loop *loop)
 397 {
 398   return optimize_bb_for_speed_p (loop->header);
 399 }
 400
 401 /* Return TRUE when LOOP nest should be optimized for speed.  */
 402
 403 bool
 404 optimize_loop_nest_for_speed_p (struct loop *loop)
 405 {
 406   struct loop *l = loop;
 407   if (optimize_loop_for_speed_p (loop))
 408     return true;
 409   l = loop->inner;
 410   while (l && l != loop)
 411     {
 412       if (optimize_loop_for_speed_p (l))
 413         return true;
 414       if (l->inner)
 415         l = l->inner;
 416       else if (l->next)
 417         l = l->next;
 418       else
 419         {
 420           while (l != loop && !l->next)
 421             l = loop_outer (l);
 422           if (l != loop)
 423             l = l->next;
 424         }
 425     }
 426   return false;
 427 }
 428
 429 /* Return TRUE when LOOP nest should be optimized for size.  */
 430
 431 bool
 432 optimize_loop_nest_for_size_p (struct loop *loop)
 433 {
 434   return !optimize_loop_nest_for_speed_p (loop);
 435 }
 436
 437 /* Return true when edge E is likely to be well predictable by branch
 438    predictor.  */
 439
 440 bool
 441 predictable_edge_p (edge e)
 442 {
 443   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 444     return false;
 445   if ((e->probability
 446        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 447       || (REG_BR_PROB_BASE - e->probability
 448           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 449     return true;
 450   return false;
 451 }
 452
 453
 454 /* Set RTL expansion for BB profile.  */
 455
 456 void
 457 rtl_profile_for_bb (basic_block bb)
 458 {
 459   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 460 }
 461
 462 /* Set RTL expansion for edge profile.  */
 463
 464 void
 465 rtl_profile_for_edge (edge e)
 466 {
 467   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 468 }
 469
 470 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 471 void
 472 default_rtl_profile (void)
 473 {
 474   crtl->maybe_hot_insn_p = true;
 475 }
 476
 477 /* Return true if the one of outgoing edges is already predicted by
 478    PREDICTOR.  */
 479
 480 bool
 481 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 482 {
 483   rtx note;
 484   if (!INSN_P (BB_END (bb)))
 485     return false;
 486   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 487     if (REG_NOTE_KIND (note) == REG_BR_PRED
 488         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 489       return true;
 490   return false;
 491 }
 492
 493 /*  Structure representing predictions in tree level. */
 494
 495 struct edge_prediction {
 496     struct edge_prediction *ep_next;
 497     edge ep_edge;
 498     enum br_predictor ep_predictor;
 499     int ep_probability;
 500 };
 501
 502 /* This map contains for a basic block the list of predictions for the
 503    outgoing edges.  */
 504
 505 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
 506
 507 /* Return true if the one of outgoing edges is already predicted by
 508    PREDICTOR.  */
 509
 510 bool
 511 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 512 {
 513   struct edge_prediction *i;
 514   edge_prediction **preds = bb_predictions->get (bb);
 515
 516   if (!preds)
 517     return false;
 518
 519   for (i = *preds; i; i = i->ep_next)
 520     if (i->ep_predictor == predictor)
 521       return true;
 522   return false;
 523 }
 524
 525 /* Return true when the probability of edge is reliable.
 526
 527    The profile guessing code is good at predicting branch outcome (ie.
 528    taken/not taken), that is predicted right slightly over 75% of time.
 529    It is however notoriously poor on predicting the probability itself.
 530    In general the profile appear a lot flatter (with probabilities closer
 531    to 50%) than the reality so it is bad idea to use it to drive optimization
 532    such as those disabling dynamic branch prediction for well predictable
 533    branches.
 534
 535    There are two exceptions - edges leading to noreturn edges and edges
 536    predicted by number of iterations heuristics are predicted well.  This macro
 537    should be able to distinguish those, but at the moment it simply check for
 538    noreturn heuristic that is only one giving probability over 99% or bellow
 539    1%.  In future we might want to propagate reliability information across the
 540    CFG if we find this information useful on multiple places.   */
 541 static bool
 542 probability_reliable_p (int prob)
 543 {
 544   return (profile_status_for_fn (cfun) == PROFILE_READ
 545           || (profile_status_for_fn (cfun) == PROFILE_GUESSED
 546               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 547 }
 548
 549 /* Same predicate as above, working on edges.  */
 550 bool
 551 edge_probability_reliable_p (const_edge e)
 552 {
 553   return probability_reliable_p (e->probability);
 554 }
 555
 556 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 557 bool
 558 br_prob_note_reliable_p (const_rtx note)
 559 {
 560   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 561   return probability_reliable_p (XINT (note, 0));
 562 }
 563
 564 static void
 565 predict_insn (rtx insn, enum br_predictor predictor, int probability)
 566 {
 567   gcc_assert (any_condjump_p (insn));
 568   if (!flag_guess_branch_prob)
 569     return;
 570
 571   add_reg_note (insn, REG_BR_PRED,
 572                 gen_rtx_CONCAT (VOIDmode,
 573                                 GEN_INT ((int) predictor),
 574                                 GEN_INT ((int) probability)));
 575 }
 576
 577 /* Predict insn by given predictor.  */
 578
 579 void
 580 predict_insn_def (rtx insn, enum br_predictor predictor,
 581                   enum prediction taken)
 582 {
 583    int probability = predictor_info[(int) predictor].hitrate;
 584
 585    if (taken != TAKEN)
 586      probability = REG_BR_PROB_BASE - probability;
 587
 588    predict_insn (insn, predictor, probability);
 589 }
 590
 591 /* Predict edge E with given probability if possible.  */
 592
 593 void
 594 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 595 {
 596   rtx last_insn;
 597   last_insn = BB_END (e->src);
 598
 599   /* We can store the branch prediction information only about
 600      conditional jumps.  */
 601   if (!any_condjump_p (last_insn))
 602     return;
 603
 604   /* We always store probability of branching.  */
 605   if (e->flags & EDGE_FALLTHRU)
 606     probability = REG_BR_PROB_BASE - probability;
 607
 608   predict_insn (last_insn, predictor, probability);
 609 }
 610
 611 /* Predict edge E with the given PROBABILITY.  */
 612 void
 613 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 614 {
 615   gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
 616   if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
 617        1)
 618       && flag_guess_branch_prob && optimize)
 619     {
 620       struct edge_prediction *i = XNEW (struct edge_prediction);
 621       edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
 622
 623       i->ep_next = preds;
 624       preds = i;
 625       i->ep_probability = probability;
 626       i->ep_predictor = predictor;
 627       i->ep_edge = e;
 628     }
 629 }
 630
 631 /* Remove all predictions on given basic block that are attached
 632    to edge E.  */
 633 void
 634 remove_predictions_associated_with_edge (edge e)
 635 {
 636   if (!bb_predictions)
 637     return;
 638
 639   edge_prediction **preds = bb_predictions->get (e->src);
 640
 641   if (preds)
 642     {
 643       struct edge_prediction **prediction = preds;
 644       struct edge_prediction *next;
 645
 646       while (*prediction)
 647         {
 648           if ((*prediction)->ep_edge == e)
 649             {
 650               next = (*prediction)->ep_next;
 651               free (*prediction);
 652               *prediction = next;
 653             }
 654           else
 655             prediction = &((*prediction)->ep_next);
 656         }
 657     }
 658 }
 659
 660 /* Clears the list of predictions stored for BB.  */
 661
 662 static void
 663 clear_bb_predictions (basic_block bb)
 664 {
 665   edge_prediction **preds = bb_predictions->get (bb);
 666   struct edge_prediction *pred, *next;
 667
 668   if (!preds)
 669     return;
 670
 671   for (pred = *preds; pred; pred = next)
 672     {
 673       next = pred->ep_next;
 674       free (pred);
 675     }
 676   *preds = NULL;
 677 }
 678
 679 /* Return true when we can store prediction on insn INSN.
 680    At the moment we represent predictions only on conditional
 681    jumps, not at computed jump or other complicated cases.  */
 682 static bool
 683 can_predict_insn_p (const_rtx insn)
 684 {
 685   return (JUMP_P (insn)
 686           && any_condjump_p (insn)
 687           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 688 }
 689
 690 /* Predict edge E by given predictor if possible.  */
 691
 692 void
 693 predict_edge_def (edge e, enum br_predictor predictor,
 694                   enum prediction taken)
 695 {
 696    int probability = predictor_info[(int) predictor].hitrate;
 697
 698    if (taken != TAKEN)
 699      probability = REG_BR_PROB_BASE - probability;
 700
 701    predict_edge (e, predictor, probability);
 702 }
 703
 704 /* Invert all branch predictions or probability notes in the INSN.  This needs
 705    to be done each time we invert the condition used by the jump.  */
 706
 707 void
 708 invert_br_probabilities (rtx insn)
 709 {
 710   rtx note;
 711
 712   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 713     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 714       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 715     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 716       XEXP (XEXP (note, 0), 1)
 717         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 718 }
 719
 720 /* Dump information about the branch prediction to the output file.  */
 721
 722 static void
 723 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 724                  basic_block bb, int used)
 725 {
 726   edge e;
 727   edge_iterator ei;
 728
 729   if (!file)
 730     return;
 731
 732   FOR_EACH_EDGE (e, ei, bb->succs)
 733     if (! (e->flags & EDGE_FALLTHRU))
 734       break;
 735
 736   fprintf (file, "  %s heuristics%s: %.1f%%",
 737            predictor_info[predictor].name,
 738            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 739
 740   if (bb->count)
 741     {
 742       fprintf (file, "  exec %"PRId64, bb->count);
 743       if (e)
 744         {
 745           fprintf (file, " hit %"PRId64, e->count);
 746           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 747         }
 748     }
 749
 750   fprintf (file, "\n");
 751 }
 752
 753 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 754    evenly and hope for the best.  */
 755 static void
 756 set_even_probabilities (basic_block bb)
 757 {
 758   int nedges = 0;
 759   edge e;
 760   edge_iterator ei;
 761
 762   FOR_EACH_EDGE (e, ei, bb->succs)
 763     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 764       nedges ++;
 765   FOR_EACH_EDGE (e, ei, bb->succs)
 766     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 767       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 768     else
 769       e->probability = 0;
 770 }
 771
 772 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 773    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 774
 775 static void
 776 combine_predictions_for_insn (rtx insn, basic_block bb)
 777 {
 778   rtx prob_note;
 779   rtx *pnote;
 780   rtx note;
 781   int best_probability = PROB_EVEN;
 782   enum br_predictor best_predictor = END_PREDICTORS;
 783   int combined_probability = REG_BR_PROB_BASE / 2;
 784   int d;
 785   bool first_match = false;
 786   bool found = false;
 787
 788   if (!can_predict_insn_p (insn))
 789     {
 790       set_even_probabilities (bb);
 791       return;
 792     }
 793
 794   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 795   pnote = &REG_NOTES (insn);
 796   if (dump_file)
 797     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 798              bb->index);
 799
 800   /* We implement "first match" heuristics and use probability guessed
 801      by predictor with smallest index.  */
 802   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 803     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 804       {
 805         enum br_predictor predictor = ((enum br_predictor)
 806                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 807         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 808
 809         found = true;
 810         if (best_predictor > predictor)
 811           best_probability = probability, best_predictor = predictor;
 812
 813         d = (combined_probability * probability
 814              + (REG_BR_PROB_BASE - combined_probability)
 815              * (REG_BR_PROB_BASE - probability));
 816
 817         /* Use FP math to avoid overflows of 32bit integers.  */
 818         if (d == 0)
 819           /* If one probability is 0% and one 100%, avoid division by zero.  */
 820           combined_probability = REG_BR_PROB_BASE / 2;
 821         else
 822           combined_probability = (((double) combined_probability) * probability
 823                                   * REG_BR_PROB_BASE / d + 0.5);
 824       }
 825
 826   /* Decide which heuristic to use.  In case we didn't match anything,
 827      use no_prediction heuristic, in case we did match, use either
 828      first match or Dempster-Shaffer theory depending on the flags.  */
 829
 830   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 831     first_match = true;
 832
 833   if (!found)
 834     dump_prediction (dump_file, PRED_NO_PREDICTION,
 835                      combined_probability, bb, true);
 836   else
 837     {
 838       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 839                        bb, !first_match);
 840       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 841                        bb, first_match);
 842     }
 843
 844   if (first_match)
 845     combined_probability = best_probability;
 846   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 847
 848   while (*pnote)
 849     {
 850       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 851         {
 852           enum br_predictor predictor = ((enum br_predictor)
 853                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 854           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 855
 856           dump_prediction (dump_file, predictor, probability, bb,
 857                            !first_match || best_predictor == predictor);
 858           *pnote = XEXP (*pnote, 1);
 859         }
 860       else
 861         pnote = &XEXP (*pnote, 1);
 862     }
 863
 864   if (!prob_note)
 865     {
 866       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 867
 868       /* Save the prediction into CFG in case we are seeing non-degenerated
 869          conditional jump.  */
 870       if (!single_succ_p (bb))
 871         {
 872           BRANCH_EDGE (bb)->probability = combined_probability;
 873           FALLTHRU_EDGE (bb)->probability
 874             = REG_BR_PROB_BASE - combined_probability;
 875         }
 876     }
 877   else if (!single_succ_p (bb))
 878     {
 879       int prob = XINT (prob_note, 0);
 880
 881       BRANCH_EDGE (bb)->probability = prob;
 882       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 883     }
 884   else
 885     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 886 }
 887
 888 /* Combine predictions into single probability and store them into CFG.
 889    Remove now useless prediction entries.  */
 890
 891 static void
 892 combine_predictions_for_bb (basic_block bb)
 893 {
 894   int best_probability = PROB_EVEN;
 895   enum br_predictor best_predictor = END_PREDICTORS;
 896   int combined_probability = REG_BR_PROB_BASE / 2;
 897   int d;
 898   bool first_match = false;
 899   bool found = false;
 900   struct edge_prediction *pred;
 901   int nedges = 0;
 902   edge e, first = NULL, second = NULL;
 903   edge_iterator ei;
 904
 905   FOR_EACH_EDGE (e, ei, bb->succs)
 906     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 907       {
 908         nedges ++;
 909         if (first && !second)
 910           second = e;
 911         if (!first)
 912           first = e;
 913       }
 914
 915   /* When there is no successor or only one choice, prediction is easy.
 916
 917      We are lazy for now and predict only basic blocks with two outgoing
 918      edges.  It is possible to predict generic case too, but we have to
 919      ignore first match heuristics and do more involved combining.  Implement
 920      this later.  */
 921   if (nedges != 2)
 922     {
 923       if (!bb->count)
 924         set_even_probabilities (bb);
 925       clear_bb_predictions (bb);
 926       if (dump_file)
 927         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 928                  nedges, bb->index);
 929       return;
 930     }
 931
 932   if (dump_file)
 933     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 934
 935   edge_prediction **preds = bb_predictions->get (bb);
 936   if (preds)
 937     {
 938       /* We implement "first match" heuristics and use probability guessed
 939          by predictor with smallest index.  */
 940       for (pred = *preds; pred; pred = pred->ep_next)
 941         {
 942           enum br_predictor predictor = pred->ep_predictor;
 943           int probability = pred->ep_probability;
 944
 945           if (pred->ep_edge != first)
 946             probability = REG_BR_PROB_BASE - probability;
 947
 948           found = true;
 949           /* First match heuristics would be widly confused if we predicted
 950              both directions.  */
 951           if (best_predictor > predictor)
 952             {
 953               struct edge_prediction *pred2;
 954               int prob = probability;
 955
 956               for (pred2 = (struct edge_prediction *) *preds;
 957                    pred2; pred2 = pred2->ep_next)
 958                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 959                  {
 960                    int probability2 = pred->ep_probability;
 961
 962                    if (pred2->ep_edge != first)
 963                      probability2 = REG_BR_PROB_BASE - probability2;
 964
 965                    if ((probability < REG_BR_PROB_BASE / 2) !=
 966                        (probability2 < REG_BR_PROB_BASE / 2))
 967                      break;
 968
 969                    /* If the same predictor later gave better result, go for it! */
 970                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 971                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 972                      prob = probability2;
 973                  }
 974               if (!pred2)
 975                 best_probability = prob, best_predictor = predictor;
 976             }
 977
 978           d = (combined_probability * probability
 979                + (REG_BR_PROB_BASE - combined_probability)
 980                * (REG_BR_PROB_BASE - probability));
 981
 982           /* Use FP math to avoid overflows of 32bit integers.  */
 983           if (d == 0)
 984             /* If one probability is 0% and one 100%, avoid division by zero.  */
 985             combined_probability = REG_BR_PROB_BASE / 2;
 986           else
 987             combined_probability = (((double) combined_probability)
 988                                     * probability
 989                                     * REG_BR_PROB_BASE / d + 0.5);
 990         }
 991     }
 992
 993   /* Decide which heuristic to use.  In case we didn't match anything,
 994      use no_prediction heuristic, in case we did match, use either
 995      first match or Dempster-Shaffer theory depending on the flags.  */
 996
 997   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 998     first_match = true;
 999
1000   if (!found)
1001     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
1002   else
1003     {
1004       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1005                        !first_match);
1006       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1007                        first_match);
1008     }
1009
1010   if (first_match)
1011     combined_probability = best_probability;
1012   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
1013
1014   if (preds)
1015     {
1016       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1017         {
1018           enum br_predictor predictor = pred->ep_predictor;
1019           int probability = pred->ep_probability;
1020
1021           if (pred->ep_edge != EDGE_SUCC (bb, 0))
1022             probability = REG_BR_PROB_BASE - probability;
1023           dump_prediction (dump_file, predictor, probability, bb,
1024                            !first_match || best_predictor == predictor);
1025         }
1026     }
1027   clear_bb_predictions (bb);
1028
1029   if (!bb->count)
1030     {
1031       first->probability = combined_probability;
1032       second->probability = REG_BR_PROB_BASE - combined_probability;
1033     }
1034 }
1035
1036 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1037    Return the SSA_NAME if the condition satisfies, NULL otherwise.
1038
1039    T1 and T2 should be one of the following cases:
1040      1. T1 is SSA_NAME, T2 is NULL
1041      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1042      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1043
1044 static tree
1045 strips_small_constant (tree t1, tree t2)
1046 {
1047   tree ret = NULL;
1048   int value = 0;
1049
1050   if (!t1)
1051     return NULL;
1052   else if (TREE_CODE (t1) == SSA_NAME)
1053     ret = t1;
1054   else if (tree_fits_shwi_p (t1))
1055     value = tree_to_shwi (t1);
1056   else
1057     return NULL;
1058
1059   if (!t2)
1060     return ret;
1061   else if (tree_fits_shwi_p (t2))
1062     value = tree_to_shwi (t2);
1063   else if (TREE_CODE (t2) == SSA_NAME)
1064     {
1065       if (ret)
1066         return NULL;
1067       else
1068         ret = t2;
1069     }
1070
1071   if (value <= 4 && value >= -4)
1072     return ret;
1073   else
1074     return NULL;
1075 }
1076
1077 /* Return the SSA_NAME in T or T's operands.
1078    Return NULL if SSA_NAME cannot be found.  */
1079
1080 static tree
1081 get_base_value (tree t)
1082 {
1083   if (TREE_CODE (t) == SSA_NAME)
1084     return t;
1085
1086   if (!BINARY_CLASS_P (t))
1087     return NULL;
1088
1089   switch (TREE_OPERAND_LENGTH (t))
1090     {
1091     case 1:
1092       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1093     case 2:
1094       return strips_small_constant (TREE_OPERAND (t, 0),
1095                                     TREE_OPERAND (t, 1));
1096     default:
1097       return NULL;
1098     }
1099 }
1100
1101 /* Check the compare STMT in LOOP. If it compares an induction
1102    variable to a loop invariant, return true, and save
1103    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1104    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1105
1106 static bool
1107 is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1108                                      tree *loop_invariant,
1109                                      enum tree_code *compare_code,
1110                                      tree *loop_step,
1111                                      tree *loop_iv_base)
1112 {
1113   tree op0, op1, bound, base;
1114   affine_iv iv0, iv1;
1115   enum tree_code code;
1116   tree step;
1117
1118   code = gimple_cond_code (stmt);
1119   *loop_invariant = NULL;
1120
1121   switch (code)
1122     {
1123     case GT_EXPR:
1124     case GE_EXPR:
1125     case NE_EXPR:
1126     case LT_EXPR:
1127     case LE_EXPR:
1128     case EQ_EXPR:
1129       break;
1130
1131     default:
1132       return false;
1133     }
1134
1135   op0 = gimple_cond_lhs (stmt);
1136   op1 = gimple_cond_rhs (stmt);
1137
1138   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1139        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1140     return false;
1141   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1142     return false;
1143   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1144     return false;
1145   if (TREE_CODE (iv0.step) != INTEGER_CST
1146       || TREE_CODE (iv1.step) != INTEGER_CST)
1147     return false;
1148   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1149       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1150     return false;
1151
1152   if (integer_zerop (iv0.step))
1153     {
1154       if (code != NE_EXPR && code != EQ_EXPR)
1155         code = invert_tree_comparison (code, false);
1156       bound = iv0.base;
1157       base = iv1.base;
1158       if (tree_fits_shwi_p (iv1.step))
1159         step = iv1.step;
1160       else
1161         return false;
1162     }
1163   else
1164     {
1165       bound = iv1.base;
1166       base = iv0.base;
1167       if (tree_fits_shwi_p (iv0.step))
1168         step = iv0.step;
1169       else
1170         return false;
1171     }
1172
1173   if (TREE_CODE (bound) != INTEGER_CST)
1174     bound = get_base_value (bound);
1175   if (!bound)
1176     return false;
1177   if (TREE_CODE (base) != INTEGER_CST)
1178     base = get_base_value (base);
1179   if (!base)
1180     return false;
1181
1182   *loop_invariant = bound;
1183   *compare_code = code;
1184   *loop_step = step;
1185   *loop_iv_base = base;
1186   return true;
1187 }
1188
1189 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1190
1191 static bool
1192 expr_coherent_p (tree t1, tree t2)
1193 {
1194   gimple stmt;
1195   tree ssa_name_1 = NULL;
1196   tree ssa_name_2 = NULL;
1197
1198   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1199   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1200
1201   if (t1 == t2)
1202     return true;
1203
1204   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1205     return true;
1206   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1207     return false;
1208
1209   /* Check to see if t1 is expressed/defined with t2.  */
1210   stmt = SSA_NAME_DEF_STMT (t1);
1211   gcc_assert (stmt != NULL);
1212   if (is_gimple_assign (stmt))
1213     {
1214       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1215       if (ssa_name_1 && ssa_name_1 == t2)
1216         return true;
1217     }
1218
1219   /* Check to see if t2 is expressed/defined with t1.  */
1220   stmt = SSA_NAME_DEF_STMT (t2);
1221   gcc_assert (stmt != NULL);
1222   if (is_gimple_assign (stmt))
1223     {
1224       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1225       if (ssa_name_2 && ssa_name_2 == t1)
1226         return true;
1227     }
1228
1229   /* Compare if t1 and t2's def_stmts are identical.  */
1230   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1231     return true;
1232   else
1233     return false;
1234 }
1235
1236 /* Predict branch probability of BB when BB contains a branch that compares
1237    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1238    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1239
1240    E.g.
1241      for (int i = 0; i < bound; i++) {
1242        if (i < bound - 2)
1243          computation_1();
1244        else
1245          computation_2();
1246      }
1247
1248   In this loop, we will predict the branch inside the loop to be taken.  */
1249
1250 static void
1251 predict_iv_comparison (struct loop *loop, basic_block bb,
1252                        tree loop_bound_var,
1253                        tree loop_iv_base_var,
1254                        enum tree_code loop_bound_code,
1255                        int loop_bound_step)
1256 {
1257   gimple stmt;
1258   tree compare_var, compare_base;
1259   enum tree_code compare_code;
1260   tree compare_step_var;
1261   edge then_edge;
1262   edge_iterator ei;
1263
1264   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1265       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1266       || predicted_by_p (bb, PRED_LOOP_EXIT))
1267     return;
1268
1269   stmt = last_stmt (bb);
1270   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1271     return;
1272   if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1273                                             &compare_code,
1274                                             &compare_step_var,
1275                                             &compare_base))
1276     return;
1277
1278   /* Find the taken edge.  */
1279   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1280     if (then_edge->flags & EDGE_TRUE_VALUE)
1281       break;
1282
1283   /* When comparing an IV to a loop invariant, NE is more likely to be
1284      taken while EQ is more likely to be not-taken.  */
1285   if (compare_code == NE_EXPR)
1286     {
1287       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1288       return;
1289     }
1290   else if (compare_code == EQ_EXPR)
1291     {
1292       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1293       return;
1294     }
1295
1296   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1297     return;
1298
1299   /* If loop bound, base and compare bound are all constants, we can
1300      calculate the probability directly.  */
1301   if (tree_fits_shwi_p (loop_bound_var)
1302       && tree_fits_shwi_p (compare_var)
1303       && tree_fits_shwi_p (compare_base))
1304     {
1305       int probability;
1306       bool overflow, overall_overflow = false;
1307       widest_int compare_count, tem;
1308
1309       /* (loop_bound - base) / compare_step */
1310       tem = wi::sub (wi::to_widest (loop_bound_var),
1311                      wi::to_widest (compare_base), SIGNED, &overflow);
1312       overall_overflow |= overflow;
1313       widest_int loop_count = wi::div_trunc (tem,
1314                                              wi::to_widest (compare_step_var),
1315                                              SIGNED, &overflow);
1316       overall_overflow |= overflow;
1317
1318       if (!wi::neg_p (wi::to_widest (compare_step_var))
1319           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1320         {
1321           /* (loop_bound - compare_bound) / compare_step */
1322           tem = wi::sub (wi::to_widest (loop_bound_var),
1323                          wi::to_widest (compare_var), SIGNED, &overflow);
1324           overall_overflow |= overflow;
1325           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1326                                          SIGNED, &overflow);
1327           overall_overflow |= overflow;
1328         }
1329       else
1330         {
1331           /* (compare_bound - base) / compare_step */
1332           tem = wi::sub (wi::to_widest (compare_var),
1333                          wi::to_widest (compare_base), SIGNED, &overflow);
1334           overall_overflow |= overflow;
1335           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1336                                          SIGNED, &overflow);
1337           overall_overflow |= overflow;
1338         }
1339       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1340         ++compare_count;
1341       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1342         ++loop_count;
1343       if (wi::neg_p (compare_count))
1344         compare_count = 0;
1345       if (wi::neg_p (loop_count))
1346         loop_count = 0;
1347       if (loop_count == 0)
1348         probability = 0;
1349       else if (wi::cmps (compare_count, loop_count) == 1)
1350         probability = REG_BR_PROB_BASE;
1351       else
1352         {
1353           tem = compare_count * REG_BR_PROB_BASE;
1354           tem = wi::udiv_trunc (tem, loop_count);
1355           probability = tem.to_uhwi ();
1356         }
1357
1358       if (!overall_overflow)
1359         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1360
1361       return;
1362     }
1363
1364   if (expr_coherent_p (loop_bound_var, compare_var))
1365     {
1366       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1367           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1368         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1369       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1370                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1371         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1372       else if (loop_bound_code == NE_EXPR)
1373         {
1374           /* If the loop backedge condition is "(i != bound)", we do
1375              the comparison based on the step of IV:
1376              * step < 0 : backedge condition is like (i > bound)
1377              * step > 0 : backedge condition is like (i < bound)  */
1378           gcc_assert (loop_bound_step != 0);
1379           if (loop_bound_step > 0
1380               && (compare_code == LT_EXPR
1381                   || compare_code == LE_EXPR))
1382             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1383           else if (loop_bound_step < 0
1384                    && (compare_code == GT_EXPR
1385                        || compare_code == GE_EXPR))
1386             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1387           else
1388             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1389         }
1390       else
1391         /* The branch is predicted not-taken if loop_bound_code is
1392            opposite with compare_code.  */
1393         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1394     }
1395   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1396     {
1397       /* For cases like:
1398            for (i = s; i < h; i++)
1399              if (i > s + 2) ....
1400          The branch should be predicted taken.  */
1401       if (loop_bound_step > 0
1402           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1403         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1404       else if (loop_bound_step < 0
1405                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1406         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1407       else
1408         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1409     }
1410 }
1411
1412 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1413    exits are resulted from short-circuit conditions that will generate an
1414    if_tmp. E.g.:
1415
1416    if (foo() || global > 10)
1417      break;
1418
1419    This will be translated into:
1420
1421    BB3:
1422      loop header...
1423    BB4:
1424      if foo() goto BB6 else goto BB5
1425    BB5:
1426      if global > 10 goto BB6 else goto BB7
1427    BB6:
1428      goto BB7
1429    BB7:
1430      iftmp = (PHI 0(BB5), 1(BB6))
1431      if iftmp == 1 goto BB8 else goto BB3
1432    BB8:
1433      outside of the loop...
1434
1435    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1436    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1437    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1438    exits to predict them using PRED_LOOP_EXIT.  */
1439
1440 static void
1441 predict_extra_loop_exits (edge exit_edge)
1442 {
1443   unsigned i;
1444   bool check_value_one;
1445   gimple phi_stmt;
1446   tree cmp_rhs, cmp_lhs;
1447   gimple cmp_stmt = last_stmt (exit_edge->src);
1448
1449   if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1450     return;
1451   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1452   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1453   if (!TREE_CONSTANT (cmp_rhs)
1454       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1455     return;
1456   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1457     return;
1458
1459   /* If check_value_one is true, only the phi_args with value '1' will lead
1460      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1461      loop exit.  */
1462   check_value_one = (((integer_onep (cmp_rhs))
1463                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1464                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1465
1466   phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1467   if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1468     return;
1469
1470   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1471     {
1472       edge e1;
1473       edge_iterator ei;
1474       tree val = gimple_phi_arg_def (phi_stmt, i);
1475       edge e = gimple_phi_arg_edge (phi_stmt, i);
1476
1477       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1478         continue;
1479       if ((check_value_one ^ integer_onep (val)) == 1)
1480         continue;
1481       if (EDGE_COUNT (e->src->succs) != 1)
1482         {
1483           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1484           continue;
1485         }
1486
1487       FOR_EACH_EDGE (e1, ei, e->src->preds)
1488         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1489     }
1490 }
1491
1492 /* Predict edge probabilities by exploiting loop structure.  */
1493
1494 static void
1495 predict_loops (void)
1496 {
1497   struct loop *loop;
1498
1499   /* Try to predict out blocks in a loop that are not part of a
1500      natural loop.  */
1501   FOR_EACH_LOOP (loop, 0)
1502     {
1503       basic_block bb, *bbs;
1504       unsigned j, n_exits;
1505       vec<edge> exits;
1506       struct tree_niter_desc niter_desc;
1507       edge ex;
1508       struct nb_iter_bound *nb_iter;
1509       enum tree_code loop_bound_code = ERROR_MARK;
1510       tree loop_bound_step = NULL;
1511       tree loop_bound_var = NULL;
1512       tree loop_iv_base = NULL;
1513       gimple stmt = NULL;
1514
1515       exits = get_loop_exit_edges (loop);
1516       n_exits = exits.length ();
1517       if (!n_exits)
1518         {
1519           exits.release ();
1520           continue;
1521         }
1522
1523       FOR_EACH_VEC_ELT (exits, j, ex)
1524         {
1525           tree niter = NULL;
1526           HOST_WIDE_INT nitercst;
1527           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1528           int probability;
1529           enum br_predictor predictor;
1530
1531           predict_extra_loop_exits (ex);
1532
1533           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1534             niter = niter_desc.niter;
1535           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1536             niter = loop_niter_by_eval (loop, ex);
1537
1538           if (TREE_CODE (niter) == INTEGER_CST)
1539             {
1540               if (tree_fits_uhwi_p (niter)
1541                   && max
1542                   && compare_tree_int (niter, max - 1) == -1)
1543                 nitercst = tree_to_uhwi (niter) + 1;
1544               else
1545                 nitercst = max;
1546               predictor = PRED_LOOP_ITERATIONS;
1547             }
1548           /* If we have just one exit and we can derive some information about
1549              the number of iterations of the loop from the statements inside
1550              the loop, use it to predict this exit.  */
1551           else if (n_exits == 1)
1552             {
1553               nitercst = estimated_stmt_executions_int (loop);
1554               if (nitercst < 0)
1555                 continue;
1556               if (nitercst > max)
1557                 nitercst = max;
1558
1559               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1560             }
1561           else
1562             continue;
1563
1564           /* If the prediction for number of iterations is zero, do not
1565              predict the exit edges.  */
1566           if (nitercst == 0)
1567             continue;
1568
1569           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1570           predict_edge (ex, predictor, probability);
1571         }
1572       exits.release ();
1573
1574       /* Find information about loop bound variables.  */
1575       for (nb_iter = loop->bounds; nb_iter;
1576            nb_iter = nb_iter->next)
1577         if (nb_iter->stmt
1578             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1579           {
1580             stmt = nb_iter->stmt;
1581             break;
1582           }
1583       if (!stmt && last_stmt (loop->header)
1584           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1585         stmt = last_stmt (loop->header);
1586       if (stmt)
1587         is_comparison_with_loop_invariant_p (stmt, loop,
1588                                              &loop_bound_var,
1589                                              &loop_bound_code,
1590                                              &loop_bound_step,
1591                                              &loop_iv_base);
1592
1593       bbs = get_loop_body (loop);
1594
1595       for (j = 0; j < loop->num_nodes; j++)
1596         {
1597           int header_found = 0;
1598           edge e;
1599           edge_iterator ei;
1600
1601           bb = bbs[j];
1602
1603           /* Bypass loop heuristics on continue statement.  These
1604              statements construct loops via "non-loop" constructs
1605              in the source language and are better to be handled
1606              separately.  */
1607           if (predicted_by_p (bb, PRED_CONTINUE))
1608             continue;
1609
1610           /* Loop branch heuristics - predict an edge back to a
1611              loop's head as taken.  */
1612           if (bb == loop->latch)
1613             {
1614               e = find_edge (loop->latch, loop->header);
1615               if (e)
1616                 {
1617                   header_found = 1;
1618                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1619                 }
1620             }
1621
1622           /* Loop exit heuristics - predict an edge exiting the loop if the
1623              conditional has no loop header successors as not taken.  */
1624           if (!header_found
1625               /* If we already used more reliable loop exit predictors, do not
1626                  bother with PRED_LOOP_EXIT.  */
1627               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1628               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1629             {
1630               /* For loop with many exits we don't want to predict all exits
1631                  with the pretty large probability, because if all exits are
1632                  considered in row, the loop would be predicted to iterate
1633                  almost never.  The code to divide probability by number of
1634                  exits is very rough.  It should compute the number of exits
1635                  taken in each patch through function (not the overall number
1636                  of exits that might be a lot higher for loops with wide switch
1637                  statements in them) and compute n-th square root.
1638
1639                  We limit the minimal probability by 2% to avoid
1640                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1641                  as this was causing regression in perl benchmark containing such
1642                  a wide loop.  */
1643
1644               int probability = ((REG_BR_PROB_BASE
1645                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1646                                  / n_exits);
1647               if (probability < HITRATE (2))
1648                 probability = HITRATE (2);
1649               FOR_EACH_EDGE (e, ei, bb->succs)
1650                 if (e->dest->index < NUM_FIXED_BLOCKS
1651                     || !flow_bb_inside_loop_p (loop, e->dest))
1652                   predict_edge (e, PRED_LOOP_EXIT, probability);
1653             }
1654           if (loop_bound_var)
1655             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1656                                    loop_bound_code,
1657                                    tree_to_shwi (loop_bound_step));
1658         }
1659
1660       /* Free basic blocks from get_loop_body.  */
1661       free (bbs);
1662     }
1663 }
1664
1665 /* Attempt to predict probabilities of BB outgoing edges using local
1666    properties.  */
1667 static void
1668 bb_estimate_probability_locally (basic_block bb)
1669 {
1670   rtx last_insn = BB_END (bb);
1671   rtx cond;
1672
1673   if (! can_predict_insn_p (last_insn))
1674     return;
1675   cond = get_condition (last_insn, NULL, false, false);
1676   if (! cond)
1677     return;
1678
1679   /* Try "pointer heuristic."
1680      A comparison ptr == 0 is predicted as false.
1681      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1682   if (COMPARISON_P (cond)
1683       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1684           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1685     {
1686       if (GET_CODE (cond) == EQ)
1687         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1688       else if (GET_CODE (cond) == NE)
1689         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1690     }
1691   else
1692
1693   /* Try "opcode heuristic."
1694      EQ tests are usually false and NE tests are usually true. Also,
1695      most quantities are positive, so we can make the appropriate guesses
1696      about signed comparisons against zero.  */
1697     switch (GET_CODE (cond))
1698       {
1699       case CONST_INT:
1700         /* Unconditional branch.  */
1701         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1702                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1703         break;
1704
1705       case EQ:
1706       case UNEQ:
1707         /* Floating point comparisons appears to behave in a very
1708            unpredictable way because of special role of = tests in
1709            FP code.  */
1710         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1711           ;
1712         /* Comparisons with 0 are often used for booleans and there is
1713            nothing useful to predict about them.  */
1714         else if (XEXP (cond, 1) == const0_rtx
1715                  || XEXP (cond, 0) == const0_rtx)
1716           ;
1717         else
1718           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1719         break;
1720
1721       case NE:
1722       case LTGT:
1723         /* Floating point comparisons appears to behave in a very
1724            unpredictable way because of special role of = tests in
1725            FP code.  */
1726         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1727           ;
1728         /* Comparisons with 0 are often used for booleans and there is
1729            nothing useful to predict about them.  */
1730         else if (XEXP (cond, 1) == const0_rtx
1731                  || XEXP (cond, 0) == const0_rtx)
1732           ;
1733         else
1734           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1735         break;
1736
1737       case ORDERED:
1738         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1739         break;
1740
1741       case UNORDERED:
1742         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1743         break;
1744
1745       case LE:
1746       case LT:
1747         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1748             || XEXP (cond, 1) == constm1_rtx)
1749           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1750         break;
1751
1752       case GE:
1753       case GT:
1754         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1755             || XEXP (cond, 1) == constm1_rtx)
1756           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1757         break;
1758
1759       default:
1760         break;
1761       }
1762 }
1763
1764 /* Set edge->probability for each successor edge of BB.  */
1765 void
1766 guess_outgoing_edge_probabilities (basic_block bb)
1767 {
1768   bb_estimate_probability_locally (bb);
1769   combine_predictions_for_insn (BB_END (bb), bb);
1770 }
1771 \f
1772 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1773
1774 /* Helper function for expr_expected_value.  */
1775
1776 static tree
1777 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1778                        tree op1, bitmap visited, enum br_predictor *predictor)
1779 {
1780   gimple def;
1781
1782   if (predictor)
1783     *predictor = PRED_UNCONDITIONAL;
1784
1785   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1786     {
1787       if (TREE_CONSTANT (op0))
1788         return op0;
1789
1790       if (code != SSA_NAME)
1791         return NULL_TREE;
1792
1793       def = SSA_NAME_DEF_STMT (op0);
1794
1795       /* If we were already here, break the infinite cycle.  */
1796       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1797         return NULL;
1798
1799       if (gimple_code (def) == GIMPLE_PHI)
1800         {
1801           /* All the arguments of the PHI node must have the same constant
1802              length.  */
1803           int i, n = gimple_phi_num_args (def);
1804           tree val = NULL, new_val;
1805
1806           for (i = 0; i < n; i++)
1807             {
1808               tree arg = PHI_ARG_DEF (def, i);
1809               enum br_predictor predictor2;
1810
1811               /* If this PHI has itself as an argument, we cannot
1812                  determine the string length of this argument.  However,
1813                  if we can find an expected constant value for the other
1814                  PHI args then we can still be sure that this is
1815                  likely a constant.  So be optimistic and just
1816                  continue with the next argument.  */
1817               if (arg == PHI_RESULT (def))
1818                 continue;
1819
1820               new_val = expr_expected_value (arg, visited, &predictor2);
1821
1822               /* It is difficult to combine value predictors.  Simply assume
1823                  that later predictor is weaker and take its prediction.  */
1824               if (predictor && *predictor < predictor2)
1825                 *predictor = predictor2;
1826               if (!new_val)
1827                 return NULL;
1828               if (!val)
1829                 val = new_val;
1830               else if (!operand_equal_p (val, new_val, false))
1831                 return NULL;
1832             }
1833           return val;
1834         }
1835       if (is_gimple_assign (def))
1836         {
1837           if (gimple_assign_lhs (def) != op0)
1838             return NULL;
1839
1840           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1841                                         gimple_assign_rhs1 (def),
1842                                         gimple_assign_rhs_code (def),
1843                                         gimple_assign_rhs2 (def),
1844                                         visited, predictor);
1845         }
1846
1847       if (is_gimple_call (def))
1848         {
1849           tree decl = gimple_call_fndecl (def);
1850           if (!decl)
1851             {
1852               if (gimple_call_internal_p (def)
1853                   && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1854                 {
1855                   gcc_assert (gimple_call_num_args (def) == 3);
1856                   tree val = gimple_call_arg (def, 0);
1857                   if (TREE_CONSTANT (val))
1858                     return val;
1859                   if (predictor)
1860                     {
1861                       *predictor = PRED_BUILTIN_EXPECT;
1862                       tree val2 = gimple_call_arg (def, 2);
1863                       gcc_assert (TREE_CODE (val2) == INTEGER_CST
1864                                   && tree_fits_uhwi_p (val2)
1865                                   && tree_to_uhwi (val2) < END_PREDICTORS);
1866                       *predictor = (enum br_predictor) tree_to_uhwi (val2);
1867                     }
1868                   return gimple_call_arg (def, 1);
1869                 }
1870               return NULL;
1871             }
1872           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1873             switch (DECL_FUNCTION_CODE (decl))
1874               {
1875               case BUILT_IN_EXPECT:
1876                 {
1877                   tree val;
1878                   if (gimple_call_num_args (def) != 2)
1879                     return NULL;
1880                   val = gimple_call_arg (def, 0);
1881                   if (TREE_CONSTANT (val))
1882                     return val;
1883                   if (predictor)
1884                     *predictor = PRED_BUILTIN_EXPECT;
1885                   return gimple_call_arg (def, 1);
1886                 }
1887
1888               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1889               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1890               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1891               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1892               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1893               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1894               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1895               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1896               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1897               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1898               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1899               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1900               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1901                 /* Assume that any given atomic operation has low contention,
1902                    and thus the compare-and-swap operation succeeds.  */
1903                 if (predictor)
1904                   *predictor = PRED_COMPARE_AND_SWAP;
1905                 return boolean_true_node;
1906             }
1907         }
1908
1909       return NULL;
1910     }
1911
1912   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1913     {
1914       tree res;
1915       enum br_predictor predictor2;
1916       op0 = expr_expected_value (op0, visited, predictor);
1917       if (!op0)
1918         return NULL;
1919       op1 = expr_expected_value (op1, visited, &predictor2);
1920       if (predictor && *predictor < predictor2)
1921         *predictor = predictor2;
1922       if (!op1)
1923         return NULL;
1924       res = fold_build2 (code, type, op0, op1);
1925       if (TREE_CONSTANT (res))
1926         return res;
1927       return NULL;
1928     }
1929   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1930     {
1931       tree res;
1932       op0 = expr_expected_value (op0, visited, predictor);
1933       if (!op0)
1934         return NULL;
1935       res = fold_build1 (code, type, op0);
1936       if (TREE_CONSTANT (res))
1937         return res;
1938       return NULL;
1939     }
1940   return NULL;
1941 }
1942
1943 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1944    The function is used by builtin_expect branch predictor so the evidence
1945    must come from this construct and additional possible constant folding.
1946
1947    We may want to implement more involved value guess (such as value range
1948    propagation based prediction), but such tricks shall go to new
1949    implementation.  */
1950
1951 static tree
1952 expr_expected_value (tree expr, bitmap visited,
1953                      enum br_predictor *predictor)
1954 {
1955   enum tree_code code;
1956   tree op0, op1;
1957
1958   if (TREE_CONSTANT (expr))
1959     {
1960       if (predictor)
1961         *predictor = PRED_UNCONDITIONAL;
1962       return expr;
1963     }
1964
1965   extract_ops_from_tree (expr, &code, &op0, &op1);
1966   return expr_expected_value_1 (TREE_TYPE (expr),
1967                                 op0, code, op1, visited, predictor);
1968 }
1969 \f
1970 /* Predict using opcode of the last statement in basic block.  */
1971 static void
1972 tree_predict_by_opcode (basic_block bb)
1973 {
1974   gimple stmt = last_stmt (bb);
1975   edge then_edge;
1976   tree op0, op1;
1977   tree type;
1978   tree val;
1979   enum tree_code cmp;
1980   bitmap visited;
1981   edge_iterator ei;
1982   enum br_predictor predictor;
1983
1984   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1985     return;
1986   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1987     if (then_edge->flags & EDGE_TRUE_VALUE)
1988       break;
1989   op0 = gimple_cond_lhs (stmt);
1990   op1 = gimple_cond_rhs (stmt);
1991   cmp = gimple_cond_code (stmt);
1992   type = TREE_TYPE (op0);
1993   visited = BITMAP_ALLOC (NULL);
1994   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1995                                &predictor);
1996   BITMAP_FREE (visited);
1997   if (val && TREE_CODE (val) == INTEGER_CST)
1998     {
1999       if (predictor == PRED_BUILTIN_EXPECT)
2000         {
2001           int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2002
2003           gcc_assert (percent >= 0 && percent <= 100);
2004           if (integer_zerop (val))
2005             percent = 100 - percent;
2006           predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2007         }
2008       else
2009         predict_edge (then_edge, predictor,
2010                       integer_zerop (val) ? NOT_TAKEN : TAKEN);
2011     }
2012   /* Try "pointer heuristic."
2013      A comparison ptr == 0 is predicted as false.
2014      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2015   if (POINTER_TYPE_P (type))
2016     {
2017       if (cmp == EQ_EXPR)
2018         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2019       else if (cmp == NE_EXPR)
2020         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2021     }
2022   else
2023
2024   /* Try "opcode heuristic."
2025      EQ tests are usually false and NE tests are usually true. Also,
2026      most quantities are positive, so we can make the appropriate guesses
2027      about signed comparisons against zero.  */
2028     switch (cmp)
2029       {
2030       case EQ_EXPR:
2031       case UNEQ_EXPR:
2032         /* Floating point comparisons appears to behave in a very
2033            unpredictable way because of special role of = tests in
2034            FP code.  */
2035         if (FLOAT_TYPE_P (type))
2036           ;
2037         /* Comparisons with 0 are often used for booleans and there is
2038            nothing useful to predict about them.  */
2039         else if (integer_zerop (op0) || integer_zerop (op1))
2040           ;
2041         else
2042           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2043         break;
2044
2045       case NE_EXPR:
2046       case LTGT_EXPR:
2047         /* Floating point comparisons appears to behave in a very
2048            unpredictable way because of special role of = tests in
2049            FP code.  */
2050         if (FLOAT_TYPE_P (type))
2051           ;
2052         /* Comparisons with 0 are often used for booleans and there is
2053            nothing useful to predict about them.  */
2054         else if (integer_zerop (op0)
2055                  || integer_zerop (op1))
2056           ;
2057         else
2058           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2059         break;
2060
2061       case ORDERED_EXPR:
2062         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2063         break;
2064
2065       case UNORDERED_EXPR:
2066         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2067         break;
2068
2069       case LE_EXPR:
2070       case LT_EXPR:
2071         if (integer_zerop (op1)
2072             || integer_onep (op1)
2073             || integer_all_onesp (op1)
2074             || real_zerop (op1)
2075             || real_onep (op1)
2076             || real_minus_onep (op1))
2077           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2078         break;
2079
2080       case GE_EXPR:
2081       case GT_EXPR:
2082         if (integer_zerop (op1)
2083             || integer_onep (op1)
2084             || integer_all_onesp (op1)
2085             || real_zerop (op1)
2086             || real_onep (op1)
2087             || real_minus_onep (op1))
2088           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2089         break;
2090
2091       default:
2092         break;
2093       }
2094 }
2095
2096 /* Try to guess whether the value of return means error code.  */
2097
2098 static enum br_predictor
2099 return_prediction (tree val, enum prediction *prediction)
2100 {
2101   /* VOID.  */
2102   if (!val)
2103     return PRED_NO_PREDICTION;
2104   /* Different heuristics for pointers and scalars.  */
2105   if (POINTER_TYPE_P (TREE_TYPE (val)))
2106     {
2107       /* NULL is usually not returned.  */
2108       if (integer_zerop (val))
2109         {
2110           *prediction = NOT_TAKEN;
2111           return PRED_NULL_RETURN;
2112         }
2113     }
2114   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2115     {
2116       /* Negative return values are often used to indicate
2117          errors.  */
2118       if (TREE_CODE (val) == INTEGER_CST
2119           && tree_int_cst_sgn (val) < 0)
2120         {
2121           *prediction = NOT_TAKEN;
2122           return PRED_NEGATIVE_RETURN;
2123         }
2124       /* Constant return values seems to be commonly taken.
2125          Zero/one often represent booleans so exclude them from the
2126          heuristics.  */
2127       if (TREE_CONSTANT (val)
2128           && (!integer_zerop (val) && !integer_onep (val)))
2129         {
2130           *prediction = TAKEN;
2131           return PRED_CONST_RETURN;
2132         }
2133     }
2134   return PRED_NO_PREDICTION;
2135 }
2136
2137 /* Find the basic block with return expression and look up for possible
2138    return value trying to apply RETURN_PREDICTION heuristics.  */
2139 static void
2140 apply_return_prediction (void)
2141 {
2142   gimple return_stmt = NULL;
2143   tree return_val;
2144   edge e;
2145   gimple phi;
2146   int phi_num_args, i;
2147   enum br_predictor pred;
2148   enum prediction direction;
2149   edge_iterator ei;
2150
2151   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2152     {
2153       return_stmt = last_stmt (e->src);
2154       if (return_stmt
2155           && gimple_code (return_stmt) == GIMPLE_RETURN)
2156         break;
2157     }
2158   if (!e)
2159     return;
2160   return_val = gimple_return_retval (return_stmt);
2161   if (!return_val)
2162     return;
2163   if (TREE_CODE (return_val) != SSA_NAME
2164       || !SSA_NAME_DEF_STMT (return_val)
2165       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2166     return;
2167   phi = SSA_NAME_DEF_STMT (return_val);
2168   phi_num_args = gimple_phi_num_args (phi);
2169   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2170
2171   /* Avoid the degenerate case where all return values form the function
2172      belongs to same category (ie they are all positive constants)
2173      so we can hardly say something about them.  */
2174   for (i = 1; i < phi_num_args; i++)
2175     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2176       break;
2177   if (i != phi_num_args)
2178     for (i = 0; i < phi_num_args; i++)
2179       {
2180         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2181         if (pred != PRED_NO_PREDICTION)
2182           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2183                                          direction);
2184       }
2185 }
2186
2187 /* Look for basic block that contains unlikely to happen events
2188    (such as noreturn calls) and mark all paths leading to execution
2189    of this basic blocks as unlikely.  */
2190
2191 static void
2192 tree_bb_level_predictions (void)
2193 {
2194   basic_block bb;
2195   bool has_return_edges = false;
2196   edge e;
2197   edge_iterator ei;
2198
2199   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2200     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2201       {
2202         has_return_edges = true;
2203         break;
2204       }
2205
2206   apply_return_prediction ();
2207
2208   FOR_EACH_BB_FN (bb, cfun)
2209     {
2210       gimple_stmt_iterator gsi;
2211
2212       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2213         {
2214           gimple stmt = gsi_stmt (gsi);
2215           tree decl;
2216
2217           if (is_gimple_call (stmt))
2218             {
2219               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2220                   && has_return_edges)
2221                 predict_paths_leading_to (bb, PRED_NORETURN,
2222                                           NOT_TAKEN);
2223               decl = gimple_call_fndecl (stmt);
2224               if (decl
2225                   && lookup_attribute ("cold",
2226                                        DECL_ATTRIBUTES (decl)))
2227                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2228                                           NOT_TAKEN);
2229             }
2230           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2231             {
2232               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2233                                         gimple_predict_outcome (stmt));
2234               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2235                  hints to callers.  */
2236             }
2237         }
2238     }
2239 }
2240
2241 #ifdef ENABLE_CHECKING
2242
2243 /* Callback for hash_map::traverse, asserts that the pointer map is
2244    empty.  */
2245
2246 bool
2247 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2248                  void *)
2249 {
2250   gcc_assert (!value);
2251   return false;
2252 }
2253 #endif
2254
2255 /* Predict branch probabilities and estimate profile for basic block BB.  */
2256
2257 static void
2258 tree_estimate_probability_bb (basic_block bb)
2259 {
2260   edge e;
2261   edge_iterator ei;
2262   gimple last;
2263
2264   FOR_EACH_EDGE (e, ei, bb->succs)
2265     {
2266       /* Predict edges to user labels with attributes.  */
2267       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2268         {
2269           gimple_stmt_iterator gi;
2270           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2271             {
2272               gimple stmt = gsi_stmt (gi);
2273               tree decl;
2274
2275               if (gimple_code (stmt) != GIMPLE_LABEL)
2276                 break;
2277               decl = gimple_label_label (stmt);
2278               if (DECL_ARTIFICIAL (decl))
2279                 continue;
2280
2281               /* Finally, we have a user-defined label.  */
2282               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2283                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2284               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2285                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2286             }
2287         }
2288
2289       /* Predict early returns to be probable, as we've already taken
2290          care for error returns and other cases are often used for
2291          fast paths through function.
2292
2293          Since we've already removed the return statements, we are
2294          looking for CFG like:
2295
2296          if (conditional)
2297          {
2298          ..
2299          goto return_block
2300          }
2301          some other blocks
2302          return_block:
2303          return_stmt.  */
2304       if (e->dest != bb->next_bb
2305           && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2306           && single_succ_p (e->dest)
2307           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2308           && (last = last_stmt (e->dest)) != NULL
2309           && gimple_code (last) == GIMPLE_RETURN)
2310         {
2311           edge e1;
2312           edge_iterator ei1;
2313
2314           if (single_succ_p (bb))
2315             {
2316               FOR_EACH_EDGE (e1, ei1, bb->preds)
2317                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2318                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2319                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2320                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2321             }
2322           else
2323             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2324                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2325                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2326               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2327         }
2328
2329       /* Look for block we are guarding (ie we dominate it,
2330          but it doesn't postdominate us).  */
2331       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2332           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2333           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2334         {
2335           gimple_stmt_iterator bi;
2336
2337           /* The call heuristic claims that a guarded function call
2338              is improbable.  This is because such calls are often used
2339              to signal exceptional situations such as printing error
2340              messages.  */
2341           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2342                gsi_next (&bi))
2343             {
2344               gimple stmt = gsi_stmt (bi);
2345               if (is_gimple_call (stmt)
2346                   /* Constant and pure calls are hardly used to signalize
2347                      something exceptional.  */
2348                   && gimple_has_side_effects (stmt))
2349                 {
2350                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2351                   break;
2352                 }
2353             }
2354         }
2355     }
2356   tree_predict_by_opcode (bb);
2357 }
2358
2359 /* Predict branch probabilities and estimate profile of the tree CFG.
2360    This function can be called from the loop optimizers to recompute
2361    the profile information.  */
2362
2363 void
2364 tree_estimate_probability (void)
2365 {
2366   basic_block bb;
2367
2368   add_noreturn_fake_exit_edges ();
2369   connect_infinite_loops_to_exit ();
2370   /* We use loop_niter_by_eval, which requires that the loops have
2371      preheaders.  */
2372   create_preheaders (CP_SIMPLE_PREHEADERS);
2373   calculate_dominance_info (CDI_POST_DOMINATORS);
2374
2375   bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2376   tree_bb_level_predictions ();
2377   record_loop_exits ();
2378
2379   if (number_of_loops (cfun) > 1)
2380     predict_loops ();
2381
2382   FOR_EACH_BB_FN (bb, cfun)
2383     tree_estimate_probability_bb (bb);
2384
2385   FOR_EACH_BB_FN (bb, cfun)
2386     combine_predictions_for_bb (bb);
2387
2388 #ifdef ENABLE_CHECKING
2389   bb_predictions->traverse<void *, assert_is_empty> (NULL);
2390 #endif
2391   delete bb_predictions;
2392   bb_predictions = NULL;
2393
2394   estimate_bb_frequencies (false);
2395   free_dominance_info (CDI_POST_DOMINATORS);
2396   remove_fake_exit_edges ();
2397 }
2398 \f
2399 /* Predict edges to successors of CUR whose sources are not postdominated by
2400    BB by PRED and recurse to all postdominators.  */
2401
2402 static void
2403 predict_paths_for_bb (basic_block cur, basic_block bb,
2404                       enum br_predictor pred,
2405                       enum prediction taken,
2406                       bitmap visited)
2407 {
2408   edge e;
2409   edge_iterator ei;
2410   basic_block son;
2411
2412   /* We are looking for all edges forming edge cut induced by
2413      set of all blocks postdominated by BB.  */
2414   FOR_EACH_EDGE (e, ei, cur->preds)
2415     if (e->src->index >= NUM_FIXED_BLOCKS
2416         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2417     {
2418       edge e2;
2419       edge_iterator ei2;
2420       bool found = false;
2421
2422       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2423       if (e->flags & (EDGE_EH | EDGE_FAKE))
2424         continue;
2425       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2426
2427       /* See if there is an edge from e->src that is not abnormal
2428          and does not lead to BB.  */
2429       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2430         if (e2 != e
2431             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2432             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2433           {
2434             found = true;
2435             break;
2436           }
2437
2438       /* If there is non-abnormal path leaving e->src, predict edge
2439          using predictor.  Otherwise we need to look for paths
2440          leading to e->src.
2441
2442          The second may lead to infinite loop in the case we are predicitng
2443          regions that are only reachable by abnormal edges.  We simply
2444          prevent visiting given BB twice.  */
2445       if (found)
2446         predict_edge_def (e, pred, taken);
2447       else if (bitmap_set_bit (visited, e->src->index))
2448         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2449     }
2450   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2451        son;
2452        son = next_dom_son (CDI_POST_DOMINATORS, son))
2453     predict_paths_for_bb (son, bb, pred, taken, visited);
2454 }
2455
2456 /* Sets branch probabilities according to PREDiction and
2457    FLAGS.  */
2458
2459 static void
2460 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2461                           enum prediction taken)
2462 {
2463   bitmap visited = BITMAP_ALLOC (NULL);
2464   predict_paths_for_bb (bb, bb, pred, taken, visited);
2465   BITMAP_FREE (visited);
2466 }
2467
2468 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2469
2470 static void
2471 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2472                                enum prediction taken)
2473 {
2474   bool has_nonloop_edge = false;
2475   edge_iterator ei;
2476   edge e2;
2477
2478   basic_block bb = e->src;
2479   FOR_EACH_EDGE (e2, ei, bb->succs)
2480     if (e2->dest != e->src && e2->dest != e->dest
2481         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2482         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2483       {
2484         has_nonloop_edge = true;
2485         break;
2486       }
2487   if (!has_nonloop_edge)
2488     {
2489       bitmap visited = BITMAP_ALLOC (NULL);
2490       predict_paths_for_bb (bb, bb, pred, taken, visited);
2491       BITMAP_FREE (visited);
2492     }
2493   else
2494     predict_edge_def (e, pred, taken);
2495 }
2496 \f
2497 /* This is used to carry information about basic blocks.  It is
2498    attached to the AUX field of the standard CFG block.  */
2499
2500 typedef struct block_info_def
2501 {
2502   /* Estimated frequency of execution of basic_block.  */
2503   sreal frequency;
2504
2505   /* To keep queue of basic blocks to process.  */
2506   basic_block next;
2507
2508   /* Number of predecessors we need to visit first.  */
2509   int npredecessors;
2510 } *block_info;
2511
2512 /* Similar information for edges.  */
2513 typedef struct edge_info_def
2514 {
2515   /* In case edge is a loopback edge, the probability edge will be reached
2516      in case header is.  Estimated number of iterations of the loop can be
2517      then computed as 1 / (1 - back_edge_prob).  */
2518   sreal back_edge_prob;
2519   /* True if the edge is a loopback edge in the natural loop.  */
2520   unsigned int back_edge:1;
2521 } *edge_info;
2522
2523 #define BLOCK_INFO(B)   ((block_info) (B)->aux)
2524 #define EDGE_INFO(E)    ((edge_info) (E)->aux)
2525
2526 /* Helper function for estimate_bb_frequencies.
2527    Propagate the frequencies in blocks marked in
2528    TOVISIT, starting in HEAD.  */
2529
2530 static void
2531 propagate_freq (basic_block head, bitmap tovisit)
2532 {
2533   basic_block bb;
2534   basic_block last;
2535   unsigned i;
2536   edge e;
2537   basic_block nextbb;
2538   bitmap_iterator bi;
2539
2540   /* For each basic block we need to visit count number of his predecessors
2541      we need to visit first.  */
2542   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2543     {
2544       edge_iterator ei;
2545       int count = 0;
2546
2547       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2548
2549       FOR_EACH_EDGE (e, ei, bb->preds)
2550         {
2551           bool visit = bitmap_bit_p (tovisit, e->src->index);
2552
2553           if (visit && !(e->flags & EDGE_DFS_BACK))
2554             count++;
2555           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2556             fprintf (dump_file,
2557                      "Irreducible region hit, ignoring edge to %i->%i\n",
2558                      e->src->index, bb->index);
2559         }
2560       BLOCK_INFO (bb)->npredecessors = count;
2561       /* When function never returns, we will never process exit block.  */
2562       if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2563         bb->count = bb->frequency = 0;
2564     }
2565
2566   memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2567   last = head;
2568   for (bb = head; bb; bb = nextbb)
2569     {
2570       edge_iterator ei;
2571       sreal cyclic_probability, frequency;
2572
2573       memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2574       memcpy (&frequency, &real_zero, sizeof (real_zero));
2575
2576       nextbb = BLOCK_INFO (bb)->next;
2577       BLOCK_INFO (bb)->next = NULL;
2578
2579       /* Compute frequency of basic block.  */
2580       if (bb != head)
2581         {
2582 #ifdef ENABLE_CHECKING
2583           FOR_EACH_EDGE (e, ei, bb->preds)
2584             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2585                         || (e->flags & EDGE_DFS_BACK));
2586 #endif
2587
2588           FOR_EACH_EDGE (e, ei, bb->preds)
2589             if (EDGE_INFO (e)->back_edge)
2590               {
2591                 sreal_add (&cyclic_probability, &cyclic_probability,
2592                            &EDGE_INFO (e)->back_edge_prob);
2593               }
2594             else if (!(e->flags & EDGE_DFS_BACK))
2595               {
2596                 sreal tmp;
2597
2598                 /*  frequency += (e->probability
2599                                   * BLOCK_INFO (e->src)->frequency /
2600                                   REG_BR_PROB_BASE);  */
2601
2602                 sreal_init (&tmp, e->probability, 0);
2603                 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2604                 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2605                 sreal_add (&frequency, &frequency, &tmp);
2606               }
2607
2608           if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2609             {
2610               memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2611                       sizeof (frequency));
2612             }
2613           else
2614             {
2615               if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2616                 {
2617                   memcpy (&cyclic_probability, &real_almost_one,
2618                           sizeof (real_almost_one));
2619                 }
2620
2621               /* BLOCK_INFO (bb)->frequency = frequency
2622                                               / (1 - cyclic_probability) */
2623
2624               sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2625               sreal_div (&BLOCK_INFO (bb)->frequency,
2626                          &frequency, &cyclic_probability);
2627             }
2628         }
2629
2630       bitmap_clear_bit (tovisit, bb->index);
2631
2632       e = find_edge (bb, head);
2633       if (e)
2634         {
2635           sreal tmp;
2636
2637           /* EDGE_INFO (e)->back_edge_prob
2638              = ((e->probability * BLOCK_INFO (bb)->frequency)
2639              / REG_BR_PROB_BASE); */
2640
2641           sreal_init (&tmp, e->probability, 0);
2642           sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2643           sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2644                      &tmp, &real_inv_br_prob_base);
2645         }
2646
2647       /* Propagate to successor blocks.  */
2648       FOR_EACH_EDGE (e, ei, bb->succs)
2649         if (!(e->flags & EDGE_DFS_BACK)
2650             && BLOCK_INFO (e->dest)->npredecessors)
2651           {
2652             BLOCK_INFO (e->dest)->npredecessors--;
2653             if (!BLOCK_INFO (e->dest)->npredecessors)
2654               {
2655                 if (!nextbb)
2656                   nextbb = e->dest;
2657                 else
2658                   BLOCK_INFO (last)->next = e->dest;
2659
2660                 last = e->dest;
2661               }
2662           }
2663     }
2664 }
2665
2666 /* Estimate frequencies in loops at same nest level.  */
2667
2668 static void
2669 estimate_loops_at_level (struct loop *first_loop)
2670 {
2671   struct loop *loop;
2672
2673   for (loop = first_loop; loop; loop = loop->next)
2674     {
2675       edge e;
2676       basic_block *bbs;
2677       unsigned i;
2678       bitmap tovisit = BITMAP_ALLOC (NULL);
2679
2680       estimate_loops_at_level (loop->inner);
2681
2682       /* Find current loop back edge and mark it.  */
2683       e = loop_latch_edge (loop);
2684       EDGE_INFO (e)->back_edge = 1;
2685
2686       bbs = get_loop_body (loop);
2687       for (i = 0; i < loop->num_nodes; i++)
2688         bitmap_set_bit (tovisit, bbs[i]->index);
2689       free (bbs);
2690       propagate_freq (loop->header, tovisit);
2691       BITMAP_FREE (tovisit);
2692     }
2693 }
2694
2695 /* Propagates frequencies through structure of loops.  */
2696
2697 static void
2698 estimate_loops (void)
2699 {
2700   bitmap tovisit = BITMAP_ALLOC (NULL);
2701   basic_block bb;
2702
2703   /* Start by estimating the frequencies in the loops.  */
2704   if (number_of_loops (cfun) > 1)
2705     estimate_loops_at_level (current_loops->tree_root->inner);
2706
2707   /* Now propagate the frequencies through all the blocks.  */
2708   FOR_ALL_BB_FN (bb, cfun)
2709     {
2710       bitmap_set_bit (tovisit, bb->index);
2711     }
2712   propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2713   BITMAP_FREE (tovisit);
2714 }
2715
2716 /* Drop the profile for NODE to guessed, and update its frequency based on
2717    whether it is expected to be hot given the CALL_COUNT.  */
2718
2719 static void
2720 drop_profile (struct cgraph_node *node, gcov_type call_count)
2721 {
2722   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2723   /* In the case where this was called by another function with a
2724      dropped profile, call_count will be 0. Since there are no
2725      non-zero call counts to this function, we don't know for sure
2726      whether it is hot, and therefore it will be marked normal below.  */
2727   bool hot = maybe_hot_count_p (NULL, call_count);
2728
2729   if (dump_file)
2730     fprintf (dump_file,
2731              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2732              node->name (), node->order,
2733              hot ? "Function is hot" : "Function is normal");
2734   /* We only expect to miss profiles for functions that are reached
2735      via non-zero call edges in cases where the function may have
2736      been linked from another module or library (COMDATs and extern
2737      templates). See the comments below for handle_missing_profiles.
2738      Also, only warn in cases where the missing counts exceed the
2739      number of training runs. In certain cases with an execv followed
2740      by a no-return call the profile for the no-return call is not
2741      dumped and there can be a mismatch.  */
2742   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2743       && call_count > profile_info->runs)
2744     {
2745       if (flag_profile_correction)
2746         {
2747           if (dump_file)
2748             fprintf (dump_file,
2749                      "Missing counts for called function %s/%i\n",
2750                      node->name (), node->order);
2751         }
2752       else
2753         warning (0, "Missing counts for called function %s/%i",
2754                  node->name (), node->order);
2755     }
2756
2757   profile_status_for_fn (fn)
2758       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2759   node->frequency
2760       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2761 }
2762
2763 /* In the case of COMDAT routines, multiple object files will contain the same
2764    function and the linker will select one for the binary. In that case
2765    all the other copies from the profile instrument binary will be missing
2766    profile counts. Look for cases where this happened, due to non-zero
2767    call counts going to 0-count functions, and drop the profile to guessed
2768    so that we can use the estimated probabilities and avoid optimizing only
2769    for size.
2770
2771    The other case where the profile may be missing is when the routine
2772    is not going to be emitted to the object file, e.g. for "extern template"
2773    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2774    all other cases of non-zero calls to 0-count functions.  */
2775
2776 void
2777 handle_missing_profiles (void)
2778 {
2779   struct cgraph_node *node;
2780   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2781   vec<struct cgraph_node *> worklist;
2782   worklist.create (64);
2783
2784   /* See if 0 count function has non-0 count callers.  In this case we
2785      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2786   FOR_EACH_DEFINED_FUNCTION (node)
2787     {
2788       struct cgraph_edge *e;
2789       gcov_type call_count = 0;
2790       gcov_type max_tp_first_run = 0;
2791       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2792
2793       if (node->count)
2794         continue;
2795       for (e = node->callers; e; e = e->next_caller)
2796       {
2797         call_count += e->count;
2798
2799         if (e->caller->tp_first_run > max_tp_first_run)
2800           max_tp_first_run = e->caller->tp_first_run;
2801       }
2802
2803       /* If time profile is missing, let assign the maximum that comes from
2804          caller functions.  */
2805       if (!node->tp_first_run && max_tp_first_run)
2806         node->tp_first_run = max_tp_first_run + 1;
2807
2808       if (call_count
2809           && fn && fn->cfg
2810           && (call_count * unlikely_count_fraction >= profile_info->runs))
2811         {
2812           drop_profile (node, call_count);
2813           worklist.safe_push (node);
2814         }
2815     }
2816
2817   /* Propagate the profile dropping to other 0-count COMDATs that are
2818      potentially called by COMDATs we already dropped the profile on.  */
2819   while (worklist.length () > 0)
2820     {
2821       struct cgraph_edge *e;
2822
2823       node = worklist.pop ();
2824       for (e = node->callees; e; e = e->next_caller)
2825         {
2826           struct cgraph_node *callee = e->callee;
2827           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2828
2829           if (callee->count > 0)
2830             continue;
2831           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2832               && profile_status_for_fn (fn) == PROFILE_READ)
2833             {
2834               drop_profile (node, 0);
2835               worklist.safe_push (callee);
2836             }
2837         }
2838     }
2839   worklist.release ();
2840 }
2841
2842 /* Convert counts measured by profile driven feedback to frequencies.
2843    Return nonzero iff there was any nonzero execution count.  */
2844
2845 int
2846 counts_to_freqs (void)
2847 {
2848   gcov_type count_max, true_count_max = 0;
2849   basic_block bb;
2850
2851   /* Don't overwrite the estimated frequencies when the profile for
2852      the function is missing.  We may drop this function PROFILE_GUESSED
2853      later in drop_profile ().  */
2854   if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2855     return 0;
2856
2857   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2858     true_count_max = MAX (bb->count, true_count_max);
2859
2860   count_max = MAX (true_count_max, 1);
2861   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2862     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2863
2864   return true_count_max;
2865 }
2866
2867 /* Return true if function is likely to be expensive, so there is no point to
2868    optimize performance of prologue, epilogue or do inlining at the expense
2869    of code size growth.  THRESHOLD is the limit of number of instructions
2870    function can execute at average to be still considered not expensive.  */
2871
2872 bool
2873 expensive_function_p (int threshold)
2874 {
2875   unsigned int sum = 0;
2876   basic_block bb;
2877   unsigned int limit;
2878
2879   /* We can not compute accurately for large thresholds due to scaled
2880      frequencies.  */
2881   gcc_assert (threshold <= BB_FREQ_MAX);
2882
2883   /* Frequencies are out of range.  This either means that function contains
2884      internal loop executing more than BB_FREQ_MAX times or profile feedback
2885      is available and function has not been executed at all.  */
2886   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2887     return true;
2888
2889   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2890   limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2891   FOR_EACH_BB_FN (bb, cfun)
2892     {
2893       rtx insn;
2894
2895       FOR_BB_INSNS (bb, insn)
2896         if (active_insn_p (insn))
2897           {
2898             sum += bb->frequency;
2899             if (sum > limit)
2900               return true;
2901         }
2902     }
2903
2904   return false;
2905 }
2906
2907 /* Estimate and propagate basic block frequencies using the given branch
2908    probabilities.  If FORCE is true, the frequencies are used to estimate
2909    the counts even when there are already non-zero profile counts.  */
2910
2911 void
2912 estimate_bb_frequencies (bool force)
2913 {
2914   basic_block bb;
2915   sreal freq_max;
2916
2917   if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2918     {
2919       static int real_values_initialized = 0;
2920
2921       if (!real_values_initialized)
2922         {
2923           real_values_initialized = 1;
2924           sreal_init (&real_zero, 0, 0);
2925           sreal_init (&real_one, 1, 0);
2926           sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2927           sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2928           sreal_init (&real_one_half, 1, -1);
2929           sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2930           sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2931         }
2932
2933       mark_dfs_back_edges ();
2934
2935       single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2936          REG_BR_PROB_BASE;
2937
2938       /* Set up block info for each basic block.  */
2939       alloc_aux_for_blocks (sizeof (struct block_info_def));
2940       alloc_aux_for_edges (sizeof (struct edge_info_def));
2941       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2942         {
2943           edge e;
2944           edge_iterator ei;
2945
2946           FOR_EACH_EDGE (e, ei, bb->succs)
2947             {
2948               sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2949               sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2950                          &EDGE_INFO (e)->back_edge_prob,
2951                          &real_inv_br_prob_base);
2952             }
2953         }
2954
2955       /* First compute frequencies locally for each loop from innermost
2956          to outermost to examine frequencies for back edges.  */
2957       estimate_loops ();
2958
2959       memcpy (&freq_max, &real_zero, sizeof (real_zero));
2960       FOR_EACH_BB_FN (bb, cfun)
2961         if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
2962           memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
2963
2964       sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
2965       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2966         {
2967           sreal tmp;
2968
2969           sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
2970           sreal_add (&tmp, &tmp, &real_one_half);
2971           bb->frequency = sreal_to_int (&tmp);
2972         }
2973
2974       free_aux_for_blocks ();
2975       free_aux_for_edges ();
2976     }
2977   compute_function_frequency ();
2978 }
2979
2980 /* Decide whether function is hot, cold or unlikely executed.  */
2981 void
2982 compute_function_frequency (void)
2983 {
2984   basic_block bb;
2985   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2986
2987   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2988       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2989     node->only_called_at_startup = true;
2990   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2991     node->only_called_at_exit = true;
2992
2993   if (profile_status_for_fn (cfun) != PROFILE_READ)
2994     {
2995       int flags = flags_from_decl_or_type (current_function_decl);
2996       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2997           != NULL)
2998         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2999       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3000                != NULL)
3001         node->frequency = NODE_FREQUENCY_HOT;
3002       else if (flags & ECF_NORETURN)
3003         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3004       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3005         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3006       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3007                || DECL_STATIC_DESTRUCTOR (current_function_decl))
3008         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3009       return;
3010     }
3011
3012   /* Only first time try to drop function into unlikely executed.
3013      After inlining the roundoff errors may confuse us.
3014      Ipa-profile pass will drop functions only called from unlikely
3015      functions to unlikely and that is most of what we care about.  */
3016   if (!cfun->after_inlining)
3017     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3018   FOR_EACH_BB_FN (bb, cfun)
3019     {
3020       if (maybe_hot_bb_p (cfun, bb))
3021         {
3022           node->frequency = NODE_FREQUENCY_HOT;
3023           return;
3024         }
3025       if (!probably_never_executed_bb_p (cfun, bb))
3026         node->frequency = NODE_FREQUENCY_NORMAL;
3027     }
3028 }
3029
3030 /* Build PREDICT_EXPR.  */
3031 tree
3032 build_predict_expr (enum br_predictor predictor, enum prediction taken)
3033 {
3034   tree t = build1 (PREDICT_EXPR, void_type_node,
3035                    build_int_cst (integer_type_node, predictor));
3036   SET_PREDICT_EXPR_OUTCOME (t, taken);
3037   return t;
3038 }
3039
3040 const char *
3041 predictor_name (enum br_predictor predictor)
3042 {
3043   return predictor_info[predictor].name;
3044 }
3045
3046 /* Predict branch probabilities and estimate profile of the tree CFG. */
3047
3048 namespace {
3049
3050 const pass_data pass_data_profile =
3051 {
3052   GIMPLE_PASS, /* type */
3053   "profile_estimate", /* name */
3054   OPTGROUP_NONE, /* optinfo_flags */
3055   TV_BRANCH_PROB, /* tv_id */
3056   PROP_cfg, /* properties_required */
3057   0, /* properties_provided */
3058   0, /* properties_destroyed */
3059   0, /* todo_flags_start */
3060   0, /* todo_flags_finish */
3061 };
3062
3063 class pass_profile : public gimple_opt_pass
3064 {
3065 public:
3066   pass_profile (gcc::context *ctxt)
3067     : gimple_opt_pass (pass_data_profile, ctxt)
3068   {}
3069
3070   /* opt_pass methods: */
3071   virtual bool gate (function *) { return flag_guess_branch_prob; }
3072   virtual unsigned int execute (function *);
3073
3074 }; // class pass_profile
3075
3076 unsigned int
3077 pass_profile::execute (function *fun)
3078 {
3079   unsigned nb_loops;
3080
3081   loop_optimizer_init (LOOPS_NORMAL);
3082   if (dump_file && (dump_flags & TDF_DETAILS))
3083     flow_loops_dump (dump_file, NULL, 0);
3084
3085   mark_irreducible_loops ();
3086
3087   nb_loops = number_of_loops (fun);
3088   if (nb_loops > 1)
3089     scev_initialize ();
3090
3091   tree_estimate_probability ();
3092
3093   if (nb_loops > 1)
3094     scev_finalize ();
3095
3096   loop_optimizer_finalize ();
3097   if (dump_file && (dump_flags & TDF_DETAILS))
3098     gimple_dump_cfg (dump_file, dump_flags);
3099  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3100     profile_status_for_fn (fun) = PROFILE_GUESSED;
3101   return 0;
3102 }
3103
3104 } // anon namespace
3105
3106 gimple_opt_pass *
3107 make_pass_profile (gcc::context *ctxt)
3108 {
3109   return new pass_profile (ctxt);
3110 }
3111
3112 namespace {
3113
3114 const pass_data pass_data_strip_predict_hints =
3115 {
3116   GIMPLE_PASS, /* type */
3117   "*strip_predict_hints", /* name */
3118   OPTGROUP_NONE, /* optinfo_flags */
3119   TV_BRANCH_PROB, /* tv_id */
3120   PROP_cfg, /* properties_required */
3121   0, /* properties_provided */
3122   0, /* properties_destroyed */
3123   0, /* todo_flags_start */
3124   0, /* todo_flags_finish */
3125 };
3126
3127 class pass_strip_predict_hints : public gimple_opt_pass
3128 {
3129 public:
3130   pass_strip_predict_hints (gcc::context *ctxt)
3131     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3132   {}
3133
3134   /* opt_pass methods: */
3135   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3136   virtual unsigned int execute (function *);
3137
3138 }; // class pass_strip_predict_hints
3139
3140 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3141    we no longer need.  */
3142 unsigned int
3143 pass_strip_predict_hints::execute (function *fun)
3144 {
3145   basic_block bb;
3146   gimple ass_stmt;
3147   tree var;
3148
3149   FOR_EACH_BB_FN (bb, fun)
3150     {
3151       gimple_stmt_iterator bi;
3152       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3153         {
3154           gimple stmt = gsi_stmt (bi);
3155
3156           if (gimple_code (stmt) == GIMPLE_PREDICT)
3157             {
3158               gsi_remove (&bi, true);
3159               continue;
3160             }
3161           else if (is_gimple_call (stmt))
3162             {
3163               tree fndecl = gimple_call_fndecl (stmt);
3164
3165               if ((fndecl
3166                    && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3167                    && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3168                    && gimple_call_num_args (stmt) == 2)
3169                   || (gimple_call_internal_p (stmt)
3170                       && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3171                 {
3172                   var = gimple_call_lhs (stmt);
3173                   if (var)
3174                     {
3175                       ass_stmt
3176                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3177                       gsi_replace (&bi, ass_stmt, true);
3178                     }
3179                   else
3180                     {
3181                       gsi_remove (&bi, true);
3182                       continue;
3183                     }
3184                 }
3185             }
3186           gsi_next (&bi);
3187         }
3188     }
3189   return 0;
3190 }
3191
3192 } // anon namespace
3193
3194 gimple_opt_pass *
3195 make_pass_strip_predict_hints (gcc::context *ctxt)
3196 {
3197   return new pass_strip_predict_hints (ctxt);
3198 }
3199
3200 /* Rebuild function frequencies.  Passes are in general expected to
3201    maintain profile by hand, however in some cases this is not possible:
3202    for example when inlining several functions with loops freuqencies might run
3203    out of scale and thus needs to be recomputed.  */
3204
3205 void
3206 rebuild_frequencies (void)
3207 {
3208   timevar_push (TV_REBUILD_FREQUENCIES);
3209
3210   /* When the max bb count in the function is small, there is a higher
3211      chance that there were truncation errors in the integer scaling
3212      of counts by inlining and other optimizations. This could lead
3213      to incorrect classification of code as being cold when it isn't.
3214      In that case, force the estimation of bb counts/frequencies from the
3215      branch probabilities, rather than computing frequencies from counts,
3216      which may also lead to frequencies incorrectly reduced to 0. There
3217      is less precision in the probabilities, so we only do this for small
3218      max counts.  */
3219   gcov_type count_max = 0;
3220   basic_block bb;
3221   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3222     count_max = MAX (bb->count, count_max);
3223
3224   if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3225       || (profile_status_for_fn (cfun) == PROFILE_READ && count_max < REG_BR_PROB_BASE/10))
3226     {
3227       loop_optimizer_init (0);
3228       add_noreturn_fake_exit_edges ();
3229       mark_irreducible_loops ();
3230       connect_infinite_loops_to_exit ();
3231       estimate_bb_frequencies (true);
3232       remove_fake_exit_edges ();
3233       loop_optimizer_finalize ();
3234     }
3235   else if (profile_status_for_fn (cfun) == PROFILE_READ)
3236     counts_to_freqs ();
3237   else
3238     gcc_unreachable ();
3239   timevar_pop (TV_REBUILD_FREQUENCIES);
3240 }