gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "backend.h"
  34 #include "rtl.h"
  35 #include "tree.h"
  36 #include "gimple.h"
  37 #include "cfghooks.h"
  38 #include "tree-pass.h"
  39 #include "ssa.h"
  40 #include "emit-rtl.h"
  41 #include "cgraph.h"
  42 #include "coverage.h"
  43 #include "diagnostic-core.h"
  44 #include "gimple-predict.h"
  45 #include "fold-const.h"
  46 #include "calls.h"
  47 #include "cfganal.h"
  48 #include "profile.h"
  49 #include "sreal.h"
  50 #include "params.h"
  51 #include "cfgloop.h"
  52 #include "gimple-iterator.h"
  53 #include "tree-cfg.h"
  54 #include "tree-ssa-loop-niter.h"
  55 #include "tree-ssa-loop.h"
  56 #include "tree-scalar-evolution.h"
  57
  58 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  59                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  60 static sreal real_almost_one, real_br_prob_base,
  61              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  62
  63 static void combine_predictions_for_insn (rtx_insn *, basic_block);
  64 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  65 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  66 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  67 static bool can_predict_insn_p (const rtx_insn *);
  68
  69 /* Information we hold about each branch predictor.
  70    Filled using information from predict.def.  */
  71
  72 struct predictor_info
  73 {
  74   const char *const name;       /* Name used in the debugging dumps.  */
  75   const int hitrate;            /* Expected hitrate used by
  76                                    predict_insn_def call.  */
  77   const int flags;
  78 };
  79
  80 /* Use given predictor without Dempster-Shaffer theory if it matches
  81    using first_match heuristics.  */
  82 #define PRED_FLAG_FIRST_MATCH 1
  83
  84 /* Recompute hitrate in percent to our representation.  */
  85
  86 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
  87
  88 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
  89 static const struct predictor_info predictor_info[]= {
  90 #include "predict.def"
  91
  92   /* Upper bound on predictors.  */
  93   {NULL, 0, 0}
  94 };
  95 #undef DEF_PREDICTOR
  96
  97 /* Return TRUE if frequency FREQ is considered to be hot.  */
  98
  99 static inline bool
 100 maybe_hot_frequency_p (struct function *fun, int freq)
 101 {
 102   struct cgraph_node *node = cgraph_node::get (fun->decl);
 103   if (!profile_info
 104       || !opt_for_fn (fun->decl, flag_branch_probabilities))
 105     {
 106       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 107         return false;
 108       if (node->frequency == NODE_FREQUENCY_HOT)
 109         return true;
 110     }
 111   if (profile_status_for_fn (fun) == PROFILE_ABSENT)
 112     return true;
 113   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 114       && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
 115     return false;
 116   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 117     return false;
 118   if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
 119               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 120     return false;
 121   return true;
 122 }
 123
 124 static gcov_type min_count = -1;
 125
 126 /* Determine the threshold for hot BB counts.  */
 127
 128 gcov_type
 129 get_hot_bb_threshold ()
 130 {
 131   gcov_working_set_t *ws;
 132   if (min_count == -1)
 133     {
 134       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 135       gcc_assert (ws);
 136       min_count = ws->min_counter;
 137     }
 138   return min_count;
 139 }
 140
 141 /* Set the threshold for hot BB counts.  */
 142
 143 void
 144 set_hot_bb_threshold (gcov_type min)
 145 {
 146   min_count = min;
 147 }
 148
 149 /* Return TRUE if frequency FREQ is considered to be hot.  */
 150
 151 bool
 152 maybe_hot_count_p (struct function *fun, gcov_type count)
 153 {
 154   if (fun && profile_status_for_fn (fun) != PROFILE_READ)
 155     return true;
 156   /* Code executed at most once is not hot.  */
 157   if (profile_info->runs >= count)
 158     return false;
 159   return (count >= get_hot_bb_threshold ());
 160 }
 161
 162 /* Return true in case BB can be CPU intensive and should be optimized
 163    for maximal performance.  */
 164
 165 bool
 166 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 167 {
 168   gcc_checking_assert (fun);
 169   if (profile_status_for_fn (fun) == PROFILE_READ)
 170     return maybe_hot_count_p (fun, bb->count);
 171   return maybe_hot_frequency_p (fun, bb->frequency);
 172 }
 173
 174 /* Return true in case BB can be CPU intensive and should be optimized
 175    for maximal performance.  */
 176
 177 bool
 178 maybe_hot_edge_p (edge e)
 179 {
 180   if (profile_status_for_fn (cfun) == PROFILE_READ)
 181     return maybe_hot_count_p (cfun, e->count);
 182   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 183 }
 184
 185 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 186    node frequency reflects never being executed.  */
 187
 188 static bool
 189 probably_never_executed (struct function *fun,
 190                          gcov_type count, int frequency)
 191 {
 192   gcc_checking_assert (fun);
 193   if (profile_status_for_fn (fun) == PROFILE_READ)
 194     {
 195       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 196       if (count * unlikely_count_fraction >= profile_info->runs)
 197         return false;
 198       if (!frequency)
 199         return true;
 200       if (!ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
 201         return false;
 202       if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
 203         {
 204           gcov_type computed_count;
 205           /* Check for possibility of overflow, in which case entry bb count
 206              is large enough to do the division first without losing much
 207              precision.  */
 208           if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count < REG_BR_PROB_BASE *
 209               REG_BR_PROB_BASE)
 210             {
 211               gcov_type scaled_count
 212                   = frequency * ENTRY_BLOCK_PTR_FOR_FN (fun)->count *
 213              unlikely_count_fraction;
 214               computed_count = RDIV (scaled_count,
 215                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 216             }
 217           else
 218             {
 219               computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun)->count,
 220                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 221               computed_count *= frequency * unlikely_count_fraction;
 222             }
 223           if (computed_count >= profile_info->runs)
 224             return false;
 225         }
 226       return true;
 227     }
 228   if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities)))
 229       && (cgraph_node::get (fun->decl)->frequency
 230           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 231     return true;
 232   return false;
 233 }
 234
 235
 236 /* Return true in case BB is probably never executed.  */
 237
 238 bool
 239 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 240 {
 241   return probably_never_executed (fun, bb->count, bb->frequency);
 242 }
 243
 244
 245 /* Return true in case edge E is probably never executed.  */
 246
 247 bool
 248 probably_never_executed_edge_p (struct function *fun, edge e)
 249 {
 250   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 251 }
 252
 253 /* Return true when current function should always be optimized for size.  */
 254
 255 bool
 256 optimize_function_for_size_p (struct function *fun)
 257 {
 258   if (!fun || !fun->decl)
 259     return optimize_size;
 260   cgraph_node *n = cgraph_node::get (fun->decl);
 261   return n && n->optimize_for_size_p ();
 262 }
 263
 264 /* Return true when current function should always be optimized for speed.  */
 265
 266 bool
 267 optimize_function_for_speed_p (struct function *fun)
 268 {
 269   return !optimize_function_for_size_p (fun);
 270 }
 271
 272 /* Return TRUE when BB should be optimized for size.  */
 273
 274 bool
 275 optimize_bb_for_size_p (const_basic_block bb)
 276 {
 277   return (optimize_function_for_size_p (cfun)
 278           || (bb && !maybe_hot_bb_p (cfun, bb)));
 279 }
 280
 281 /* Return TRUE when BB should be optimized for speed.  */
 282
 283 bool
 284 optimize_bb_for_speed_p (const_basic_block bb)
 285 {
 286   return !optimize_bb_for_size_p (bb);
 287 }
 288
 289 /* Return TRUE when BB should be optimized for size.  */
 290
 291 bool
 292 optimize_edge_for_size_p (edge e)
 293 {
 294   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 295 }
 296
 297 /* Return TRUE when BB should be optimized for speed.  */
 298
 299 bool
 300 optimize_edge_for_speed_p (edge e)
 301 {
 302   return !optimize_edge_for_size_p (e);
 303 }
 304
 305 /* Return TRUE when BB should be optimized for size.  */
 306
 307 bool
 308 optimize_insn_for_size_p (void)
 309 {
 310   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 311 }
 312
 313 /* Return TRUE when BB should be optimized for speed.  */
 314
 315 bool
 316 optimize_insn_for_speed_p (void)
 317 {
 318   return !optimize_insn_for_size_p ();
 319 }
 320
 321 /* Return TRUE when LOOP should be optimized for size.  */
 322
 323 bool
 324 optimize_loop_for_size_p (struct loop *loop)
 325 {
 326   return optimize_bb_for_size_p (loop->header);
 327 }
 328
 329 /* Return TRUE when LOOP should be optimized for speed.  */
 330
 331 bool
 332 optimize_loop_for_speed_p (struct loop *loop)
 333 {
 334   return optimize_bb_for_speed_p (loop->header);
 335 }
 336
 337 /* Return TRUE when LOOP nest should be optimized for speed.  */
 338
 339 bool
 340 optimize_loop_nest_for_speed_p (struct loop *loop)
 341 {
 342   struct loop *l = loop;
 343   if (optimize_loop_for_speed_p (loop))
 344     return true;
 345   l = loop->inner;
 346   while (l && l != loop)
 347     {
 348       if (optimize_loop_for_speed_p (l))
 349         return true;
 350       if (l->inner)
 351         l = l->inner;
 352       else if (l->next)
 353         l = l->next;
 354       else
 355         {
 356           while (l != loop && !l->next)
 357             l = loop_outer (l);
 358           if (l != loop)
 359             l = l->next;
 360         }
 361     }
 362   return false;
 363 }
 364
 365 /* Return TRUE when LOOP nest should be optimized for size.  */
 366
 367 bool
 368 optimize_loop_nest_for_size_p (struct loop *loop)
 369 {
 370   return !optimize_loop_nest_for_speed_p (loop);
 371 }
 372
 373 /* Return true when edge E is likely to be well predictable by branch
 374    predictor.  */
 375
 376 bool
 377 predictable_edge_p (edge e)
 378 {
 379   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 380     return false;
 381   if ((e->probability
 382        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 383       || (REG_BR_PROB_BASE - e->probability
 384           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 385     return true;
 386   return false;
 387 }
 388
 389
 390 /* Set RTL expansion for BB profile.  */
 391
 392 void
 393 rtl_profile_for_bb (basic_block bb)
 394 {
 395   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 396 }
 397
 398 /* Set RTL expansion for edge profile.  */
 399
 400 void
 401 rtl_profile_for_edge (edge e)
 402 {
 403   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 404 }
 405
 406 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 407 void
 408 default_rtl_profile (void)
 409 {
 410   crtl->maybe_hot_insn_p = true;
 411 }
 412
 413 /* Return true if the one of outgoing edges is already predicted by
 414    PREDICTOR.  */
 415
 416 bool
 417 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 418 {
 419   rtx note;
 420   if (!INSN_P (BB_END (bb)))
 421     return false;
 422   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 423     if (REG_NOTE_KIND (note) == REG_BR_PRED
 424         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 425       return true;
 426   return false;
 427 }
 428
 429 /*  Structure representing predictions in tree level. */
 430
 431 struct edge_prediction {
 432     struct edge_prediction *ep_next;
 433     edge ep_edge;
 434     enum br_predictor ep_predictor;
 435     int ep_probability;
 436 };
 437
 438 /* This map contains for a basic block the list of predictions for the
 439    outgoing edges.  */
 440
 441 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
 442
 443 /* Return true if the one of outgoing edges is already predicted by
 444    PREDICTOR.  */
 445
 446 bool
 447 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 448 {
 449   struct edge_prediction *i;
 450   edge_prediction **preds = bb_predictions->get (bb);
 451
 452   if (!preds)
 453     return false;
 454
 455   for (i = *preds; i; i = i->ep_next)
 456     if (i->ep_predictor == predictor)
 457       return true;
 458   return false;
 459 }
 460
 461 /* Return true when the probability of edge is reliable.
 462
 463    The profile guessing code is good at predicting branch outcome (ie.
 464    taken/not taken), that is predicted right slightly over 75% of time.
 465    It is however notoriously poor on predicting the probability itself.
 466    In general the profile appear a lot flatter (with probabilities closer
 467    to 50%) than the reality so it is bad idea to use it to drive optimization
 468    such as those disabling dynamic branch prediction for well predictable
 469    branches.
 470
 471    There are two exceptions - edges leading to noreturn edges and edges
 472    predicted by number of iterations heuristics are predicted well.  This macro
 473    should be able to distinguish those, but at the moment it simply check for
 474    noreturn heuristic that is only one giving probability over 99% or bellow
 475    1%.  In future we might want to propagate reliability information across the
 476    CFG if we find this information useful on multiple places.   */
 477 static bool
 478 probability_reliable_p (int prob)
 479 {
 480   return (profile_status_for_fn (cfun) == PROFILE_READ
 481           || (profile_status_for_fn (cfun) == PROFILE_GUESSED
 482               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 483 }
 484
 485 /* Same predicate as above, working on edges.  */
 486 bool
 487 edge_probability_reliable_p (const_edge e)
 488 {
 489   return probability_reliable_p (e->probability);
 490 }
 491
 492 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 493 bool
 494 br_prob_note_reliable_p (const_rtx note)
 495 {
 496   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 497   return probability_reliable_p (XINT (note, 0));
 498 }
 499
 500 static void
 501 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
 502 {
 503   gcc_assert (any_condjump_p (insn));
 504   if (!flag_guess_branch_prob)
 505     return;
 506
 507   add_reg_note (insn, REG_BR_PRED,
 508                 gen_rtx_CONCAT (VOIDmode,
 509                                 GEN_INT ((int) predictor),
 510                                 GEN_INT ((int) probability)));
 511 }
 512
 513 /* Predict insn by given predictor.  */
 514
 515 void
 516 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
 517                   enum prediction taken)
 518 {
 519    int probability = predictor_info[(int) predictor].hitrate;
 520
 521    if (taken != TAKEN)
 522      probability = REG_BR_PROB_BASE - probability;
 523
 524    predict_insn (insn, predictor, probability);
 525 }
 526
 527 /* Predict edge E with given probability if possible.  */
 528
 529 void
 530 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 531 {
 532   rtx_insn *last_insn;
 533   last_insn = BB_END (e->src);
 534
 535   /* We can store the branch prediction information only about
 536      conditional jumps.  */
 537   if (!any_condjump_p (last_insn))
 538     return;
 539
 540   /* We always store probability of branching.  */
 541   if (e->flags & EDGE_FALLTHRU)
 542     probability = REG_BR_PROB_BASE - probability;
 543
 544   predict_insn (last_insn, predictor, probability);
 545 }
 546
 547 /* Predict edge E with the given PROBABILITY.  */
 548 void
 549 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 550 {
 551   gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
 552   if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
 553        1)
 554       && flag_guess_branch_prob && optimize)
 555     {
 556       struct edge_prediction *i = XNEW (struct edge_prediction);
 557       edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
 558
 559       i->ep_next = preds;
 560       preds = i;
 561       i->ep_probability = probability;
 562       i->ep_predictor = predictor;
 563       i->ep_edge = e;
 564     }
 565 }
 566
 567 /* Remove all predictions on given basic block that are attached
 568    to edge E.  */
 569 void
 570 remove_predictions_associated_with_edge (edge e)
 571 {
 572   if (!bb_predictions)
 573     return;
 574
 575   edge_prediction **preds = bb_predictions->get (e->src);
 576
 577   if (preds)
 578     {
 579       struct edge_prediction **prediction = preds;
 580       struct edge_prediction *next;
 581
 582       while (*prediction)
 583         {
 584           if ((*prediction)->ep_edge == e)
 585             {
 586               next = (*prediction)->ep_next;
 587               free (*prediction);
 588               *prediction = next;
 589             }
 590           else
 591             prediction = &((*prediction)->ep_next);
 592         }
 593     }
 594 }
 595
 596 /* Clears the list of predictions stored for BB.  */
 597
 598 static void
 599 clear_bb_predictions (basic_block bb)
 600 {
 601   edge_prediction **preds = bb_predictions->get (bb);
 602   struct edge_prediction *pred, *next;
 603
 604   if (!preds)
 605     return;
 606
 607   for (pred = *preds; pred; pred = next)
 608     {
 609       next = pred->ep_next;
 610       free (pred);
 611     }
 612   *preds = NULL;
 613 }
 614
 615 /* Return true when we can store prediction on insn INSN.
 616    At the moment we represent predictions only on conditional
 617    jumps, not at computed jump or other complicated cases.  */
 618 static bool
 619 can_predict_insn_p (const rtx_insn *insn)
 620 {
 621   return (JUMP_P (insn)
 622           && any_condjump_p (insn)
 623           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 624 }
 625
 626 /* Predict edge E by given predictor if possible.  */
 627
 628 void
 629 predict_edge_def (edge e, enum br_predictor predictor,
 630                   enum prediction taken)
 631 {
 632    int probability = predictor_info[(int) predictor].hitrate;
 633
 634    if (taken != TAKEN)
 635      probability = REG_BR_PROB_BASE - probability;
 636
 637    predict_edge (e, predictor, probability);
 638 }
 639
 640 /* Invert all branch predictions or probability notes in the INSN.  This needs
 641    to be done each time we invert the condition used by the jump.  */
 642
 643 void
 644 invert_br_probabilities (rtx insn)
 645 {
 646   rtx note;
 647
 648   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 649     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 650       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 651     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 652       XEXP (XEXP (note, 0), 1)
 653         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 654 }
 655
 656 /* Dump information about the branch prediction to the output file.  */
 657
 658 static void
 659 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 660                  basic_block bb, int used)
 661 {
 662   edge e;
 663   edge_iterator ei;
 664
 665   if (!file)
 666     return;
 667
 668   FOR_EACH_EDGE (e, ei, bb->succs)
 669     if (! (e->flags & EDGE_FALLTHRU))
 670       break;
 671
 672   fprintf (file, "  %s heuristics%s: %.1f%%",
 673            predictor_info[predictor].name,
 674            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 675
 676   if (bb->count)
 677     {
 678       fprintf (file, "  exec %" PRId64, bb->count);
 679       if (e)
 680         {
 681           fprintf (file, " hit %" PRId64, e->count);
 682           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 683         }
 684     }
 685
 686   fprintf (file, "\n");
 687 }
 688
 689 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 690    evenly and hope for the best.  */
 691 static void
 692 set_even_probabilities (basic_block bb)
 693 {
 694   int nedges = 0;
 695   edge e;
 696   edge_iterator ei;
 697
 698   FOR_EACH_EDGE (e, ei, bb->succs)
 699     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 700       nedges ++;
 701   FOR_EACH_EDGE (e, ei, bb->succs)
 702     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 703       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 704     else
 705       e->probability = 0;
 706 }
 707
 708 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 709    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 710
 711 static void
 712 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
 713 {
 714   rtx prob_note;
 715   rtx *pnote;
 716   rtx note;
 717   int best_probability = PROB_EVEN;
 718   enum br_predictor best_predictor = END_PREDICTORS;
 719   int combined_probability = REG_BR_PROB_BASE / 2;
 720   int d;
 721   bool first_match = false;
 722   bool found = false;
 723
 724   if (!can_predict_insn_p (insn))
 725     {
 726       set_even_probabilities (bb);
 727       return;
 728     }
 729
 730   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 731   pnote = &REG_NOTES (insn);
 732   if (dump_file)
 733     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 734              bb->index);
 735
 736   /* We implement "first match" heuristics and use probability guessed
 737      by predictor with smallest index.  */
 738   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 739     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 740       {
 741         enum br_predictor predictor = ((enum br_predictor)
 742                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 743         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 744
 745         found = true;
 746         if (best_predictor > predictor)
 747           best_probability = probability, best_predictor = predictor;
 748
 749         d = (combined_probability * probability
 750              + (REG_BR_PROB_BASE - combined_probability)
 751              * (REG_BR_PROB_BASE - probability));
 752
 753         /* Use FP math to avoid overflows of 32bit integers.  */
 754         if (d == 0)
 755           /* If one probability is 0% and one 100%, avoid division by zero.  */
 756           combined_probability = REG_BR_PROB_BASE / 2;
 757         else
 758           combined_probability = (((double) combined_probability) * probability
 759                                   * REG_BR_PROB_BASE / d + 0.5);
 760       }
 761
 762   /* Decide which heuristic to use.  In case we didn't match anything,
 763      use no_prediction heuristic, in case we did match, use either
 764      first match or Dempster-Shaffer theory depending on the flags.  */
 765
 766   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 767     first_match = true;
 768
 769   if (!found)
 770     dump_prediction (dump_file, PRED_NO_PREDICTION,
 771                      combined_probability, bb, true);
 772   else
 773     {
 774       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 775                        bb, !first_match);
 776       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 777                        bb, first_match);
 778     }
 779
 780   if (first_match)
 781     combined_probability = best_probability;
 782   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 783
 784   while (*pnote)
 785     {
 786       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 787         {
 788           enum br_predictor predictor = ((enum br_predictor)
 789                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 790           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 791
 792           dump_prediction (dump_file, predictor, probability, bb,
 793                            !first_match || best_predictor == predictor);
 794           *pnote = XEXP (*pnote, 1);
 795         }
 796       else
 797         pnote = &XEXP (*pnote, 1);
 798     }
 799
 800   if (!prob_note)
 801     {
 802       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 803
 804       /* Save the prediction into CFG in case we are seeing non-degenerated
 805          conditional jump.  */
 806       if (!single_succ_p (bb))
 807         {
 808           BRANCH_EDGE (bb)->probability = combined_probability;
 809           FALLTHRU_EDGE (bb)->probability
 810             = REG_BR_PROB_BASE - combined_probability;
 811         }
 812     }
 813   else if (!single_succ_p (bb))
 814     {
 815       int prob = XINT (prob_note, 0);
 816
 817       BRANCH_EDGE (bb)->probability = prob;
 818       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 819     }
 820   else
 821     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 822 }
 823
 824 /* Combine predictions into single probability and store them into CFG.
 825    Remove now useless prediction entries.  */
 826
 827 static void
 828 combine_predictions_for_bb (basic_block bb)
 829 {
 830   int best_probability = PROB_EVEN;
 831   enum br_predictor best_predictor = END_PREDICTORS;
 832   int combined_probability = REG_BR_PROB_BASE / 2;
 833   int d;
 834   bool first_match = false;
 835   bool found = false;
 836   struct edge_prediction *pred;
 837   int nedges = 0;
 838   edge e, first = NULL, second = NULL;
 839   edge_iterator ei;
 840
 841   FOR_EACH_EDGE (e, ei, bb->succs)
 842     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 843       {
 844         nedges ++;
 845         if (first && !second)
 846           second = e;
 847         if (!first)
 848           first = e;
 849       }
 850
 851   /* When there is no successor or only one choice, prediction is easy.
 852
 853      We are lazy for now and predict only basic blocks with two outgoing
 854      edges.  It is possible to predict generic case too, but we have to
 855      ignore first match heuristics and do more involved combining.  Implement
 856      this later.  */
 857   if (nedges != 2)
 858     {
 859       if (!bb->count)
 860         set_even_probabilities (bb);
 861       clear_bb_predictions (bb);
 862       if (dump_file)
 863         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 864                  nedges, bb->index);
 865       return;
 866     }
 867
 868   if (dump_file)
 869     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 870
 871   edge_prediction **preds = bb_predictions->get (bb);
 872   if (preds)
 873     {
 874       /* We implement "first match" heuristics and use probability guessed
 875          by predictor with smallest index.  */
 876       for (pred = *preds; pred; pred = pred->ep_next)
 877         {
 878           enum br_predictor predictor = pred->ep_predictor;
 879           int probability = pred->ep_probability;
 880
 881           if (pred->ep_edge != first)
 882             probability = REG_BR_PROB_BASE - probability;
 883
 884           found = true;
 885           /* First match heuristics would be widly confused if we predicted
 886              both directions.  */
 887           if (best_predictor > predictor)
 888             {
 889               struct edge_prediction *pred2;
 890               int prob = probability;
 891
 892               for (pred2 = (struct edge_prediction *) *preds;
 893                    pred2; pred2 = pred2->ep_next)
 894                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 895                  {
 896                    int probability2 = pred->ep_probability;
 897
 898                    if (pred2->ep_edge != first)
 899                      probability2 = REG_BR_PROB_BASE - probability2;
 900
 901                    if ((probability < REG_BR_PROB_BASE / 2) !=
 902                        (probability2 < REG_BR_PROB_BASE / 2))
 903                      break;
 904
 905                    /* If the same predictor later gave better result, go for it! */
 906                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 907                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 908                      prob = probability2;
 909                  }
 910               if (!pred2)
 911                 best_probability = prob, best_predictor = predictor;
 912             }
 913
 914           d = (combined_probability * probability
 915                + (REG_BR_PROB_BASE - combined_probability)
 916                * (REG_BR_PROB_BASE - probability));
 917
 918           /* Use FP math to avoid overflows of 32bit integers.  */
 919           if (d == 0)
 920             /* If one probability is 0% and one 100%, avoid division by zero.  */
 921             combined_probability = REG_BR_PROB_BASE / 2;
 922           else
 923             combined_probability = (((double) combined_probability)
 924                                     * probability
 925                                     * REG_BR_PROB_BASE / d + 0.5);
 926         }
 927     }
 928
 929   /* Decide which heuristic to use.  In case we didn't match anything,
 930      use no_prediction heuristic, in case we did match, use either
 931      first match or Dempster-Shaffer theory depending on the flags.  */
 932
 933   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 934     first_match = true;
 935
 936   if (!found)
 937     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
 938   else
 939     {
 940       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
 941                        !first_match);
 942       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
 943                        first_match);
 944     }
 945
 946   if (first_match)
 947     combined_probability = best_probability;
 948   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 949
 950   if (preds)
 951     {
 952       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 953         {
 954           enum br_predictor predictor = pred->ep_predictor;
 955           int probability = pred->ep_probability;
 956
 957           if (pred->ep_edge != EDGE_SUCC (bb, 0))
 958             probability = REG_BR_PROB_BASE - probability;
 959           dump_prediction (dump_file, predictor, probability, bb,
 960                            !first_match || best_predictor == predictor);
 961         }
 962     }
 963   clear_bb_predictions (bb);
 964
 965   if (!bb->count)
 966     {
 967       first->probability = combined_probability;
 968       second->probability = REG_BR_PROB_BASE - combined_probability;
 969     }
 970 }
 971
 972 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
 973    Return the SSA_NAME if the condition satisfies, NULL otherwise.
 974
 975    T1 and T2 should be one of the following cases:
 976      1. T1 is SSA_NAME, T2 is NULL
 977      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
 978      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
 979
 980 static tree
 981 strips_small_constant (tree t1, tree t2)
 982 {
 983   tree ret = NULL;
 984   int value = 0;
 985
 986   if (!t1)
 987     return NULL;
 988   else if (TREE_CODE (t1) == SSA_NAME)
 989     ret = t1;
 990   else if (tree_fits_shwi_p (t1))
 991     value = tree_to_shwi (t1);
 992   else
 993     return NULL;
 994
 995   if (!t2)
 996     return ret;
 997   else if (tree_fits_shwi_p (t2))
 998     value = tree_to_shwi (t2);
 999   else if (TREE_CODE (t2) == SSA_NAME)
1000     {
1001       if (ret)
1002         return NULL;
1003       else
1004         ret = t2;
1005     }
1006
1007   if (value <= 4 && value >= -4)
1008     return ret;
1009   else
1010     return NULL;
1011 }
1012
1013 /* Return the SSA_NAME in T or T's operands.
1014    Return NULL if SSA_NAME cannot be found.  */
1015
1016 static tree
1017 get_base_value (tree t)
1018 {
1019   if (TREE_CODE (t) == SSA_NAME)
1020     return t;
1021
1022   if (!BINARY_CLASS_P (t))
1023     return NULL;
1024
1025   switch (TREE_OPERAND_LENGTH (t))
1026     {
1027     case 1:
1028       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1029     case 2:
1030       return strips_small_constant (TREE_OPERAND (t, 0),
1031                                     TREE_OPERAND (t, 1));
1032     default:
1033       return NULL;
1034     }
1035 }
1036
1037 /* Check the compare STMT in LOOP. If it compares an induction
1038    variable to a loop invariant, return true, and save
1039    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1040    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1041
1042 static bool
1043 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1044                                      tree *loop_invariant,
1045                                      enum tree_code *compare_code,
1046                                      tree *loop_step,
1047                                      tree *loop_iv_base)
1048 {
1049   tree op0, op1, bound, base;
1050   affine_iv iv0, iv1;
1051   enum tree_code code;
1052   tree step;
1053
1054   code = gimple_cond_code (stmt);
1055   *loop_invariant = NULL;
1056
1057   switch (code)
1058     {
1059     case GT_EXPR:
1060     case GE_EXPR:
1061     case NE_EXPR:
1062     case LT_EXPR:
1063     case LE_EXPR:
1064     case EQ_EXPR:
1065       break;
1066
1067     default:
1068       return false;
1069     }
1070
1071   op0 = gimple_cond_lhs (stmt);
1072   op1 = gimple_cond_rhs (stmt);
1073
1074   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1075        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1076     return false;
1077   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1078     return false;
1079   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1080     return false;
1081   if (TREE_CODE (iv0.step) != INTEGER_CST
1082       || TREE_CODE (iv1.step) != INTEGER_CST)
1083     return false;
1084   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1085       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1086     return false;
1087
1088   if (integer_zerop (iv0.step))
1089     {
1090       if (code != NE_EXPR && code != EQ_EXPR)
1091         code = invert_tree_comparison (code, false);
1092       bound = iv0.base;
1093       base = iv1.base;
1094       if (tree_fits_shwi_p (iv1.step))
1095         step = iv1.step;
1096       else
1097         return false;
1098     }
1099   else
1100     {
1101       bound = iv1.base;
1102       base = iv0.base;
1103       if (tree_fits_shwi_p (iv0.step))
1104         step = iv0.step;
1105       else
1106         return false;
1107     }
1108
1109   if (TREE_CODE (bound) != INTEGER_CST)
1110     bound = get_base_value (bound);
1111   if (!bound)
1112     return false;
1113   if (TREE_CODE (base) != INTEGER_CST)
1114     base = get_base_value (base);
1115   if (!base)
1116     return false;
1117
1118   *loop_invariant = bound;
1119   *compare_code = code;
1120   *loop_step = step;
1121   *loop_iv_base = base;
1122   return true;
1123 }
1124
1125 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1126
1127 static bool
1128 expr_coherent_p (tree t1, tree t2)
1129 {
1130   gimple *stmt;
1131   tree ssa_name_1 = NULL;
1132   tree ssa_name_2 = NULL;
1133
1134   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1135   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1136
1137   if (t1 == t2)
1138     return true;
1139
1140   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1141     return true;
1142   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1143     return false;
1144
1145   /* Check to see if t1 is expressed/defined with t2.  */
1146   stmt = SSA_NAME_DEF_STMT (t1);
1147   gcc_assert (stmt != NULL);
1148   if (is_gimple_assign (stmt))
1149     {
1150       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1151       if (ssa_name_1 && ssa_name_1 == t2)
1152         return true;
1153     }
1154
1155   /* Check to see if t2 is expressed/defined with t1.  */
1156   stmt = SSA_NAME_DEF_STMT (t2);
1157   gcc_assert (stmt != NULL);
1158   if (is_gimple_assign (stmt))
1159     {
1160       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1161       if (ssa_name_2 && ssa_name_2 == t1)
1162         return true;
1163     }
1164
1165   /* Compare if t1 and t2's def_stmts are identical.  */
1166   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1167     return true;
1168   else
1169     return false;
1170 }
1171
1172 /* Predict branch probability of BB when BB contains a branch that compares
1173    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1174    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1175
1176    E.g.
1177      for (int i = 0; i < bound; i++) {
1178        if (i < bound - 2)
1179          computation_1();
1180        else
1181          computation_2();
1182      }
1183
1184   In this loop, we will predict the branch inside the loop to be taken.  */
1185
1186 static void
1187 predict_iv_comparison (struct loop *loop, basic_block bb,
1188                        tree loop_bound_var,
1189                        tree loop_iv_base_var,
1190                        enum tree_code loop_bound_code,
1191                        int loop_bound_step)
1192 {
1193   gimple *stmt;
1194   tree compare_var, compare_base;
1195   enum tree_code compare_code;
1196   tree compare_step_var;
1197   edge then_edge;
1198   edge_iterator ei;
1199
1200   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1201       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1202       || predicted_by_p (bb, PRED_LOOP_EXIT))
1203     return;
1204
1205   stmt = last_stmt (bb);
1206   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1207     return;
1208   if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1209                                             loop, &compare_var,
1210                                             &compare_code,
1211                                             &compare_step_var,
1212                                             &compare_base))
1213     return;
1214
1215   /* Find the taken edge.  */
1216   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1217     if (then_edge->flags & EDGE_TRUE_VALUE)
1218       break;
1219
1220   /* When comparing an IV to a loop invariant, NE is more likely to be
1221      taken while EQ is more likely to be not-taken.  */
1222   if (compare_code == NE_EXPR)
1223     {
1224       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1225       return;
1226     }
1227   else if (compare_code == EQ_EXPR)
1228     {
1229       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1230       return;
1231     }
1232
1233   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1234     return;
1235
1236   /* If loop bound, base and compare bound are all constants, we can
1237      calculate the probability directly.  */
1238   if (tree_fits_shwi_p (loop_bound_var)
1239       && tree_fits_shwi_p (compare_var)
1240       && tree_fits_shwi_p (compare_base))
1241     {
1242       int probability;
1243       bool overflow, overall_overflow = false;
1244       widest_int compare_count, tem;
1245
1246       /* (loop_bound - base) / compare_step */
1247       tem = wi::sub (wi::to_widest (loop_bound_var),
1248                      wi::to_widest (compare_base), SIGNED, &overflow);
1249       overall_overflow |= overflow;
1250       widest_int loop_count = wi::div_trunc (tem,
1251                                              wi::to_widest (compare_step_var),
1252                                              SIGNED, &overflow);
1253       overall_overflow |= overflow;
1254
1255       if (!wi::neg_p (wi::to_widest (compare_step_var))
1256           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1257         {
1258           /* (loop_bound - compare_bound) / compare_step */
1259           tem = wi::sub (wi::to_widest (loop_bound_var),
1260                          wi::to_widest (compare_var), SIGNED, &overflow);
1261           overall_overflow |= overflow;
1262           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1263                                          SIGNED, &overflow);
1264           overall_overflow |= overflow;
1265         }
1266       else
1267         {
1268           /* (compare_bound - base) / compare_step */
1269           tem = wi::sub (wi::to_widest (compare_var),
1270                          wi::to_widest (compare_base), SIGNED, &overflow);
1271           overall_overflow |= overflow;
1272           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1273                                          SIGNED, &overflow);
1274           overall_overflow |= overflow;
1275         }
1276       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1277         ++compare_count;
1278       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1279         ++loop_count;
1280       if (wi::neg_p (compare_count))
1281         compare_count = 0;
1282       if (wi::neg_p (loop_count))
1283         loop_count = 0;
1284       if (loop_count == 0)
1285         probability = 0;
1286       else if (wi::cmps (compare_count, loop_count) == 1)
1287         probability = REG_BR_PROB_BASE;
1288       else
1289         {
1290           tem = compare_count * REG_BR_PROB_BASE;
1291           tem = wi::udiv_trunc (tem, loop_count);
1292           probability = tem.to_uhwi ();
1293         }
1294
1295       if (!overall_overflow)
1296         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1297
1298       return;
1299     }
1300
1301   if (expr_coherent_p (loop_bound_var, compare_var))
1302     {
1303       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1304           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1305         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1306       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1307                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1308         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1309       else if (loop_bound_code == NE_EXPR)
1310         {
1311           /* If the loop backedge condition is "(i != bound)", we do
1312              the comparison based on the step of IV:
1313              * step < 0 : backedge condition is like (i > bound)
1314              * step > 0 : backedge condition is like (i < bound)  */
1315           gcc_assert (loop_bound_step != 0);
1316           if (loop_bound_step > 0
1317               && (compare_code == LT_EXPR
1318                   || compare_code == LE_EXPR))
1319             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1320           else if (loop_bound_step < 0
1321                    && (compare_code == GT_EXPR
1322                        || compare_code == GE_EXPR))
1323             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1324           else
1325             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1326         }
1327       else
1328         /* The branch is predicted not-taken if loop_bound_code is
1329            opposite with compare_code.  */
1330         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1331     }
1332   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1333     {
1334       /* For cases like:
1335            for (i = s; i < h; i++)
1336              if (i > s + 2) ....
1337          The branch should be predicted taken.  */
1338       if (loop_bound_step > 0
1339           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1340         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1341       else if (loop_bound_step < 0
1342                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1343         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1344       else
1345         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1346     }
1347 }
1348
1349 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1350    exits are resulted from short-circuit conditions that will generate an
1351    if_tmp. E.g.:
1352
1353    if (foo() || global > 10)
1354      break;
1355
1356    This will be translated into:
1357
1358    BB3:
1359      loop header...
1360    BB4:
1361      if foo() goto BB6 else goto BB5
1362    BB5:
1363      if global > 10 goto BB6 else goto BB7
1364    BB6:
1365      goto BB7
1366    BB7:
1367      iftmp = (PHI 0(BB5), 1(BB6))
1368      if iftmp == 1 goto BB8 else goto BB3
1369    BB8:
1370      outside of the loop...
1371
1372    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1373    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1374    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1375    exits to predict them using PRED_LOOP_EXIT.  */
1376
1377 static void
1378 predict_extra_loop_exits (edge exit_edge)
1379 {
1380   unsigned i;
1381   bool check_value_one;
1382   gimple *lhs_def_stmt;
1383   gphi *phi_stmt;
1384   tree cmp_rhs, cmp_lhs;
1385   gimple *last;
1386   gcond *cmp_stmt;
1387
1388   last = last_stmt (exit_edge->src);
1389   if (!last)
1390     return;
1391   cmp_stmt = dyn_cast <gcond *> (last);
1392   if (!cmp_stmt)
1393     return;
1394
1395   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1396   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1397   if (!TREE_CONSTANT (cmp_rhs)
1398       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1399     return;
1400   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1401     return;
1402
1403   /* If check_value_one is true, only the phi_args with value '1' will lead
1404      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1405      loop exit.  */
1406   check_value_one = (((integer_onep (cmp_rhs))
1407                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1408                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1409
1410   lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1411   if (!lhs_def_stmt)
1412     return;
1413
1414   phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1415   if (!phi_stmt)
1416     return;
1417
1418   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1419     {
1420       edge e1;
1421       edge_iterator ei;
1422       tree val = gimple_phi_arg_def (phi_stmt, i);
1423       edge e = gimple_phi_arg_edge (phi_stmt, i);
1424
1425       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1426         continue;
1427       if ((check_value_one ^ integer_onep (val)) == 1)
1428         continue;
1429       if (EDGE_COUNT (e->src->succs) != 1)
1430         {
1431           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1432           continue;
1433         }
1434
1435       FOR_EACH_EDGE (e1, ei, e->src->preds)
1436         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1437     }
1438 }
1439
1440 /* Predict edge probabilities by exploiting loop structure.  */
1441
1442 static void
1443 predict_loops (void)
1444 {
1445   struct loop *loop;
1446
1447   /* Try to predict out blocks in a loop that are not part of a
1448      natural loop.  */
1449   FOR_EACH_LOOP (loop, 0)
1450     {
1451       basic_block bb, *bbs;
1452       unsigned j, n_exits;
1453       vec<edge> exits;
1454       struct tree_niter_desc niter_desc;
1455       edge ex;
1456       struct nb_iter_bound *nb_iter;
1457       enum tree_code loop_bound_code = ERROR_MARK;
1458       tree loop_bound_step = NULL;
1459       tree loop_bound_var = NULL;
1460       tree loop_iv_base = NULL;
1461       gcond *stmt = NULL;
1462
1463       exits = get_loop_exit_edges (loop);
1464       n_exits = exits.length ();
1465       if (!n_exits)
1466         {
1467           exits.release ();
1468           continue;
1469         }
1470
1471       FOR_EACH_VEC_ELT (exits, j, ex)
1472         {
1473           tree niter = NULL;
1474           HOST_WIDE_INT nitercst;
1475           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1476           int probability;
1477           enum br_predictor predictor;
1478
1479           predict_extra_loop_exits (ex);
1480
1481           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1482             niter = niter_desc.niter;
1483           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1484             niter = loop_niter_by_eval (loop, ex);
1485
1486           if (TREE_CODE (niter) == INTEGER_CST)
1487             {
1488               if (tree_fits_uhwi_p (niter)
1489                   && max
1490                   && compare_tree_int (niter, max - 1) == -1)
1491                 nitercst = tree_to_uhwi (niter) + 1;
1492               else
1493                 nitercst = max;
1494               predictor = PRED_LOOP_ITERATIONS;
1495             }
1496           /* If we have just one exit and we can derive some information about
1497              the number of iterations of the loop from the statements inside
1498              the loop, use it to predict this exit.  */
1499           else if (n_exits == 1)
1500             {
1501               nitercst = estimated_stmt_executions_int (loop);
1502               if (nitercst < 0)
1503                 continue;
1504               if (nitercst > max)
1505                 nitercst = max;
1506
1507               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1508             }
1509           else
1510             continue;
1511
1512           /* If the prediction for number of iterations is zero, do not
1513              predict the exit edges.  */
1514           if (nitercst == 0)
1515             continue;
1516
1517           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1518           predict_edge (ex, predictor, probability);
1519         }
1520       exits.release ();
1521
1522       /* Find information about loop bound variables.  */
1523       for (nb_iter = loop->bounds; nb_iter;
1524            nb_iter = nb_iter->next)
1525         if (nb_iter->stmt
1526             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1527           {
1528             stmt = as_a <gcond *> (nb_iter->stmt);
1529             break;
1530           }
1531       if (!stmt && last_stmt (loop->header)
1532           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1533         stmt = as_a <gcond *> (last_stmt (loop->header));
1534       if (stmt)
1535         is_comparison_with_loop_invariant_p (stmt, loop,
1536                                              &loop_bound_var,
1537                                              &loop_bound_code,
1538                                              &loop_bound_step,
1539                                              &loop_iv_base);
1540
1541       bbs = get_loop_body (loop);
1542
1543       for (j = 0; j < loop->num_nodes; j++)
1544         {
1545           int header_found = 0;
1546           edge e;
1547           edge_iterator ei;
1548
1549           bb = bbs[j];
1550
1551           /* Bypass loop heuristics on continue statement.  These
1552              statements construct loops via "non-loop" constructs
1553              in the source language and are better to be handled
1554              separately.  */
1555           if (predicted_by_p (bb, PRED_CONTINUE))
1556             continue;
1557
1558           /* Loop branch heuristics - predict an edge back to a
1559              loop's head as taken.  */
1560           if (bb == loop->latch)
1561             {
1562               e = find_edge (loop->latch, loop->header);
1563               if (e)
1564                 {
1565                   header_found = 1;
1566                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1567                 }
1568             }
1569
1570           /* Loop exit heuristics - predict an edge exiting the loop if the
1571              conditional has no loop header successors as not taken.  */
1572           if (!header_found
1573               /* If we already used more reliable loop exit predictors, do not
1574                  bother with PRED_LOOP_EXIT.  */
1575               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1576               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1577             {
1578               /* For loop with many exits we don't want to predict all exits
1579                  with the pretty large probability, because if all exits are
1580                  considered in row, the loop would be predicted to iterate
1581                  almost never.  The code to divide probability by number of
1582                  exits is very rough.  It should compute the number of exits
1583                  taken in each patch through function (not the overall number
1584                  of exits that might be a lot higher for loops with wide switch
1585                  statements in them) and compute n-th square root.
1586
1587                  We limit the minimal probability by 2% to avoid
1588                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1589                  as this was causing regression in perl benchmark containing such
1590                  a wide loop.  */
1591
1592               int probability = ((REG_BR_PROB_BASE
1593                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1594                                  / n_exits);
1595               if (probability < HITRATE (2))
1596                 probability = HITRATE (2);
1597               FOR_EACH_EDGE (e, ei, bb->succs)
1598                 if (e->dest->index < NUM_FIXED_BLOCKS
1599                     || !flow_bb_inside_loop_p (loop, e->dest))
1600                   predict_edge (e, PRED_LOOP_EXIT, probability);
1601             }
1602           if (loop_bound_var)
1603             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1604                                    loop_bound_code,
1605                                    tree_to_shwi (loop_bound_step));
1606         }
1607
1608       /* Free basic blocks from get_loop_body.  */
1609       free (bbs);
1610     }
1611 }
1612
1613 /* Attempt to predict probabilities of BB outgoing edges using local
1614    properties.  */
1615 static void
1616 bb_estimate_probability_locally (basic_block bb)
1617 {
1618   rtx_insn *last_insn = BB_END (bb);
1619   rtx cond;
1620
1621   if (! can_predict_insn_p (last_insn))
1622     return;
1623   cond = get_condition (last_insn, NULL, false, false);
1624   if (! cond)
1625     return;
1626
1627   /* Try "pointer heuristic."
1628      A comparison ptr == 0 is predicted as false.
1629      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1630   if (COMPARISON_P (cond)
1631       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1632           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1633     {
1634       if (GET_CODE (cond) == EQ)
1635         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1636       else if (GET_CODE (cond) == NE)
1637         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1638     }
1639   else
1640
1641   /* Try "opcode heuristic."
1642      EQ tests are usually false and NE tests are usually true. Also,
1643      most quantities are positive, so we can make the appropriate guesses
1644      about signed comparisons against zero.  */
1645     switch (GET_CODE (cond))
1646       {
1647       case CONST_INT:
1648         /* Unconditional branch.  */
1649         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1650                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1651         break;
1652
1653       case EQ:
1654       case UNEQ:
1655         /* Floating point comparisons appears to behave in a very
1656            unpredictable way because of special role of = tests in
1657            FP code.  */
1658         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1659           ;
1660         /* Comparisons with 0 are often used for booleans and there is
1661            nothing useful to predict about them.  */
1662         else if (XEXP (cond, 1) == const0_rtx
1663                  || XEXP (cond, 0) == const0_rtx)
1664           ;
1665         else
1666           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1667         break;
1668
1669       case NE:
1670       case LTGT:
1671         /* Floating point comparisons appears to behave in a very
1672            unpredictable way because of special role of = tests in
1673            FP code.  */
1674         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1675           ;
1676         /* Comparisons with 0 are often used for booleans and there is
1677            nothing useful to predict about them.  */
1678         else if (XEXP (cond, 1) == const0_rtx
1679                  || XEXP (cond, 0) == const0_rtx)
1680           ;
1681         else
1682           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1683         break;
1684
1685       case ORDERED:
1686         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1687         break;
1688
1689       case UNORDERED:
1690         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1691         break;
1692
1693       case LE:
1694       case LT:
1695         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1696             || XEXP (cond, 1) == constm1_rtx)
1697           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1698         break;
1699
1700       case GE:
1701       case GT:
1702         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1703             || XEXP (cond, 1) == constm1_rtx)
1704           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1705         break;
1706
1707       default:
1708         break;
1709       }
1710 }
1711
1712 /* Set edge->probability for each successor edge of BB.  */
1713 void
1714 guess_outgoing_edge_probabilities (basic_block bb)
1715 {
1716   bb_estimate_probability_locally (bb);
1717   combine_predictions_for_insn (BB_END (bb), bb);
1718 }
1719 \f
1720 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1721
1722 /* Helper function for expr_expected_value.  */
1723
1724 static tree
1725 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1726                        tree op1, bitmap visited, enum br_predictor *predictor)
1727 {
1728   gimple *def;
1729
1730   if (predictor)
1731     *predictor = PRED_UNCONDITIONAL;
1732
1733   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1734     {
1735       if (TREE_CONSTANT (op0))
1736         return op0;
1737
1738       if (code != SSA_NAME)
1739         return NULL_TREE;
1740
1741       def = SSA_NAME_DEF_STMT (op0);
1742
1743       /* If we were already here, break the infinite cycle.  */
1744       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1745         return NULL;
1746
1747       if (gimple_code (def) == GIMPLE_PHI)
1748         {
1749           /* All the arguments of the PHI node must have the same constant
1750              length.  */
1751           int i, n = gimple_phi_num_args (def);
1752           tree val = NULL, new_val;
1753
1754           for (i = 0; i < n; i++)
1755             {
1756               tree arg = PHI_ARG_DEF (def, i);
1757               enum br_predictor predictor2;
1758
1759               /* If this PHI has itself as an argument, we cannot
1760                  determine the string length of this argument.  However,
1761                  if we can find an expected constant value for the other
1762                  PHI args then we can still be sure that this is
1763                  likely a constant.  So be optimistic and just
1764                  continue with the next argument.  */
1765               if (arg == PHI_RESULT (def))
1766                 continue;
1767
1768               new_val = expr_expected_value (arg, visited, &predictor2);
1769
1770               /* It is difficult to combine value predictors.  Simply assume
1771                  that later predictor is weaker and take its prediction.  */
1772               if (predictor && *predictor < predictor2)
1773                 *predictor = predictor2;
1774               if (!new_val)
1775                 return NULL;
1776               if (!val)
1777                 val = new_val;
1778               else if (!operand_equal_p (val, new_val, false))
1779                 return NULL;
1780             }
1781           return val;
1782         }
1783       if (is_gimple_assign (def))
1784         {
1785           if (gimple_assign_lhs (def) != op0)
1786             return NULL;
1787
1788           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1789                                         gimple_assign_rhs1 (def),
1790                                         gimple_assign_rhs_code (def),
1791                                         gimple_assign_rhs2 (def),
1792                                         visited, predictor);
1793         }
1794
1795       if (is_gimple_call (def))
1796         {
1797           tree decl = gimple_call_fndecl (def);
1798           if (!decl)
1799             {
1800               if (gimple_call_internal_p (def)
1801                   && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1802                 {
1803                   gcc_assert (gimple_call_num_args (def) == 3);
1804                   tree val = gimple_call_arg (def, 0);
1805                   if (TREE_CONSTANT (val))
1806                     return val;
1807                   if (predictor)
1808                     {
1809                       tree val2 = gimple_call_arg (def, 2);
1810                       gcc_assert (TREE_CODE (val2) == INTEGER_CST
1811                                   && tree_fits_uhwi_p (val2)
1812                                   && tree_to_uhwi (val2) < END_PREDICTORS);
1813                       *predictor = (enum br_predictor) tree_to_uhwi (val2);
1814                     }
1815                   return gimple_call_arg (def, 1);
1816                 }
1817               return NULL;
1818             }
1819           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1820             switch (DECL_FUNCTION_CODE (decl))
1821               {
1822               case BUILT_IN_EXPECT:
1823                 {
1824                   tree val;
1825                   if (gimple_call_num_args (def) != 2)
1826                     return NULL;
1827                   val = gimple_call_arg (def, 0);
1828                   if (TREE_CONSTANT (val))
1829                     return val;
1830                   if (predictor)
1831                     *predictor = PRED_BUILTIN_EXPECT;
1832                   return gimple_call_arg (def, 1);
1833                 }
1834
1835               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1836               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1837               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1838               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1839               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1840               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1841               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1842               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1843               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1844               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1845               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1846               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1847               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1848                 /* Assume that any given atomic operation has low contention,
1849                    and thus the compare-and-swap operation succeeds.  */
1850                 if (predictor)
1851                   *predictor = PRED_COMPARE_AND_SWAP;
1852                 return boolean_true_node;
1853               default:
1854                 break;
1855             }
1856         }
1857
1858       return NULL;
1859     }
1860
1861   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1862     {
1863       tree res;
1864       enum br_predictor predictor2;
1865       op0 = expr_expected_value (op0, visited, predictor);
1866       if (!op0)
1867         return NULL;
1868       op1 = expr_expected_value (op1, visited, &predictor2);
1869       if (predictor && *predictor < predictor2)
1870         *predictor = predictor2;
1871       if (!op1)
1872         return NULL;
1873       res = fold_build2 (code, type, op0, op1);
1874       if (TREE_CONSTANT (res))
1875         return res;
1876       return NULL;
1877     }
1878   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1879     {
1880       tree res;
1881       op0 = expr_expected_value (op0, visited, predictor);
1882       if (!op0)
1883         return NULL;
1884       res = fold_build1 (code, type, op0);
1885       if (TREE_CONSTANT (res))
1886         return res;
1887       return NULL;
1888     }
1889   return NULL;
1890 }
1891
1892 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1893    The function is used by builtin_expect branch predictor so the evidence
1894    must come from this construct and additional possible constant folding.
1895
1896    We may want to implement more involved value guess (such as value range
1897    propagation based prediction), but such tricks shall go to new
1898    implementation.  */
1899
1900 static tree
1901 expr_expected_value (tree expr, bitmap visited,
1902                      enum br_predictor *predictor)
1903 {
1904   enum tree_code code;
1905   tree op0, op1;
1906
1907   if (TREE_CONSTANT (expr))
1908     {
1909       if (predictor)
1910         *predictor = PRED_UNCONDITIONAL;
1911       return expr;
1912     }
1913
1914   extract_ops_from_tree (expr, &code, &op0, &op1);
1915   return expr_expected_value_1 (TREE_TYPE (expr),
1916                                 op0, code, op1, visited, predictor);
1917 }
1918 \f
1919 /* Predict using opcode of the last statement in basic block.  */
1920 static void
1921 tree_predict_by_opcode (basic_block bb)
1922 {
1923   gimple *stmt = last_stmt (bb);
1924   edge then_edge;
1925   tree op0, op1;
1926   tree type;
1927   tree val;
1928   enum tree_code cmp;
1929   bitmap visited;
1930   edge_iterator ei;
1931   enum br_predictor predictor;
1932
1933   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1934     return;
1935   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1936     if (then_edge->flags & EDGE_TRUE_VALUE)
1937       break;
1938   op0 = gimple_cond_lhs (stmt);
1939   op1 = gimple_cond_rhs (stmt);
1940   cmp = gimple_cond_code (stmt);
1941   type = TREE_TYPE (op0);
1942   visited = BITMAP_ALLOC (NULL);
1943   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1944                                &predictor);
1945   BITMAP_FREE (visited);
1946   if (val && TREE_CODE (val) == INTEGER_CST)
1947     {
1948       if (predictor == PRED_BUILTIN_EXPECT)
1949         {
1950           int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
1951
1952           gcc_assert (percent >= 0 && percent <= 100);
1953           if (integer_zerop (val))
1954             percent = 100 - percent;
1955           predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
1956         }
1957       else
1958         predict_edge (then_edge, predictor,
1959                       integer_zerop (val) ? NOT_TAKEN : TAKEN);
1960     }
1961   /* Try "pointer heuristic."
1962      A comparison ptr == 0 is predicted as false.
1963      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1964   if (POINTER_TYPE_P (type))
1965     {
1966       if (cmp == EQ_EXPR)
1967         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1968       else if (cmp == NE_EXPR)
1969         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1970     }
1971   else
1972
1973   /* Try "opcode heuristic."
1974      EQ tests are usually false and NE tests are usually true. Also,
1975      most quantities are positive, so we can make the appropriate guesses
1976      about signed comparisons against zero.  */
1977     switch (cmp)
1978       {
1979       case EQ_EXPR:
1980       case UNEQ_EXPR:
1981         /* Floating point comparisons appears to behave in a very
1982            unpredictable way because of special role of = tests in
1983            FP code.  */
1984         if (FLOAT_TYPE_P (type))
1985           ;
1986         /* Comparisons with 0 are often used for booleans and there is
1987            nothing useful to predict about them.  */
1988         else if (integer_zerop (op0) || integer_zerop (op1))
1989           ;
1990         else
1991           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
1992         break;
1993
1994       case NE_EXPR:
1995       case LTGT_EXPR:
1996         /* Floating point comparisons appears to behave in a very
1997            unpredictable way because of special role of = tests in
1998            FP code.  */
1999         if (FLOAT_TYPE_P (type))
2000           ;
2001         /* Comparisons with 0 are often used for booleans and there is
2002            nothing useful to predict about them.  */
2003         else if (integer_zerop (op0)
2004                  || integer_zerop (op1))
2005           ;
2006         else
2007           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2008         break;
2009
2010       case ORDERED_EXPR:
2011         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2012         break;
2013
2014       case UNORDERED_EXPR:
2015         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2016         break;
2017
2018       case LE_EXPR:
2019       case LT_EXPR:
2020         if (integer_zerop (op1)
2021             || integer_onep (op1)
2022             || integer_all_onesp (op1)
2023             || real_zerop (op1)
2024             || real_onep (op1)
2025             || real_minus_onep (op1))
2026           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2027         break;
2028
2029       case GE_EXPR:
2030       case GT_EXPR:
2031         if (integer_zerop (op1)
2032             || integer_onep (op1)
2033             || integer_all_onesp (op1)
2034             || real_zerop (op1)
2035             || real_onep (op1)
2036             || real_minus_onep (op1))
2037           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2038         break;
2039
2040       default:
2041         break;
2042       }
2043 }
2044
2045 /* Try to guess whether the value of return means error code.  */
2046
2047 static enum br_predictor
2048 return_prediction (tree val, enum prediction *prediction)
2049 {
2050   /* VOID.  */
2051   if (!val)
2052     return PRED_NO_PREDICTION;
2053   /* Different heuristics for pointers and scalars.  */
2054   if (POINTER_TYPE_P (TREE_TYPE (val)))
2055     {
2056       /* NULL is usually not returned.  */
2057       if (integer_zerop (val))
2058         {
2059           *prediction = NOT_TAKEN;
2060           return PRED_NULL_RETURN;
2061         }
2062     }
2063   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2064     {
2065       /* Negative return values are often used to indicate
2066          errors.  */
2067       if (TREE_CODE (val) == INTEGER_CST
2068           && tree_int_cst_sgn (val) < 0)
2069         {
2070           *prediction = NOT_TAKEN;
2071           return PRED_NEGATIVE_RETURN;
2072         }
2073       /* Constant return values seems to be commonly taken.
2074          Zero/one often represent booleans so exclude them from the
2075          heuristics.  */
2076       if (TREE_CONSTANT (val)
2077           && (!integer_zerop (val) && !integer_onep (val)))
2078         {
2079           *prediction = TAKEN;
2080           return PRED_CONST_RETURN;
2081         }
2082     }
2083   return PRED_NO_PREDICTION;
2084 }
2085
2086 /* Find the basic block with return expression and look up for possible
2087    return value trying to apply RETURN_PREDICTION heuristics.  */
2088 static void
2089 apply_return_prediction (void)
2090 {
2091   greturn *return_stmt = NULL;
2092   tree return_val;
2093   edge e;
2094   gphi *phi;
2095   int phi_num_args, i;
2096   enum br_predictor pred;
2097   enum prediction direction;
2098   edge_iterator ei;
2099
2100   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2101     {
2102       gimple *last = last_stmt (e->src);
2103       if (last
2104           && gimple_code (last) == GIMPLE_RETURN)
2105         {
2106           return_stmt = as_a <greturn *> (last);
2107           break;
2108         }
2109     }
2110   if (!e)
2111     return;
2112   return_val = gimple_return_retval (return_stmt);
2113   if (!return_val)
2114     return;
2115   if (TREE_CODE (return_val) != SSA_NAME
2116       || !SSA_NAME_DEF_STMT (return_val)
2117       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2118     return;
2119   phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2120   phi_num_args = gimple_phi_num_args (phi);
2121   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2122
2123   /* Avoid the degenerate case where all return values form the function
2124      belongs to same category (ie they are all positive constants)
2125      so we can hardly say something about them.  */
2126   for (i = 1; i < phi_num_args; i++)
2127     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2128       break;
2129   if (i != phi_num_args)
2130     for (i = 0; i < phi_num_args; i++)
2131       {
2132         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2133         if (pred != PRED_NO_PREDICTION)
2134           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2135                                          direction);
2136       }
2137 }
2138
2139 /* Look for basic block that contains unlikely to happen events
2140    (such as noreturn calls) and mark all paths leading to execution
2141    of this basic blocks as unlikely.  */
2142
2143 static void
2144 tree_bb_level_predictions (void)
2145 {
2146   basic_block bb;
2147   bool has_return_edges = false;
2148   edge e;
2149   edge_iterator ei;
2150
2151   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2152     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2153       {
2154         has_return_edges = true;
2155         break;
2156       }
2157
2158   apply_return_prediction ();
2159
2160   FOR_EACH_BB_FN (bb, cfun)
2161     {
2162       gimple_stmt_iterator gsi;
2163
2164       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2165         {
2166           gimple *stmt = gsi_stmt (gsi);
2167           tree decl;
2168
2169           if (is_gimple_call (stmt))
2170             {
2171               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2172                   && has_return_edges)
2173                 predict_paths_leading_to (bb, PRED_NORETURN,
2174                                           NOT_TAKEN);
2175               decl = gimple_call_fndecl (stmt);
2176               if (decl
2177                   && lookup_attribute ("cold",
2178                                        DECL_ATTRIBUTES (decl)))
2179                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2180                                           NOT_TAKEN);
2181             }
2182           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2183             {
2184               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2185                                         gimple_predict_outcome (stmt));
2186               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2187                  hints to callers.  */
2188             }
2189         }
2190     }
2191 }
2192
2193 /* Callback for hash_map::traverse, asserts that the pointer map is
2194    empty.  */
2195
2196 bool
2197 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2198                  void *)
2199 {
2200   gcc_assert (!value);
2201   return false;
2202 }
2203
2204 /* Predict branch probabilities and estimate profile for basic block BB.  */
2205
2206 static void
2207 tree_estimate_probability_bb (basic_block bb)
2208 {
2209   edge e;
2210   edge_iterator ei;
2211   gimple *last;
2212
2213   FOR_EACH_EDGE (e, ei, bb->succs)
2214     {
2215       /* Predict edges to user labels with attributes.  */
2216       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2217         {
2218           gimple_stmt_iterator gi;
2219           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2220             {
2221               glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
2222               tree decl;
2223
2224               if (!label_stmt)
2225                 break;
2226               decl = gimple_label_label (label_stmt);
2227               if (DECL_ARTIFICIAL (decl))
2228                 continue;
2229
2230               /* Finally, we have a user-defined label.  */
2231               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2232                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2233               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2234                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2235             }
2236         }
2237
2238       /* Predict early returns to be probable, as we've already taken
2239          care for error returns and other cases are often used for
2240          fast paths through function.
2241
2242          Since we've already removed the return statements, we are
2243          looking for CFG like:
2244
2245          if (conditional)
2246          {
2247          ..
2248          goto return_block
2249          }
2250          some other blocks
2251          return_block:
2252          return_stmt.  */
2253       if (e->dest != bb->next_bb
2254           && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2255           && single_succ_p (e->dest)
2256           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2257           && (last = last_stmt (e->dest)) != NULL
2258           && gimple_code (last) == GIMPLE_RETURN)
2259         {
2260           edge e1;
2261           edge_iterator ei1;
2262
2263           if (single_succ_p (bb))
2264             {
2265               FOR_EACH_EDGE (e1, ei1, bb->preds)
2266                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2267                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2268                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2269                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2270             }
2271           else
2272             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2273                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2274                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2275               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2276         }
2277
2278       /* Look for block we are guarding (ie we dominate it,
2279          but it doesn't postdominate us).  */
2280       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2281           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2282           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2283         {
2284           gimple_stmt_iterator bi;
2285
2286           /* The call heuristic claims that a guarded function call
2287              is improbable.  This is because such calls are often used
2288              to signal exceptional situations such as printing error
2289              messages.  */
2290           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2291                gsi_next (&bi))
2292             {
2293               gimple *stmt = gsi_stmt (bi);
2294               if (is_gimple_call (stmt)
2295                   /* Constant and pure calls are hardly used to signalize
2296                      something exceptional.  */
2297                   && gimple_has_side_effects (stmt))
2298                 {
2299                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2300                   break;
2301                 }
2302             }
2303         }
2304     }
2305   tree_predict_by_opcode (bb);
2306 }
2307
2308 /* Predict branch probabilities and estimate profile of the tree CFG.
2309    This function can be called from the loop optimizers to recompute
2310    the profile information.  */
2311
2312 void
2313 tree_estimate_probability (void)
2314 {
2315   basic_block bb;
2316
2317   add_noreturn_fake_exit_edges ();
2318   connect_infinite_loops_to_exit ();
2319   /* We use loop_niter_by_eval, which requires that the loops have
2320      preheaders.  */
2321   create_preheaders (CP_SIMPLE_PREHEADERS);
2322   calculate_dominance_info (CDI_POST_DOMINATORS);
2323
2324   bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2325   tree_bb_level_predictions ();
2326   record_loop_exits ();
2327
2328   if (number_of_loops (cfun) > 1)
2329     predict_loops ();
2330
2331   FOR_EACH_BB_FN (bb, cfun)
2332     tree_estimate_probability_bb (bb);
2333
2334   FOR_EACH_BB_FN (bb, cfun)
2335     combine_predictions_for_bb (bb);
2336
2337   if (flag_checking)
2338     bb_predictions->traverse<void *, assert_is_empty> (NULL);
2339
2340   delete bb_predictions;
2341   bb_predictions = NULL;
2342
2343   estimate_bb_frequencies (false);
2344   free_dominance_info (CDI_POST_DOMINATORS);
2345   remove_fake_exit_edges ();
2346 }
2347 \f
2348 /* Predict edges to successors of CUR whose sources are not postdominated by
2349    BB by PRED and recurse to all postdominators.  */
2350
2351 static void
2352 predict_paths_for_bb (basic_block cur, basic_block bb,
2353                       enum br_predictor pred,
2354                       enum prediction taken,
2355                       bitmap visited)
2356 {
2357   edge e;
2358   edge_iterator ei;
2359   basic_block son;
2360
2361   /* We are looking for all edges forming edge cut induced by
2362      set of all blocks postdominated by BB.  */
2363   FOR_EACH_EDGE (e, ei, cur->preds)
2364     if (e->src->index >= NUM_FIXED_BLOCKS
2365         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2366     {
2367       edge e2;
2368       edge_iterator ei2;
2369       bool found = false;
2370
2371       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2372       if (e->flags & (EDGE_EH | EDGE_FAKE))
2373         continue;
2374       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2375
2376       /* See if there is an edge from e->src that is not abnormal
2377          and does not lead to BB.  */
2378       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2379         if (e2 != e
2380             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2381             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2382           {
2383             found = true;
2384             break;
2385           }
2386
2387       /* If there is non-abnormal path leaving e->src, predict edge
2388          using predictor.  Otherwise we need to look for paths
2389          leading to e->src.
2390
2391          The second may lead to infinite loop in the case we are predicitng
2392          regions that are only reachable by abnormal edges.  We simply
2393          prevent visiting given BB twice.  */
2394       if (found)
2395         predict_edge_def (e, pred, taken);
2396       else if (bitmap_set_bit (visited, e->src->index))
2397         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2398     }
2399   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2400        son;
2401        son = next_dom_son (CDI_POST_DOMINATORS, son))
2402     predict_paths_for_bb (son, bb, pred, taken, visited);
2403 }
2404
2405 /* Sets branch probabilities according to PREDiction and
2406    FLAGS.  */
2407
2408 static void
2409 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2410                           enum prediction taken)
2411 {
2412   bitmap visited = BITMAP_ALLOC (NULL);
2413   predict_paths_for_bb (bb, bb, pred, taken, visited);
2414   BITMAP_FREE (visited);
2415 }
2416
2417 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2418
2419 static void
2420 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2421                                enum prediction taken)
2422 {
2423   bool has_nonloop_edge = false;
2424   edge_iterator ei;
2425   edge e2;
2426
2427   basic_block bb = e->src;
2428   FOR_EACH_EDGE (e2, ei, bb->succs)
2429     if (e2->dest != e->src && e2->dest != e->dest
2430         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2431         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2432       {
2433         has_nonloop_edge = true;
2434         break;
2435       }
2436   if (!has_nonloop_edge)
2437     {
2438       bitmap visited = BITMAP_ALLOC (NULL);
2439       predict_paths_for_bb (bb, bb, pred, taken, visited);
2440       BITMAP_FREE (visited);
2441     }
2442   else
2443     predict_edge_def (e, pred, taken);
2444 }
2445 \f
2446 /* This is used to carry information about basic blocks.  It is
2447    attached to the AUX field of the standard CFG block.  */
2448
2449 struct block_info
2450 {
2451   /* Estimated frequency of execution of basic_block.  */
2452   sreal frequency;
2453
2454   /* To keep queue of basic blocks to process.  */
2455   basic_block next;
2456
2457   /* Number of predecessors we need to visit first.  */
2458   int npredecessors;
2459 };
2460
2461 /* Similar information for edges.  */
2462 struct edge_prob_info
2463 {
2464   /* In case edge is a loopback edge, the probability edge will be reached
2465      in case header is.  Estimated number of iterations of the loop can be
2466      then computed as 1 / (1 - back_edge_prob).  */
2467   sreal back_edge_prob;
2468   /* True if the edge is a loopback edge in the natural loop.  */
2469   unsigned int back_edge:1;
2470 };
2471
2472 #define BLOCK_INFO(B)   ((block_info *) (B)->aux)
2473 #undef EDGE_INFO
2474 #define EDGE_INFO(E)    ((edge_prob_info *) (E)->aux)
2475
2476 /* Helper function for estimate_bb_frequencies.
2477    Propagate the frequencies in blocks marked in
2478    TOVISIT, starting in HEAD.  */
2479
2480 static void
2481 propagate_freq (basic_block head, bitmap tovisit)
2482 {
2483   basic_block bb;
2484   basic_block last;
2485   unsigned i;
2486   edge e;
2487   basic_block nextbb;
2488   bitmap_iterator bi;
2489
2490   /* For each basic block we need to visit count number of his predecessors
2491      we need to visit first.  */
2492   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2493     {
2494       edge_iterator ei;
2495       int count = 0;
2496
2497       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2498
2499       FOR_EACH_EDGE (e, ei, bb->preds)
2500         {
2501           bool visit = bitmap_bit_p (tovisit, e->src->index);
2502
2503           if (visit && !(e->flags & EDGE_DFS_BACK))
2504             count++;
2505           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2506             fprintf (dump_file,
2507                      "Irreducible region hit, ignoring edge to %i->%i\n",
2508                      e->src->index, bb->index);
2509         }
2510       BLOCK_INFO (bb)->npredecessors = count;
2511       /* When function never returns, we will never process exit block.  */
2512       if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2513         bb->count = bb->frequency = 0;
2514     }
2515
2516   BLOCK_INFO (head)->frequency = 1;
2517   last = head;
2518   for (bb = head; bb; bb = nextbb)
2519     {
2520       edge_iterator ei;
2521       sreal cyclic_probability = 0;
2522       sreal frequency = 0;
2523
2524       nextbb = BLOCK_INFO (bb)->next;
2525       BLOCK_INFO (bb)->next = NULL;
2526
2527       /* Compute frequency of basic block.  */
2528       if (bb != head)
2529         {
2530           if (flag_checking)
2531             FOR_EACH_EDGE (e, ei, bb->preds)
2532               gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2533                           || (e->flags & EDGE_DFS_BACK));
2534
2535           FOR_EACH_EDGE (e, ei, bb->preds)
2536             if (EDGE_INFO (e)->back_edge)
2537               {
2538                 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
2539               }
2540             else if (!(e->flags & EDGE_DFS_BACK))
2541               {
2542                 /*  frequency += (e->probability
2543                                   * BLOCK_INFO (e->src)->frequency /
2544                                   REG_BR_PROB_BASE);  */
2545
2546                 sreal tmp = e->probability;
2547                 tmp *= BLOCK_INFO (e->src)->frequency;
2548                 tmp *= real_inv_br_prob_base;
2549                 frequency += tmp;
2550               }
2551
2552           if (cyclic_probability == 0)
2553             {
2554               BLOCK_INFO (bb)->frequency = frequency;
2555             }
2556           else
2557             {
2558               if (cyclic_probability > real_almost_one)
2559                 cyclic_probability = real_almost_one;
2560
2561               /* BLOCK_INFO (bb)->frequency = frequency
2562                                               / (1 - cyclic_probability) */
2563
2564               cyclic_probability = sreal (1) - cyclic_probability;
2565               BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
2566             }
2567         }
2568
2569       bitmap_clear_bit (tovisit, bb->index);
2570
2571       e = find_edge (bb, head);
2572       if (e)
2573         {
2574           /* EDGE_INFO (e)->back_edge_prob
2575              = ((e->probability * BLOCK_INFO (bb)->frequency)
2576              / REG_BR_PROB_BASE); */
2577
2578           sreal tmp = e->probability;
2579           tmp *= BLOCK_INFO (bb)->frequency;
2580           EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
2581         }
2582
2583       /* Propagate to successor blocks.  */
2584       FOR_EACH_EDGE (e, ei, bb->succs)
2585         if (!(e->flags & EDGE_DFS_BACK)
2586             && BLOCK_INFO (e->dest)->npredecessors)
2587           {
2588             BLOCK_INFO (e->dest)->npredecessors--;
2589             if (!BLOCK_INFO (e->dest)->npredecessors)
2590               {
2591                 if (!nextbb)
2592                   nextbb = e->dest;
2593                 else
2594                   BLOCK_INFO (last)->next = e->dest;
2595
2596                 last = e->dest;
2597               }
2598           }
2599     }
2600 }
2601
2602 /* Estimate frequencies in loops at same nest level.  */
2603
2604 static void
2605 estimate_loops_at_level (struct loop *first_loop)
2606 {
2607   struct loop *loop;
2608
2609   for (loop = first_loop; loop; loop = loop->next)
2610     {
2611       edge e;
2612       basic_block *bbs;
2613       unsigned i;
2614       bitmap tovisit = BITMAP_ALLOC (NULL);
2615
2616       estimate_loops_at_level (loop->inner);
2617
2618       /* Find current loop back edge and mark it.  */
2619       e = loop_latch_edge (loop);
2620       EDGE_INFO (e)->back_edge = 1;
2621
2622       bbs = get_loop_body (loop);
2623       for (i = 0; i < loop->num_nodes; i++)
2624         bitmap_set_bit (tovisit, bbs[i]->index);
2625       free (bbs);
2626       propagate_freq (loop->header, tovisit);
2627       BITMAP_FREE (tovisit);
2628     }
2629 }
2630
2631 /* Propagates frequencies through structure of loops.  */
2632
2633 static void
2634 estimate_loops (void)
2635 {
2636   bitmap tovisit = BITMAP_ALLOC (NULL);
2637   basic_block bb;
2638
2639   /* Start by estimating the frequencies in the loops.  */
2640   if (number_of_loops (cfun) > 1)
2641     estimate_loops_at_level (current_loops->tree_root->inner);
2642
2643   /* Now propagate the frequencies through all the blocks.  */
2644   FOR_ALL_BB_FN (bb, cfun)
2645     {
2646       bitmap_set_bit (tovisit, bb->index);
2647     }
2648   propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2649   BITMAP_FREE (tovisit);
2650 }
2651
2652 /* Drop the profile for NODE to guessed, and update its frequency based on
2653    whether it is expected to be hot given the CALL_COUNT.  */
2654
2655 static void
2656 drop_profile (struct cgraph_node *node, gcov_type call_count)
2657 {
2658   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2659   /* In the case where this was called by another function with a
2660      dropped profile, call_count will be 0. Since there are no
2661      non-zero call counts to this function, we don't know for sure
2662      whether it is hot, and therefore it will be marked normal below.  */
2663   bool hot = maybe_hot_count_p (NULL, call_count);
2664
2665   if (dump_file)
2666     fprintf (dump_file,
2667              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2668              node->name (), node->order,
2669              hot ? "Function is hot" : "Function is normal");
2670   /* We only expect to miss profiles for functions that are reached
2671      via non-zero call edges in cases where the function may have
2672      been linked from another module or library (COMDATs and extern
2673      templates). See the comments below for handle_missing_profiles.
2674      Also, only warn in cases where the missing counts exceed the
2675      number of training runs. In certain cases with an execv followed
2676      by a no-return call the profile for the no-return call is not
2677      dumped and there can be a mismatch.  */
2678   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2679       && call_count > profile_info->runs)
2680     {
2681       if (flag_profile_correction)
2682         {
2683           if (dump_file)
2684             fprintf (dump_file,
2685                      "Missing counts for called function %s/%i\n",
2686                      node->name (), node->order);
2687         }
2688       else
2689         warning (0, "Missing counts for called function %s/%i",
2690                  node->name (), node->order);
2691     }
2692
2693   profile_status_for_fn (fn)
2694       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2695   node->frequency
2696       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2697 }
2698
2699 /* In the case of COMDAT routines, multiple object files will contain the same
2700    function and the linker will select one for the binary. In that case
2701    all the other copies from the profile instrument binary will be missing
2702    profile counts. Look for cases where this happened, due to non-zero
2703    call counts going to 0-count functions, and drop the profile to guessed
2704    so that we can use the estimated probabilities and avoid optimizing only
2705    for size.
2706
2707    The other case where the profile may be missing is when the routine
2708    is not going to be emitted to the object file, e.g. for "extern template"
2709    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2710    all other cases of non-zero calls to 0-count functions.  */
2711
2712 void
2713 handle_missing_profiles (void)
2714 {
2715   struct cgraph_node *node;
2716   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2717   vec<struct cgraph_node *> worklist;
2718   worklist.create (64);
2719
2720   /* See if 0 count function has non-0 count callers.  In this case we
2721      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2722   FOR_EACH_DEFINED_FUNCTION (node)
2723     {
2724       struct cgraph_edge *e;
2725       gcov_type call_count = 0;
2726       gcov_type max_tp_first_run = 0;
2727       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2728
2729       if (node->count)
2730         continue;
2731       for (e = node->callers; e; e = e->next_caller)
2732       {
2733         call_count += e->count;
2734
2735         if (e->caller->tp_first_run > max_tp_first_run)
2736           max_tp_first_run = e->caller->tp_first_run;
2737       }
2738
2739       /* If time profile is missing, let assign the maximum that comes from
2740          caller functions.  */
2741       if (!node->tp_first_run && max_tp_first_run)
2742         node->tp_first_run = max_tp_first_run + 1;
2743
2744       if (call_count
2745           && fn && fn->cfg
2746           && (call_count * unlikely_count_fraction >= profile_info->runs))
2747         {
2748           drop_profile (node, call_count);
2749           worklist.safe_push (node);
2750         }
2751     }
2752
2753   /* Propagate the profile dropping to other 0-count COMDATs that are
2754      potentially called by COMDATs we already dropped the profile on.  */
2755   while (worklist.length () > 0)
2756     {
2757       struct cgraph_edge *e;
2758
2759       node = worklist.pop ();
2760       for (e = node->callees; e; e = e->next_caller)
2761         {
2762           struct cgraph_node *callee = e->callee;
2763           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2764
2765           if (callee->count > 0)
2766             continue;
2767           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2768               && profile_status_for_fn (fn) == PROFILE_READ)
2769             {
2770               drop_profile (node, 0);
2771               worklist.safe_push (callee);
2772             }
2773         }
2774     }
2775   worklist.release ();
2776 }
2777
2778 /* Convert counts measured by profile driven feedback to frequencies.
2779    Return nonzero iff there was any nonzero execution count.  */
2780
2781 int
2782 counts_to_freqs (void)
2783 {
2784   gcov_type count_max, true_count_max = 0;
2785   basic_block bb;
2786
2787   /* Don't overwrite the estimated frequencies when the profile for
2788      the function is missing.  We may drop this function PROFILE_GUESSED
2789      later in drop_profile ().  */
2790   if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2791     return 0;
2792
2793   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2794     true_count_max = MAX (bb->count, true_count_max);
2795
2796   count_max = MAX (true_count_max, 1);
2797   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2798     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2799
2800   return true_count_max;
2801 }
2802
2803 /* Return true if function is likely to be expensive, so there is no point to
2804    optimize performance of prologue, epilogue or do inlining at the expense
2805    of code size growth.  THRESHOLD is the limit of number of instructions
2806    function can execute at average to be still considered not expensive.  */
2807
2808 bool
2809 expensive_function_p (int threshold)
2810 {
2811   unsigned int sum = 0;
2812   basic_block bb;
2813   unsigned int limit;
2814
2815   /* We can not compute accurately for large thresholds due to scaled
2816      frequencies.  */
2817   gcc_assert (threshold <= BB_FREQ_MAX);
2818
2819   /* Frequencies are out of range.  This either means that function contains
2820      internal loop executing more than BB_FREQ_MAX times or profile feedback
2821      is available and function has not been executed at all.  */
2822   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2823     return true;
2824
2825   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2826   limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2827   FOR_EACH_BB_FN (bb, cfun)
2828     {
2829       rtx_insn *insn;
2830
2831       FOR_BB_INSNS (bb, insn)
2832         if (active_insn_p (insn))
2833           {
2834             sum += bb->frequency;
2835             if (sum > limit)
2836               return true;
2837         }
2838     }
2839
2840   return false;
2841 }
2842
2843 /* Estimate and propagate basic block frequencies using the given branch
2844    probabilities.  If FORCE is true, the frequencies are used to estimate
2845    the counts even when there are already non-zero profile counts.  */
2846
2847 void
2848 estimate_bb_frequencies (bool force)
2849 {
2850   basic_block bb;
2851   sreal freq_max;
2852
2853   if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2854     {
2855       static int real_values_initialized = 0;
2856
2857       if (!real_values_initialized)
2858         {
2859           real_values_initialized = 1;
2860           real_br_prob_base = REG_BR_PROB_BASE;
2861           real_bb_freq_max = BB_FREQ_MAX;
2862           real_one_half = sreal (1, -1);
2863           real_inv_br_prob_base = sreal (1) / real_br_prob_base;
2864           real_almost_one = sreal (1) - real_inv_br_prob_base;
2865         }
2866
2867       mark_dfs_back_edges ();
2868
2869       single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2870          REG_BR_PROB_BASE;
2871
2872       /* Set up block info for each basic block.  */
2873       alloc_aux_for_blocks (sizeof (block_info));
2874       alloc_aux_for_edges (sizeof (edge_prob_info));
2875       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2876         {
2877           edge e;
2878           edge_iterator ei;
2879
2880           FOR_EACH_EDGE (e, ei, bb->succs)
2881             {
2882               EDGE_INFO (e)->back_edge_prob = e->probability;
2883               EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
2884             }
2885         }
2886
2887       /* First compute frequencies locally for each loop from innermost
2888          to outermost to examine frequencies for back edges.  */
2889       estimate_loops ();
2890
2891       freq_max = 0;
2892       FOR_EACH_BB_FN (bb, cfun)
2893         if (freq_max < BLOCK_INFO (bb)->frequency)
2894           freq_max = BLOCK_INFO (bb)->frequency;
2895
2896       freq_max = real_bb_freq_max / freq_max;
2897       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2898         {
2899           sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
2900           bb->frequency = tmp.to_int ();
2901         }
2902
2903       free_aux_for_blocks ();
2904       free_aux_for_edges ();
2905     }
2906   compute_function_frequency ();
2907 }
2908
2909 /* Decide whether function is hot, cold or unlikely executed.  */
2910 void
2911 compute_function_frequency (void)
2912 {
2913   basic_block bb;
2914   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2915
2916   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2917       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2918     node->only_called_at_startup = true;
2919   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2920     node->only_called_at_exit = true;
2921
2922   if (profile_status_for_fn (cfun) != PROFILE_READ)
2923     {
2924       int flags = flags_from_decl_or_type (current_function_decl);
2925       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2926           != NULL)
2927         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2928       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2929                != NULL)
2930         node->frequency = NODE_FREQUENCY_HOT;
2931       else if (flags & ECF_NORETURN)
2932         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2933       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2934         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2935       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2936                || DECL_STATIC_DESTRUCTOR (current_function_decl))
2937         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2938       return;
2939     }
2940
2941   /* Only first time try to drop function into unlikely executed.
2942      After inlining the roundoff errors may confuse us.
2943      Ipa-profile pass will drop functions only called from unlikely
2944      functions to unlikely and that is most of what we care about.  */
2945   if (!cfun->after_inlining)
2946     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2947   FOR_EACH_BB_FN (bb, cfun)
2948     {
2949       if (maybe_hot_bb_p (cfun, bb))
2950         {
2951           node->frequency = NODE_FREQUENCY_HOT;
2952           return;
2953         }
2954       if (!probably_never_executed_bb_p (cfun, bb))
2955         node->frequency = NODE_FREQUENCY_NORMAL;
2956     }
2957 }
2958
2959 /* Build PREDICT_EXPR.  */
2960 tree
2961 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2962 {
2963   tree t = build1 (PREDICT_EXPR, void_type_node,
2964                    build_int_cst (integer_type_node, predictor));
2965   SET_PREDICT_EXPR_OUTCOME (t, taken);
2966   return t;
2967 }
2968
2969 const char *
2970 predictor_name (enum br_predictor predictor)
2971 {
2972   return predictor_info[predictor].name;
2973 }
2974
2975 /* Predict branch probabilities and estimate profile of the tree CFG. */
2976
2977 namespace {
2978
2979 const pass_data pass_data_profile =
2980 {
2981   GIMPLE_PASS, /* type */
2982   "profile_estimate", /* name */
2983   OPTGROUP_NONE, /* optinfo_flags */
2984   TV_BRANCH_PROB, /* tv_id */
2985   PROP_cfg, /* properties_required */
2986   0, /* properties_provided */
2987   0, /* properties_destroyed */
2988   0, /* todo_flags_start */
2989   0, /* todo_flags_finish */
2990 };
2991
2992 class pass_profile : public gimple_opt_pass
2993 {
2994 public:
2995   pass_profile (gcc::context *ctxt)
2996     : gimple_opt_pass (pass_data_profile, ctxt)
2997   {}
2998
2999   /* opt_pass methods: */
3000   virtual bool gate (function *) { return flag_guess_branch_prob; }
3001   virtual unsigned int execute (function *);
3002
3003 }; // class pass_profile
3004
3005 unsigned int
3006 pass_profile::execute (function *fun)
3007 {
3008   unsigned nb_loops;
3009
3010   if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3011     return 0;
3012
3013   loop_optimizer_init (LOOPS_NORMAL);
3014   if (dump_file && (dump_flags & TDF_DETAILS))
3015     flow_loops_dump (dump_file, NULL, 0);
3016
3017   mark_irreducible_loops ();
3018
3019   nb_loops = number_of_loops (fun);
3020   if (nb_loops > 1)
3021     scev_initialize ();
3022
3023   tree_estimate_probability ();
3024
3025   if (nb_loops > 1)
3026     scev_finalize ();
3027
3028   loop_optimizer_finalize ();
3029   if (dump_file && (dump_flags & TDF_DETAILS))
3030     gimple_dump_cfg (dump_file, dump_flags);
3031  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3032     profile_status_for_fn (fun) = PROFILE_GUESSED;
3033   return 0;
3034 }
3035
3036 } // anon namespace
3037
3038 gimple_opt_pass *
3039 make_pass_profile (gcc::context *ctxt)
3040 {
3041   return new pass_profile (ctxt);
3042 }
3043
3044 namespace {
3045
3046 const pass_data pass_data_strip_predict_hints =
3047 {
3048   GIMPLE_PASS, /* type */
3049   "*strip_predict_hints", /* name */
3050   OPTGROUP_NONE, /* optinfo_flags */
3051   TV_BRANCH_PROB, /* tv_id */
3052   PROP_cfg, /* properties_required */
3053   0, /* properties_provided */
3054   0, /* properties_destroyed */
3055   0, /* todo_flags_start */
3056   0, /* todo_flags_finish */
3057 };
3058
3059 class pass_strip_predict_hints : public gimple_opt_pass
3060 {
3061 public:
3062   pass_strip_predict_hints (gcc::context *ctxt)
3063     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3064   {}
3065
3066   /* opt_pass methods: */
3067   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3068   virtual unsigned int execute (function *);
3069
3070 }; // class pass_strip_predict_hints
3071
3072 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3073    we no longer need.  */
3074 unsigned int
3075 pass_strip_predict_hints::execute (function *fun)
3076 {
3077   basic_block bb;
3078   gimple *ass_stmt;
3079   tree var;
3080
3081   FOR_EACH_BB_FN (bb, fun)
3082     {
3083       gimple_stmt_iterator bi;
3084       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3085         {
3086           gimple *stmt = gsi_stmt (bi);
3087
3088           if (gimple_code (stmt) == GIMPLE_PREDICT)
3089             {
3090               gsi_remove (&bi, true);
3091               continue;
3092             }
3093           else if (is_gimple_call (stmt))
3094             {
3095               tree fndecl = gimple_call_fndecl (stmt);
3096
3097               if ((fndecl
3098                    && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3099                    && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3100                    && gimple_call_num_args (stmt) == 2)
3101                   || (gimple_call_internal_p (stmt)
3102                       && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3103                 {
3104                   var = gimple_call_lhs (stmt);
3105                   if (var)
3106                     {
3107                       ass_stmt
3108                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3109                       gsi_replace (&bi, ass_stmt, true);
3110                     }
3111                   else
3112                     {
3113                       gsi_remove (&bi, true);
3114                       continue;
3115                     }
3116                 }
3117             }
3118           gsi_next (&bi);
3119         }
3120     }
3121   return 0;
3122 }
3123
3124 } // anon namespace
3125
3126 gimple_opt_pass *
3127 make_pass_strip_predict_hints (gcc::context *ctxt)
3128 {
3129   return new pass_strip_predict_hints (ctxt);
3130 }
3131
3132 /* Rebuild function frequencies.  Passes are in general expected to
3133    maintain profile by hand, however in some cases this is not possible:
3134    for example when inlining several functions with loops freuqencies might run
3135    out of scale and thus needs to be recomputed.  */
3136
3137 void
3138 rebuild_frequencies (void)
3139 {
3140   timevar_push (TV_REBUILD_FREQUENCIES);
3141
3142   /* When the max bb count in the function is small, there is a higher
3143      chance that there were truncation errors in the integer scaling
3144      of counts by inlining and other optimizations. This could lead
3145      to incorrect classification of code as being cold when it isn't.
3146      In that case, force the estimation of bb counts/frequencies from the
3147      branch probabilities, rather than computing frequencies from counts,
3148      which may also lead to frequencies incorrectly reduced to 0. There
3149      is less precision in the probabilities, so we only do this for small
3150      max counts.  */
3151   gcov_type count_max = 0;
3152   basic_block bb;
3153   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3154     count_max = MAX (bb->count, count_max);
3155
3156   if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3157       || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3158           && count_max < REG_BR_PROB_BASE/10))
3159     {
3160       loop_optimizer_init (0);
3161       add_noreturn_fake_exit_edges ();
3162       mark_irreducible_loops ();
3163       connect_infinite_loops_to_exit ();
3164       estimate_bb_frequencies (true);
3165       remove_fake_exit_edges ();
3166       loop_optimizer_finalize ();
3167     }
3168   else if (profile_status_for_fn (cfun) == PROFILE_READ)
3169     counts_to_freqs ();
3170   else
3171     gcc_unreachable ();
3172   timevar_pop (TV_REBUILD_FREQUENCIES);
3173 }