gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "tm.h"
  34 #include "tree.h"
  35 #include "rtl.h"
  36 #include "tm_p.h"
  37 #include "hard-reg-set.h"
  38 #include "basic-block.h"
  39 #include "insn-config.h"
  40 #include "regs.h"
  41 #include "flags.h"
  42 #include "function.h"
  43 #include "except.h"
  44 #include "diagnostic-core.h"
  45 #include "recog.h"
  46 #include "expr.h"
  47 #include "predict.h"
  48 #include "coverage.h"
  49 #include "sreal.h"
  50 #include "params.h"
  51 #include "target.h"
  52 #include "cfgloop.h"
  53 #include "tree-ssa.h"
  54 #include "ggc.h"
  55 #include "tree-pass.h"
  56 #include "tree-scalar-evolution.h"
  57 #include "cfgloop.h"
  58 #include "pointer-set.h"
  59
  60 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  61                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  62 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
  63              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  64
  65 /* Random guesstimation given names.
  66    PROV_VERY_UNLIKELY should be small enough so basic block predicted
  67    by it gets below HOT_BB_FREQUENCY_FRACTION.  */
  68 #define PROB_VERY_UNLIKELY      (REG_BR_PROB_BASE / 2000 - 1)
  69 #define PROB_EVEN               (REG_BR_PROB_BASE / 2)
  70 #define PROB_VERY_LIKELY        (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
  71 #define PROB_ALWAYS             (REG_BR_PROB_BASE)
  72
  73 static void combine_predictions_for_insn (rtx, basic_block);
  74 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  75 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  76 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  77 static bool can_predict_insn_p (const_rtx);
  78
  79 /* Information we hold about each branch predictor.
  80    Filled using information from predict.def.  */
  81
  82 struct predictor_info
  83 {
  84   const char *const name;       /* Name used in the debugging dumps.  */
  85   const int hitrate;            /* Expected hitrate used by
  86                                    predict_insn_def call.  */
  87   const int flags;
  88 };
  89
  90 /* Use given predictor without Dempster-Shaffer theory if it matches
  91    using first_match heuristics.  */
  92 #define PRED_FLAG_FIRST_MATCH 1
  93
  94 /* Recompute hitrate in percent to our representation.  */
  95
  96 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
  97
  98 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
  99 static const struct predictor_info predictor_info[]= {
 100 #include "predict.def"
 101
 102   /* Upper bound on predictors.  */
 103   {NULL, 0, 0}
 104 };
 105 #undef DEF_PREDICTOR
 106
 107 /* Return TRUE if frequency FREQ is considered to be hot.  */
 108
 109 static inline bool
 110 maybe_hot_frequency_p (struct function *fun, int freq)
 111 {
 112   struct cgraph_node *node = cgraph_get_node (fun->decl);
 113   if (!profile_info || !flag_branch_probabilities)
 114     {
 115       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 116         return false;
 117       if (node->frequency == NODE_FREQUENCY_HOT)
 118         return true;
 119     }
 120   if (profile_status_for_function (fun) == PROFILE_ABSENT)
 121     return true;
 122   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 123       && freq < (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun)->frequency * 2 / 3))
 124     return false;
 125   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 126     return false;
 127   if (freq < (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun)->frequency
 128               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 129     return false;
 130   return true;
 131 }
 132
 133 static gcov_type min_count = -1;
 134
 135 /* Determine the threshold for hot BB counts.  */
 136
 137 gcov_type
 138 get_hot_bb_threshold ()
 139 {
 140   gcov_working_set_t *ws;
 141   if (min_count == -1)
 142     {
 143       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 144       gcc_assert (ws);
 145       min_count = ws->min_counter;
 146     }
 147   return min_count;
 148 }
 149
 150 /* Set the threshold for hot BB counts.  */
 151
 152 void
 153 set_hot_bb_threshold (gcov_type min)
 154 {
 155   min_count = min;
 156 }
 157
 158 /* Return TRUE if frequency FREQ is considered to be hot.  */
 159
 160 static inline bool
 161 maybe_hot_count_p (struct function *fun, gcov_type count)
 162 {
 163   if (fun && profile_status_for_function (fun) != PROFILE_READ)
 164     return true;
 165   /* Code executed at most once is not hot.  */
 166   if (profile_info->runs >= count)
 167     return false;
 168   return (count >= get_hot_bb_threshold ());
 169 }
 170
 171 /* Return true in case BB can be CPU intensive and should be optimized
 172    for maximal performance.  */
 173
 174 bool
 175 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 176 {
 177   gcc_checking_assert (fun);
 178   if (profile_status_for_function (fun) == PROFILE_READ)
 179     return maybe_hot_count_p (fun, bb->count);
 180   return maybe_hot_frequency_p (fun, bb->frequency);
 181 }
 182
 183 /* Return true if the call can be hot.  */
 184
 185 bool
 186 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
 187 {
 188   if (profile_info && flag_branch_probabilities
 189       && !maybe_hot_count_p (NULL,
 190                              edge->count))
 191     return false;
 192   if (edge->caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
 193       || (edge->callee
 194           && edge->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 195     return false;
 196   if (edge->caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
 197       && (edge->callee
 198           && edge->callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
 199     return false;
 200   if (optimize_size)
 201     return false;
 202   if (edge->caller->frequency == NODE_FREQUENCY_HOT)
 203     return true;
 204   if (edge->caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 205       && edge->frequency < CGRAPH_FREQ_BASE * 3 / 2)
 206     return false;
 207   if (flag_guess_branch_prob)
 208     {
 209       if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0
 210           || edge->frequency <= (CGRAPH_FREQ_BASE
 211                                  / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 212         return false;
 213     }
 214   return true;
 215 }
 216
 217 /* Return true in case BB can be CPU intensive and should be optimized
 218    for maximal performance.  */
 219
 220 bool
 221 maybe_hot_edge_p (edge e)
 222 {
 223   if (profile_status == PROFILE_READ)
 224     return maybe_hot_count_p (cfun, e->count);
 225   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 226 }
 227
 228
 229
 230 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 231    node frequency reflects never being executed.  */
 232
 233 static bool
 234 probably_never_executed (struct function *fun,
 235                          gcov_type count, int frequency)
 236 {
 237   gcc_checking_assert (fun);
 238   if (profile_status_for_function (fun) == PROFILE_READ)
 239     {
 240       if ((count * 4 + profile_info->runs / 2) / profile_info->runs > 0)
 241         return false;
 242       if (!frequency)
 243         return true;
 244       if (!ENTRY_BLOCK_PTR->frequency)
 245         return false;
 246       if (ENTRY_BLOCK_PTR->count && ENTRY_BLOCK_PTR->count < REG_BR_PROB_BASE)
 247         {
 248           return (RDIV (frequency * ENTRY_BLOCK_PTR->count,
 249                         ENTRY_BLOCK_PTR->frequency)
 250                   < REG_BR_PROB_BASE / 4);
 251         }
 252       return true;
 253     }
 254   if ((!profile_info || !flag_branch_probabilities)
 255       && (cgraph_get_node (fun->decl)->frequency
 256           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 257     return true;
 258   return false;
 259 }
 260
 261
 262 /* Return true in case BB is probably never executed.  */
 263
 264 bool
 265 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 266 {
 267   return probably_never_executed (fun, bb->count, bb->frequency);
 268 }
 269
 270
 271 /* Return true in case edge E is probably never executed.  */
 272
 273 bool
 274 probably_never_executed_edge_p (struct function *fun, edge e)
 275 {
 276   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 277 }
 278
 279 /* Return true if NODE should be optimized for size.  */
 280
 281 bool
 282 cgraph_optimize_for_size_p (struct cgraph_node *node)
 283 {
 284   if (optimize_size)
 285     return true;
 286   if (node && (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 287     return true;
 288   else
 289     return false;
 290 }
 291
 292 /* Return true when current function should always be optimized for size.  */
 293
 294 bool
 295 optimize_function_for_size_p (struct function *fun)
 296 {
 297   if (optimize_size)
 298     return true;
 299   if (!fun || !fun->decl)
 300     return false;
 301   return cgraph_optimize_for_size_p (cgraph_get_node (fun->decl));
 302 }
 303
 304 /* Return true when current function should always be optimized for speed.  */
 305
 306 bool
 307 optimize_function_for_speed_p (struct function *fun)
 308 {
 309   return !optimize_function_for_size_p (fun);
 310 }
 311
 312 /* Return TRUE when BB should be optimized for size.  */
 313
 314 bool
 315 optimize_bb_for_size_p (const_basic_block bb)
 316 {
 317   return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (cfun, bb);
 318 }
 319
 320 /* Return TRUE when BB should be optimized for speed.  */
 321
 322 bool
 323 optimize_bb_for_speed_p (const_basic_block bb)
 324 {
 325   return !optimize_bb_for_size_p (bb);
 326 }
 327
 328 /* Return TRUE when BB should be optimized for size.  */
 329
 330 bool
 331 optimize_edge_for_size_p (edge e)
 332 {
 333   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 334 }
 335
 336 /* Return TRUE when BB should be optimized for speed.  */
 337
 338 bool
 339 optimize_edge_for_speed_p (edge e)
 340 {
 341   return !optimize_edge_for_size_p (e);
 342 }
 343
 344 /* Return TRUE when BB should be optimized for size.  */
 345
 346 bool
 347 optimize_insn_for_size_p (void)
 348 {
 349   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 350 }
 351
 352 /* Return TRUE when BB should be optimized for speed.  */
 353
 354 bool
 355 optimize_insn_for_speed_p (void)
 356 {
 357   return !optimize_insn_for_size_p ();
 358 }
 359
 360 /* Return TRUE when LOOP should be optimized for size.  */
 361
 362 bool
 363 optimize_loop_for_size_p (struct loop *loop)
 364 {
 365   return optimize_bb_for_size_p (loop->header);
 366 }
 367
 368 /* Return TRUE when LOOP should be optimized for speed.  */
 369
 370 bool
 371 optimize_loop_for_speed_p (struct loop *loop)
 372 {
 373   return optimize_bb_for_speed_p (loop->header);
 374 }
 375
 376 /* Return TRUE when LOOP nest should be optimized for speed.  */
 377
 378 bool
 379 optimize_loop_nest_for_speed_p (struct loop *loop)
 380 {
 381   struct loop *l = loop;
 382   if (optimize_loop_for_speed_p (loop))
 383     return true;
 384   l = loop->inner;
 385   while (l && l != loop)
 386     {
 387       if (optimize_loop_for_speed_p (l))
 388         return true;
 389       if (l->inner)
 390         l = l->inner;
 391       else if (l->next)
 392         l = l->next;
 393       else
 394         {
 395           while (l != loop && !l->next)
 396             l = loop_outer (l);
 397           if (l != loop)
 398             l = l->next;
 399         }
 400     }
 401   return false;
 402 }
 403
 404 /* Return TRUE when LOOP nest should be optimized for size.  */
 405
 406 bool
 407 optimize_loop_nest_for_size_p (struct loop *loop)
 408 {
 409   return !optimize_loop_nest_for_speed_p (loop);
 410 }
 411
 412 /* Return true when edge E is likely to be well predictable by branch
 413    predictor.  */
 414
 415 bool
 416 predictable_edge_p (edge e)
 417 {
 418   if (profile_status == PROFILE_ABSENT)
 419     return false;
 420   if ((e->probability
 421        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 422       || (REG_BR_PROB_BASE - e->probability
 423           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 424     return true;
 425   return false;
 426 }
 427
 428
 429 /* Set RTL expansion for BB profile.  */
 430
 431 void
 432 rtl_profile_for_bb (basic_block bb)
 433 {
 434   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 435 }
 436
 437 /* Set RTL expansion for edge profile.  */
 438
 439 void
 440 rtl_profile_for_edge (edge e)
 441 {
 442   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 443 }
 444
 445 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 446 void
 447 default_rtl_profile (void)
 448 {
 449   crtl->maybe_hot_insn_p = true;
 450 }
 451
 452 /* Return true if the one of outgoing edges is already predicted by
 453    PREDICTOR.  */
 454
 455 bool
 456 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 457 {
 458   rtx note;
 459   if (!INSN_P (BB_END (bb)))
 460     return false;
 461   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 462     if (REG_NOTE_KIND (note) == REG_BR_PRED
 463         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 464       return true;
 465   return false;
 466 }
 467
 468 /* This map contains for a basic block the list of predictions for the
 469    outgoing edges.  */
 470
 471 static struct pointer_map_t *bb_predictions;
 472
 473 /*  Structure representing predictions in tree level. */
 474
 475 struct edge_prediction {
 476     struct edge_prediction *ep_next;
 477     edge ep_edge;
 478     enum br_predictor ep_predictor;
 479     int ep_probability;
 480 };
 481
 482 /* Return true if the one of outgoing edges is already predicted by
 483    PREDICTOR.  */
 484
 485 bool
 486 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 487 {
 488   struct edge_prediction *i;
 489   void **preds = pointer_map_contains (bb_predictions, bb);
 490
 491   if (!preds)
 492     return false;
 493
 494   for (i = (struct edge_prediction *) *preds; i; i = i->ep_next)
 495     if (i->ep_predictor == predictor)
 496       return true;
 497   return false;
 498 }
 499
 500 /* Return true when the probability of edge is reliable.
 501
 502    The profile guessing code is good at predicting branch outcome (ie.
 503    taken/not taken), that is predicted right slightly over 75% of time.
 504    It is however notoriously poor on predicting the probability itself.
 505    In general the profile appear a lot flatter (with probabilities closer
 506    to 50%) than the reality so it is bad idea to use it to drive optimization
 507    such as those disabling dynamic branch prediction for well predictable
 508    branches.
 509
 510    There are two exceptions - edges leading to noreturn edges and edges
 511    predicted by number of iterations heuristics are predicted well.  This macro
 512    should be able to distinguish those, but at the moment it simply check for
 513    noreturn heuristic that is only one giving probability over 99% or bellow
 514    1%.  In future we might want to propagate reliability information across the
 515    CFG if we find this information useful on multiple places.   */
 516 static bool
 517 probability_reliable_p (int prob)
 518 {
 519   return (profile_status == PROFILE_READ
 520           || (profile_status == PROFILE_GUESSED
 521               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 522 }
 523
 524 /* Same predicate as above, working on edges.  */
 525 bool
 526 edge_probability_reliable_p (const_edge e)
 527 {
 528   return probability_reliable_p (e->probability);
 529 }
 530
 531 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 532 bool
 533 br_prob_note_reliable_p (const_rtx note)
 534 {
 535   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 536   return probability_reliable_p (XINT (note, 0));
 537 }
 538
 539 static void
 540 predict_insn (rtx insn, enum br_predictor predictor, int probability)
 541 {
 542   gcc_assert (any_condjump_p (insn));
 543   if (!flag_guess_branch_prob)
 544     return;
 545
 546   add_reg_note (insn, REG_BR_PRED,
 547                 gen_rtx_CONCAT (VOIDmode,
 548                                 GEN_INT ((int) predictor),
 549                                 GEN_INT ((int) probability)));
 550 }
 551
 552 /* Predict insn by given predictor.  */
 553
 554 void
 555 predict_insn_def (rtx insn, enum br_predictor predictor,
 556                   enum prediction taken)
 557 {
 558    int probability = predictor_info[(int) predictor].hitrate;
 559
 560    if (taken != TAKEN)
 561      probability = REG_BR_PROB_BASE - probability;
 562
 563    predict_insn (insn, predictor, probability);
 564 }
 565
 566 /* Predict edge E with given probability if possible.  */
 567
 568 void
 569 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 570 {
 571   rtx last_insn;
 572   last_insn = BB_END (e->src);
 573
 574   /* We can store the branch prediction information only about
 575      conditional jumps.  */
 576   if (!any_condjump_p (last_insn))
 577     return;
 578
 579   /* We always store probability of branching.  */
 580   if (e->flags & EDGE_FALLTHRU)
 581     probability = REG_BR_PROB_BASE - probability;
 582
 583   predict_insn (last_insn, predictor, probability);
 584 }
 585
 586 /* Predict edge E with the given PROBABILITY.  */
 587 void
 588 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 589 {
 590   gcc_assert (profile_status != PROFILE_GUESSED);
 591   if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1)
 592       && flag_guess_branch_prob && optimize)
 593     {
 594       struct edge_prediction *i = XNEW (struct edge_prediction);
 595       void **preds = pointer_map_insert (bb_predictions, e->src);
 596
 597       i->ep_next = (struct edge_prediction *) *preds;
 598       *preds = i;
 599       i->ep_probability = probability;
 600       i->ep_predictor = predictor;
 601       i->ep_edge = e;
 602     }
 603 }
 604
 605 /* Remove all predictions on given basic block that are attached
 606    to edge E.  */
 607 void
 608 remove_predictions_associated_with_edge (edge e)
 609 {
 610   void **preds;
 611
 612   if (!bb_predictions)
 613     return;
 614
 615   preds = pointer_map_contains (bb_predictions, e->src);
 616
 617   if (preds)
 618     {
 619       struct edge_prediction **prediction = (struct edge_prediction **) preds;
 620       struct edge_prediction *next;
 621
 622       while (*prediction)
 623         {
 624           if ((*prediction)->ep_edge == e)
 625             {
 626               next = (*prediction)->ep_next;
 627               free (*prediction);
 628               *prediction = next;
 629             }
 630           else
 631             prediction = &((*prediction)->ep_next);
 632         }
 633     }
 634 }
 635
 636 /* Clears the list of predictions stored for BB.  */
 637
 638 static void
 639 clear_bb_predictions (basic_block bb)
 640 {
 641   void **preds = pointer_map_contains (bb_predictions, bb);
 642   struct edge_prediction *pred, *next;
 643
 644   if (!preds)
 645     return;
 646
 647   for (pred = (struct edge_prediction *) *preds; pred; pred = next)
 648     {
 649       next = pred->ep_next;
 650       free (pred);
 651     }
 652   *preds = NULL;
 653 }
 654
 655 /* Return true when we can store prediction on insn INSN.
 656    At the moment we represent predictions only on conditional
 657    jumps, not at computed jump or other complicated cases.  */
 658 static bool
 659 can_predict_insn_p (const_rtx insn)
 660 {
 661   return (JUMP_P (insn)
 662           && any_condjump_p (insn)
 663           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 664 }
 665
 666 /* Predict edge E by given predictor if possible.  */
 667
 668 void
 669 predict_edge_def (edge e, enum br_predictor predictor,
 670                   enum prediction taken)
 671 {
 672    int probability = predictor_info[(int) predictor].hitrate;
 673
 674    if (taken != TAKEN)
 675      probability = REG_BR_PROB_BASE - probability;
 676
 677    predict_edge (e, predictor, probability);
 678 }
 679
 680 /* Invert all branch predictions or probability notes in the INSN.  This needs
 681    to be done each time we invert the condition used by the jump.  */
 682
 683 void
 684 invert_br_probabilities (rtx insn)
 685 {
 686   rtx note;
 687
 688   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 689     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 690       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 691     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 692       XEXP (XEXP (note, 0), 1)
 693         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 694 }
 695
 696 /* Dump information about the branch prediction to the output file.  */
 697
 698 static void
 699 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 700                  basic_block bb, int used)
 701 {
 702   edge e;
 703   edge_iterator ei;
 704
 705   if (!file)
 706     return;
 707
 708   FOR_EACH_EDGE (e, ei, bb->succs)
 709     if (! (e->flags & EDGE_FALLTHRU))
 710       break;
 711
 712   fprintf (file, "  %s heuristics%s: %.1f%%",
 713            predictor_info[predictor].name,
 714            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 715
 716   if (bb->count)
 717     {
 718       fprintf (file, "  exec ");
 719       fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
 720       if (e)
 721         {
 722           fprintf (file, " hit ");
 723           fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
 724           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 725         }
 726     }
 727
 728   fprintf (file, "\n");
 729 }
 730
 731 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 732    evenly and hope for the best.  */
 733 static void
 734 set_even_probabilities (basic_block bb)
 735 {
 736   int nedges = 0;
 737   edge e;
 738   edge_iterator ei;
 739
 740   FOR_EACH_EDGE (e, ei, bb->succs)
 741     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 742       nedges ++;
 743   FOR_EACH_EDGE (e, ei, bb->succs)
 744     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 745       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 746     else
 747       e->probability = 0;
 748 }
 749
 750 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 751    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 752
 753 static void
 754 combine_predictions_for_insn (rtx insn, basic_block bb)
 755 {
 756   rtx prob_note;
 757   rtx *pnote;
 758   rtx note;
 759   int best_probability = PROB_EVEN;
 760   enum br_predictor best_predictor = END_PREDICTORS;
 761   int combined_probability = REG_BR_PROB_BASE / 2;
 762   int d;
 763   bool first_match = false;
 764   bool found = false;
 765
 766   if (!can_predict_insn_p (insn))
 767     {
 768       set_even_probabilities (bb);
 769       return;
 770     }
 771
 772   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 773   pnote = &REG_NOTES (insn);
 774   if (dump_file)
 775     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 776              bb->index);
 777
 778   /* We implement "first match" heuristics and use probability guessed
 779      by predictor with smallest index.  */
 780   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 781     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 782       {
 783         enum br_predictor predictor = ((enum br_predictor)
 784                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 785         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 786
 787         found = true;
 788         if (best_predictor > predictor)
 789           best_probability = probability, best_predictor = predictor;
 790
 791         d = (combined_probability * probability
 792              + (REG_BR_PROB_BASE - combined_probability)
 793              * (REG_BR_PROB_BASE - probability));
 794
 795         /* Use FP math to avoid overflows of 32bit integers.  */
 796         if (d == 0)
 797           /* If one probability is 0% and one 100%, avoid division by zero.  */
 798           combined_probability = REG_BR_PROB_BASE / 2;
 799         else
 800           combined_probability = (((double) combined_probability) * probability
 801                                   * REG_BR_PROB_BASE / d + 0.5);
 802       }
 803
 804   /* Decide which heuristic to use.  In case we didn't match anything,
 805      use no_prediction heuristic, in case we did match, use either
 806      first match or Dempster-Shaffer theory depending on the flags.  */
 807
 808   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 809     first_match = true;
 810
 811   if (!found)
 812     dump_prediction (dump_file, PRED_NO_PREDICTION,
 813                      combined_probability, bb, true);
 814   else
 815     {
 816       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 817                        bb, !first_match);
 818       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 819                        bb, first_match);
 820     }
 821
 822   if (first_match)
 823     combined_probability = best_probability;
 824   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 825
 826   while (*pnote)
 827     {
 828       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 829         {
 830           enum br_predictor predictor = ((enum br_predictor)
 831                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 832           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 833
 834           dump_prediction (dump_file, predictor, probability, bb,
 835                            !first_match || best_predictor == predictor);
 836           *pnote = XEXP (*pnote, 1);
 837         }
 838       else
 839         pnote = &XEXP (*pnote, 1);
 840     }
 841
 842   if (!prob_note)
 843     {
 844       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 845
 846       /* Save the prediction into CFG in case we are seeing non-degenerated
 847          conditional jump.  */
 848       if (!single_succ_p (bb))
 849         {
 850           BRANCH_EDGE (bb)->probability = combined_probability;
 851           FALLTHRU_EDGE (bb)->probability
 852             = REG_BR_PROB_BASE - combined_probability;
 853         }
 854     }
 855   else if (!single_succ_p (bb))
 856     {
 857       int prob = XINT (prob_note, 0);
 858
 859       BRANCH_EDGE (bb)->probability = prob;
 860       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 861     }
 862   else
 863     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 864 }
 865
 866 /* Combine predictions into single probability and store them into CFG.
 867    Remove now useless prediction entries.  */
 868
 869 static void
 870 combine_predictions_for_bb (basic_block bb)
 871 {
 872   int best_probability = PROB_EVEN;
 873   enum br_predictor best_predictor = END_PREDICTORS;
 874   int combined_probability = REG_BR_PROB_BASE / 2;
 875   int d;
 876   bool first_match = false;
 877   bool found = false;
 878   struct edge_prediction *pred;
 879   int nedges = 0;
 880   edge e, first = NULL, second = NULL;
 881   edge_iterator ei;
 882   void **preds;
 883
 884   FOR_EACH_EDGE (e, ei, bb->succs)
 885     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 886       {
 887         nedges ++;
 888         if (first && !second)
 889           second = e;
 890         if (!first)
 891           first = e;
 892       }
 893
 894   /* When there is no successor or only one choice, prediction is easy.
 895
 896      We are lazy for now and predict only basic blocks with two outgoing
 897      edges.  It is possible to predict generic case too, but we have to
 898      ignore first match heuristics and do more involved combining.  Implement
 899      this later.  */
 900   if (nedges != 2)
 901     {
 902       if (!bb->count)
 903         set_even_probabilities (bb);
 904       clear_bb_predictions (bb);
 905       if (dump_file)
 906         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 907                  nedges, bb->index);
 908       return;
 909     }
 910
 911   if (dump_file)
 912     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 913
 914   preds = pointer_map_contains (bb_predictions, bb);
 915   if (preds)
 916     {
 917       /* We implement "first match" heuristics and use probability guessed
 918          by predictor with smallest index.  */
 919       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 920         {
 921           enum br_predictor predictor = pred->ep_predictor;
 922           int probability = pred->ep_probability;
 923
 924           if (pred->ep_edge != first)
 925             probability = REG_BR_PROB_BASE - probability;
 926
 927           found = true;
 928           /* First match heuristics would be widly confused if we predicted
 929              both directions.  */
 930           if (best_predictor > predictor)
 931             {
 932               struct edge_prediction *pred2;
 933               int prob = probability;
 934
 935               for (pred2 = (struct edge_prediction *) *preds; pred2; pred2 = pred2->ep_next)
 936                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 937                  {
 938                    int probability2 = pred->ep_probability;
 939
 940                    if (pred2->ep_edge != first)
 941                      probability2 = REG_BR_PROB_BASE - probability2;
 942
 943                    if ((probability < REG_BR_PROB_BASE / 2) !=
 944                        (probability2 < REG_BR_PROB_BASE / 2))
 945                      break;
 946
 947                    /* If the same predictor later gave better result, go for it! */
 948                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 949                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 950                      prob = probability2;
 951                  }
 952               if (!pred2)
 953                 best_probability = prob, best_predictor = predictor;
 954             }
 955
 956           d = (combined_probability * probability
 957                + (REG_BR_PROB_BASE - combined_probability)
 958                * (REG_BR_PROB_BASE - probability));
 959
 960           /* Use FP math to avoid overflows of 32bit integers.  */
 961           if (d == 0)
 962             /* If one probability is 0% and one 100%, avoid division by zero.  */
 963             combined_probability = REG_BR_PROB_BASE / 2;
 964           else
 965             combined_probability = (((double) combined_probability)
 966                                     * probability
 967                                     * REG_BR_PROB_BASE / d + 0.5);
 968         }
 969     }
 970
 971   /* Decide which heuristic to use.  In case we didn't match anything,
 972      use no_prediction heuristic, in case we did match, use either
 973      first match or Dempster-Shaffer theory depending on the flags.  */
 974
 975   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 976     first_match = true;
 977
 978   if (!found)
 979     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
 980   else
 981     {
 982       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
 983                        !first_match);
 984       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
 985                        first_match);
 986     }
 987
 988   if (first_match)
 989     combined_probability = best_probability;
 990   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 991
 992   if (preds)
 993     {
 994       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 995         {
 996           enum br_predictor predictor = pred->ep_predictor;
 997           int probability = pred->ep_probability;
 998
 999           if (pred->ep_edge != EDGE_SUCC (bb, 0))
1000             probability = REG_BR_PROB_BASE - probability;
1001           dump_prediction (dump_file, predictor, probability, bb,
1002                            !first_match || best_predictor == predictor);
1003         }
1004     }
1005   clear_bb_predictions (bb);
1006
1007   if (!bb->count)
1008     {
1009       first->probability = combined_probability;
1010       second->probability = REG_BR_PROB_BASE - combined_probability;
1011     }
1012 }
1013
1014 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1015    Return the SSA_NAME if the condition satisfies, NULL otherwise.
1016
1017    T1 and T2 should be one of the following cases:
1018      1. T1 is SSA_NAME, T2 is NULL
1019      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1020      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1021
1022 static tree
1023 strips_small_constant (tree t1, tree t2)
1024 {
1025   tree ret = NULL;
1026   int value = 0;
1027
1028   if (!t1)
1029     return NULL;
1030   else if (TREE_CODE (t1) == SSA_NAME)
1031     ret = t1;
1032   else if (host_integerp (t1, 0))
1033     value = tree_low_cst (t1, 0);
1034   else
1035     return NULL;
1036
1037   if (!t2)
1038     return ret;
1039   else if (host_integerp (t2, 0))
1040     value = tree_low_cst (t2, 0);
1041   else if (TREE_CODE (t2) == SSA_NAME)
1042     {
1043       if (ret)
1044         return NULL;
1045       else
1046         ret = t2;
1047     }
1048
1049   if (value <= 4 && value >= -4)
1050     return ret;
1051   else
1052     return NULL;
1053 }
1054
1055 /* Return the SSA_NAME in T or T's operands.
1056    Return NULL if SSA_NAME cannot be found.  */
1057
1058 static tree
1059 get_base_value (tree t)
1060 {
1061   if (TREE_CODE (t) == SSA_NAME)
1062     return t;
1063
1064   if (!BINARY_CLASS_P (t))
1065     return NULL;
1066
1067   switch (TREE_OPERAND_LENGTH (t))
1068     {
1069     case 1:
1070       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1071     case 2:
1072       return strips_small_constant (TREE_OPERAND (t, 0),
1073                                     TREE_OPERAND (t, 1));
1074     default:
1075       return NULL;
1076     }
1077 }
1078
1079 /* Check the compare STMT in LOOP. If it compares an induction
1080    variable to a loop invariant, return true, and save
1081    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1082    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1083
1084 static bool
1085 is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1086                                      tree *loop_invariant,
1087                                      enum tree_code *compare_code,
1088                                      tree *loop_step,
1089                                      tree *loop_iv_base)
1090 {
1091   tree op0, op1, bound, base;
1092   affine_iv iv0, iv1;
1093   enum tree_code code;
1094   tree step;
1095
1096   code = gimple_cond_code (stmt);
1097   *loop_invariant = NULL;
1098
1099   switch (code)
1100     {
1101     case GT_EXPR:
1102     case GE_EXPR:
1103     case NE_EXPR:
1104     case LT_EXPR:
1105     case LE_EXPR:
1106     case EQ_EXPR:
1107       break;
1108
1109     default:
1110       return false;
1111     }
1112
1113   op0 = gimple_cond_lhs (stmt);
1114   op1 = gimple_cond_rhs (stmt);
1115
1116   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1117        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1118     return false;
1119   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1120     return false;
1121   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1122     return false;
1123   if (TREE_CODE (iv0.step) != INTEGER_CST
1124       || TREE_CODE (iv1.step) != INTEGER_CST)
1125     return false;
1126   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1127       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1128     return false;
1129
1130   if (integer_zerop (iv0.step))
1131     {
1132       if (code != NE_EXPR && code != EQ_EXPR)
1133         code = invert_tree_comparison (code, false);
1134       bound = iv0.base;
1135       base = iv1.base;
1136       if (host_integerp (iv1.step, 0))
1137         step = iv1.step;
1138       else
1139         return false;
1140     }
1141   else
1142     {
1143       bound = iv1.base;
1144       base = iv0.base;
1145       if (host_integerp (iv0.step, 0))
1146         step = iv0.step;
1147       else
1148         return false;
1149     }
1150
1151   if (TREE_CODE (bound) != INTEGER_CST)
1152     bound = get_base_value (bound);
1153   if (!bound)
1154     return false;
1155   if (TREE_CODE (base) != INTEGER_CST)
1156     base = get_base_value (base);
1157   if (!base)
1158     return false;
1159
1160   *loop_invariant = bound;
1161   *compare_code = code;
1162   *loop_step = step;
1163   *loop_iv_base = base;
1164   return true;
1165 }
1166
1167 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1168
1169 static bool
1170 expr_coherent_p (tree t1, tree t2)
1171 {
1172   gimple stmt;
1173   tree ssa_name_1 = NULL;
1174   tree ssa_name_2 = NULL;
1175
1176   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1177   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1178
1179   if (t1 == t2)
1180     return true;
1181
1182   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1183     return true;
1184   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1185     return false;
1186
1187   /* Check to see if t1 is expressed/defined with t2.  */
1188   stmt = SSA_NAME_DEF_STMT (t1);
1189   gcc_assert (stmt != NULL);
1190   if (is_gimple_assign (stmt))
1191     {
1192       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1193       if (ssa_name_1 && ssa_name_1 == t2)
1194         return true;
1195     }
1196
1197   /* Check to see if t2 is expressed/defined with t1.  */
1198   stmt = SSA_NAME_DEF_STMT (t2);
1199   gcc_assert (stmt != NULL);
1200   if (is_gimple_assign (stmt))
1201     {
1202       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1203       if (ssa_name_2 && ssa_name_2 == t1)
1204         return true;
1205     }
1206
1207   /* Compare if t1 and t2's def_stmts are identical.  */
1208   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1209     return true;
1210   else
1211     return false;
1212 }
1213
1214 /* Predict branch probability of BB when BB contains a branch that compares
1215    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1216    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1217
1218    E.g.
1219      for (int i = 0; i < bound; i++) {
1220        if (i < bound - 2)
1221          computation_1();
1222        else
1223          computation_2();
1224      }
1225
1226   In this loop, we will predict the branch inside the loop to be taken.  */
1227
1228 static void
1229 predict_iv_comparison (struct loop *loop, basic_block bb,
1230                        tree loop_bound_var,
1231                        tree loop_iv_base_var,
1232                        enum tree_code loop_bound_code,
1233                        int loop_bound_step)
1234 {
1235   gimple stmt;
1236   tree compare_var, compare_base;
1237   enum tree_code compare_code;
1238   tree compare_step_var;
1239   edge then_edge;
1240   edge_iterator ei;
1241
1242   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1243       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1244       || predicted_by_p (bb, PRED_LOOP_EXIT))
1245     return;
1246
1247   stmt = last_stmt (bb);
1248   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1249     return;
1250   if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1251                                             &compare_code,
1252                                             &compare_step_var,
1253                                             &compare_base))
1254     return;
1255
1256   /* Find the taken edge.  */
1257   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1258     if (then_edge->flags & EDGE_TRUE_VALUE)
1259       break;
1260
1261   /* When comparing an IV to a loop invariant, NE is more likely to be
1262      taken while EQ is more likely to be not-taken.  */
1263   if (compare_code == NE_EXPR)
1264     {
1265       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1266       return;
1267     }
1268   else if (compare_code == EQ_EXPR)
1269     {
1270       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1271       return;
1272     }
1273
1274   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1275     return;
1276
1277   /* If loop bound, base and compare bound are all constants, we can
1278      calculate the probability directly.  */
1279   if (host_integerp (loop_bound_var, 0)
1280       && host_integerp (compare_var, 0)
1281       && host_integerp (compare_base, 0))
1282     {
1283       int probability;
1284       bool of, overflow = false;
1285       double_int mod, compare_count, tem, loop_count;
1286
1287       double_int loop_bound = tree_to_double_int (loop_bound_var);
1288       double_int compare_bound = tree_to_double_int (compare_var);
1289       double_int base = tree_to_double_int (compare_base);
1290       double_int compare_step = tree_to_double_int (compare_step_var);
1291
1292       /* (loop_bound - base) / compare_step */
1293       tem = loop_bound.sub_with_overflow (base, &of);
1294       overflow |= of;
1295       loop_count = tem.divmod_with_overflow (compare_step,
1296                                               0, TRUNC_DIV_EXPR,
1297                                               &mod, &of);
1298       overflow |= of;
1299
1300       if ((!compare_step.is_negative ())
1301           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1302         {
1303           /* (loop_bound - compare_bound) / compare_step */
1304           tem = loop_bound.sub_with_overflow (compare_bound, &of);
1305           overflow |= of;
1306           compare_count = tem.divmod_with_overflow (compare_step,
1307                                                      0, TRUNC_DIV_EXPR,
1308                                                      &mod, &of);
1309           overflow |= of;
1310         }
1311       else
1312         {
1313           /* (compare_bound - base) / compare_step */
1314           tem = compare_bound.sub_with_overflow (base, &of);
1315           overflow |= of;
1316           compare_count = tem.divmod_with_overflow (compare_step,
1317                                                      0, TRUNC_DIV_EXPR,
1318                                                      &mod, &of);
1319           overflow |= of;
1320         }
1321       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1322         ++compare_count;
1323       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1324         ++loop_count;
1325       if (compare_count.is_negative ())
1326         compare_count = double_int_zero;
1327       if (loop_count.is_negative ())
1328         loop_count = double_int_zero;
1329       if (loop_count.is_zero ())
1330         probability = 0;
1331       else if (compare_count.scmp (loop_count) == 1)
1332         probability = REG_BR_PROB_BASE;
1333       else
1334         {
1335           /* If loop_count is too big, such that REG_BR_PROB_BASE * loop_count
1336              could overflow, shift both loop_count and compare_count right
1337              a bit so that it doesn't overflow.  Note both counts are known not
1338              to be negative at this point.  */
1339           int clz_bits = clz_hwi (loop_count.high);
1340           gcc_assert (REG_BR_PROB_BASE < 32768);
1341           if (clz_bits < 16)
1342             {
1343               loop_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1344               compare_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1345             }
1346           tem = compare_count.mul_with_sign (double_int::from_shwi
1347                                             (REG_BR_PROB_BASE), true, &of);
1348           gcc_assert (!of);
1349           tem = tem.divmod (loop_count, true, TRUNC_DIV_EXPR, &mod);
1350           probability = tem.to_uhwi ();
1351         }
1352
1353       if (!overflow)
1354         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1355
1356       return;
1357     }
1358
1359   if (expr_coherent_p (loop_bound_var, compare_var))
1360     {
1361       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1362           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1363         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1364       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1365                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1366         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1367       else if (loop_bound_code == NE_EXPR)
1368         {
1369           /* If the loop backedge condition is "(i != bound)", we do
1370              the comparison based on the step of IV:
1371              * step < 0 : backedge condition is like (i > bound)
1372              * step > 0 : backedge condition is like (i < bound)  */
1373           gcc_assert (loop_bound_step != 0);
1374           if (loop_bound_step > 0
1375               && (compare_code == LT_EXPR
1376                   || compare_code == LE_EXPR))
1377             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1378           else if (loop_bound_step < 0
1379                    && (compare_code == GT_EXPR
1380                        || compare_code == GE_EXPR))
1381             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1382           else
1383             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1384         }
1385       else
1386         /* The branch is predicted not-taken if loop_bound_code is
1387            opposite with compare_code.  */
1388         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1389     }
1390   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1391     {
1392       /* For cases like:
1393            for (i = s; i < h; i++)
1394              if (i > s + 2) ....
1395          The branch should be predicted taken.  */
1396       if (loop_bound_step > 0
1397           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1398         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1399       else if (loop_bound_step < 0
1400                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1401         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1402       else
1403         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1404     }
1405 }
1406
1407 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1408    exits are resulted from short-circuit conditions that will generate an
1409    if_tmp. E.g.:
1410
1411    if (foo() || global > 10)
1412      break;
1413
1414    This will be translated into:
1415
1416    BB3:
1417      loop header...
1418    BB4:
1419      if foo() goto BB6 else goto BB5
1420    BB5:
1421      if global > 10 goto BB6 else goto BB7
1422    BB6:
1423      goto BB7
1424    BB7:
1425      iftmp = (PHI 0(BB5), 1(BB6))
1426      if iftmp == 1 goto BB8 else goto BB3
1427    BB8:
1428      outside of the loop...
1429
1430    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1431    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1432    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1433    exits to predict them using PRED_LOOP_EXIT.  */
1434
1435 static void
1436 predict_extra_loop_exits (edge exit_edge)
1437 {
1438   unsigned i;
1439   bool check_value_one;
1440   gimple phi_stmt;
1441   tree cmp_rhs, cmp_lhs;
1442   gimple cmp_stmt = last_stmt (exit_edge->src);
1443
1444   if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1445     return;
1446   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1447   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1448   if (!TREE_CONSTANT (cmp_rhs)
1449       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1450     return;
1451   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1452     return;
1453
1454   /* If check_value_one is true, only the phi_args with value '1' will lead
1455      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1456      loop exit.  */
1457   check_value_one = (((integer_onep (cmp_rhs))
1458                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1459                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1460
1461   phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1462   if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1463     return;
1464
1465   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1466     {
1467       edge e1;
1468       edge_iterator ei;
1469       tree val = gimple_phi_arg_def (phi_stmt, i);
1470       edge e = gimple_phi_arg_edge (phi_stmt, i);
1471
1472       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1473         continue;
1474       if ((check_value_one ^ integer_onep (val)) == 1)
1475         continue;
1476       if (EDGE_COUNT (e->src->succs) != 1)
1477         {
1478           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1479           continue;
1480         }
1481
1482       FOR_EACH_EDGE (e1, ei, e->src->preds)
1483         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1484     }
1485 }
1486
1487 /* Predict edge probabilities by exploiting loop structure.  */
1488
1489 static void
1490 predict_loops (void)
1491 {
1492   loop_iterator li;
1493   struct loop *loop;
1494
1495   /* Try to predict out blocks in a loop that are not part of a
1496      natural loop.  */
1497   FOR_EACH_LOOP (li, loop, 0)
1498     {
1499       basic_block bb, *bbs;
1500       unsigned j, n_exits;
1501       vec<edge> exits;
1502       struct tree_niter_desc niter_desc;
1503       edge ex;
1504       struct nb_iter_bound *nb_iter;
1505       enum tree_code loop_bound_code = ERROR_MARK;
1506       tree loop_bound_step = NULL;
1507       tree loop_bound_var = NULL;
1508       tree loop_iv_base = NULL;
1509       gimple stmt = NULL;
1510
1511       exits = get_loop_exit_edges (loop);
1512       n_exits = exits.length ();
1513       if (!n_exits)
1514         {
1515           exits.release ();
1516           continue;
1517         }
1518
1519       FOR_EACH_VEC_ELT (exits, j, ex)
1520         {
1521           tree niter = NULL;
1522           HOST_WIDE_INT nitercst;
1523           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1524           int probability;
1525           enum br_predictor predictor;
1526
1527           predict_extra_loop_exits (ex);
1528
1529           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1530             niter = niter_desc.niter;
1531           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1532             niter = loop_niter_by_eval (loop, ex);
1533
1534           if (TREE_CODE (niter) == INTEGER_CST)
1535             {
1536               if (host_integerp (niter, 1)
1537                   && max
1538                   && compare_tree_int (niter, max - 1) == -1)
1539                 nitercst = tree_low_cst (niter, 1) + 1;
1540               else
1541                 nitercst = max;
1542               predictor = PRED_LOOP_ITERATIONS;
1543             }
1544           /* If we have just one exit and we can derive some information about
1545              the number of iterations of the loop from the statements inside
1546              the loop, use it to predict this exit.  */
1547           else if (n_exits == 1)
1548             {
1549               nitercst = estimated_stmt_executions_int (loop);
1550               if (nitercst < 0)
1551                 continue;
1552               if (nitercst > max)
1553                 nitercst = max;
1554
1555               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1556             }
1557           else
1558             continue;
1559
1560           /* If the prediction for number of iterations is zero, do not
1561              predict the exit edges.  */
1562           if (nitercst == 0)
1563             continue;
1564
1565           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1566           predict_edge (ex, predictor, probability);
1567         }
1568       exits.release ();
1569
1570       /* Find information about loop bound variables.  */
1571       for (nb_iter = loop->bounds; nb_iter;
1572            nb_iter = nb_iter->next)
1573         if (nb_iter->stmt
1574             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1575           {
1576             stmt = nb_iter->stmt;
1577             break;
1578           }
1579       if (!stmt && last_stmt (loop->header)
1580           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1581         stmt = last_stmt (loop->header);
1582       if (stmt)
1583         is_comparison_with_loop_invariant_p (stmt, loop,
1584                                              &loop_bound_var,
1585                                              &loop_bound_code,
1586                                              &loop_bound_step,
1587                                              &loop_iv_base);
1588
1589       bbs = get_loop_body (loop);
1590
1591       for (j = 0; j < loop->num_nodes; j++)
1592         {
1593           int header_found = 0;
1594           edge e;
1595           edge_iterator ei;
1596
1597           bb = bbs[j];
1598
1599           /* Bypass loop heuristics on continue statement.  These
1600              statements construct loops via "non-loop" constructs
1601              in the source language and are better to be handled
1602              separately.  */
1603           if (predicted_by_p (bb, PRED_CONTINUE))
1604             continue;
1605
1606           /* Loop branch heuristics - predict an edge back to a
1607              loop's head as taken.  */
1608           if (bb == loop->latch)
1609             {
1610               e = find_edge (loop->latch, loop->header);
1611               if (e)
1612                 {
1613                   header_found = 1;
1614                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1615                 }
1616             }
1617
1618           /* Loop exit heuristics - predict an edge exiting the loop if the
1619              conditional has no loop header successors as not taken.  */
1620           if (!header_found
1621               /* If we already used more reliable loop exit predictors, do not
1622                  bother with PRED_LOOP_EXIT.  */
1623               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1624               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1625             {
1626               /* For loop with many exits we don't want to predict all exits
1627                  with the pretty large probability, because if all exits are
1628                  considered in row, the loop would be predicted to iterate
1629                  almost never.  The code to divide probability by number of
1630                  exits is very rough.  It should compute the number of exits
1631                  taken in each patch through function (not the overall number
1632                  of exits that might be a lot higher for loops with wide switch
1633                  statements in them) and compute n-th square root.
1634
1635                  We limit the minimal probability by 2% to avoid
1636                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1637                  as this was causing regression in perl benchmark containing such
1638                  a wide loop.  */
1639
1640               int probability = ((REG_BR_PROB_BASE
1641                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1642                                  / n_exits);
1643               if (probability < HITRATE (2))
1644                 probability = HITRATE (2);
1645               FOR_EACH_EDGE (e, ei, bb->succs)
1646                 if (e->dest->index < NUM_FIXED_BLOCKS
1647                     || !flow_bb_inside_loop_p (loop, e->dest))
1648                   predict_edge (e, PRED_LOOP_EXIT, probability);
1649             }
1650           if (loop_bound_var)
1651             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1652                                    loop_bound_code,
1653                                    tree_low_cst (loop_bound_step, 0));
1654         }
1655
1656       /* Free basic blocks from get_loop_body.  */
1657       free (bbs);
1658     }
1659 }
1660
1661 /* Attempt to predict probabilities of BB outgoing edges using local
1662    properties.  */
1663 static void
1664 bb_estimate_probability_locally (basic_block bb)
1665 {
1666   rtx last_insn = BB_END (bb);
1667   rtx cond;
1668
1669   if (! can_predict_insn_p (last_insn))
1670     return;
1671   cond = get_condition (last_insn, NULL, false, false);
1672   if (! cond)
1673     return;
1674
1675   /* Try "pointer heuristic."
1676      A comparison ptr == 0 is predicted as false.
1677      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1678   if (COMPARISON_P (cond)
1679       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1680           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1681     {
1682       if (GET_CODE (cond) == EQ)
1683         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1684       else if (GET_CODE (cond) == NE)
1685         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1686     }
1687   else
1688
1689   /* Try "opcode heuristic."
1690      EQ tests are usually false and NE tests are usually true. Also,
1691      most quantities are positive, so we can make the appropriate guesses
1692      about signed comparisons against zero.  */
1693     switch (GET_CODE (cond))
1694       {
1695       case CONST_INT:
1696         /* Unconditional branch.  */
1697         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1698                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1699         break;
1700
1701       case EQ:
1702       case UNEQ:
1703         /* Floating point comparisons appears to behave in a very
1704            unpredictable way because of special role of = tests in
1705            FP code.  */
1706         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1707           ;
1708         /* Comparisons with 0 are often used for booleans and there is
1709            nothing useful to predict about them.  */
1710         else if (XEXP (cond, 1) == const0_rtx
1711                  || XEXP (cond, 0) == const0_rtx)
1712           ;
1713         else
1714           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1715         break;
1716
1717       case NE:
1718       case LTGT:
1719         /* Floating point comparisons appears to behave in a very
1720            unpredictable way because of special role of = tests in
1721            FP code.  */
1722         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1723           ;
1724         /* Comparisons with 0 are often used for booleans and there is
1725            nothing useful to predict about them.  */
1726         else if (XEXP (cond, 1) == const0_rtx
1727                  || XEXP (cond, 0) == const0_rtx)
1728           ;
1729         else
1730           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1731         break;
1732
1733       case ORDERED:
1734         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1735         break;
1736
1737       case UNORDERED:
1738         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1739         break;
1740
1741       case LE:
1742       case LT:
1743         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1744             || XEXP (cond, 1) == constm1_rtx)
1745           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1746         break;
1747
1748       case GE:
1749       case GT:
1750         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1751             || XEXP (cond, 1) == constm1_rtx)
1752           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1753         break;
1754
1755       default:
1756         break;
1757       }
1758 }
1759
1760 /* Set edge->probability for each successor edge of BB.  */
1761 void
1762 guess_outgoing_edge_probabilities (basic_block bb)
1763 {
1764   bb_estimate_probability_locally (bb);
1765   combine_predictions_for_insn (BB_END (bb), bb);
1766 }
1767 \f
1768 static tree expr_expected_value (tree, bitmap);
1769
1770 /* Helper function for expr_expected_value.  */
1771
1772 static tree
1773 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1774                        tree op1, bitmap visited)
1775 {
1776   gimple def;
1777
1778   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1779     {
1780       if (TREE_CONSTANT (op0))
1781         return op0;
1782
1783       if (code != SSA_NAME)
1784         return NULL_TREE;
1785
1786       def = SSA_NAME_DEF_STMT (op0);
1787
1788       /* If we were already here, break the infinite cycle.  */
1789       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1790         return NULL;
1791
1792       if (gimple_code (def) == GIMPLE_PHI)
1793         {
1794           /* All the arguments of the PHI node must have the same constant
1795              length.  */
1796           int i, n = gimple_phi_num_args (def);
1797           tree val = NULL, new_val;
1798
1799           for (i = 0; i < n; i++)
1800             {
1801               tree arg = PHI_ARG_DEF (def, i);
1802
1803               /* If this PHI has itself as an argument, we cannot
1804                  determine the string length of this argument.  However,
1805                  if we can find an expected constant value for the other
1806                  PHI args then we can still be sure that this is
1807                  likely a constant.  So be optimistic and just
1808                  continue with the next argument.  */
1809               if (arg == PHI_RESULT (def))
1810                 continue;
1811
1812               new_val = expr_expected_value (arg, visited);
1813               if (!new_val)
1814                 return NULL;
1815               if (!val)
1816                 val = new_val;
1817               else if (!operand_equal_p (val, new_val, false))
1818                 return NULL;
1819             }
1820           return val;
1821         }
1822       if (is_gimple_assign (def))
1823         {
1824           if (gimple_assign_lhs (def) != op0)
1825             return NULL;
1826
1827           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1828                                         gimple_assign_rhs1 (def),
1829                                         gimple_assign_rhs_code (def),
1830                                         gimple_assign_rhs2 (def),
1831                                         visited);
1832         }
1833
1834       if (is_gimple_call (def))
1835         {
1836           tree decl = gimple_call_fndecl (def);
1837           if (!decl)
1838             return NULL;
1839           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1840             switch (DECL_FUNCTION_CODE (decl))
1841               {
1842               case BUILT_IN_EXPECT:
1843                 {
1844                   tree val;
1845                   if (gimple_call_num_args (def) != 2)
1846                     return NULL;
1847                   val = gimple_call_arg (def, 0);
1848                   if (TREE_CONSTANT (val))
1849                     return val;
1850                   return gimple_call_arg (def, 1);
1851                 }
1852
1853               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1854               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1855               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1856               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1857               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1858               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1859               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1860               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1861               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1862               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1863               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1864               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1865               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1866                 /* Assume that any given atomic operation has low contention,
1867                    and thus the compare-and-swap operation succeeds.  */
1868                 return boolean_true_node;
1869             }
1870         }
1871
1872       return NULL;
1873     }
1874
1875   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1876     {
1877       tree res;
1878       op0 = expr_expected_value (op0, visited);
1879       if (!op0)
1880         return NULL;
1881       op1 = expr_expected_value (op1, visited);
1882       if (!op1)
1883         return NULL;
1884       res = fold_build2 (code, type, op0, op1);
1885       if (TREE_CONSTANT (res))
1886         return res;
1887       return NULL;
1888     }
1889   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1890     {
1891       tree res;
1892       op0 = expr_expected_value (op0, visited);
1893       if (!op0)
1894         return NULL;
1895       res = fold_build1 (code, type, op0);
1896       if (TREE_CONSTANT (res))
1897         return res;
1898       return NULL;
1899     }
1900   return NULL;
1901 }
1902
1903 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1904    The function is used by builtin_expect branch predictor so the evidence
1905    must come from this construct and additional possible constant folding.
1906
1907    We may want to implement more involved value guess (such as value range
1908    propagation based prediction), but such tricks shall go to new
1909    implementation.  */
1910
1911 static tree
1912 expr_expected_value (tree expr, bitmap visited)
1913 {
1914   enum tree_code code;
1915   tree op0, op1;
1916
1917   if (TREE_CONSTANT (expr))
1918     return expr;
1919
1920   extract_ops_from_tree (expr, &code, &op0, &op1);
1921   return expr_expected_value_1 (TREE_TYPE (expr),
1922                                 op0, code, op1, visited);
1923 }
1924
1925 \f
1926 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1927    we no longer need.  */
1928 static unsigned int
1929 strip_predict_hints (void)
1930 {
1931   basic_block bb;
1932   gimple ass_stmt;
1933   tree var;
1934
1935   FOR_EACH_BB (bb)
1936     {
1937       gimple_stmt_iterator bi;
1938       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
1939         {
1940           gimple stmt = gsi_stmt (bi);
1941
1942           if (gimple_code (stmt) == GIMPLE_PREDICT)
1943             {
1944               gsi_remove (&bi, true);
1945               continue;
1946             }
1947           else if (gimple_code (stmt) == GIMPLE_CALL)
1948             {
1949               tree fndecl = gimple_call_fndecl (stmt);
1950
1951               if (fndecl
1952                   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
1953                   && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
1954                   && gimple_call_num_args (stmt) == 2)
1955                 {
1956                   var = gimple_call_lhs (stmt);
1957                   if (var)
1958                     {
1959                       ass_stmt
1960                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
1961                       gsi_replace (&bi, ass_stmt, true);
1962                     }
1963                   else
1964                     {
1965                       gsi_remove (&bi, true);
1966                       continue;
1967                     }
1968                 }
1969             }
1970           gsi_next (&bi);
1971         }
1972     }
1973   return 0;
1974 }
1975 \f
1976 /* Predict using opcode of the last statement in basic block.  */
1977 static void
1978 tree_predict_by_opcode (basic_block bb)
1979 {
1980   gimple stmt = last_stmt (bb);
1981   edge then_edge;
1982   tree op0, op1;
1983   tree type;
1984   tree val;
1985   enum tree_code cmp;
1986   bitmap visited;
1987   edge_iterator ei;
1988
1989   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1990     return;
1991   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1992     if (then_edge->flags & EDGE_TRUE_VALUE)
1993       break;
1994   op0 = gimple_cond_lhs (stmt);
1995   op1 = gimple_cond_rhs (stmt);
1996   cmp = gimple_cond_code (stmt);
1997   type = TREE_TYPE (op0);
1998   visited = BITMAP_ALLOC (NULL);
1999   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited);
2000   BITMAP_FREE (visited);
2001   if (val)
2002     {
2003       int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2004
2005       gcc_assert (percent >= 0 && percent <= 100);
2006       if (integer_zerop (val))
2007         percent = 100 - percent;
2008       predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2009     }
2010   /* Try "pointer heuristic."
2011      A comparison ptr == 0 is predicted as false.
2012      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2013   if (POINTER_TYPE_P (type))
2014     {
2015       if (cmp == EQ_EXPR)
2016         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2017       else if (cmp == NE_EXPR)
2018         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2019     }
2020   else
2021
2022   /* Try "opcode heuristic."
2023      EQ tests are usually false and NE tests are usually true. Also,
2024      most quantities are positive, so we can make the appropriate guesses
2025      about signed comparisons against zero.  */
2026     switch (cmp)
2027       {
2028       case EQ_EXPR:
2029       case UNEQ_EXPR:
2030         /* Floating point comparisons appears to behave in a very
2031            unpredictable way because of special role of = tests in
2032            FP code.  */
2033         if (FLOAT_TYPE_P (type))
2034           ;
2035         /* Comparisons with 0 are often used for booleans and there is
2036            nothing useful to predict about them.  */
2037         else if (integer_zerop (op0) || integer_zerop (op1))
2038           ;
2039         else
2040           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2041         break;
2042
2043       case NE_EXPR:
2044       case LTGT_EXPR:
2045         /* Floating point comparisons appears to behave in a very
2046            unpredictable way because of special role of = tests in
2047            FP code.  */
2048         if (FLOAT_TYPE_P (type))
2049           ;
2050         /* Comparisons with 0 are often used for booleans and there is
2051            nothing useful to predict about them.  */
2052         else if (integer_zerop (op0)
2053                  || integer_zerop (op1))
2054           ;
2055         else
2056           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2057         break;
2058
2059       case ORDERED_EXPR:
2060         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2061         break;
2062
2063       case UNORDERED_EXPR:
2064         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2065         break;
2066
2067       case LE_EXPR:
2068       case LT_EXPR:
2069         if (integer_zerop (op1)
2070             || integer_onep (op1)
2071             || integer_all_onesp (op1)
2072             || real_zerop (op1)
2073             || real_onep (op1)
2074             || real_minus_onep (op1))
2075           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2076         break;
2077
2078       case GE_EXPR:
2079       case GT_EXPR:
2080         if (integer_zerop (op1)
2081             || integer_onep (op1)
2082             || integer_all_onesp (op1)
2083             || real_zerop (op1)
2084             || real_onep (op1)
2085             || real_minus_onep (op1))
2086           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2087         break;
2088
2089       default:
2090         break;
2091       }
2092 }
2093
2094 /* Try to guess whether the value of return means error code.  */
2095
2096 static enum br_predictor
2097 return_prediction (tree val, enum prediction *prediction)
2098 {
2099   /* VOID.  */
2100   if (!val)
2101     return PRED_NO_PREDICTION;
2102   /* Different heuristics for pointers and scalars.  */
2103   if (POINTER_TYPE_P (TREE_TYPE (val)))
2104     {
2105       /* NULL is usually not returned.  */
2106       if (integer_zerop (val))
2107         {
2108           *prediction = NOT_TAKEN;
2109           return PRED_NULL_RETURN;
2110         }
2111     }
2112   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2113     {
2114       /* Negative return values are often used to indicate
2115          errors.  */
2116       if (TREE_CODE (val) == INTEGER_CST
2117           && tree_int_cst_sgn (val) < 0)
2118         {
2119           *prediction = NOT_TAKEN;
2120           return PRED_NEGATIVE_RETURN;
2121         }
2122       /* Constant return values seems to be commonly taken.
2123          Zero/one often represent booleans so exclude them from the
2124          heuristics.  */
2125       if (TREE_CONSTANT (val)
2126           && (!integer_zerop (val) && !integer_onep (val)))
2127         {
2128           *prediction = TAKEN;
2129           return PRED_CONST_RETURN;
2130         }
2131     }
2132   return PRED_NO_PREDICTION;
2133 }
2134
2135 /* Find the basic block with return expression and look up for possible
2136    return value trying to apply RETURN_PREDICTION heuristics.  */
2137 static void
2138 apply_return_prediction (void)
2139 {
2140   gimple return_stmt = NULL;
2141   tree return_val;
2142   edge e;
2143   gimple phi;
2144   int phi_num_args, i;
2145   enum br_predictor pred;
2146   enum prediction direction;
2147   edge_iterator ei;
2148
2149   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2150     {
2151       return_stmt = last_stmt (e->src);
2152       if (return_stmt
2153           && gimple_code (return_stmt) == GIMPLE_RETURN)
2154         break;
2155     }
2156   if (!e)
2157     return;
2158   return_val = gimple_return_retval (return_stmt);
2159   if (!return_val)
2160     return;
2161   if (TREE_CODE (return_val) != SSA_NAME
2162       || !SSA_NAME_DEF_STMT (return_val)
2163       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2164     return;
2165   phi = SSA_NAME_DEF_STMT (return_val);
2166   phi_num_args = gimple_phi_num_args (phi);
2167   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2168
2169   /* Avoid the degenerate case where all return values form the function
2170      belongs to same category (ie they are all positive constants)
2171      so we can hardly say something about them.  */
2172   for (i = 1; i < phi_num_args; i++)
2173     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2174       break;
2175   if (i != phi_num_args)
2176     for (i = 0; i < phi_num_args; i++)
2177       {
2178         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2179         if (pred != PRED_NO_PREDICTION)
2180           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2181                                          direction);
2182       }
2183 }
2184
2185 /* Look for basic block that contains unlikely to happen events
2186    (such as noreturn calls) and mark all paths leading to execution
2187    of this basic blocks as unlikely.  */
2188
2189 static void
2190 tree_bb_level_predictions (void)
2191 {
2192   basic_block bb;
2193   bool has_return_edges = false;
2194   edge e;
2195   edge_iterator ei;
2196
2197   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2198     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2199       {
2200         has_return_edges = true;
2201         break;
2202       }
2203
2204   apply_return_prediction ();
2205
2206   FOR_EACH_BB (bb)
2207     {
2208       gimple_stmt_iterator gsi;
2209
2210       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2211         {
2212           gimple stmt = gsi_stmt (gsi);
2213           tree decl;
2214
2215           if (is_gimple_call (stmt))
2216             {
2217               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2218                   && has_return_edges)
2219                 predict_paths_leading_to (bb, PRED_NORETURN,
2220                                           NOT_TAKEN);
2221               decl = gimple_call_fndecl (stmt);
2222               if (decl
2223                   && lookup_attribute ("cold",
2224                                        DECL_ATTRIBUTES (decl)))
2225                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2226                                           NOT_TAKEN);
2227             }
2228           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2229             {
2230               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2231                                         gimple_predict_outcome (stmt));
2232               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2233                  hints to callers.  */
2234             }
2235         }
2236     }
2237 }
2238
2239 #ifdef ENABLE_CHECKING
2240
2241 /* Callback for pointer_map_traverse, asserts that the pointer map is
2242    empty.  */
2243
2244 static bool
2245 assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value,
2246                  void *data ATTRIBUTE_UNUSED)
2247 {
2248   gcc_assert (!*value);
2249   return false;
2250 }
2251 #endif
2252
2253 /* Predict branch probabilities and estimate profile for basic block BB.  */
2254
2255 static void
2256 tree_estimate_probability_bb (basic_block bb)
2257 {
2258   edge e;
2259   edge_iterator ei;
2260   gimple last;
2261
2262   FOR_EACH_EDGE (e, ei, bb->succs)
2263     {
2264       /* Predict edges to user labels with attributes.  */
2265       if (e->dest != EXIT_BLOCK_PTR)
2266         {
2267           gimple_stmt_iterator gi;
2268           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2269             {
2270               gimple stmt = gsi_stmt (gi);
2271               tree decl;
2272
2273               if (gimple_code (stmt) != GIMPLE_LABEL)
2274                 break;
2275               decl = gimple_label_label (stmt);
2276               if (DECL_ARTIFICIAL (decl))
2277                 continue;
2278
2279               /* Finally, we have a user-defined label.  */
2280               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2281                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2282               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2283                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2284             }
2285         }
2286
2287       /* Predict early returns to be probable, as we've already taken
2288          care for error returns and other cases are often used for
2289          fast paths through function.
2290
2291          Since we've already removed the return statements, we are
2292          looking for CFG like:
2293
2294          if (conditional)
2295          {
2296          ..
2297          goto return_block
2298          }
2299          some other blocks
2300          return_block:
2301          return_stmt.  */
2302       if (e->dest != bb->next_bb
2303           && e->dest != EXIT_BLOCK_PTR
2304           && single_succ_p (e->dest)
2305           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR
2306           && (last = last_stmt (e->dest)) != NULL
2307           && gimple_code (last) == GIMPLE_RETURN)
2308         {
2309           edge e1;
2310           edge_iterator ei1;
2311
2312           if (single_succ_p (bb))
2313             {
2314               FOR_EACH_EDGE (e1, ei1, bb->preds)
2315                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2316                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2317                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2318                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2319             }
2320           else
2321             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2322                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2323                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2324               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2325         }
2326
2327       /* Look for block we are guarding (ie we dominate it,
2328          but it doesn't postdominate us).  */
2329       if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
2330           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2331           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2332         {
2333           gimple_stmt_iterator bi;
2334
2335           /* The call heuristic claims that a guarded function call
2336              is improbable.  This is because such calls are often used
2337              to signal exceptional situations such as printing error
2338              messages.  */
2339           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2340                gsi_next (&bi))
2341             {
2342               gimple stmt = gsi_stmt (bi);
2343               if (is_gimple_call (stmt)
2344                   /* Constant and pure calls are hardly used to signalize
2345                      something exceptional.  */
2346                   && gimple_has_side_effects (stmt))
2347                 {
2348                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2349                   break;
2350                 }
2351             }
2352         }
2353     }
2354   tree_predict_by_opcode (bb);
2355 }
2356
2357 /* Predict branch probabilities and estimate profile of the tree CFG.
2358    This function can be called from the loop optimizers to recompute
2359    the profile information.  */
2360
2361 void
2362 tree_estimate_probability (void)
2363 {
2364   basic_block bb;
2365
2366   add_noreturn_fake_exit_edges ();
2367   connect_infinite_loops_to_exit ();
2368   /* We use loop_niter_by_eval, which requires that the loops have
2369      preheaders.  */
2370   create_preheaders (CP_SIMPLE_PREHEADERS);
2371   calculate_dominance_info (CDI_POST_DOMINATORS);
2372
2373   bb_predictions = pointer_map_create ();
2374   tree_bb_level_predictions ();
2375   record_loop_exits ();
2376
2377   if (number_of_loops (cfun) > 1)
2378     predict_loops ();
2379
2380   FOR_EACH_BB (bb)
2381     tree_estimate_probability_bb (bb);
2382
2383   FOR_EACH_BB (bb)
2384     combine_predictions_for_bb (bb);
2385
2386 #ifdef ENABLE_CHECKING
2387   pointer_map_traverse (bb_predictions, assert_is_empty, NULL);
2388 #endif
2389   pointer_map_destroy (bb_predictions);
2390   bb_predictions = NULL;
2391
2392   estimate_bb_frequencies (false);
2393   free_dominance_info (CDI_POST_DOMINATORS);
2394   remove_fake_exit_edges ();
2395 }
2396
2397 /* Predict branch probabilities and estimate profile of the tree CFG.
2398    This is the driver function for PASS_PROFILE.  */
2399
2400 static unsigned int
2401 tree_estimate_probability_driver (void)
2402 {
2403   unsigned nb_loops;
2404
2405   loop_optimizer_init (LOOPS_NORMAL);
2406   if (dump_file && (dump_flags & TDF_DETAILS))
2407     flow_loops_dump (dump_file, NULL, 0);
2408
2409   mark_irreducible_loops ();
2410
2411   nb_loops = number_of_loops (cfun);
2412   if (nb_loops > 1)
2413     scev_initialize ();
2414
2415   tree_estimate_probability ();
2416
2417   if (nb_loops > 1)
2418     scev_finalize ();
2419
2420   loop_optimizer_finalize ();
2421   if (dump_file && (dump_flags & TDF_DETAILS))
2422     gimple_dump_cfg (dump_file, dump_flags);
2423   if (profile_status == PROFILE_ABSENT)
2424     profile_status = PROFILE_GUESSED;
2425   return 0;
2426 }
2427 \f
2428 /* Predict edges to successors of CUR whose sources are not postdominated by
2429    BB by PRED and recurse to all postdominators.  */
2430
2431 static void
2432 predict_paths_for_bb (basic_block cur, basic_block bb,
2433                       enum br_predictor pred,
2434                       enum prediction taken,
2435                       bitmap visited)
2436 {
2437   edge e;
2438   edge_iterator ei;
2439   basic_block son;
2440
2441   /* We are looking for all edges forming edge cut induced by
2442      set of all blocks postdominated by BB.  */
2443   FOR_EACH_EDGE (e, ei, cur->preds)
2444     if (e->src->index >= NUM_FIXED_BLOCKS
2445         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2446     {
2447       edge e2;
2448       edge_iterator ei2;
2449       bool found = false;
2450
2451       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2452       if (e->flags & (EDGE_EH | EDGE_FAKE))
2453         continue;
2454       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2455
2456       /* See if there is an edge from e->src that is not abnormal
2457          and does not lead to BB.  */
2458       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2459         if (e2 != e
2460             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2461             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2462           {
2463             found = true;
2464             break;
2465           }
2466
2467       /* If there is non-abnormal path leaving e->src, predict edge
2468          using predictor.  Otherwise we need to look for paths
2469          leading to e->src.
2470
2471          The second may lead to infinite loop in the case we are predicitng
2472          regions that are only reachable by abnormal edges.  We simply
2473          prevent visiting given BB twice.  */
2474       if (found)
2475         predict_edge_def (e, pred, taken);
2476       else if (bitmap_set_bit (visited, e->src->index))
2477         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2478     }
2479   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2480        son;
2481        son = next_dom_son (CDI_POST_DOMINATORS, son))
2482     predict_paths_for_bb (son, bb, pred, taken, visited);
2483 }
2484
2485 /* Sets branch probabilities according to PREDiction and
2486    FLAGS.  */
2487
2488 static void
2489 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2490                           enum prediction taken)
2491 {
2492   bitmap visited = BITMAP_ALLOC (NULL);
2493   predict_paths_for_bb (bb, bb, pred, taken, visited);
2494   BITMAP_FREE (visited);
2495 }
2496
2497 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2498
2499 static void
2500 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2501                                enum prediction taken)
2502 {
2503   bool has_nonloop_edge = false;
2504   edge_iterator ei;
2505   edge e2;
2506
2507   basic_block bb = e->src;
2508   FOR_EACH_EDGE (e2, ei, bb->succs)
2509     if (e2->dest != e->src && e2->dest != e->dest
2510         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2511         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2512       {
2513         has_nonloop_edge = true;
2514         break;
2515       }
2516   if (!has_nonloop_edge)
2517     {
2518       bitmap visited = BITMAP_ALLOC (NULL);
2519       predict_paths_for_bb (bb, bb, pred, taken, visited);
2520       BITMAP_FREE (visited);
2521     }
2522   else
2523     predict_edge_def (e, pred, taken);
2524 }
2525 \f
2526 /* This is used to carry information about basic blocks.  It is
2527    attached to the AUX field of the standard CFG block.  */
2528
2529 typedef struct block_info_def
2530 {
2531   /* Estimated frequency of execution of basic_block.  */
2532   sreal frequency;
2533
2534   /* To keep queue of basic blocks to process.  */
2535   basic_block next;
2536
2537   /* Number of predecessors we need to visit first.  */
2538   int npredecessors;
2539 } *block_info;
2540
2541 /* Similar information for edges.  */
2542 typedef struct edge_info_def
2543 {
2544   /* In case edge is a loopback edge, the probability edge will be reached
2545      in case header is.  Estimated number of iterations of the loop can be
2546      then computed as 1 / (1 - back_edge_prob).  */
2547   sreal back_edge_prob;
2548   /* True if the edge is a loopback edge in the natural loop.  */
2549   unsigned int back_edge:1;
2550 } *edge_info;
2551
2552 #define BLOCK_INFO(B)   ((block_info) (B)->aux)
2553 #define EDGE_INFO(E)    ((edge_info) (E)->aux)
2554
2555 /* Helper function for estimate_bb_frequencies.
2556    Propagate the frequencies in blocks marked in
2557    TOVISIT, starting in HEAD.  */
2558
2559 static void
2560 propagate_freq (basic_block head, bitmap tovisit)
2561 {
2562   basic_block bb;
2563   basic_block last;
2564   unsigned i;
2565   edge e;
2566   basic_block nextbb;
2567   bitmap_iterator bi;
2568
2569   /* For each basic block we need to visit count number of his predecessors
2570      we need to visit first.  */
2571   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2572     {
2573       edge_iterator ei;
2574       int count = 0;
2575
2576       bb = BASIC_BLOCK (i);
2577
2578       FOR_EACH_EDGE (e, ei, bb->preds)
2579         {
2580           bool visit = bitmap_bit_p (tovisit, e->src->index);
2581
2582           if (visit && !(e->flags & EDGE_DFS_BACK))
2583             count++;
2584           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2585             fprintf (dump_file,
2586                      "Irreducible region hit, ignoring edge to %i->%i\n",
2587                      e->src->index, bb->index);
2588         }
2589       BLOCK_INFO (bb)->npredecessors = count;
2590       /* When function never returns, we will never process exit block.  */
2591       if (!count && bb == EXIT_BLOCK_PTR)
2592         bb->count = bb->frequency = 0;
2593     }
2594
2595   memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2596   last = head;
2597   for (bb = head; bb; bb = nextbb)
2598     {
2599       edge_iterator ei;
2600       sreal cyclic_probability, frequency;
2601
2602       memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2603       memcpy (&frequency, &real_zero, sizeof (real_zero));
2604
2605       nextbb = BLOCK_INFO (bb)->next;
2606       BLOCK_INFO (bb)->next = NULL;
2607
2608       /* Compute frequency of basic block.  */
2609       if (bb != head)
2610         {
2611 #ifdef ENABLE_CHECKING
2612           FOR_EACH_EDGE (e, ei, bb->preds)
2613             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2614                         || (e->flags & EDGE_DFS_BACK));
2615 #endif
2616
2617           FOR_EACH_EDGE (e, ei, bb->preds)
2618             if (EDGE_INFO (e)->back_edge)
2619               {
2620                 sreal_add (&cyclic_probability, &cyclic_probability,
2621                            &EDGE_INFO (e)->back_edge_prob);
2622               }
2623             else if (!(e->flags & EDGE_DFS_BACK))
2624               {
2625                 sreal tmp;
2626
2627                 /*  frequency += (e->probability
2628                                   * BLOCK_INFO (e->src)->frequency /
2629                                   REG_BR_PROB_BASE);  */
2630
2631                 sreal_init (&tmp, e->probability, 0);
2632                 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2633                 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2634                 sreal_add (&frequency, &frequency, &tmp);
2635               }
2636
2637           if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2638             {
2639               memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2640                       sizeof (frequency));
2641             }
2642           else
2643             {
2644               if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2645                 {
2646                   memcpy (&cyclic_probability, &real_almost_one,
2647                           sizeof (real_almost_one));
2648                 }
2649
2650               /* BLOCK_INFO (bb)->frequency = frequency
2651                                               / (1 - cyclic_probability) */
2652
2653               sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2654               sreal_div (&BLOCK_INFO (bb)->frequency,
2655                          &frequency, &cyclic_probability);
2656             }
2657         }
2658
2659       bitmap_clear_bit (tovisit, bb->index);
2660
2661       e = find_edge (bb, head);
2662       if (e)
2663         {
2664           sreal tmp;
2665
2666           /* EDGE_INFO (e)->back_edge_prob
2667              = ((e->probability * BLOCK_INFO (bb)->frequency)
2668              / REG_BR_PROB_BASE); */
2669
2670           sreal_init (&tmp, e->probability, 0);
2671           sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2672           sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2673                      &tmp, &real_inv_br_prob_base);
2674         }
2675
2676       /* Propagate to successor blocks.  */
2677       FOR_EACH_EDGE (e, ei, bb->succs)
2678         if (!(e->flags & EDGE_DFS_BACK)
2679             && BLOCK_INFO (e->dest)->npredecessors)
2680           {
2681             BLOCK_INFO (e->dest)->npredecessors--;
2682             if (!BLOCK_INFO (e->dest)->npredecessors)
2683               {
2684                 if (!nextbb)
2685                   nextbb = e->dest;
2686                 else
2687                   BLOCK_INFO (last)->next = e->dest;
2688
2689                 last = e->dest;
2690               }
2691           }
2692     }
2693 }
2694
2695 /* Estimate frequencies in loops at same nest level.  */
2696
2697 static void
2698 estimate_loops_at_level (struct loop *first_loop)
2699 {
2700   struct loop *loop;
2701
2702   for (loop = first_loop; loop; loop = loop->next)
2703     {
2704       edge e;
2705       basic_block *bbs;
2706       unsigned i;
2707       bitmap tovisit = BITMAP_ALLOC (NULL);
2708
2709       estimate_loops_at_level (loop->inner);
2710
2711       /* Find current loop back edge and mark it.  */
2712       e = loop_latch_edge (loop);
2713       EDGE_INFO (e)->back_edge = 1;
2714
2715       bbs = get_loop_body (loop);
2716       for (i = 0; i < loop->num_nodes; i++)
2717         bitmap_set_bit (tovisit, bbs[i]->index);
2718       free (bbs);
2719       propagate_freq (loop->header, tovisit);
2720       BITMAP_FREE (tovisit);
2721     }
2722 }
2723
2724 /* Propagates frequencies through structure of loops.  */
2725
2726 static void
2727 estimate_loops (void)
2728 {
2729   bitmap tovisit = BITMAP_ALLOC (NULL);
2730   basic_block bb;
2731
2732   /* Start by estimating the frequencies in the loops.  */
2733   if (number_of_loops (cfun) > 1)
2734     estimate_loops_at_level (current_loops->tree_root->inner);
2735
2736   /* Now propagate the frequencies through all the blocks.  */
2737   FOR_ALL_BB (bb)
2738     {
2739       bitmap_set_bit (tovisit, bb->index);
2740     }
2741   propagate_freq (ENTRY_BLOCK_PTR, tovisit);
2742   BITMAP_FREE (tovisit);
2743 }
2744
2745 /* Convert counts measured by profile driven feedback to frequencies.
2746    Return nonzero iff there was any nonzero execution count.  */
2747
2748 int
2749 counts_to_freqs (void)
2750 {
2751   gcov_type count_max, true_count_max = 0;
2752   basic_block bb;
2753
2754   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2755     true_count_max = MAX (bb->count, true_count_max);
2756
2757   count_max = MAX (true_count_max, 1);
2758   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2759     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2760
2761   return true_count_max;
2762 }
2763
2764 /* Return true if function is likely to be expensive, so there is no point to
2765    optimize performance of prologue, epilogue or do inlining at the expense
2766    of code size growth.  THRESHOLD is the limit of number of instructions
2767    function can execute at average to be still considered not expensive.  */
2768
2769 bool
2770 expensive_function_p (int threshold)
2771 {
2772   unsigned int sum = 0;
2773   basic_block bb;
2774   unsigned int limit;
2775
2776   /* We can not compute accurately for large thresholds due to scaled
2777      frequencies.  */
2778   gcc_assert (threshold <= BB_FREQ_MAX);
2779
2780   /* Frequencies are out of range.  This either means that function contains
2781      internal loop executing more than BB_FREQ_MAX times or profile feedback
2782      is available and function has not been executed at all.  */
2783   if (ENTRY_BLOCK_PTR->frequency == 0)
2784     return true;
2785
2786   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2787   limit = ENTRY_BLOCK_PTR->frequency * threshold;
2788   FOR_EACH_BB (bb)
2789     {
2790       rtx insn;
2791
2792       FOR_BB_INSNS (bb, insn)
2793         if (active_insn_p (insn))
2794           {
2795             sum += bb->frequency;
2796             if (sum > limit)
2797               return true;
2798         }
2799     }
2800
2801   return false;
2802 }
2803
2804 /* Estimate and propagate basic block frequencies using the given branch
2805    probabilities.  If FORCE is true, the frequencies are used to estimate
2806    the counts even when there are already non-zero profile counts.  */
2807
2808 void
2809 estimate_bb_frequencies (bool force)
2810 {
2811   basic_block bb;
2812   sreal freq_max;
2813
2814   if (force || profile_status != PROFILE_READ || !counts_to_freqs ())
2815     {
2816       static int real_values_initialized = 0;
2817
2818       if (!real_values_initialized)
2819         {
2820           real_values_initialized = 1;
2821           sreal_init (&real_zero, 0, 0);
2822           sreal_init (&real_one, 1, 0);
2823           sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2824           sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2825           sreal_init (&real_one_half, 1, -1);
2826           sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2827           sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2828         }
2829
2830       mark_dfs_back_edges ();
2831
2832       single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE;
2833
2834       /* Set up block info for each basic block.  */
2835       alloc_aux_for_blocks (sizeof (struct block_info_def));
2836       alloc_aux_for_edges (sizeof (struct edge_info_def));
2837       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2838         {
2839           edge e;
2840           edge_iterator ei;
2841
2842           FOR_EACH_EDGE (e, ei, bb->succs)
2843             {
2844               sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2845               sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2846                          &EDGE_INFO (e)->back_edge_prob,
2847                          &real_inv_br_prob_base);
2848             }
2849         }
2850
2851       /* First compute frequencies locally for each loop from innermost
2852          to outermost to examine frequencies for back edges.  */
2853       estimate_loops ();
2854
2855       memcpy (&freq_max, &real_zero, sizeof (real_zero));
2856       FOR_EACH_BB (bb)
2857         if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
2858           memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
2859
2860       sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
2861       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2862         {
2863           sreal tmp;
2864
2865           sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
2866           sreal_add (&tmp, &tmp, &real_one_half);
2867           bb->frequency = sreal_to_int (&tmp);
2868         }
2869
2870       free_aux_for_blocks ();
2871       free_aux_for_edges ();
2872     }
2873   compute_function_frequency ();
2874 }
2875
2876 /* Decide whether function is hot, cold or unlikely executed.  */
2877 void
2878 compute_function_frequency (void)
2879 {
2880   basic_block bb;
2881   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2882
2883   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2884       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2885     node->only_called_at_startup = true;
2886   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2887     node->only_called_at_exit = true;
2888
2889   if (profile_status != PROFILE_READ)
2890     {
2891       int flags = flags_from_decl_or_type (current_function_decl);
2892       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2893           != NULL)
2894         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2895       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2896                != NULL)
2897         node->frequency = NODE_FREQUENCY_HOT;
2898       else if (flags & ECF_NORETURN)
2899         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2900       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2901         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2902       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2903                || DECL_STATIC_DESTRUCTOR (current_function_decl))
2904         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2905       return;
2906     }
2907
2908   /* Only first time try to drop function into unlikely executed.
2909      After inlining the roundoff errors may confuse us.
2910      Ipa-profile pass will drop functions only called from unlikely
2911      functions to unlikely and that is most of what we care about.  */
2912   if (!cfun->after_inlining)
2913     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2914   FOR_EACH_BB (bb)
2915     {
2916       if (maybe_hot_bb_p (cfun, bb))
2917         {
2918           node->frequency = NODE_FREQUENCY_HOT;
2919           return;
2920         }
2921       if (!probably_never_executed_bb_p (cfun, bb))
2922         node->frequency = NODE_FREQUENCY_NORMAL;
2923     }
2924 }
2925
2926 static bool
2927 gate_estimate_probability (void)
2928 {
2929   return flag_guess_branch_prob;
2930 }
2931
2932 /* Build PREDICT_EXPR.  */
2933 tree
2934 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2935 {
2936   tree t = build1 (PREDICT_EXPR, void_type_node,
2937                    build_int_cst (integer_type_node, predictor));
2938   SET_PREDICT_EXPR_OUTCOME (t, taken);
2939   return t;
2940 }
2941
2942 const char *
2943 predictor_name (enum br_predictor predictor)
2944 {
2945   return predictor_info[predictor].name;
2946 }
2947
2948 namespace {
2949
2950 const pass_data pass_data_profile =
2951 {
2952   GIMPLE_PASS, /* type */
2953   "profile_estimate", /* name */
2954   OPTGROUP_NONE, /* optinfo_flags */
2955   true, /* has_gate */
2956   true, /* has_execute */
2957   TV_BRANCH_PROB, /* tv_id */
2958   PROP_cfg, /* properties_required */
2959   0, /* properties_provided */
2960   0, /* properties_destroyed */
2961   0, /* todo_flags_start */
2962   TODO_verify_ssa, /* todo_flags_finish */
2963 };
2964
2965 class pass_profile : public gimple_opt_pass
2966 {
2967 public:
2968   pass_profile (gcc::context *ctxt)
2969     : gimple_opt_pass (pass_data_profile, ctxt)
2970   {}
2971
2972   /* opt_pass methods: */
2973   bool gate () { return gate_estimate_probability (); }
2974   unsigned int execute () { return tree_estimate_probability_driver (); }
2975
2976 }; // class pass_profile
2977
2978 } // anon namespace
2979
2980 gimple_opt_pass *
2981 make_pass_profile (gcc::context *ctxt)
2982 {
2983   return new pass_profile (ctxt);
2984 }
2985
2986 namespace {
2987
2988 const pass_data pass_data_strip_predict_hints =
2989 {
2990   GIMPLE_PASS, /* type */
2991   "*strip_predict_hints", /* name */
2992   OPTGROUP_NONE, /* optinfo_flags */
2993   false, /* has_gate */
2994   true, /* has_execute */
2995   TV_BRANCH_PROB, /* tv_id */
2996   PROP_cfg, /* properties_required */
2997   0, /* properties_provided */
2998   0, /* properties_destroyed */
2999   0, /* todo_flags_start */
3000   TODO_verify_ssa, /* todo_flags_finish */
3001 };
3002
3003 class pass_strip_predict_hints : public gimple_opt_pass
3004 {
3005 public:
3006   pass_strip_predict_hints (gcc::context *ctxt)
3007     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3008   {}
3009
3010   /* opt_pass methods: */
3011   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3012   unsigned int execute () { return strip_predict_hints (); }
3013
3014 }; // class pass_strip_predict_hints
3015
3016 } // anon namespace
3017
3018 gimple_opt_pass *
3019 make_pass_strip_predict_hints (gcc::context *ctxt)
3020 {
3021   return new pass_strip_predict_hints (ctxt);
3022 }
3023
3024 /* Rebuild function frequencies.  Passes are in general expected to
3025    maintain profile by hand, however in some cases this is not possible:
3026    for example when inlining several functions with loops freuqencies might run
3027    out of scale and thus needs to be recomputed.  */
3028
3029 void
3030 rebuild_frequencies (void)
3031 {
3032   timevar_push (TV_REBUILD_FREQUENCIES);
3033
3034   /* When the max bb count in the function is small, there is a higher
3035      chance that there were truncation errors in the integer scaling
3036      of counts by inlining and other optimizations. This could lead
3037      to incorrect classification of code as being cold when it isn't.
3038      In that case, force the estimation of bb counts/frequencies from the
3039      branch probabilities, rather than computing frequencies from counts,
3040      which may also lead to frequencies incorrectly reduced to 0. There
3041      is less precision in the probabilities, so we only do this for small
3042      max counts.  */
3043   gcov_type count_max = 0;
3044   basic_block bb;
3045   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
3046     count_max = MAX (bb->count, count_max);
3047
3048   if (profile_status == PROFILE_GUESSED
3049       || (profile_status == PROFILE_READ && count_max < REG_BR_PROB_BASE/10))
3050     {
3051       loop_optimizer_init (0);
3052       add_noreturn_fake_exit_edges ();
3053       mark_irreducible_loops ();
3054       connect_infinite_loops_to_exit ();
3055       estimate_bb_frequencies (true);
3056       remove_fake_exit_edges ();
3057       loop_optimizer_finalize ();
3058     }
3059   else if (profile_status == PROFILE_READ)
3060     counts_to_freqs ();
3061   else
3062     gcc_unreachable ();
3063   timevar_pop (TV_REBUILD_FREQUENCIES);
3064 }