gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "tm.h"
  34 #include "tree.h"
  35 #include "calls.h"
  36 #include "rtl.h"
  37 #include "tm_p.h"
  38 #include "hard-reg-set.h"
  39 #include "basic-block.h"
  40 #include "insn-config.h"
  41 #include "regs.h"
  42 #include "flags.h"
  43 #include "function.h"
  44 #include "except.h"
  45 #include "diagnostic-core.h"
  46 #include "recog.h"
  47 #include "expr.h"
  48 #include "predict.h"
  49 #include "coverage.h"
  50 #include "sreal.h"
  51 #include "params.h"
  52 #include "target.h"
  53 #include "cfgloop.h"
  54 #include "gimple.h"
  55 #include "gimple-iterator.h"
  56 #include "gimple-ssa.h"
  57 #include "cgraph.h"
  58 #include "tree-cfg.h"
  59 #include "tree-phinodes.h"
  60 #include "ssa-iterators.h"
  61 #include "tree-ssa-loop-niter.h"
  62 #include "tree-ssa-loop.h"
  63 #include "ggc.h"
  64 #include "tree-pass.h"
  65 #include "tree-scalar-evolution.h"
  66 #include "cfgloop.h"
  67 #include "pointer-set.h"
  68
  69 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  70                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  71 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
  72              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  73
  74 /* Random guesstimation given names.
  75    PROV_VERY_UNLIKELY should be small enough so basic block predicted
  76    by it gets below HOT_BB_FREQUENCY_FRACTION.  */
  77 #define PROB_VERY_UNLIKELY      (REG_BR_PROB_BASE / 2000 - 1)
  78 #define PROB_EVEN               (REG_BR_PROB_BASE / 2)
  79 #define PROB_VERY_LIKELY        (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
  80 #define PROB_ALWAYS             (REG_BR_PROB_BASE)
  81
  82 static void combine_predictions_for_insn (rtx, basic_block);
  83 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  84 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  85 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  86 static bool can_predict_insn_p (const_rtx);
  87
  88 /* Information we hold about each branch predictor.
  89    Filled using information from predict.def.  */
  90
  91 struct predictor_info
  92 {
  93   const char *const name;       /* Name used in the debugging dumps.  */
  94   const int hitrate;            /* Expected hitrate used by
  95                                    predict_insn_def call.  */
  96   const int flags;
  97 };
  98
  99 /* Use given predictor without Dempster-Shaffer theory if it matches
 100    using first_match heuristics.  */
 101 #define PRED_FLAG_FIRST_MATCH 1
 102
 103 /* Recompute hitrate in percent to our representation.  */
 104
 105 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
 106
 107 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
 108 static const struct predictor_info predictor_info[]= {
 109 #include "predict.def"
 110
 111   /* Upper bound on predictors.  */
 112   {NULL, 0, 0}
 113 };
 114 #undef DEF_PREDICTOR
 115
 116 /* Return TRUE if frequency FREQ is considered to be hot.  */
 117
 118 static inline bool
 119 maybe_hot_frequency_p (struct function *fun, int freq)
 120 {
 121   struct cgraph_node *node = cgraph_get_node (fun->decl);
 122   if (!profile_info || !flag_branch_probabilities)
 123     {
 124       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 125         return false;
 126       if (node->frequency == NODE_FREQUENCY_HOT)
 127         return true;
 128     }
 129   if (profile_status_for_function (fun) == PROFILE_ABSENT)
 130     return true;
 131   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 132       && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
 133     return false;
 134   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 135     return false;
 136   if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
 137               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 138     return false;
 139   return true;
 140 }
 141
 142 static gcov_type min_count = -1;
 143
 144 /* Determine the threshold for hot BB counts.  */
 145
 146 gcov_type
 147 get_hot_bb_threshold ()
 148 {
 149   gcov_working_set_t *ws;
 150   if (min_count == -1)
 151     {
 152       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 153       gcc_assert (ws);
 154       min_count = ws->min_counter;
 155     }
 156   return min_count;
 157 }
 158
 159 /* Set the threshold for hot BB counts.  */
 160
 161 void
 162 set_hot_bb_threshold (gcov_type min)
 163 {
 164   min_count = min;
 165 }
 166
 167 /* Return TRUE if frequency FREQ is considered to be hot.  */
 168
 169 static inline bool
 170 maybe_hot_count_p (struct function *fun, gcov_type count)
 171 {
 172   if (fun && profile_status_for_function (fun) != PROFILE_READ)
 173     return true;
 174   /* Code executed at most once is not hot.  */
 175   if (profile_info->runs >= count)
 176     return false;
 177   return (count >= get_hot_bb_threshold ());
 178 }
 179
 180 /* Return true in case BB can be CPU intensive and should be optimized
 181    for maximal performance.  */
 182
 183 bool
 184 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 185 {
 186   gcc_checking_assert (fun);
 187   if (profile_status_for_function (fun) == PROFILE_READ)
 188     return maybe_hot_count_p (fun, bb->count);
 189   return maybe_hot_frequency_p (fun, bb->frequency);
 190 }
 191
 192 /* Return true if the call can be hot.  */
 193
 194 bool
 195 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
 196 {
 197   if (profile_info && flag_branch_probabilities
 198       && !maybe_hot_count_p (NULL,
 199                              edge->count))
 200     return false;
 201   if (edge->caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
 202       || (edge->callee
 203           && edge->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 204     return false;
 205   if (edge->caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
 206       && (edge->callee
 207           && edge->callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
 208     return false;
 209   if (optimize_size)
 210     return false;
 211   if (edge->caller->frequency == NODE_FREQUENCY_HOT)
 212     return true;
 213   if (edge->caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 214       && edge->frequency < CGRAPH_FREQ_BASE * 3 / 2)
 215     return false;
 216   if (flag_guess_branch_prob)
 217     {
 218       if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0
 219           || edge->frequency <= (CGRAPH_FREQ_BASE
 220                                  / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 221         return false;
 222     }
 223   return true;
 224 }
 225
 226 /* Return true in case BB can be CPU intensive and should be optimized
 227    for maximal performance.  */
 228
 229 bool
 230 maybe_hot_edge_p (edge e)
 231 {
 232   if (profile_status == PROFILE_READ)
 233     return maybe_hot_count_p (cfun, e->count);
 234   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 235 }
 236
 237
 238
 239 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 240    node frequency reflects never being executed.  */
 241
 242 static bool
 243 probably_never_executed (struct function *fun,
 244                          gcov_type count, int frequency)
 245 {
 246   gcc_checking_assert (fun);
 247   if (profile_status_for_function (fun) == PROFILE_READ)
 248     {
 249       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 250       if (count * unlikely_count_fraction >= profile_info->runs)
 251         return false;
 252       if (!frequency)
 253         return true;
 254       if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
 255         return false;
 256       if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
 257         {
 258           gcov_type computed_count;
 259           /* Check for possibility of overflow, in which case entry bb count
 260              is large enough to do the division first without losing much
 261              precision.  */
 262           if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count < REG_BR_PROB_BASE *
 263               REG_BR_PROB_BASE)
 264             {
 265               gcov_type scaled_count
 266                   = frequency * ENTRY_BLOCK_PTR_FOR_FN (cfun)->count *
 267              unlikely_count_fraction;
 268               computed_count = RDIV (scaled_count,
 269                                      ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
 270             }
 271           else
 272             {
 273               computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count,
 274                                      ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
 275               computed_count *= frequency * unlikely_count_fraction;
 276             }
 277           if (computed_count >= profile_info->runs)
 278             return false;
 279         }
 280       return true;
 281     }
 282   if ((!profile_info || !flag_branch_probabilities)
 283       && (cgraph_get_node (fun->decl)->frequency
 284           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 285     return true;
 286   return false;
 287 }
 288
 289
 290 /* Return true in case BB is probably never executed.  */
 291
 292 bool
 293 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 294 {
 295   return probably_never_executed (fun, bb->count, bb->frequency);
 296 }
 297
 298
 299 /* Return true in case edge E is probably never executed.  */
 300
 301 bool
 302 probably_never_executed_edge_p (struct function *fun, edge e)
 303 {
 304   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 305 }
 306
 307 /* Return true if NODE should be optimized for size.  */
 308
 309 bool
 310 cgraph_optimize_for_size_p (struct cgraph_node *node)
 311 {
 312   if (optimize_size)
 313     return true;
 314   if (node && (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 315     return true;
 316   else
 317     return false;
 318 }
 319
 320 /* Return true when current function should always be optimized for size.  */
 321
 322 bool
 323 optimize_function_for_size_p (struct function *fun)
 324 {
 325   if (optimize_size)
 326     return true;
 327   if (!fun || !fun->decl)
 328     return false;
 329   return cgraph_optimize_for_size_p (cgraph_get_node (fun->decl));
 330 }
 331
 332 /* Return true when current function should always be optimized for speed.  */
 333
 334 bool
 335 optimize_function_for_speed_p (struct function *fun)
 336 {
 337   return !optimize_function_for_size_p (fun);
 338 }
 339
 340 /* Return TRUE when BB should be optimized for size.  */
 341
 342 bool
 343 optimize_bb_for_size_p (const_basic_block bb)
 344 {
 345   return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (cfun, bb);
 346 }
 347
 348 /* Return TRUE when BB should be optimized for speed.  */
 349
 350 bool
 351 optimize_bb_for_speed_p (const_basic_block bb)
 352 {
 353   return !optimize_bb_for_size_p (bb);
 354 }
 355
 356 /* Return TRUE when BB should be optimized for size.  */
 357
 358 bool
 359 optimize_edge_for_size_p (edge e)
 360 {
 361   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 362 }
 363
 364 /* Return TRUE when BB should be optimized for speed.  */
 365
 366 bool
 367 optimize_edge_for_speed_p (edge e)
 368 {
 369   return !optimize_edge_for_size_p (e);
 370 }
 371
 372 /* Return TRUE when BB should be optimized for size.  */
 373
 374 bool
 375 optimize_insn_for_size_p (void)
 376 {
 377   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 378 }
 379
 380 /* Return TRUE when BB should be optimized for speed.  */
 381
 382 bool
 383 optimize_insn_for_speed_p (void)
 384 {
 385   return !optimize_insn_for_size_p ();
 386 }
 387
 388 /* Return TRUE when LOOP should be optimized for size.  */
 389
 390 bool
 391 optimize_loop_for_size_p (struct loop *loop)
 392 {
 393   return optimize_bb_for_size_p (loop->header);
 394 }
 395
 396 /* Return TRUE when LOOP should be optimized for speed.  */
 397
 398 bool
 399 optimize_loop_for_speed_p (struct loop *loop)
 400 {
 401   return optimize_bb_for_speed_p (loop->header);
 402 }
 403
 404 /* Return TRUE when LOOP nest should be optimized for speed.  */
 405
 406 bool
 407 optimize_loop_nest_for_speed_p (struct loop *loop)
 408 {
 409   struct loop *l = loop;
 410   if (optimize_loop_for_speed_p (loop))
 411     return true;
 412   l = loop->inner;
 413   while (l && l != loop)
 414     {
 415       if (optimize_loop_for_speed_p (l))
 416         return true;
 417       if (l->inner)
 418         l = l->inner;
 419       else if (l->next)
 420         l = l->next;
 421       else
 422         {
 423           while (l != loop && !l->next)
 424             l = loop_outer (l);
 425           if (l != loop)
 426             l = l->next;
 427         }
 428     }
 429   return false;
 430 }
 431
 432 /* Return TRUE when LOOP nest should be optimized for size.  */
 433
 434 bool
 435 optimize_loop_nest_for_size_p (struct loop *loop)
 436 {
 437   return !optimize_loop_nest_for_speed_p (loop);
 438 }
 439
 440 /* Return true when edge E is likely to be well predictable by branch
 441    predictor.  */
 442
 443 bool
 444 predictable_edge_p (edge e)
 445 {
 446   if (profile_status == PROFILE_ABSENT)
 447     return false;
 448   if ((e->probability
 449        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 450       || (REG_BR_PROB_BASE - e->probability
 451           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 452     return true;
 453   return false;
 454 }
 455
 456
 457 /* Set RTL expansion for BB profile.  */
 458
 459 void
 460 rtl_profile_for_bb (basic_block bb)
 461 {
 462   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 463 }
 464
 465 /* Set RTL expansion for edge profile.  */
 466
 467 void
 468 rtl_profile_for_edge (edge e)
 469 {
 470   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 471 }
 472
 473 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 474 void
 475 default_rtl_profile (void)
 476 {
 477   crtl->maybe_hot_insn_p = true;
 478 }
 479
 480 /* Return true if the one of outgoing edges is already predicted by
 481    PREDICTOR.  */
 482
 483 bool
 484 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 485 {
 486   rtx note;
 487   if (!INSN_P (BB_END (bb)))
 488     return false;
 489   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 490     if (REG_NOTE_KIND (note) == REG_BR_PRED
 491         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 492       return true;
 493   return false;
 494 }
 495
 496 /* This map contains for a basic block the list of predictions for the
 497    outgoing edges.  */
 498
 499 static struct pointer_map_t *bb_predictions;
 500
 501 /*  Structure representing predictions in tree level. */
 502
 503 struct edge_prediction {
 504     struct edge_prediction *ep_next;
 505     edge ep_edge;
 506     enum br_predictor ep_predictor;
 507     int ep_probability;
 508 };
 509
 510 /* Return true if the one of outgoing edges is already predicted by
 511    PREDICTOR.  */
 512
 513 bool
 514 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 515 {
 516   struct edge_prediction *i;
 517   void **preds = pointer_map_contains (bb_predictions, bb);
 518
 519   if (!preds)
 520     return false;
 521
 522   for (i = (struct edge_prediction *) *preds; i; i = i->ep_next)
 523     if (i->ep_predictor == predictor)
 524       return true;
 525   return false;
 526 }
 527
 528 /* Return true when the probability of edge is reliable.
 529
 530    The profile guessing code is good at predicting branch outcome (ie.
 531    taken/not taken), that is predicted right slightly over 75% of time.
 532    It is however notoriously poor on predicting the probability itself.
 533    In general the profile appear a lot flatter (with probabilities closer
 534    to 50%) than the reality so it is bad idea to use it to drive optimization
 535    such as those disabling dynamic branch prediction for well predictable
 536    branches.
 537
 538    There are two exceptions - edges leading to noreturn edges and edges
 539    predicted by number of iterations heuristics are predicted well.  This macro
 540    should be able to distinguish those, but at the moment it simply check for
 541    noreturn heuristic that is only one giving probability over 99% or bellow
 542    1%.  In future we might want to propagate reliability information across the
 543    CFG if we find this information useful on multiple places.   */
 544 static bool
 545 probability_reliable_p (int prob)
 546 {
 547   return (profile_status == PROFILE_READ
 548           || (profile_status == PROFILE_GUESSED
 549               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 550 }
 551
 552 /* Same predicate as above, working on edges.  */
 553 bool
 554 edge_probability_reliable_p (const_edge e)
 555 {
 556   return probability_reliable_p (e->probability);
 557 }
 558
 559 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 560 bool
 561 br_prob_note_reliable_p (const_rtx note)
 562 {
 563   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 564   return probability_reliable_p (XINT (note, 0));
 565 }
 566
 567 static void
 568 predict_insn (rtx insn, enum br_predictor predictor, int probability)
 569 {
 570   gcc_assert (any_condjump_p (insn));
 571   if (!flag_guess_branch_prob)
 572     return;
 573
 574   add_reg_note (insn, REG_BR_PRED,
 575                 gen_rtx_CONCAT (VOIDmode,
 576                                 GEN_INT ((int) predictor),
 577                                 GEN_INT ((int) probability)));
 578 }
 579
 580 /* Predict insn by given predictor.  */
 581
 582 void
 583 predict_insn_def (rtx insn, enum br_predictor predictor,
 584                   enum prediction taken)
 585 {
 586    int probability = predictor_info[(int) predictor].hitrate;
 587
 588    if (taken != TAKEN)
 589      probability = REG_BR_PROB_BASE - probability;
 590
 591    predict_insn (insn, predictor, probability);
 592 }
 593
 594 /* Predict edge E with given probability if possible.  */
 595
 596 void
 597 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 598 {
 599   rtx last_insn;
 600   last_insn = BB_END (e->src);
 601
 602   /* We can store the branch prediction information only about
 603      conditional jumps.  */
 604   if (!any_condjump_p (last_insn))
 605     return;
 606
 607   /* We always store probability of branching.  */
 608   if (e->flags & EDGE_FALLTHRU)
 609     probability = REG_BR_PROB_BASE - probability;
 610
 611   predict_insn (last_insn, predictor, probability);
 612 }
 613
 614 /* Predict edge E with the given PROBABILITY.  */
 615 void
 616 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 617 {
 618   gcc_assert (profile_status != PROFILE_GUESSED);
 619   if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
 620        1)
 621       && flag_guess_branch_prob && optimize)
 622     {
 623       struct edge_prediction *i = XNEW (struct edge_prediction);
 624       void **preds = pointer_map_insert (bb_predictions, e->src);
 625
 626       i->ep_next = (struct edge_prediction *) *preds;
 627       *preds = i;
 628       i->ep_probability = probability;
 629       i->ep_predictor = predictor;
 630       i->ep_edge = e;
 631     }
 632 }
 633
 634 /* Remove all predictions on given basic block that are attached
 635    to edge E.  */
 636 void
 637 remove_predictions_associated_with_edge (edge e)
 638 {
 639   void **preds;
 640
 641   if (!bb_predictions)
 642     return;
 643
 644   preds = pointer_map_contains (bb_predictions, e->src);
 645
 646   if (preds)
 647     {
 648       struct edge_prediction **prediction = (struct edge_prediction **) preds;
 649       struct edge_prediction *next;
 650
 651       while (*prediction)
 652         {
 653           if ((*prediction)->ep_edge == e)
 654             {
 655               next = (*prediction)->ep_next;
 656               free (*prediction);
 657               *prediction = next;
 658             }
 659           else
 660             prediction = &((*prediction)->ep_next);
 661         }
 662     }
 663 }
 664
 665 /* Clears the list of predictions stored for BB.  */
 666
 667 static void
 668 clear_bb_predictions (basic_block bb)
 669 {
 670   void **preds = pointer_map_contains (bb_predictions, bb);
 671   struct edge_prediction *pred, *next;
 672
 673   if (!preds)
 674     return;
 675
 676   for (pred = (struct edge_prediction *) *preds; pred; pred = next)
 677     {
 678       next = pred->ep_next;
 679       free (pred);
 680     }
 681   *preds = NULL;
 682 }
 683
 684 /* Return true when we can store prediction on insn INSN.
 685    At the moment we represent predictions only on conditional
 686    jumps, not at computed jump or other complicated cases.  */
 687 static bool
 688 can_predict_insn_p (const_rtx insn)
 689 {
 690   return (JUMP_P (insn)
 691           && any_condjump_p (insn)
 692           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 693 }
 694
 695 /* Predict edge E by given predictor if possible.  */
 696
 697 void
 698 predict_edge_def (edge e, enum br_predictor predictor,
 699                   enum prediction taken)
 700 {
 701    int probability = predictor_info[(int) predictor].hitrate;
 702
 703    if (taken != TAKEN)
 704      probability = REG_BR_PROB_BASE - probability;
 705
 706    predict_edge (e, predictor, probability);
 707 }
 708
 709 /* Invert all branch predictions or probability notes in the INSN.  This needs
 710    to be done each time we invert the condition used by the jump.  */
 711
 712 void
 713 invert_br_probabilities (rtx insn)
 714 {
 715   rtx note;
 716
 717   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 718     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 719       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 720     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 721       XEXP (XEXP (note, 0), 1)
 722         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 723 }
 724
 725 /* Dump information about the branch prediction to the output file.  */
 726
 727 static void
 728 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 729                  basic_block bb, int used)
 730 {
 731   edge e;
 732   edge_iterator ei;
 733
 734   if (!file)
 735     return;
 736
 737   FOR_EACH_EDGE (e, ei, bb->succs)
 738     if (! (e->flags & EDGE_FALLTHRU))
 739       break;
 740
 741   fprintf (file, "  %s heuristics%s: %.1f%%",
 742            predictor_info[predictor].name,
 743            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 744
 745   if (bb->count)
 746     {
 747       fprintf (file, "  exec ");
 748       fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
 749       if (e)
 750         {
 751           fprintf (file, " hit ");
 752           fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
 753           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 754         }
 755     }
 756
 757   fprintf (file, "\n");
 758 }
 759
 760 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 761    evenly and hope for the best.  */
 762 static void
 763 set_even_probabilities (basic_block bb)
 764 {
 765   int nedges = 0;
 766   edge e;
 767   edge_iterator ei;
 768
 769   FOR_EACH_EDGE (e, ei, bb->succs)
 770     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 771       nedges ++;
 772   FOR_EACH_EDGE (e, ei, bb->succs)
 773     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 774       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 775     else
 776       e->probability = 0;
 777 }
 778
 779 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 780    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 781
 782 static void
 783 combine_predictions_for_insn (rtx insn, basic_block bb)
 784 {
 785   rtx prob_note;
 786   rtx *pnote;
 787   rtx note;
 788   int best_probability = PROB_EVEN;
 789   enum br_predictor best_predictor = END_PREDICTORS;
 790   int combined_probability = REG_BR_PROB_BASE / 2;
 791   int d;
 792   bool first_match = false;
 793   bool found = false;
 794
 795   if (!can_predict_insn_p (insn))
 796     {
 797       set_even_probabilities (bb);
 798       return;
 799     }
 800
 801   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 802   pnote = &REG_NOTES (insn);
 803   if (dump_file)
 804     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 805              bb->index);
 806
 807   /* We implement "first match" heuristics and use probability guessed
 808      by predictor with smallest index.  */
 809   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 810     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 811       {
 812         enum br_predictor predictor = ((enum br_predictor)
 813                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 814         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 815
 816         found = true;
 817         if (best_predictor > predictor)
 818           best_probability = probability, best_predictor = predictor;
 819
 820         d = (combined_probability * probability
 821              + (REG_BR_PROB_BASE - combined_probability)
 822              * (REG_BR_PROB_BASE - probability));
 823
 824         /* Use FP math to avoid overflows of 32bit integers.  */
 825         if (d == 0)
 826           /* If one probability is 0% and one 100%, avoid division by zero.  */
 827           combined_probability = REG_BR_PROB_BASE / 2;
 828         else
 829           combined_probability = (((double) combined_probability) * probability
 830                                   * REG_BR_PROB_BASE / d + 0.5);
 831       }
 832
 833   /* Decide which heuristic to use.  In case we didn't match anything,
 834      use no_prediction heuristic, in case we did match, use either
 835      first match or Dempster-Shaffer theory depending on the flags.  */
 836
 837   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 838     first_match = true;
 839
 840   if (!found)
 841     dump_prediction (dump_file, PRED_NO_PREDICTION,
 842                      combined_probability, bb, true);
 843   else
 844     {
 845       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 846                        bb, !first_match);
 847       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 848                        bb, first_match);
 849     }
 850
 851   if (first_match)
 852     combined_probability = best_probability;
 853   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 854
 855   while (*pnote)
 856     {
 857       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 858         {
 859           enum br_predictor predictor = ((enum br_predictor)
 860                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 861           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 862
 863           dump_prediction (dump_file, predictor, probability, bb,
 864                            !first_match || best_predictor == predictor);
 865           *pnote = XEXP (*pnote, 1);
 866         }
 867       else
 868         pnote = &XEXP (*pnote, 1);
 869     }
 870
 871   if (!prob_note)
 872     {
 873       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 874
 875       /* Save the prediction into CFG in case we are seeing non-degenerated
 876          conditional jump.  */
 877       if (!single_succ_p (bb))
 878         {
 879           BRANCH_EDGE (bb)->probability = combined_probability;
 880           FALLTHRU_EDGE (bb)->probability
 881             = REG_BR_PROB_BASE - combined_probability;
 882         }
 883     }
 884   else if (!single_succ_p (bb))
 885     {
 886       int prob = XINT (prob_note, 0);
 887
 888       BRANCH_EDGE (bb)->probability = prob;
 889       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 890     }
 891   else
 892     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 893 }
 894
 895 /* Combine predictions into single probability and store them into CFG.
 896    Remove now useless prediction entries.  */
 897
 898 static void
 899 combine_predictions_for_bb (basic_block bb)
 900 {
 901   int best_probability = PROB_EVEN;
 902   enum br_predictor best_predictor = END_PREDICTORS;
 903   int combined_probability = REG_BR_PROB_BASE / 2;
 904   int d;
 905   bool first_match = false;
 906   bool found = false;
 907   struct edge_prediction *pred;
 908   int nedges = 0;
 909   edge e, first = NULL, second = NULL;
 910   edge_iterator ei;
 911   void **preds;
 912
 913   FOR_EACH_EDGE (e, ei, bb->succs)
 914     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 915       {
 916         nedges ++;
 917         if (first && !second)
 918           second = e;
 919         if (!first)
 920           first = e;
 921       }
 922
 923   /* When there is no successor or only one choice, prediction is easy.
 924
 925      We are lazy for now and predict only basic blocks with two outgoing
 926      edges.  It is possible to predict generic case too, but we have to
 927      ignore first match heuristics and do more involved combining.  Implement
 928      this later.  */
 929   if (nedges != 2)
 930     {
 931       if (!bb->count)
 932         set_even_probabilities (bb);
 933       clear_bb_predictions (bb);
 934       if (dump_file)
 935         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 936                  nedges, bb->index);
 937       return;
 938     }
 939
 940   if (dump_file)
 941     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 942
 943   preds = pointer_map_contains (bb_predictions, bb);
 944   if (preds)
 945     {
 946       /* We implement "first match" heuristics and use probability guessed
 947          by predictor with smallest index.  */
 948       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 949         {
 950           enum br_predictor predictor = pred->ep_predictor;
 951           int probability = pred->ep_probability;
 952
 953           if (pred->ep_edge != first)
 954             probability = REG_BR_PROB_BASE - probability;
 955
 956           found = true;
 957           /* First match heuristics would be widly confused if we predicted
 958              both directions.  */
 959           if (best_predictor > predictor)
 960             {
 961               struct edge_prediction *pred2;
 962               int prob = probability;
 963
 964               for (pred2 = (struct edge_prediction *) *preds; pred2; pred2 = pred2->ep_next)
 965                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 966                  {
 967                    int probability2 = pred->ep_probability;
 968
 969                    if (pred2->ep_edge != first)
 970                      probability2 = REG_BR_PROB_BASE - probability2;
 971
 972                    if ((probability < REG_BR_PROB_BASE / 2) !=
 973                        (probability2 < REG_BR_PROB_BASE / 2))
 974                      break;
 975
 976                    /* If the same predictor later gave better result, go for it! */
 977                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 978                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 979                      prob = probability2;
 980                  }
 981               if (!pred2)
 982                 best_probability = prob, best_predictor = predictor;
 983             }
 984
 985           d = (combined_probability * probability
 986                + (REG_BR_PROB_BASE - combined_probability)
 987                * (REG_BR_PROB_BASE - probability));
 988
 989           /* Use FP math to avoid overflows of 32bit integers.  */
 990           if (d == 0)
 991             /* If one probability is 0% and one 100%, avoid division by zero.  */
 992             combined_probability = REG_BR_PROB_BASE / 2;
 993           else
 994             combined_probability = (((double) combined_probability)
 995                                     * probability
 996                                     * REG_BR_PROB_BASE / d + 0.5);
 997         }
 998     }
 999
1000   /* Decide which heuristic to use.  In case we didn't match anything,
1001      use no_prediction heuristic, in case we did match, use either
1002      first match or Dempster-Shaffer theory depending on the flags.  */
1003
1004   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
1005     first_match = true;
1006
1007   if (!found)
1008     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
1009   else
1010     {
1011       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1012                        !first_match);
1013       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1014                        first_match);
1015     }
1016
1017   if (first_match)
1018     combined_probability = best_probability;
1019   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
1020
1021   if (preds)
1022     {
1023       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1024         {
1025           enum br_predictor predictor = pred->ep_predictor;
1026           int probability = pred->ep_probability;
1027
1028           if (pred->ep_edge != EDGE_SUCC (bb, 0))
1029             probability = REG_BR_PROB_BASE - probability;
1030           dump_prediction (dump_file, predictor, probability, bb,
1031                            !first_match || best_predictor == predictor);
1032         }
1033     }
1034   clear_bb_predictions (bb);
1035
1036   if (!bb->count)
1037     {
1038       first->probability = combined_probability;
1039       second->probability = REG_BR_PROB_BASE - combined_probability;
1040     }
1041 }
1042
1043 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1044    Return the SSA_NAME if the condition satisfies, NULL otherwise.
1045
1046    T1 and T2 should be one of the following cases:
1047      1. T1 is SSA_NAME, T2 is NULL
1048      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1049      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1050
1051 static tree
1052 strips_small_constant (tree t1, tree t2)
1053 {
1054   tree ret = NULL;
1055   int value = 0;
1056
1057   if (!t1)
1058     return NULL;
1059   else if (TREE_CODE (t1) == SSA_NAME)
1060     ret = t1;
1061   else if (tree_fits_shwi_p (t1))
1062     value = tree_to_shwi (t1);
1063   else
1064     return NULL;
1065
1066   if (!t2)
1067     return ret;
1068   else if (tree_fits_shwi_p (t2))
1069     value = tree_to_shwi (t2);
1070   else if (TREE_CODE (t2) == SSA_NAME)
1071     {
1072       if (ret)
1073         return NULL;
1074       else
1075         ret = t2;
1076     }
1077
1078   if (value <= 4 && value >= -4)
1079     return ret;
1080   else
1081     return NULL;
1082 }
1083
1084 /* Return the SSA_NAME in T or T's operands.
1085    Return NULL if SSA_NAME cannot be found.  */
1086
1087 static tree
1088 get_base_value (tree t)
1089 {
1090   if (TREE_CODE (t) == SSA_NAME)
1091     return t;
1092
1093   if (!BINARY_CLASS_P (t))
1094     return NULL;
1095
1096   switch (TREE_OPERAND_LENGTH (t))
1097     {
1098     case 1:
1099       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1100     case 2:
1101       return strips_small_constant (TREE_OPERAND (t, 0),
1102                                     TREE_OPERAND (t, 1));
1103     default:
1104       return NULL;
1105     }
1106 }
1107
1108 /* Check the compare STMT in LOOP. If it compares an induction
1109    variable to a loop invariant, return true, and save
1110    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1111    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1112
1113 static bool
1114 is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1115                                      tree *loop_invariant,
1116                                      enum tree_code *compare_code,
1117                                      tree *loop_step,
1118                                      tree *loop_iv_base)
1119 {
1120   tree op0, op1, bound, base;
1121   affine_iv iv0, iv1;
1122   enum tree_code code;
1123   tree step;
1124
1125   code = gimple_cond_code (stmt);
1126   *loop_invariant = NULL;
1127
1128   switch (code)
1129     {
1130     case GT_EXPR:
1131     case GE_EXPR:
1132     case NE_EXPR:
1133     case LT_EXPR:
1134     case LE_EXPR:
1135     case EQ_EXPR:
1136       break;
1137
1138     default:
1139       return false;
1140     }
1141
1142   op0 = gimple_cond_lhs (stmt);
1143   op1 = gimple_cond_rhs (stmt);
1144
1145   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1146        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1147     return false;
1148   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1149     return false;
1150   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1151     return false;
1152   if (TREE_CODE (iv0.step) != INTEGER_CST
1153       || TREE_CODE (iv1.step) != INTEGER_CST)
1154     return false;
1155   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1156       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1157     return false;
1158
1159   if (integer_zerop (iv0.step))
1160     {
1161       if (code != NE_EXPR && code != EQ_EXPR)
1162         code = invert_tree_comparison (code, false);
1163       bound = iv0.base;
1164       base = iv1.base;
1165       if (tree_fits_shwi_p (iv1.step))
1166         step = iv1.step;
1167       else
1168         return false;
1169     }
1170   else
1171     {
1172       bound = iv1.base;
1173       base = iv0.base;
1174       if (tree_fits_shwi_p (iv0.step))
1175         step = iv0.step;
1176       else
1177         return false;
1178     }
1179
1180   if (TREE_CODE (bound) != INTEGER_CST)
1181     bound = get_base_value (bound);
1182   if (!bound)
1183     return false;
1184   if (TREE_CODE (base) != INTEGER_CST)
1185     base = get_base_value (base);
1186   if (!base)
1187     return false;
1188
1189   *loop_invariant = bound;
1190   *compare_code = code;
1191   *loop_step = step;
1192   *loop_iv_base = base;
1193   return true;
1194 }
1195
1196 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1197
1198 static bool
1199 expr_coherent_p (tree t1, tree t2)
1200 {
1201   gimple stmt;
1202   tree ssa_name_1 = NULL;
1203   tree ssa_name_2 = NULL;
1204
1205   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1206   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1207
1208   if (t1 == t2)
1209     return true;
1210
1211   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1212     return true;
1213   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1214     return false;
1215
1216   /* Check to see if t1 is expressed/defined with t2.  */
1217   stmt = SSA_NAME_DEF_STMT (t1);
1218   gcc_assert (stmt != NULL);
1219   if (is_gimple_assign (stmt))
1220     {
1221       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1222       if (ssa_name_1 && ssa_name_1 == t2)
1223         return true;
1224     }
1225
1226   /* Check to see if t2 is expressed/defined with t1.  */
1227   stmt = SSA_NAME_DEF_STMT (t2);
1228   gcc_assert (stmt != NULL);
1229   if (is_gimple_assign (stmt))
1230     {
1231       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1232       if (ssa_name_2 && ssa_name_2 == t1)
1233         return true;
1234     }
1235
1236   /* Compare if t1 and t2's def_stmts are identical.  */
1237   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1238     return true;
1239   else
1240     return false;
1241 }
1242
1243 /* Predict branch probability of BB when BB contains a branch that compares
1244    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1245    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1246
1247    E.g.
1248      for (int i = 0; i < bound; i++) {
1249        if (i < bound - 2)
1250          computation_1();
1251        else
1252          computation_2();
1253      }
1254
1255   In this loop, we will predict the branch inside the loop to be taken.  */
1256
1257 static void
1258 predict_iv_comparison (struct loop *loop, basic_block bb,
1259                        tree loop_bound_var,
1260                        tree loop_iv_base_var,
1261                        enum tree_code loop_bound_code,
1262                        int loop_bound_step)
1263 {
1264   gimple stmt;
1265   tree compare_var, compare_base;
1266   enum tree_code compare_code;
1267   tree compare_step_var;
1268   edge then_edge;
1269   edge_iterator ei;
1270
1271   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1272       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1273       || predicted_by_p (bb, PRED_LOOP_EXIT))
1274     return;
1275
1276   stmt = last_stmt (bb);
1277   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1278     return;
1279   if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1280                                             &compare_code,
1281                                             &compare_step_var,
1282                                             &compare_base))
1283     return;
1284
1285   /* Find the taken edge.  */
1286   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1287     if (then_edge->flags & EDGE_TRUE_VALUE)
1288       break;
1289
1290   /* When comparing an IV to a loop invariant, NE is more likely to be
1291      taken while EQ is more likely to be not-taken.  */
1292   if (compare_code == NE_EXPR)
1293     {
1294       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1295       return;
1296     }
1297   else if (compare_code == EQ_EXPR)
1298     {
1299       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1300       return;
1301     }
1302
1303   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1304     return;
1305
1306   /* If loop bound, base and compare bound are all constants, we can
1307      calculate the probability directly.  */
1308   if (tree_fits_shwi_p (loop_bound_var)
1309       && tree_fits_shwi_p (compare_var)
1310       && tree_fits_shwi_p (compare_base))
1311     {
1312       int probability;
1313       bool of, overflow = false;
1314       double_int mod, compare_count, tem, loop_count;
1315
1316       double_int loop_bound = tree_to_double_int (loop_bound_var);
1317       double_int compare_bound = tree_to_double_int (compare_var);
1318       double_int base = tree_to_double_int (compare_base);
1319       double_int compare_step = tree_to_double_int (compare_step_var);
1320
1321       /* (loop_bound - base) / compare_step */
1322       tem = loop_bound.sub_with_overflow (base, &of);
1323       overflow |= of;
1324       loop_count = tem.divmod_with_overflow (compare_step,
1325                                               0, TRUNC_DIV_EXPR,
1326                                               &mod, &of);
1327       overflow |= of;
1328
1329       if ((!compare_step.is_negative ())
1330           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1331         {
1332           /* (loop_bound - compare_bound) / compare_step */
1333           tem = loop_bound.sub_with_overflow (compare_bound, &of);
1334           overflow |= of;
1335           compare_count = tem.divmod_with_overflow (compare_step,
1336                                                      0, TRUNC_DIV_EXPR,
1337                                                      &mod, &of);
1338           overflow |= of;
1339         }
1340       else
1341         {
1342           /* (compare_bound - base) / compare_step */
1343           tem = compare_bound.sub_with_overflow (base, &of);
1344           overflow |= of;
1345           compare_count = tem.divmod_with_overflow (compare_step,
1346                                                      0, TRUNC_DIV_EXPR,
1347                                                      &mod, &of);
1348           overflow |= of;
1349         }
1350       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1351         ++compare_count;
1352       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1353         ++loop_count;
1354       if (compare_count.is_negative ())
1355         compare_count = double_int_zero;
1356       if (loop_count.is_negative ())
1357         loop_count = double_int_zero;
1358       if (loop_count.is_zero ())
1359         probability = 0;
1360       else if (compare_count.scmp (loop_count) == 1)
1361         probability = REG_BR_PROB_BASE;
1362       else
1363         {
1364           /* If loop_count is too big, such that REG_BR_PROB_BASE * loop_count
1365              could overflow, shift both loop_count and compare_count right
1366              a bit so that it doesn't overflow.  Note both counts are known not
1367              to be negative at this point.  */
1368           int clz_bits = clz_hwi (loop_count.high);
1369           gcc_assert (REG_BR_PROB_BASE < 32768);
1370           if (clz_bits < 16)
1371             {
1372               loop_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1373               compare_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1374             }
1375           tem = compare_count.mul_with_sign (double_int::from_shwi
1376                                             (REG_BR_PROB_BASE), true, &of);
1377           gcc_assert (!of);
1378           tem = tem.divmod (loop_count, true, TRUNC_DIV_EXPR, &mod);
1379           probability = tem.to_uhwi ();
1380         }
1381
1382       if (!overflow)
1383         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1384
1385       return;
1386     }
1387
1388   if (expr_coherent_p (loop_bound_var, compare_var))
1389     {
1390       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1391           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1392         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1393       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1394                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1395         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1396       else if (loop_bound_code == NE_EXPR)
1397         {
1398           /* If the loop backedge condition is "(i != bound)", we do
1399              the comparison based on the step of IV:
1400              * step < 0 : backedge condition is like (i > bound)
1401              * step > 0 : backedge condition is like (i < bound)  */
1402           gcc_assert (loop_bound_step != 0);
1403           if (loop_bound_step > 0
1404               && (compare_code == LT_EXPR
1405                   || compare_code == LE_EXPR))
1406             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1407           else if (loop_bound_step < 0
1408                    && (compare_code == GT_EXPR
1409                        || compare_code == GE_EXPR))
1410             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1411           else
1412             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1413         }
1414       else
1415         /* The branch is predicted not-taken if loop_bound_code is
1416            opposite with compare_code.  */
1417         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1418     }
1419   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1420     {
1421       /* For cases like:
1422            for (i = s; i < h; i++)
1423              if (i > s + 2) ....
1424          The branch should be predicted taken.  */
1425       if (loop_bound_step > 0
1426           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1427         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1428       else if (loop_bound_step < 0
1429                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1430         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1431       else
1432         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1433     }
1434 }
1435
1436 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1437    exits are resulted from short-circuit conditions that will generate an
1438    if_tmp. E.g.:
1439
1440    if (foo() || global > 10)
1441      break;
1442
1443    This will be translated into:
1444
1445    BB3:
1446      loop header...
1447    BB4:
1448      if foo() goto BB6 else goto BB5
1449    BB5:
1450      if global > 10 goto BB6 else goto BB7
1451    BB6:
1452      goto BB7
1453    BB7:
1454      iftmp = (PHI 0(BB5), 1(BB6))
1455      if iftmp == 1 goto BB8 else goto BB3
1456    BB8:
1457      outside of the loop...
1458
1459    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1460    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1461    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1462    exits to predict them using PRED_LOOP_EXIT.  */
1463
1464 static void
1465 predict_extra_loop_exits (edge exit_edge)
1466 {
1467   unsigned i;
1468   bool check_value_one;
1469   gimple phi_stmt;
1470   tree cmp_rhs, cmp_lhs;
1471   gimple cmp_stmt = last_stmt (exit_edge->src);
1472
1473   if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1474     return;
1475   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1476   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1477   if (!TREE_CONSTANT (cmp_rhs)
1478       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1479     return;
1480   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1481     return;
1482
1483   /* If check_value_one is true, only the phi_args with value '1' will lead
1484      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1485      loop exit.  */
1486   check_value_one = (((integer_onep (cmp_rhs))
1487                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1488                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1489
1490   phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1491   if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1492     return;
1493
1494   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1495     {
1496       edge e1;
1497       edge_iterator ei;
1498       tree val = gimple_phi_arg_def (phi_stmt, i);
1499       edge e = gimple_phi_arg_edge (phi_stmt, i);
1500
1501       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1502         continue;
1503       if ((check_value_one ^ integer_onep (val)) == 1)
1504         continue;
1505       if (EDGE_COUNT (e->src->succs) != 1)
1506         {
1507           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1508           continue;
1509         }
1510
1511       FOR_EACH_EDGE (e1, ei, e->src->preds)
1512         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1513     }
1514 }
1515
1516 /* Predict edge probabilities by exploiting loop structure.  */
1517
1518 static void
1519 predict_loops (void)
1520 {
1521   struct loop *loop;
1522
1523   /* Try to predict out blocks in a loop that are not part of a
1524      natural loop.  */
1525   FOR_EACH_LOOP (loop, 0)
1526     {
1527       basic_block bb, *bbs;
1528       unsigned j, n_exits;
1529       vec<edge> exits;
1530       struct tree_niter_desc niter_desc;
1531       edge ex;
1532       struct nb_iter_bound *nb_iter;
1533       enum tree_code loop_bound_code = ERROR_MARK;
1534       tree loop_bound_step = NULL;
1535       tree loop_bound_var = NULL;
1536       tree loop_iv_base = NULL;
1537       gimple stmt = NULL;
1538
1539       exits = get_loop_exit_edges (loop);
1540       n_exits = exits.length ();
1541       if (!n_exits)
1542         {
1543           exits.release ();
1544           continue;
1545         }
1546
1547       FOR_EACH_VEC_ELT (exits, j, ex)
1548         {
1549           tree niter = NULL;
1550           HOST_WIDE_INT nitercst;
1551           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1552           int probability;
1553           enum br_predictor predictor;
1554
1555           predict_extra_loop_exits (ex);
1556
1557           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1558             niter = niter_desc.niter;
1559           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1560             niter = loop_niter_by_eval (loop, ex);
1561
1562           if (TREE_CODE (niter) == INTEGER_CST)
1563             {
1564               if (tree_fits_uhwi_p (niter)
1565                   && max
1566                   && compare_tree_int (niter, max - 1) == -1)
1567                 nitercst = tree_to_uhwi (niter) + 1;
1568               else
1569                 nitercst = max;
1570               predictor = PRED_LOOP_ITERATIONS;
1571             }
1572           /* If we have just one exit and we can derive some information about
1573              the number of iterations of the loop from the statements inside
1574              the loop, use it to predict this exit.  */
1575           else if (n_exits == 1)
1576             {
1577               nitercst = estimated_stmt_executions_int (loop);
1578               if (nitercst < 0)
1579                 continue;
1580               if (nitercst > max)
1581                 nitercst = max;
1582
1583               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1584             }
1585           else
1586             continue;
1587
1588           /* If the prediction for number of iterations is zero, do not
1589              predict the exit edges.  */
1590           if (nitercst == 0)
1591             continue;
1592
1593           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1594           predict_edge (ex, predictor, probability);
1595         }
1596       exits.release ();
1597
1598       /* Find information about loop bound variables.  */
1599       for (nb_iter = loop->bounds; nb_iter;
1600            nb_iter = nb_iter->next)
1601         if (nb_iter->stmt
1602             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1603           {
1604             stmt = nb_iter->stmt;
1605             break;
1606           }
1607       if (!stmt && last_stmt (loop->header)
1608           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1609         stmt = last_stmt (loop->header);
1610       if (stmt)
1611         is_comparison_with_loop_invariant_p (stmt, loop,
1612                                              &loop_bound_var,
1613                                              &loop_bound_code,
1614                                              &loop_bound_step,
1615                                              &loop_iv_base);
1616
1617       bbs = get_loop_body (loop);
1618
1619       for (j = 0; j < loop->num_nodes; j++)
1620         {
1621           int header_found = 0;
1622           edge e;
1623           edge_iterator ei;
1624
1625           bb = bbs[j];
1626
1627           /* Bypass loop heuristics on continue statement.  These
1628              statements construct loops via "non-loop" constructs
1629              in the source language and are better to be handled
1630              separately.  */
1631           if (predicted_by_p (bb, PRED_CONTINUE))
1632             continue;
1633
1634           /* Loop branch heuristics - predict an edge back to a
1635              loop's head as taken.  */
1636           if (bb == loop->latch)
1637             {
1638               e = find_edge (loop->latch, loop->header);
1639               if (e)
1640                 {
1641                   header_found = 1;
1642                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1643                 }
1644             }
1645
1646           /* Loop exit heuristics - predict an edge exiting the loop if the
1647              conditional has no loop header successors as not taken.  */
1648           if (!header_found
1649               /* If we already used more reliable loop exit predictors, do not
1650                  bother with PRED_LOOP_EXIT.  */
1651               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1652               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1653             {
1654               /* For loop with many exits we don't want to predict all exits
1655                  with the pretty large probability, because if all exits are
1656                  considered in row, the loop would be predicted to iterate
1657                  almost never.  The code to divide probability by number of
1658                  exits is very rough.  It should compute the number of exits
1659                  taken in each patch through function (not the overall number
1660                  of exits that might be a lot higher for loops with wide switch
1661                  statements in them) and compute n-th square root.
1662
1663                  We limit the minimal probability by 2% to avoid
1664                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1665                  as this was causing regression in perl benchmark containing such
1666                  a wide loop.  */
1667
1668               int probability = ((REG_BR_PROB_BASE
1669                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1670                                  / n_exits);
1671               if (probability < HITRATE (2))
1672                 probability = HITRATE (2);
1673               FOR_EACH_EDGE (e, ei, bb->succs)
1674                 if (e->dest->index < NUM_FIXED_BLOCKS
1675                     || !flow_bb_inside_loop_p (loop, e->dest))
1676                   predict_edge (e, PRED_LOOP_EXIT, probability);
1677             }
1678           if (loop_bound_var)
1679             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1680                                    loop_bound_code,
1681                                    tree_to_shwi (loop_bound_step));
1682         }
1683
1684       /* Free basic blocks from get_loop_body.  */
1685       free (bbs);
1686     }
1687 }
1688
1689 /* Attempt to predict probabilities of BB outgoing edges using local
1690    properties.  */
1691 static void
1692 bb_estimate_probability_locally (basic_block bb)
1693 {
1694   rtx last_insn = BB_END (bb);
1695   rtx cond;
1696
1697   if (! can_predict_insn_p (last_insn))
1698     return;
1699   cond = get_condition (last_insn, NULL, false, false);
1700   if (! cond)
1701     return;
1702
1703   /* Try "pointer heuristic."
1704      A comparison ptr == 0 is predicted as false.
1705      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1706   if (COMPARISON_P (cond)
1707       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1708           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1709     {
1710       if (GET_CODE (cond) == EQ)
1711         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1712       else if (GET_CODE (cond) == NE)
1713         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1714     }
1715   else
1716
1717   /* Try "opcode heuristic."
1718      EQ tests are usually false and NE tests are usually true. Also,
1719      most quantities are positive, so we can make the appropriate guesses
1720      about signed comparisons against zero.  */
1721     switch (GET_CODE (cond))
1722       {
1723       case CONST_INT:
1724         /* Unconditional branch.  */
1725         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1726                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1727         break;
1728
1729       case EQ:
1730       case UNEQ:
1731         /* Floating point comparisons appears to behave in a very
1732            unpredictable way because of special role of = tests in
1733            FP code.  */
1734         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1735           ;
1736         /* Comparisons with 0 are often used for booleans and there is
1737            nothing useful to predict about them.  */
1738         else if (XEXP (cond, 1) == const0_rtx
1739                  || XEXP (cond, 0) == const0_rtx)
1740           ;
1741         else
1742           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1743         break;
1744
1745       case NE:
1746       case LTGT:
1747         /* Floating point comparisons appears to behave in a very
1748            unpredictable way because of special role of = tests in
1749            FP code.  */
1750         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1751           ;
1752         /* Comparisons with 0 are often used for booleans and there is
1753            nothing useful to predict about them.  */
1754         else if (XEXP (cond, 1) == const0_rtx
1755                  || XEXP (cond, 0) == const0_rtx)
1756           ;
1757         else
1758           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1759         break;
1760
1761       case ORDERED:
1762         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1763         break;
1764
1765       case UNORDERED:
1766         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1767         break;
1768
1769       case LE:
1770       case LT:
1771         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1772             || XEXP (cond, 1) == constm1_rtx)
1773           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1774         break;
1775
1776       case GE:
1777       case GT:
1778         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1779             || XEXP (cond, 1) == constm1_rtx)
1780           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1781         break;
1782
1783       default:
1784         break;
1785       }
1786 }
1787
1788 /* Set edge->probability for each successor edge of BB.  */
1789 void
1790 guess_outgoing_edge_probabilities (basic_block bb)
1791 {
1792   bb_estimate_probability_locally (bb);
1793   combine_predictions_for_insn (BB_END (bb), bb);
1794 }
1795 \f
1796 static tree expr_expected_value (tree, bitmap);
1797
1798 /* Helper function for expr_expected_value.  */
1799
1800 static tree
1801 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1802                        tree op1, bitmap visited)
1803 {
1804   gimple def;
1805
1806   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1807     {
1808       if (TREE_CONSTANT (op0))
1809         return op0;
1810
1811       if (code != SSA_NAME)
1812         return NULL_TREE;
1813
1814       def = SSA_NAME_DEF_STMT (op0);
1815
1816       /* If we were already here, break the infinite cycle.  */
1817       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1818         return NULL;
1819
1820       if (gimple_code (def) == GIMPLE_PHI)
1821         {
1822           /* All the arguments of the PHI node must have the same constant
1823              length.  */
1824           int i, n = gimple_phi_num_args (def);
1825           tree val = NULL, new_val;
1826
1827           for (i = 0; i < n; i++)
1828             {
1829               tree arg = PHI_ARG_DEF (def, i);
1830
1831               /* If this PHI has itself as an argument, we cannot
1832                  determine the string length of this argument.  However,
1833                  if we can find an expected constant value for the other
1834                  PHI args then we can still be sure that this is
1835                  likely a constant.  So be optimistic and just
1836                  continue with the next argument.  */
1837               if (arg == PHI_RESULT (def))
1838                 continue;
1839
1840               new_val = expr_expected_value (arg, visited);
1841               if (!new_val)
1842                 return NULL;
1843               if (!val)
1844                 val = new_val;
1845               else if (!operand_equal_p (val, new_val, false))
1846                 return NULL;
1847             }
1848           return val;
1849         }
1850       if (is_gimple_assign (def))
1851         {
1852           if (gimple_assign_lhs (def) != op0)
1853             return NULL;
1854
1855           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1856                                         gimple_assign_rhs1 (def),
1857                                         gimple_assign_rhs_code (def),
1858                                         gimple_assign_rhs2 (def),
1859                                         visited);
1860         }
1861
1862       if (is_gimple_call (def))
1863         {
1864           tree decl = gimple_call_fndecl (def);
1865           if (!decl)
1866             return NULL;
1867           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1868             switch (DECL_FUNCTION_CODE (decl))
1869               {
1870               case BUILT_IN_EXPECT:
1871                 {
1872                   tree val;
1873                   if (gimple_call_num_args (def) != 2)
1874                     return NULL;
1875                   val = gimple_call_arg (def, 0);
1876                   if (TREE_CONSTANT (val))
1877                     return val;
1878                   return gimple_call_arg (def, 1);
1879                 }
1880
1881               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1882               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1883               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1884               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1885               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1886               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1887               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1888               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1889               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1890               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1891               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1892               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1893               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1894                 /* Assume that any given atomic operation has low contention,
1895                    and thus the compare-and-swap operation succeeds.  */
1896                 return boolean_true_node;
1897             }
1898         }
1899
1900       return NULL;
1901     }
1902
1903   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1904     {
1905       tree res;
1906       op0 = expr_expected_value (op0, visited);
1907       if (!op0)
1908         return NULL;
1909       op1 = expr_expected_value (op1, visited);
1910       if (!op1)
1911         return NULL;
1912       res = fold_build2 (code, type, op0, op1);
1913       if (TREE_CONSTANT (res))
1914         return res;
1915       return NULL;
1916     }
1917   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1918     {
1919       tree res;
1920       op0 = expr_expected_value (op0, visited);
1921       if (!op0)
1922         return NULL;
1923       res = fold_build1 (code, type, op0);
1924       if (TREE_CONSTANT (res))
1925         return res;
1926       return NULL;
1927     }
1928   return NULL;
1929 }
1930
1931 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1932    The function is used by builtin_expect branch predictor so the evidence
1933    must come from this construct and additional possible constant folding.
1934
1935    We may want to implement more involved value guess (such as value range
1936    propagation based prediction), but such tricks shall go to new
1937    implementation.  */
1938
1939 static tree
1940 expr_expected_value (tree expr, bitmap visited)
1941 {
1942   enum tree_code code;
1943   tree op0, op1;
1944
1945   if (TREE_CONSTANT (expr))
1946     return expr;
1947
1948   extract_ops_from_tree (expr, &code, &op0, &op1);
1949   return expr_expected_value_1 (TREE_TYPE (expr),
1950                                 op0, code, op1, visited);
1951 }
1952
1953 \f
1954 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1955    we no longer need.  */
1956 static unsigned int
1957 strip_predict_hints (void)
1958 {
1959   basic_block bb;
1960   gimple ass_stmt;
1961   tree var;
1962
1963   FOR_EACH_BB (bb)
1964     {
1965       gimple_stmt_iterator bi;
1966       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
1967         {
1968           gimple stmt = gsi_stmt (bi);
1969
1970           if (gimple_code (stmt) == GIMPLE_PREDICT)
1971             {
1972               gsi_remove (&bi, true);
1973               continue;
1974             }
1975           else if (gimple_code (stmt) == GIMPLE_CALL)
1976             {
1977               tree fndecl = gimple_call_fndecl (stmt);
1978
1979               if (fndecl
1980                   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
1981                   && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
1982                   && gimple_call_num_args (stmt) == 2)
1983                 {
1984                   var = gimple_call_lhs (stmt);
1985                   if (var)
1986                     {
1987                       ass_stmt
1988                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
1989                       gsi_replace (&bi, ass_stmt, true);
1990                     }
1991                   else
1992                     {
1993                       gsi_remove (&bi, true);
1994                       continue;
1995                     }
1996                 }
1997             }
1998           gsi_next (&bi);
1999         }
2000     }
2001   return 0;
2002 }
2003 \f
2004 /* Predict using opcode of the last statement in basic block.  */
2005 static void
2006 tree_predict_by_opcode (basic_block bb)
2007 {
2008   gimple stmt = last_stmt (bb);
2009   edge then_edge;
2010   tree op0, op1;
2011   tree type;
2012   tree val;
2013   enum tree_code cmp;
2014   bitmap visited;
2015   edge_iterator ei;
2016
2017   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2018     return;
2019   FOR_EACH_EDGE (then_edge, ei, bb->succs)
2020     if (then_edge->flags & EDGE_TRUE_VALUE)
2021       break;
2022   op0 = gimple_cond_lhs (stmt);
2023   op1 = gimple_cond_rhs (stmt);
2024   cmp = gimple_cond_code (stmt);
2025   type = TREE_TYPE (op0);
2026   visited = BITMAP_ALLOC (NULL);
2027   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited);
2028   BITMAP_FREE (visited);
2029   if (val)
2030     {
2031       int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2032
2033       gcc_assert (percent >= 0 && percent <= 100);
2034       if (integer_zerop (val))
2035         percent = 100 - percent;
2036       predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2037     }
2038   /* Try "pointer heuristic."
2039      A comparison ptr == 0 is predicted as false.
2040      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2041   if (POINTER_TYPE_P (type))
2042     {
2043       if (cmp == EQ_EXPR)
2044         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2045       else if (cmp == NE_EXPR)
2046         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2047     }
2048   else
2049
2050   /* Try "opcode heuristic."
2051      EQ tests are usually false and NE tests are usually true. Also,
2052      most quantities are positive, so we can make the appropriate guesses
2053      about signed comparisons against zero.  */
2054     switch (cmp)
2055       {
2056       case EQ_EXPR:
2057       case UNEQ_EXPR:
2058         /* Floating point comparisons appears to behave in a very
2059            unpredictable way because of special role of = tests in
2060            FP code.  */
2061         if (FLOAT_TYPE_P (type))
2062           ;
2063         /* Comparisons with 0 are often used for booleans and there is
2064            nothing useful to predict about them.  */
2065         else if (integer_zerop (op0) || integer_zerop (op1))
2066           ;
2067         else
2068           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2069         break;
2070
2071       case NE_EXPR:
2072       case LTGT_EXPR:
2073         /* Floating point comparisons appears to behave in a very
2074            unpredictable way because of special role of = tests in
2075            FP code.  */
2076         if (FLOAT_TYPE_P (type))
2077           ;
2078         /* Comparisons with 0 are often used for booleans and there is
2079            nothing useful to predict about them.  */
2080         else if (integer_zerop (op0)
2081                  || integer_zerop (op1))
2082           ;
2083         else
2084           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2085         break;
2086
2087       case ORDERED_EXPR:
2088         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2089         break;
2090
2091       case UNORDERED_EXPR:
2092         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2093         break;
2094
2095       case LE_EXPR:
2096       case LT_EXPR:
2097         if (integer_zerop (op1)
2098             || integer_onep (op1)
2099             || integer_all_onesp (op1)
2100             || real_zerop (op1)
2101             || real_onep (op1)
2102             || real_minus_onep (op1))
2103           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2104         break;
2105
2106       case GE_EXPR:
2107       case GT_EXPR:
2108         if (integer_zerop (op1)
2109             || integer_onep (op1)
2110             || integer_all_onesp (op1)
2111             || real_zerop (op1)
2112             || real_onep (op1)
2113             || real_minus_onep (op1))
2114           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2115         break;
2116
2117       default:
2118         break;
2119       }
2120 }
2121
2122 /* Try to guess whether the value of return means error code.  */
2123
2124 static enum br_predictor
2125 return_prediction (tree val, enum prediction *prediction)
2126 {
2127   /* VOID.  */
2128   if (!val)
2129     return PRED_NO_PREDICTION;
2130   /* Different heuristics for pointers and scalars.  */
2131   if (POINTER_TYPE_P (TREE_TYPE (val)))
2132     {
2133       /* NULL is usually not returned.  */
2134       if (integer_zerop (val))
2135         {
2136           *prediction = NOT_TAKEN;
2137           return PRED_NULL_RETURN;
2138         }
2139     }
2140   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2141     {
2142       /* Negative return values are often used to indicate
2143          errors.  */
2144       if (TREE_CODE (val) == INTEGER_CST
2145           && tree_int_cst_sgn (val) < 0)
2146         {
2147           *prediction = NOT_TAKEN;
2148           return PRED_NEGATIVE_RETURN;
2149         }
2150       /* Constant return values seems to be commonly taken.
2151          Zero/one often represent booleans so exclude them from the
2152          heuristics.  */
2153       if (TREE_CONSTANT (val)
2154           && (!integer_zerop (val) && !integer_onep (val)))
2155         {
2156           *prediction = TAKEN;
2157           return PRED_CONST_RETURN;
2158         }
2159     }
2160   return PRED_NO_PREDICTION;
2161 }
2162
2163 /* Find the basic block with return expression and look up for possible
2164    return value trying to apply RETURN_PREDICTION heuristics.  */
2165 static void
2166 apply_return_prediction (void)
2167 {
2168   gimple return_stmt = NULL;
2169   tree return_val;
2170   edge e;
2171   gimple phi;
2172   int phi_num_args, i;
2173   enum br_predictor pred;
2174   enum prediction direction;
2175   edge_iterator ei;
2176
2177   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2178     {
2179       return_stmt = last_stmt (e->src);
2180       if (return_stmt
2181           && gimple_code (return_stmt) == GIMPLE_RETURN)
2182         break;
2183     }
2184   if (!e)
2185     return;
2186   return_val = gimple_return_retval (return_stmt);
2187   if (!return_val)
2188     return;
2189   if (TREE_CODE (return_val) != SSA_NAME
2190       || !SSA_NAME_DEF_STMT (return_val)
2191       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2192     return;
2193   phi = SSA_NAME_DEF_STMT (return_val);
2194   phi_num_args = gimple_phi_num_args (phi);
2195   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2196
2197   /* Avoid the degenerate case where all return values form the function
2198      belongs to same category (ie they are all positive constants)
2199      so we can hardly say something about them.  */
2200   for (i = 1; i < phi_num_args; i++)
2201     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2202       break;
2203   if (i != phi_num_args)
2204     for (i = 0; i < phi_num_args; i++)
2205       {
2206         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2207         if (pred != PRED_NO_PREDICTION)
2208           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2209                                          direction);
2210       }
2211 }
2212
2213 /* Look for basic block that contains unlikely to happen events
2214    (such as noreturn calls) and mark all paths leading to execution
2215    of this basic blocks as unlikely.  */
2216
2217 static void
2218 tree_bb_level_predictions (void)
2219 {
2220   basic_block bb;
2221   bool has_return_edges = false;
2222   edge e;
2223   edge_iterator ei;
2224
2225   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2226     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2227       {
2228         has_return_edges = true;
2229         break;
2230       }
2231
2232   apply_return_prediction ();
2233
2234   FOR_EACH_BB (bb)
2235     {
2236       gimple_stmt_iterator gsi;
2237
2238       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2239         {
2240           gimple stmt = gsi_stmt (gsi);
2241           tree decl;
2242
2243           if (is_gimple_call (stmt))
2244             {
2245               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2246                   && has_return_edges)
2247                 predict_paths_leading_to (bb, PRED_NORETURN,
2248                                           NOT_TAKEN);
2249               decl = gimple_call_fndecl (stmt);
2250               if (decl
2251                   && lookup_attribute ("cold",
2252                                        DECL_ATTRIBUTES (decl)))
2253                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2254                                           NOT_TAKEN);
2255             }
2256           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2257             {
2258               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2259                                         gimple_predict_outcome (stmt));
2260               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2261                  hints to callers.  */
2262             }
2263         }
2264     }
2265 }
2266
2267 #ifdef ENABLE_CHECKING
2268
2269 /* Callback for pointer_map_traverse, asserts that the pointer map is
2270    empty.  */
2271
2272 static bool
2273 assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value,
2274                  void *data ATTRIBUTE_UNUSED)
2275 {
2276   gcc_assert (!*value);
2277   return false;
2278 }
2279 #endif
2280
2281 /* Predict branch probabilities and estimate profile for basic block BB.  */
2282
2283 static void
2284 tree_estimate_probability_bb (basic_block bb)
2285 {
2286   edge e;
2287   edge_iterator ei;
2288   gimple last;
2289
2290   FOR_EACH_EDGE (e, ei, bb->succs)
2291     {
2292       /* Predict edges to user labels with attributes.  */
2293       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2294         {
2295           gimple_stmt_iterator gi;
2296           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2297             {
2298               gimple stmt = gsi_stmt (gi);
2299               tree decl;
2300
2301               if (gimple_code (stmt) != GIMPLE_LABEL)
2302                 break;
2303               decl = gimple_label_label (stmt);
2304               if (DECL_ARTIFICIAL (decl))
2305                 continue;
2306
2307               /* Finally, we have a user-defined label.  */
2308               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2309                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2310               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2311                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2312             }
2313         }
2314
2315       /* Predict early returns to be probable, as we've already taken
2316          care for error returns and other cases are often used for
2317          fast paths through function.
2318
2319          Since we've already removed the return statements, we are
2320          looking for CFG like:
2321
2322          if (conditional)
2323          {
2324          ..
2325          goto return_block
2326          }
2327          some other blocks
2328          return_block:
2329          return_stmt.  */
2330       if (e->dest != bb->next_bb
2331           && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2332           && single_succ_p (e->dest)
2333           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2334           && (last = last_stmt (e->dest)) != NULL
2335           && gimple_code (last) == GIMPLE_RETURN)
2336         {
2337           edge e1;
2338           edge_iterator ei1;
2339
2340           if (single_succ_p (bb))
2341             {
2342               FOR_EACH_EDGE (e1, ei1, bb->preds)
2343                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2344                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2345                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2346                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2347             }
2348           else
2349             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2350                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2351                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2352               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2353         }
2354
2355       /* Look for block we are guarding (ie we dominate it,
2356          but it doesn't postdominate us).  */
2357       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2358           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2359           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2360         {
2361           gimple_stmt_iterator bi;
2362
2363           /* The call heuristic claims that a guarded function call
2364              is improbable.  This is because such calls are often used
2365              to signal exceptional situations such as printing error
2366              messages.  */
2367           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2368                gsi_next (&bi))
2369             {
2370               gimple stmt = gsi_stmt (bi);
2371               if (is_gimple_call (stmt)
2372                   /* Constant and pure calls are hardly used to signalize
2373                      something exceptional.  */
2374                   && gimple_has_side_effects (stmt))
2375                 {
2376                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2377                   break;
2378                 }
2379             }
2380         }
2381     }
2382   tree_predict_by_opcode (bb);
2383 }
2384
2385 /* Predict branch probabilities and estimate profile of the tree CFG.
2386    This function can be called from the loop optimizers to recompute
2387    the profile information.  */
2388
2389 void
2390 tree_estimate_probability (void)
2391 {
2392   basic_block bb;
2393
2394   add_noreturn_fake_exit_edges ();
2395   connect_infinite_loops_to_exit ();
2396   /* We use loop_niter_by_eval, which requires that the loops have
2397      preheaders.  */
2398   create_preheaders (CP_SIMPLE_PREHEADERS);
2399   calculate_dominance_info (CDI_POST_DOMINATORS);
2400
2401   bb_predictions = pointer_map_create ();
2402   tree_bb_level_predictions ();
2403   record_loop_exits ();
2404
2405   if (number_of_loops (cfun) > 1)
2406     predict_loops ();
2407
2408   FOR_EACH_BB (bb)
2409     tree_estimate_probability_bb (bb);
2410
2411   FOR_EACH_BB (bb)
2412     combine_predictions_for_bb (bb);
2413
2414 #ifdef ENABLE_CHECKING
2415   pointer_map_traverse (bb_predictions, assert_is_empty, NULL);
2416 #endif
2417   pointer_map_destroy (bb_predictions);
2418   bb_predictions = NULL;
2419
2420   estimate_bb_frequencies (false);
2421   free_dominance_info (CDI_POST_DOMINATORS);
2422   remove_fake_exit_edges ();
2423 }
2424
2425 /* Predict branch probabilities and estimate profile of the tree CFG.
2426    This is the driver function for PASS_PROFILE.  */
2427
2428 static unsigned int
2429 tree_estimate_probability_driver (void)
2430 {
2431   unsigned nb_loops;
2432
2433   loop_optimizer_init (LOOPS_NORMAL);
2434   if (dump_file && (dump_flags & TDF_DETAILS))
2435     flow_loops_dump (dump_file, NULL, 0);
2436
2437   mark_irreducible_loops ();
2438
2439   nb_loops = number_of_loops (cfun);
2440   if (nb_loops > 1)
2441     scev_initialize ();
2442
2443   tree_estimate_probability ();
2444
2445   if (nb_loops > 1)
2446     scev_finalize ();
2447
2448   loop_optimizer_finalize ();
2449   if (dump_file && (dump_flags & TDF_DETAILS))
2450     gimple_dump_cfg (dump_file, dump_flags);
2451   if (profile_status == PROFILE_ABSENT)
2452     profile_status = PROFILE_GUESSED;
2453   return 0;
2454 }
2455 \f
2456 /* Predict edges to successors of CUR whose sources are not postdominated by
2457    BB by PRED and recurse to all postdominators.  */
2458
2459 static void
2460 predict_paths_for_bb (basic_block cur, basic_block bb,
2461                       enum br_predictor pred,
2462                       enum prediction taken,
2463                       bitmap visited)
2464 {
2465   edge e;
2466   edge_iterator ei;
2467   basic_block son;
2468
2469   /* We are looking for all edges forming edge cut induced by
2470      set of all blocks postdominated by BB.  */
2471   FOR_EACH_EDGE (e, ei, cur->preds)
2472     if (e->src->index >= NUM_FIXED_BLOCKS
2473         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2474     {
2475       edge e2;
2476       edge_iterator ei2;
2477       bool found = false;
2478
2479       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2480       if (e->flags & (EDGE_EH | EDGE_FAKE))
2481         continue;
2482       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2483
2484       /* See if there is an edge from e->src that is not abnormal
2485          and does not lead to BB.  */
2486       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2487         if (e2 != e
2488             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2489             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2490           {
2491             found = true;
2492             break;
2493           }
2494
2495       /* If there is non-abnormal path leaving e->src, predict edge
2496          using predictor.  Otherwise we need to look for paths
2497          leading to e->src.
2498
2499          The second may lead to infinite loop in the case we are predicitng
2500          regions that are only reachable by abnormal edges.  We simply
2501          prevent visiting given BB twice.  */
2502       if (found)
2503         predict_edge_def (e, pred, taken);
2504       else if (bitmap_set_bit (visited, e->src->index))
2505         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2506     }
2507   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2508        son;
2509        son = next_dom_son (CDI_POST_DOMINATORS, son))
2510     predict_paths_for_bb (son, bb, pred, taken, visited);
2511 }
2512
2513 /* Sets branch probabilities according to PREDiction and
2514    FLAGS.  */
2515
2516 static void
2517 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2518                           enum prediction taken)
2519 {
2520   bitmap visited = BITMAP_ALLOC (NULL);
2521   predict_paths_for_bb (bb, bb, pred, taken, visited);
2522   BITMAP_FREE (visited);
2523 }
2524
2525 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2526
2527 static void
2528 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2529                                enum prediction taken)
2530 {
2531   bool has_nonloop_edge = false;
2532   edge_iterator ei;
2533   edge e2;
2534
2535   basic_block bb = e->src;
2536   FOR_EACH_EDGE (e2, ei, bb->succs)
2537     if (e2->dest != e->src && e2->dest != e->dest
2538         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2539         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2540       {
2541         has_nonloop_edge = true;
2542         break;
2543       }
2544   if (!has_nonloop_edge)
2545     {
2546       bitmap visited = BITMAP_ALLOC (NULL);
2547       predict_paths_for_bb (bb, bb, pred, taken, visited);
2548       BITMAP_FREE (visited);
2549     }
2550   else
2551     predict_edge_def (e, pred, taken);
2552 }
2553 \f
2554 /* This is used to carry information about basic blocks.  It is
2555    attached to the AUX field of the standard CFG block.  */
2556
2557 typedef struct block_info_def
2558 {
2559   /* Estimated frequency of execution of basic_block.  */
2560   sreal frequency;
2561
2562   /* To keep queue of basic blocks to process.  */
2563   basic_block next;
2564
2565   /* Number of predecessors we need to visit first.  */
2566   int npredecessors;
2567 } *block_info;
2568
2569 /* Similar information for edges.  */
2570 typedef struct edge_info_def
2571 {
2572   /* In case edge is a loopback edge, the probability edge will be reached
2573      in case header is.  Estimated number of iterations of the loop can be
2574      then computed as 1 / (1 - back_edge_prob).  */
2575   sreal back_edge_prob;
2576   /* True if the edge is a loopback edge in the natural loop.  */
2577   unsigned int back_edge:1;
2578 } *edge_info;
2579
2580 #define BLOCK_INFO(B)   ((block_info) (B)->aux)
2581 #define EDGE_INFO(E)    ((edge_info) (E)->aux)
2582
2583 /* Helper function for estimate_bb_frequencies.
2584    Propagate the frequencies in blocks marked in
2585    TOVISIT, starting in HEAD.  */
2586
2587 static void
2588 propagate_freq (basic_block head, bitmap tovisit)
2589 {
2590   basic_block bb;
2591   basic_block last;
2592   unsigned i;
2593   edge e;
2594   basic_block nextbb;
2595   bitmap_iterator bi;
2596
2597   /* For each basic block we need to visit count number of his predecessors
2598      we need to visit first.  */
2599   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2600     {
2601       edge_iterator ei;
2602       int count = 0;
2603
2604       bb = BASIC_BLOCK (i);
2605
2606       FOR_EACH_EDGE (e, ei, bb->preds)
2607         {
2608           bool visit = bitmap_bit_p (tovisit, e->src->index);
2609
2610           if (visit && !(e->flags & EDGE_DFS_BACK))
2611             count++;
2612           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2613             fprintf (dump_file,
2614                      "Irreducible region hit, ignoring edge to %i->%i\n",
2615                      e->src->index, bb->index);
2616         }
2617       BLOCK_INFO (bb)->npredecessors = count;
2618       /* When function never returns, we will never process exit block.  */
2619       if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2620         bb->count = bb->frequency = 0;
2621     }
2622
2623   memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2624   last = head;
2625   for (bb = head; bb; bb = nextbb)
2626     {
2627       edge_iterator ei;
2628       sreal cyclic_probability, frequency;
2629
2630       memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2631       memcpy (&frequency, &real_zero, sizeof (real_zero));
2632
2633       nextbb = BLOCK_INFO (bb)->next;
2634       BLOCK_INFO (bb)->next = NULL;
2635
2636       /* Compute frequency of basic block.  */
2637       if (bb != head)
2638         {
2639 #ifdef ENABLE_CHECKING
2640           FOR_EACH_EDGE (e, ei, bb->preds)
2641             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2642                         || (e->flags & EDGE_DFS_BACK));
2643 #endif
2644
2645           FOR_EACH_EDGE (e, ei, bb->preds)
2646             if (EDGE_INFO (e)->back_edge)
2647               {
2648                 sreal_add (&cyclic_probability, &cyclic_probability,
2649                            &EDGE_INFO (e)->back_edge_prob);
2650               }
2651             else if (!(e->flags & EDGE_DFS_BACK))
2652               {
2653                 sreal tmp;
2654
2655                 /*  frequency += (e->probability
2656                                   * BLOCK_INFO (e->src)->frequency /
2657                                   REG_BR_PROB_BASE);  */
2658
2659                 sreal_init (&tmp, e->probability, 0);
2660                 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2661                 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2662                 sreal_add (&frequency, &frequency, &tmp);
2663               }
2664
2665           if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2666             {
2667               memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2668                       sizeof (frequency));
2669             }
2670           else
2671             {
2672               if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2673                 {
2674                   memcpy (&cyclic_probability, &real_almost_one,
2675                           sizeof (real_almost_one));
2676                 }
2677
2678               /* BLOCK_INFO (bb)->frequency = frequency
2679                                               / (1 - cyclic_probability) */
2680
2681               sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2682               sreal_div (&BLOCK_INFO (bb)->frequency,
2683                          &frequency, &cyclic_probability);
2684             }
2685         }
2686
2687       bitmap_clear_bit (tovisit, bb->index);
2688
2689       e = find_edge (bb, head);
2690       if (e)
2691         {
2692           sreal tmp;
2693
2694           /* EDGE_INFO (e)->back_edge_prob
2695              = ((e->probability * BLOCK_INFO (bb)->frequency)
2696              / REG_BR_PROB_BASE); */
2697
2698           sreal_init (&tmp, e->probability, 0);
2699           sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2700           sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2701                      &tmp, &real_inv_br_prob_base);
2702         }
2703
2704       /* Propagate to successor blocks.  */
2705       FOR_EACH_EDGE (e, ei, bb->succs)
2706         if (!(e->flags & EDGE_DFS_BACK)
2707             && BLOCK_INFO (e->dest)->npredecessors)
2708           {
2709             BLOCK_INFO (e->dest)->npredecessors--;
2710             if (!BLOCK_INFO (e->dest)->npredecessors)
2711               {
2712                 if (!nextbb)
2713                   nextbb = e->dest;
2714                 else
2715                   BLOCK_INFO (last)->next = e->dest;
2716
2717                 last = e->dest;
2718               }
2719           }
2720     }
2721 }
2722
2723 /* Estimate frequencies in loops at same nest level.  */
2724
2725 static void
2726 estimate_loops_at_level (struct loop *first_loop)
2727 {
2728   struct loop *loop;
2729
2730   for (loop = first_loop; loop; loop = loop->next)
2731     {
2732       edge e;
2733       basic_block *bbs;
2734       unsigned i;
2735       bitmap tovisit = BITMAP_ALLOC (NULL);
2736
2737       estimate_loops_at_level (loop->inner);
2738
2739       /* Find current loop back edge and mark it.  */
2740       e = loop_latch_edge (loop);
2741       EDGE_INFO (e)->back_edge = 1;
2742
2743       bbs = get_loop_body (loop);
2744       for (i = 0; i < loop->num_nodes; i++)
2745         bitmap_set_bit (tovisit, bbs[i]->index);
2746       free (bbs);
2747       propagate_freq (loop->header, tovisit);
2748       BITMAP_FREE (tovisit);
2749     }
2750 }
2751
2752 /* Propagates frequencies through structure of loops.  */
2753
2754 static void
2755 estimate_loops (void)
2756 {
2757   bitmap tovisit = BITMAP_ALLOC (NULL);
2758   basic_block bb;
2759
2760   /* Start by estimating the frequencies in the loops.  */
2761   if (number_of_loops (cfun) > 1)
2762     estimate_loops_at_level (current_loops->tree_root->inner);
2763
2764   /* Now propagate the frequencies through all the blocks.  */
2765   FOR_ALL_BB (bb)
2766     {
2767       bitmap_set_bit (tovisit, bb->index);
2768     }
2769   propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2770   BITMAP_FREE (tovisit);
2771 }
2772
2773 /* Drop the profile for NODE to guessed, and update its frequency based on
2774    whether it is expected to be hot given the CALL_COUNT.  */
2775
2776 static void
2777 drop_profile (struct cgraph_node *node, gcov_type call_count)
2778 {
2779   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2780   /* In the case where this was called by another function with a
2781      dropped profile, call_count will be 0. Since there are no
2782      non-zero call counts to this function, we don't know for sure
2783      whether it is hot, and therefore it will be marked normal below.  */
2784   bool hot = maybe_hot_count_p (NULL, call_count);
2785
2786   if (dump_file)
2787     fprintf (dump_file,
2788              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2789              node->name (), node->order,
2790              hot ? "Function is hot" : "Function is normal");
2791   /* We only expect to miss profiles for functions that are reached
2792      via non-zero call edges in cases where the function may have
2793      been linked from another module or library (COMDATs and extern
2794      templates). See the comments below for handle_missing_profiles.
2795      Also, only warn in cases where the missing counts exceed the
2796      number of training runs. In certain cases with an execv followed
2797      by a no-return call the profile for the no-return call is not
2798      dumped and there can be a mismatch.  */
2799   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2800       && call_count > profile_info->runs)
2801     {
2802       if (flag_profile_correction)
2803         {
2804           if (dump_file)
2805             fprintf (dump_file,
2806                      "Missing counts for called function %s/%i\n",
2807                      node->name (), node->order);
2808         }
2809       else
2810         warning (0, "Missing counts for called function %s/%i",
2811                  node->name (), node->order);
2812     }
2813
2814   profile_status_for_function (fn)
2815       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2816   node->frequency
2817       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2818 }
2819
2820 /* In the case of COMDAT routines, multiple object files will contain the same
2821    function and the linker will select one for the binary. In that case
2822    all the other copies from the profile instrument binary will be missing
2823    profile counts. Look for cases where this happened, due to non-zero
2824    call counts going to 0-count functions, and drop the profile to guessed
2825    so that we can use the estimated probabilities and avoid optimizing only
2826    for size.
2827
2828    The other case where the profile may be missing is when the routine
2829    is not going to be emitted to the object file, e.g. for "extern template"
2830    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2831    all other cases of non-zero calls to 0-count functions.  */
2832
2833 void
2834 handle_missing_profiles (void)
2835 {
2836   struct cgraph_node *node;
2837   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2838   vec<struct cgraph_node *> worklist;
2839   worklist.create (64);
2840
2841   /* See if 0 count function has non-0 count callers.  In this case we
2842      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2843   FOR_EACH_DEFINED_FUNCTION (node)
2844     {
2845       struct cgraph_edge *e;
2846       gcov_type call_count = 0;
2847       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2848
2849       if (node->count)
2850         continue;
2851       for (e = node->callers; e; e = e->next_caller)
2852         call_count += e->count;
2853       if (call_count
2854           && fn && fn->cfg
2855           && (call_count * unlikely_count_fraction >= profile_info->runs))
2856         {
2857           drop_profile (node, call_count);
2858           worklist.safe_push (node);
2859         }
2860     }
2861
2862   /* Propagate the profile dropping to other 0-count COMDATs that are
2863      potentially called by COMDATs we already dropped the profile on.  */
2864   while (worklist.length () > 0)
2865     {
2866       struct cgraph_edge *e;
2867
2868       node = worklist.pop ();
2869       for (e = node->callees; e; e = e->next_caller)
2870         {
2871           struct cgraph_node *callee = e->callee;
2872           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2873
2874           if (callee->count > 0)
2875             continue;
2876           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2877               && profile_status_for_function (fn) == PROFILE_READ)
2878             {
2879               drop_profile (node, 0);
2880               worklist.safe_push (callee);
2881             }
2882         }
2883     }
2884   worklist.release ();
2885 }
2886
2887 /* Convert counts measured by profile driven feedback to frequencies.
2888    Return nonzero iff there was any nonzero execution count.  */
2889
2890 int
2891 counts_to_freqs (void)
2892 {
2893   gcov_type count_max, true_count_max = 0;
2894   basic_block bb;
2895
2896   /* Don't overwrite the estimated frequencies when the profile for
2897      the function is missing.  We may drop this function PROFILE_GUESSED
2898      later in drop_profile ().  */
2899   if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2900     return 0;
2901
2902   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2903     true_count_max = MAX (bb->count, true_count_max);
2904
2905   count_max = MAX (true_count_max, 1);
2906   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2907     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2908
2909   return true_count_max;
2910 }
2911
2912 /* Return true if function is likely to be expensive, so there is no point to
2913    optimize performance of prologue, epilogue or do inlining at the expense
2914    of code size growth.  THRESHOLD is the limit of number of instructions
2915    function can execute at average to be still considered not expensive.  */
2916
2917 bool
2918 expensive_function_p (int threshold)
2919 {
2920   unsigned int sum = 0;
2921   basic_block bb;
2922   unsigned int limit;
2923
2924   /* We can not compute accurately for large thresholds due to scaled
2925      frequencies.  */
2926   gcc_assert (threshold <= BB_FREQ_MAX);
2927
2928   /* Frequencies are out of range.  This either means that function contains
2929      internal loop executing more than BB_FREQ_MAX times or profile feedback
2930      is available and function has not been executed at all.  */
2931   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2932     return true;
2933
2934   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2935   limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2936   FOR_EACH_BB (bb)
2937     {
2938       rtx insn;
2939
2940       FOR_BB_INSNS (bb, insn)
2941         if (active_insn_p (insn))
2942           {
2943             sum += bb->frequency;
2944             if (sum > limit)
2945               return true;
2946         }
2947     }
2948
2949   return false;
2950 }
2951
2952 /* Estimate and propagate basic block frequencies using the given branch
2953    probabilities.  If FORCE is true, the frequencies are used to estimate
2954    the counts even when there are already non-zero profile counts.  */
2955
2956 void
2957 estimate_bb_frequencies (bool force)
2958 {
2959   basic_block bb;
2960   sreal freq_max;
2961
2962   if (force || profile_status != PROFILE_READ || !counts_to_freqs ())
2963     {
2964       static int real_values_initialized = 0;
2965
2966       if (!real_values_initialized)
2967         {
2968           real_values_initialized = 1;
2969           sreal_init (&real_zero, 0, 0);
2970           sreal_init (&real_one, 1, 0);
2971           sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2972           sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2973           sreal_init (&real_one_half, 1, -1);
2974           sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2975           sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2976         }
2977
2978       mark_dfs_back_edges ();
2979
2980       single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2981          REG_BR_PROB_BASE;
2982
2983       /* Set up block info for each basic block.  */
2984       alloc_aux_for_blocks (sizeof (struct block_info_def));
2985       alloc_aux_for_edges (sizeof (struct edge_info_def));
2986       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2987         {
2988           edge e;
2989           edge_iterator ei;
2990
2991           FOR_EACH_EDGE (e, ei, bb->succs)
2992             {
2993               sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2994               sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2995                          &EDGE_INFO (e)->back_edge_prob,
2996                          &real_inv_br_prob_base);
2997             }
2998         }
2999
3000       /* First compute frequencies locally for each loop from innermost
3001          to outermost to examine frequencies for back edges.  */
3002       estimate_loops ();
3003
3004       memcpy (&freq_max, &real_zero, sizeof (real_zero));
3005       FOR_EACH_BB (bb)
3006         if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
3007           memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
3008
3009       sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
3010       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3011         {
3012           sreal tmp;
3013
3014           sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
3015           sreal_add (&tmp, &tmp, &real_one_half);
3016           bb->frequency = sreal_to_int (&tmp);
3017         }
3018
3019       free_aux_for_blocks ();
3020       free_aux_for_edges ();
3021     }
3022   compute_function_frequency ();
3023 }
3024
3025 /* Decide whether function is hot, cold or unlikely executed.  */
3026 void
3027 compute_function_frequency (void)
3028 {
3029   basic_block bb;
3030   struct cgraph_node *node = cgraph_get_node (current_function_decl);
3031
3032   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3033       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
3034     node->only_called_at_startup = true;
3035   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
3036     node->only_called_at_exit = true;
3037
3038   if (profile_status != PROFILE_READ)
3039     {
3040       int flags = flags_from_decl_or_type (current_function_decl);
3041       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
3042           != NULL)
3043         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3044       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3045                != NULL)
3046         node->frequency = NODE_FREQUENCY_HOT;
3047       else if (flags & ECF_NORETURN)
3048         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3049       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3050         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3051       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3052                || DECL_STATIC_DESTRUCTOR (current_function_decl))
3053         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3054       return;
3055     }
3056
3057   /* Only first time try to drop function into unlikely executed.
3058      After inlining the roundoff errors may confuse us.
3059      Ipa-profile pass will drop functions only called from unlikely
3060      functions to unlikely and that is most of what we care about.  */
3061   if (!cfun->after_inlining)
3062     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3063   FOR_EACH_BB (bb)
3064     {
3065       if (maybe_hot_bb_p (cfun, bb))
3066         {
3067           node->frequency = NODE_FREQUENCY_HOT;
3068           return;
3069         }
3070       if (!probably_never_executed_bb_p (cfun, bb))
3071         node->frequency = NODE_FREQUENCY_NORMAL;
3072     }
3073 }
3074
3075 static bool
3076 gate_estimate_probability (void)
3077 {
3078   return flag_guess_branch_prob;
3079 }
3080
3081 /* Build PREDICT_EXPR.  */
3082 tree
3083 build_predict_expr (enum br_predictor predictor, enum prediction taken)
3084 {
3085   tree t = build1 (PREDICT_EXPR, void_type_node,
3086                    build_int_cst (integer_type_node, predictor));
3087   SET_PREDICT_EXPR_OUTCOME (t, taken);
3088   return t;
3089 }
3090
3091 const char *
3092 predictor_name (enum br_predictor predictor)
3093 {
3094   return predictor_info[predictor].name;
3095 }
3096
3097 namespace {
3098
3099 const pass_data pass_data_profile =
3100 {
3101   GIMPLE_PASS, /* type */
3102   "profile_estimate", /* name */
3103   OPTGROUP_NONE, /* optinfo_flags */
3104   true, /* has_gate */
3105   true, /* has_execute */
3106   TV_BRANCH_PROB, /* tv_id */
3107   PROP_cfg, /* properties_required */
3108   0, /* properties_provided */
3109   0, /* properties_destroyed */
3110   0, /* todo_flags_start */
3111   TODO_verify_ssa, /* todo_flags_finish */
3112 };
3113
3114 class pass_profile : public gimple_opt_pass
3115 {
3116 public:
3117   pass_profile (gcc::context *ctxt)
3118     : gimple_opt_pass (pass_data_profile, ctxt)
3119   {}
3120
3121   /* opt_pass methods: */
3122   bool gate () { return gate_estimate_probability (); }
3123   unsigned int execute () { return tree_estimate_probability_driver (); }
3124
3125 }; // class pass_profile
3126
3127 } // anon namespace
3128
3129 gimple_opt_pass *
3130 make_pass_profile (gcc::context *ctxt)
3131 {
3132   return new pass_profile (ctxt);
3133 }
3134
3135 namespace {
3136
3137 const pass_data pass_data_strip_predict_hints =
3138 {
3139   GIMPLE_PASS, /* type */
3140   "*strip_predict_hints", /* name */
3141   OPTGROUP_NONE, /* optinfo_flags */
3142   false, /* has_gate */
3143   true, /* has_execute */
3144   TV_BRANCH_PROB, /* tv_id */
3145   PROP_cfg, /* properties_required */
3146   0, /* properties_provided */
3147   0, /* properties_destroyed */
3148   0, /* todo_flags_start */
3149   TODO_verify_ssa, /* todo_flags_finish */
3150 };
3151
3152 class pass_strip_predict_hints : public gimple_opt_pass
3153 {
3154 public:
3155   pass_strip_predict_hints (gcc::context *ctxt)
3156     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3157   {}
3158
3159   /* opt_pass methods: */
3160   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3161   unsigned int execute () { return strip_predict_hints (); }
3162
3163 }; // class pass_strip_predict_hints
3164
3165 } // anon namespace
3166
3167 gimple_opt_pass *
3168 make_pass_strip_predict_hints (gcc::context *ctxt)
3169 {
3170   return new pass_strip_predict_hints (ctxt);
3171 }
3172
3173 /* Rebuild function frequencies.  Passes are in general expected to
3174    maintain profile by hand, however in some cases this is not possible:
3175    for example when inlining several functions with loops freuqencies might run
3176    out of scale and thus needs to be recomputed.  */
3177
3178 void
3179 rebuild_frequencies (void)
3180 {
3181   timevar_push (TV_REBUILD_FREQUENCIES);
3182
3183   /* When the max bb count in the function is small, there is a higher
3184      chance that there were truncation errors in the integer scaling
3185      of counts by inlining and other optimizations. This could lead
3186      to incorrect classification of code as being cold when it isn't.
3187      In that case, force the estimation of bb counts/frequencies from the
3188      branch probabilities, rather than computing frequencies from counts,
3189      which may also lead to frequencies incorrectly reduced to 0. There
3190      is less precision in the probabilities, so we only do this for small
3191      max counts.  */
3192   gcov_type count_max = 0;
3193   basic_block bb;
3194   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3195     count_max = MAX (bb->count, count_max);
3196
3197   if (profile_status == PROFILE_GUESSED
3198       || (profile_status == PROFILE_READ && count_max < REG_BR_PROB_BASE/10))
3199     {
3200       loop_optimizer_init (0);
3201       add_noreturn_fake_exit_edges ();
3202       mark_irreducible_loops ();
3203       connect_infinite_loops_to_exit ();
3204       estimate_bb_frequencies (true);
3205       remove_fake_exit_edges ();
3206       loop_optimizer_finalize ();
3207     }
3208   else if (profile_status == PROFILE_READ)
3209     counts_to_freqs ();
3210   else
3211     gcc_unreachable ();
3212   timevar_pop (TV_REBUILD_FREQUENCIES);
3213 }