gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "tm.h"
  34 #include "tree.h"
  35 #include "rtl.h"
  36 #include "tm_p.h"
  37 #include "hard-reg-set.h"
  38 #include "basic-block.h"
  39 #include "insn-config.h"
  40 #include "regs.h"
  41 #include "flags.h"
  42 #include "function.h"
  43 #include "except.h"
  44 #include "diagnostic-core.h"
  45 #include "recog.h"
  46 #include "expr.h"
  47 #include "predict.h"
  48 #include "coverage.h"
  49 #include "sreal.h"
  50 #include "params.h"
  51 #include "target.h"
  52 #include "cfgloop.h"
  53 #include "gimple.h"
  54 #include "gimple-iterator.h"
  55 #include "gimple-ssa.h"
  56 #include "cgraph.h"
  57 #include "tree-cfg.h"
  58 #include "tree-phinodes.h"
  59 #include "ssa-iterators.h"
  60 #include "tree-ssa-loop-niter.h"
  61 #include "tree-ssa-loop.h"
  62 #include "ggc.h"
  63 #include "tree-pass.h"
  64 #include "tree-scalar-evolution.h"
  65 #include "cfgloop.h"
  66 #include "pointer-set.h"
  67
  68 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  69                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  70 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
  71              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  72
  73 /* Random guesstimation given names.
  74    PROV_VERY_UNLIKELY should be small enough so basic block predicted
  75    by it gets below HOT_BB_FREQUENCY_FRACTION.  */
  76 #define PROB_VERY_UNLIKELY      (REG_BR_PROB_BASE / 2000 - 1)
  77 #define PROB_EVEN               (REG_BR_PROB_BASE / 2)
  78 #define PROB_VERY_LIKELY        (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
  79 #define PROB_ALWAYS             (REG_BR_PROB_BASE)
  80
  81 static void combine_predictions_for_insn (rtx, basic_block);
  82 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  83 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  84 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  85 static bool can_predict_insn_p (const_rtx);
  86
  87 /* Information we hold about each branch predictor.
  88    Filled using information from predict.def.  */
  89
  90 struct predictor_info
  91 {
  92   const char *const name;       /* Name used in the debugging dumps.  */
  93   const int hitrate;            /* Expected hitrate used by
  94                                    predict_insn_def call.  */
  95   const int flags;
  96 };
  97
  98 /* Use given predictor without Dempster-Shaffer theory if it matches
  99    using first_match heuristics.  */
 100 #define PRED_FLAG_FIRST_MATCH 1
 101
 102 /* Recompute hitrate in percent to our representation.  */
 103
 104 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
 105
 106 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
 107 static const struct predictor_info predictor_info[]= {
 108 #include "predict.def"
 109
 110   /* Upper bound on predictors.  */
 111   {NULL, 0, 0}
 112 };
 113 #undef DEF_PREDICTOR
 114
 115 /* Return TRUE if frequency FREQ is considered to be hot.  */
 116
 117 static inline bool
 118 maybe_hot_frequency_p (struct function *fun, int freq)
 119 {
 120   struct cgraph_node *node = cgraph_get_node (fun->decl);
 121   if (!profile_info || !flag_branch_probabilities)
 122     {
 123       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 124         return false;
 125       if (node->frequency == NODE_FREQUENCY_HOT)
 126         return true;
 127     }
 128   if (profile_status_for_function (fun) == PROFILE_ABSENT)
 129     return true;
 130   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 131       && freq < (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun)->frequency * 2 / 3))
 132     return false;
 133   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 134     return false;
 135   if (freq < (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun)->frequency
 136               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 137     return false;
 138   return true;
 139 }
 140
 141 static gcov_type min_count = -1;
 142
 143 /* Determine the threshold for hot BB counts.  */
 144
 145 gcov_type
 146 get_hot_bb_threshold ()
 147 {
 148   gcov_working_set_t *ws;
 149   if (min_count == -1)
 150     {
 151       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 152       gcc_assert (ws);
 153       min_count = ws->min_counter;
 154     }
 155   return min_count;
 156 }
 157
 158 /* Set the threshold for hot BB counts.  */
 159
 160 void
 161 set_hot_bb_threshold (gcov_type min)
 162 {
 163   min_count = min;
 164 }
 165
 166 /* Return TRUE if frequency FREQ is considered to be hot.  */
 167
 168 static inline bool
 169 maybe_hot_count_p (struct function *fun, gcov_type count)
 170 {
 171   if (fun && profile_status_for_function (fun) != PROFILE_READ)
 172     return true;
 173   /* Code executed at most once is not hot.  */
 174   if (profile_info->runs >= count)
 175     return false;
 176   return (count >= get_hot_bb_threshold ());
 177 }
 178
 179 /* Return true in case BB can be CPU intensive and should be optimized
 180    for maximal performance.  */
 181
 182 bool
 183 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 184 {
 185   gcc_checking_assert (fun);
 186   if (profile_status_for_function (fun) == PROFILE_READ)
 187     return maybe_hot_count_p (fun, bb->count);
 188   return maybe_hot_frequency_p (fun, bb->frequency);
 189 }
 190
 191 /* Return true if the call can be hot.  */
 192
 193 bool
 194 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
 195 {
 196   if (profile_info && flag_branch_probabilities
 197       && !maybe_hot_count_p (NULL,
 198                              edge->count))
 199     return false;
 200   if (edge->caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
 201       || (edge->callee
 202           && edge->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 203     return false;
 204   if (edge->caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
 205       && (edge->callee
 206           && edge->callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
 207     return false;
 208   if (optimize_size)
 209     return false;
 210   if (edge->caller->frequency == NODE_FREQUENCY_HOT)
 211     return true;
 212   if (edge->caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 213       && edge->frequency < CGRAPH_FREQ_BASE * 3 / 2)
 214     return false;
 215   if (flag_guess_branch_prob)
 216     {
 217       if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0
 218           || edge->frequency <= (CGRAPH_FREQ_BASE
 219                                  / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 220         return false;
 221     }
 222   return true;
 223 }
 224
 225 /* Return true in case BB can be CPU intensive and should be optimized
 226    for maximal performance.  */
 227
 228 bool
 229 maybe_hot_edge_p (edge e)
 230 {
 231   if (profile_status == PROFILE_READ)
 232     return maybe_hot_count_p (cfun, e->count);
 233   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 234 }
 235
 236
 237
 238 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 239    node frequency reflects never being executed.  */
 240
 241 static bool
 242 probably_never_executed (struct function *fun,
 243                          gcov_type count, int frequency)
 244 {
 245   gcc_checking_assert (fun);
 246   if (profile_status_for_function (fun) == PROFILE_READ)
 247     {
 248       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 249       if (count * unlikely_count_fraction >= profile_info->runs)
 250         return false;
 251       if (!frequency)
 252         return true;
 253       if (!ENTRY_BLOCK_PTR->frequency)
 254         return false;
 255       if (ENTRY_BLOCK_PTR->count)
 256         {
 257           gcov_type computed_count;
 258           /* Check for possibility of overflow, in which case entry bb count
 259              is large enough to do the division first without losing much
 260              precision.  */
 261           if (ENTRY_BLOCK_PTR->count < REG_BR_PROB_BASE * REG_BR_PROB_BASE)
 262             {
 263               gcov_type scaled_count
 264                   = frequency * ENTRY_BLOCK_PTR->count * unlikely_count_fraction;
 265               computed_count = RDIV (scaled_count, ENTRY_BLOCK_PTR->frequency);
 266             }
 267           else
 268             {
 269               computed_count = RDIV (ENTRY_BLOCK_PTR->count,
 270                                      ENTRY_BLOCK_PTR->frequency);
 271               computed_count *= frequency * unlikely_count_fraction;
 272             }
 273           if (computed_count >= profile_info->runs)
 274             return false;
 275         }
 276       return true;
 277     }
 278   if ((!profile_info || !flag_branch_probabilities)
 279       && (cgraph_get_node (fun->decl)->frequency
 280           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 281     return true;
 282   return false;
 283 }
 284
 285
 286 /* Return true in case BB is probably never executed.  */
 287
 288 bool
 289 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 290 {
 291   return probably_never_executed (fun, bb->count, bb->frequency);
 292 }
 293
 294
 295 /* Return true in case edge E is probably never executed.  */
 296
 297 bool
 298 probably_never_executed_edge_p (struct function *fun, edge e)
 299 {
 300   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 301 }
 302
 303 /* Return true if NODE should be optimized for size.  */
 304
 305 bool
 306 cgraph_optimize_for_size_p (struct cgraph_node *node)
 307 {
 308   if (optimize_size)
 309     return true;
 310   if (node && (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 311     return true;
 312   else
 313     return false;
 314 }
 315
 316 /* Return true when current function should always be optimized for size.  */
 317
 318 bool
 319 optimize_function_for_size_p (struct function *fun)
 320 {
 321   if (optimize_size)
 322     return true;
 323   if (!fun || !fun->decl)
 324     return false;
 325   return cgraph_optimize_for_size_p (cgraph_get_node (fun->decl));
 326 }
 327
 328 /* Return true when current function should always be optimized for speed.  */
 329
 330 bool
 331 optimize_function_for_speed_p (struct function *fun)
 332 {
 333   return !optimize_function_for_size_p (fun);
 334 }
 335
 336 /* Return TRUE when BB should be optimized for size.  */
 337
 338 bool
 339 optimize_bb_for_size_p (const_basic_block bb)
 340 {
 341   return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (cfun, bb);
 342 }
 343
 344 /* Return TRUE when BB should be optimized for speed.  */
 345
 346 bool
 347 optimize_bb_for_speed_p (const_basic_block bb)
 348 {
 349   return !optimize_bb_for_size_p (bb);
 350 }
 351
 352 /* Return TRUE when BB should be optimized for size.  */
 353
 354 bool
 355 optimize_edge_for_size_p (edge e)
 356 {
 357   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 358 }
 359
 360 /* Return TRUE when BB should be optimized for speed.  */
 361
 362 bool
 363 optimize_edge_for_speed_p (edge e)
 364 {
 365   return !optimize_edge_for_size_p (e);
 366 }
 367
 368 /* Return TRUE when BB should be optimized for size.  */
 369
 370 bool
 371 optimize_insn_for_size_p (void)
 372 {
 373   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 374 }
 375
 376 /* Return TRUE when BB should be optimized for speed.  */
 377
 378 bool
 379 optimize_insn_for_speed_p (void)
 380 {
 381   return !optimize_insn_for_size_p ();
 382 }
 383
 384 /* Return TRUE when LOOP should be optimized for size.  */
 385
 386 bool
 387 optimize_loop_for_size_p (struct loop *loop)
 388 {
 389   return optimize_bb_for_size_p (loop->header);
 390 }
 391
 392 /* Return TRUE when LOOP should be optimized for speed.  */
 393
 394 bool
 395 optimize_loop_for_speed_p (struct loop *loop)
 396 {
 397   return optimize_bb_for_speed_p (loop->header);
 398 }
 399
 400 /* Return TRUE when LOOP nest should be optimized for speed.  */
 401
 402 bool
 403 optimize_loop_nest_for_speed_p (struct loop *loop)
 404 {
 405   struct loop *l = loop;
 406   if (optimize_loop_for_speed_p (loop))
 407     return true;
 408   l = loop->inner;
 409   while (l && l != loop)
 410     {
 411       if (optimize_loop_for_speed_p (l))
 412         return true;
 413       if (l->inner)
 414         l = l->inner;
 415       else if (l->next)
 416         l = l->next;
 417       else
 418         {
 419           while (l != loop && !l->next)
 420             l = loop_outer (l);
 421           if (l != loop)
 422             l = l->next;
 423         }
 424     }
 425   return false;
 426 }
 427
 428 /* Return TRUE when LOOP nest should be optimized for size.  */
 429
 430 bool
 431 optimize_loop_nest_for_size_p (struct loop *loop)
 432 {
 433   return !optimize_loop_nest_for_speed_p (loop);
 434 }
 435
 436 /* Return true when edge E is likely to be well predictable by branch
 437    predictor.  */
 438
 439 bool
 440 predictable_edge_p (edge e)
 441 {
 442   if (profile_status == PROFILE_ABSENT)
 443     return false;
 444   if ((e->probability
 445        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 446       || (REG_BR_PROB_BASE - e->probability
 447           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 448     return true;
 449   return false;
 450 }
 451
 452
 453 /* Set RTL expansion for BB profile.  */
 454
 455 void
 456 rtl_profile_for_bb (basic_block bb)
 457 {
 458   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 459 }
 460
 461 /* Set RTL expansion for edge profile.  */
 462
 463 void
 464 rtl_profile_for_edge (edge e)
 465 {
 466   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 467 }
 468
 469 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 470 void
 471 default_rtl_profile (void)
 472 {
 473   crtl->maybe_hot_insn_p = true;
 474 }
 475
 476 /* Return true if the one of outgoing edges is already predicted by
 477    PREDICTOR.  */
 478
 479 bool
 480 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 481 {
 482   rtx note;
 483   if (!INSN_P (BB_END (bb)))
 484     return false;
 485   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 486     if (REG_NOTE_KIND (note) == REG_BR_PRED
 487         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 488       return true;
 489   return false;
 490 }
 491
 492 /* This map contains for a basic block the list of predictions for the
 493    outgoing edges.  */
 494
 495 static struct pointer_map_t *bb_predictions;
 496
 497 /*  Structure representing predictions in tree level. */
 498
 499 struct edge_prediction {
 500     struct edge_prediction *ep_next;
 501     edge ep_edge;
 502     enum br_predictor ep_predictor;
 503     int ep_probability;
 504 };
 505
 506 /* Return true if the one of outgoing edges is already predicted by
 507    PREDICTOR.  */
 508
 509 bool
 510 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 511 {
 512   struct edge_prediction *i;
 513   void **preds = pointer_map_contains (bb_predictions, bb);
 514
 515   if (!preds)
 516     return false;
 517
 518   for (i = (struct edge_prediction *) *preds; i; i = i->ep_next)
 519     if (i->ep_predictor == predictor)
 520       return true;
 521   return false;
 522 }
 523
 524 /* Return true when the probability of edge is reliable.
 525
 526    The profile guessing code is good at predicting branch outcome (ie.
 527    taken/not taken), that is predicted right slightly over 75% of time.
 528    It is however notoriously poor on predicting the probability itself.
 529    In general the profile appear a lot flatter (with probabilities closer
 530    to 50%) than the reality so it is bad idea to use it to drive optimization
 531    such as those disabling dynamic branch prediction for well predictable
 532    branches.
 533
 534    There are two exceptions - edges leading to noreturn edges and edges
 535    predicted by number of iterations heuristics are predicted well.  This macro
 536    should be able to distinguish those, but at the moment it simply check for
 537    noreturn heuristic that is only one giving probability over 99% or bellow
 538    1%.  In future we might want to propagate reliability information across the
 539    CFG if we find this information useful on multiple places.   */
 540 static bool
 541 probability_reliable_p (int prob)
 542 {
 543   return (profile_status == PROFILE_READ
 544           || (profile_status == PROFILE_GUESSED
 545               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 546 }
 547
 548 /* Same predicate as above, working on edges.  */
 549 bool
 550 edge_probability_reliable_p (const_edge e)
 551 {
 552   return probability_reliable_p (e->probability);
 553 }
 554
 555 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 556 bool
 557 br_prob_note_reliable_p (const_rtx note)
 558 {
 559   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 560   return probability_reliable_p (XINT (note, 0));
 561 }
 562
 563 static void
 564 predict_insn (rtx insn, enum br_predictor predictor, int probability)
 565 {
 566   gcc_assert (any_condjump_p (insn));
 567   if (!flag_guess_branch_prob)
 568     return;
 569
 570   add_reg_note (insn, REG_BR_PRED,
 571                 gen_rtx_CONCAT (VOIDmode,
 572                                 GEN_INT ((int) predictor),
 573                                 GEN_INT ((int) probability)));
 574 }
 575
 576 /* Predict insn by given predictor.  */
 577
 578 void
 579 predict_insn_def (rtx insn, enum br_predictor predictor,
 580                   enum prediction taken)
 581 {
 582    int probability = predictor_info[(int) predictor].hitrate;
 583
 584    if (taken != TAKEN)
 585      probability = REG_BR_PROB_BASE - probability;
 586
 587    predict_insn (insn, predictor, probability);
 588 }
 589
 590 /* Predict edge E with given probability if possible.  */
 591
 592 void
 593 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 594 {
 595   rtx last_insn;
 596   last_insn = BB_END (e->src);
 597
 598   /* We can store the branch prediction information only about
 599      conditional jumps.  */
 600   if (!any_condjump_p (last_insn))
 601     return;
 602
 603   /* We always store probability of branching.  */
 604   if (e->flags & EDGE_FALLTHRU)
 605     probability = REG_BR_PROB_BASE - probability;
 606
 607   predict_insn (last_insn, predictor, probability);
 608 }
 609
 610 /* Predict edge E with the given PROBABILITY.  */
 611 void
 612 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 613 {
 614   gcc_assert (profile_status != PROFILE_GUESSED);
 615   if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1)
 616       && flag_guess_branch_prob && optimize)
 617     {
 618       struct edge_prediction *i = XNEW (struct edge_prediction);
 619       void **preds = pointer_map_insert (bb_predictions, e->src);
 620
 621       i->ep_next = (struct edge_prediction *) *preds;
 622       *preds = i;
 623       i->ep_probability = probability;
 624       i->ep_predictor = predictor;
 625       i->ep_edge = e;
 626     }
 627 }
 628
 629 /* Remove all predictions on given basic block that are attached
 630    to edge E.  */
 631 void
 632 remove_predictions_associated_with_edge (edge e)
 633 {
 634   void **preds;
 635
 636   if (!bb_predictions)
 637     return;
 638
 639   preds = pointer_map_contains (bb_predictions, e->src);
 640
 641   if (preds)
 642     {
 643       struct edge_prediction **prediction = (struct edge_prediction **) preds;
 644       struct edge_prediction *next;
 645
 646       while (*prediction)
 647         {
 648           if ((*prediction)->ep_edge == e)
 649             {
 650               next = (*prediction)->ep_next;
 651               free (*prediction);
 652               *prediction = next;
 653             }
 654           else
 655             prediction = &((*prediction)->ep_next);
 656         }
 657     }
 658 }
 659
 660 /* Clears the list of predictions stored for BB.  */
 661
 662 static void
 663 clear_bb_predictions (basic_block bb)
 664 {
 665   void **preds = pointer_map_contains (bb_predictions, bb);
 666   struct edge_prediction *pred, *next;
 667
 668   if (!preds)
 669     return;
 670
 671   for (pred = (struct edge_prediction *) *preds; pred; pred = next)
 672     {
 673       next = pred->ep_next;
 674       free (pred);
 675     }
 676   *preds = NULL;
 677 }
 678
 679 /* Return true when we can store prediction on insn INSN.
 680    At the moment we represent predictions only on conditional
 681    jumps, not at computed jump or other complicated cases.  */
 682 static bool
 683 can_predict_insn_p (const_rtx insn)
 684 {
 685   return (JUMP_P (insn)
 686           && any_condjump_p (insn)
 687           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 688 }
 689
 690 /* Predict edge E by given predictor if possible.  */
 691
 692 void
 693 predict_edge_def (edge e, enum br_predictor predictor,
 694                   enum prediction taken)
 695 {
 696    int probability = predictor_info[(int) predictor].hitrate;
 697
 698    if (taken != TAKEN)
 699      probability = REG_BR_PROB_BASE - probability;
 700
 701    predict_edge (e, predictor, probability);
 702 }
 703
 704 /* Invert all branch predictions or probability notes in the INSN.  This needs
 705    to be done each time we invert the condition used by the jump.  */
 706
 707 void
 708 invert_br_probabilities (rtx insn)
 709 {
 710   rtx note;
 711
 712   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 713     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 714       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 715     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 716       XEXP (XEXP (note, 0), 1)
 717         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 718 }
 719
 720 /* Dump information about the branch prediction to the output file.  */
 721
 722 static void
 723 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 724                  basic_block bb, int used)
 725 {
 726   edge e;
 727   edge_iterator ei;
 728
 729   if (!file)
 730     return;
 731
 732   FOR_EACH_EDGE (e, ei, bb->succs)
 733     if (! (e->flags & EDGE_FALLTHRU))
 734       break;
 735
 736   fprintf (file, "  %s heuristics%s: %.1f%%",
 737            predictor_info[predictor].name,
 738            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 739
 740   if (bb->count)
 741     {
 742       fprintf (file, "  exec ");
 743       fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
 744       if (e)
 745         {
 746           fprintf (file, " hit ");
 747           fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
 748           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 749         }
 750     }
 751
 752   fprintf (file, "\n");
 753 }
 754
 755 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 756    evenly and hope for the best.  */
 757 static void
 758 set_even_probabilities (basic_block bb)
 759 {
 760   int nedges = 0;
 761   edge e;
 762   edge_iterator ei;
 763
 764   FOR_EACH_EDGE (e, ei, bb->succs)
 765     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 766       nedges ++;
 767   FOR_EACH_EDGE (e, ei, bb->succs)
 768     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 769       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 770     else
 771       e->probability = 0;
 772 }
 773
 774 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 775    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 776
 777 static void
 778 combine_predictions_for_insn (rtx insn, basic_block bb)
 779 {
 780   rtx prob_note;
 781   rtx *pnote;
 782   rtx note;
 783   int best_probability = PROB_EVEN;
 784   enum br_predictor best_predictor = END_PREDICTORS;
 785   int combined_probability = REG_BR_PROB_BASE / 2;
 786   int d;
 787   bool first_match = false;
 788   bool found = false;
 789
 790   if (!can_predict_insn_p (insn))
 791     {
 792       set_even_probabilities (bb);
 793       return;
 794     }
 795
 796   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 797   pnote = &REG_NOTES (insn);
 798   if (dump_file)
 799     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 800              bb->index);
 801
 802   /* We implement "first match" heuristics and use probability guessed
 803      by predictor with smallest index.  */
 804   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 805     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 806       {
 807         enum br_predictor predictor = ((enum br_predictor)
 808                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 809         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 810
 811         found = true;
 812         if (best_predictor > predictor)
 813           best_probability = probability, best_predictor = predictor;
 814
 815         d = (combined_probability * probability
 816              + (REG_BR_PROB_BASE - combined_probability)
 817              * (REG_BR_PROB_BASE - probability));
 818
 819         /* Use FP math to avoid overflows of 32bit integers.  */
 820         if (d == 0)
 821           /* If one probability is 0% and one 100%, avoid division by zero.  */
 822           combined_probability = REG_BR_PROB_BASE / 2;
 823         else
 824           combined_probability = (((double) combined_probability) * probability
 825                                   * REG_BR_PROB_BASE / d + 0.5);
 826       }
 827
 828   /* Decide which heuristic to use.  In case we didn't match anything,
 829      use no_prediction heuristic, in case we did match, use either
 830      first match or Dempster-Shaffer theory depending on the flags.  */
 831
 832   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 833     first_match = true;
 834
 835   if (!found)
 836     dump_prediction (dump_file, PRED_NO_PREDICTION,
 837                      combined_probability, bb, true);
 838   else
 839     {
 840       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 841                        bb, !first_match);
 842       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 843                        bb, first_match);
 844     }
 845
 846   if (first_match)
 847     combined_probability = best_probability;
 848   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 849
 850   while (*pnote)
 851     {
 852       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 853         {
 854           enum br_predictor predictor = ((enum br_predictor)
 855                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 856           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 857
 858           dump_prediction (dump_file, predictor, probability, bb,
 859                            !first_match || best_predictor == predictor);
 860           *pnote = XEXP (*pnote, 1);
 861         }
 862       else
 863         pnote = &XEXP (*pnote, 1);
 864     }
 865
 866   if (!prob_note)
 867     {
 868       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 869
 870       /* Save the prediction into CFG in case we are seeing non-degenerated
 871          conditional jump.  */
 872       if (!single_succ_p (bb))
 873         {
 874           BRANCH_EDGE (bb)->probability = combined_probability;
 875           FALLTHRU_EDGE (bb)->probability
 876             = REG_BR_PROB_BASE - combined_probability;
 877         }
 878     }
 879   else if (!single_succ_p (bb))
 880     {
 881       int prob = XINT (prob_note, 0);
 882
 883       BRANCH_EDGE (bb)->probability = prob;
 884       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 885     }
 886   else
 887     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 888 }
 889
 890 /* Combine predictions into single probability and store them into CFG.
 891    Remove now useless prediction entries.  */
 892
 893 static void
 894 combine_predictions_for_bb (basic_block bb)
 895 {
 896   int best_probability = PROB_EVEN;
 897   enum br_predictor best_predictor = END_PREDICTORS;
 898   int combined_probability = REG_BR_PROB_BASE / 2;
 899   int d;
 900   bool first_match = false;
 901   bool found = false;
 902   struct edge_prediction *pred;
 903   int nedges = 0;
 904   edge e, first = NULL, second = NULL;
 905   edge_iterator ei;
 906   void **preds;
 907
 908   FOR_EACH_EDGE (e, ei, bb->succs)
 909     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 910       {
 911         nedges ++;
 912         if (first && !second)
 913           second = e;
 914         if (!first)
 915           first = e;
 916       }
 917
 918   /* When there is no successor or only one choice, prediction is easy.
 919
 920      We are lazy for now and predict only basic blocks with two outgoing
 921      edges.  It is possible to predict generic case too, but we have to
 922      ignore first match heuristics and do more involved combining.  Implement
 923      this later.  */
 924   if (nedges != 2)
 925     {
 926       if (!bb->count)
 927         set_even_probabilities (bb);
 928       clear_bb_predictions (bb);
 929       if (dump_file)
 930         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 931                  nedges, bb->index);
 932       return;
 933     }
 934
 935   if (dump_file)
 936     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 937
 938   preds = pointer_map_contains (bb_predictions, bb);
 939   if (preds)
 940     {
 941       /* We implement "first match" heuristics and use probability guessed
 942          by predictor with smallest index.  */
 943       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 944         {
 945           enum br_predictor predictor = pred->ep_predictor;
 946           int probability = pred->ep_probability;
 947
 948           if (pred->ep_edge != first)
 949             probability = REG_BR_PROB_BASE - probability;
 950
 951           found = true;
 952           /* First match heuristics would be widly confused if we predicted
 953              both directions.  */
 954           if (best_predictor > predictor)
 955             {
 956               struct edge_prediction *pred2;
 957               int prob = probability;
 958
 959               for (pred2 = (struct edge_prediction *) *preds; pred2; pred2 = pred2->ep_next)
 960                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 961                  {
 962                    int probability2 = pred->ep_probability;
 963
 964                    if (pred2->ep_edge != first)
 965                      probability2 = REG_BR_PROB_BASE - probability2;
 966
 967                    if ((probability < REG_BR_PROB_BASE / 2) !=
 968                        (probability2 < REG_BR_PROB_BASE / 2))
 969                      break;
 970
 971                    /* If the same predictor later gave better result, go for it! */
 972                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 973                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 974                      prob = probability2;
 975                  }
 976               if (!pred2)
 977                 best_probability = prob, best_predictor = predictor;
 978             }
 979
 980           d = (combined_probability * probability
 981                + (REG_BR_PROB_BASE - combined_probability)
 982                * (REG_BR_PROB_BASE - probability));
 983
 984           /* Use FP math to avoid overflows of 32bit integers.  */
 985           if (d == 0)
 986             /* If one probability is 0% and one 100%, avoid division by zero.  */
 987             combined_probability = REG_BR_PROB_BASE / 2;
 988           else
 989             combined_probability = (((double) combined_probability)
 990                                     * probability
 991                                     * REG_BR_PROB_BASE / d + 0.5);
 992         }
 993     }
 994
 995   /* Decide which heuristic to use.  In case we didn't match anything,
 996      use no_prediction heuristic, in case we did match, use either
 997      first match or Dempster-Shaffer theory depending on the flags.  */
 998
 999   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
1000     first_match = true;
1001
1002   if (!found)
1003     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
1004   else
1005     {
1006       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1007                        !first_match);
1008       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1009                        first_match);
1010     }
1011
1012   if (first_match)
1013     combined_probability = best_probability;
1014   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
1015
1016   if (preds)
1017     {
1018       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1019         {
1020           enum br_predictor predictor = pred->ep_predictor;
1021           int probability = pred->ep_probability;
1022
1023           if (pred->ep_edge != EDGE_SUCC (bb, 0))
1024             probability = REG_BR_PROB_BASE - probability;
1025           dump_prediction (dump_file, predictor, probability, bb,
1026                            !first_match || best_predictor == predictor);
1027         }
1028     }
1029   clear_bb_predictions (bb);
1030
1031   if (!bb->count)
1032     {
1033       first->probability = combined_probability;
1034       second->probability = REG_BR_PROB_BASE - combined_probability;
1035     }
1036 }
1037
1038 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1039    Return the SSA_NAME if the condition satisfies, NULL otherwise.
1040
1041    T1 and T2 should be one of the following cases:
1042      1. T1 is SSA_NAME, T2 is NULL
1043      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1044      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1045
1046 static tree
1047 strips_small_constant (tree t1, tree t2)
1048 {
1049   tree ret = NULL;
1050   int value = 0;
1051
1052   if (!t1)
1053     return NULL;
1054   else if (TREE_CODE (t1) == SSA_NAME)
1055     ret = t1;
1056   else if (host_integerp (t1, 0))
1057     value = tree_low_cst (t1, 0);
1058   else
1059     return NULL;
1060
1061   if (!t2)
1062     return ret;
1063   else if (host_integerp (t2, 0))
1064     value = tree_low_cst (t2, 0);
1065   else if (TREE_CODE (t2) == SSA_NAME)
1066     {
1067       if (ret)
1068         return NULL;
1069       else
1070         ret = t2;
1071     }
1072
1073   if (value <= 4 && value >= -4)
1074     return ret;
1075   else
1076     return NULL;
1077 }
1078
1079 /* Return the SSA_NAME in T or T's operands.
1080    Return NULL if SSA_NAME cannot be found.  */
1081
1082 static tree
1083 get_base_value (tree t)
1084 {
1085   if (TREE_CODE (t) == SSA_NAME)
1086     return t;
1087
1088   if (!BINARY_CLASS_P (t))
1089     return NULL;
1090
1091   switch (TREE_OPERAND_LENGTH (t))
1092     {
1093     case 1:
1094       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1095     case 2:
1096       return strips_small_constant (TREE_OPERAND (t, 0),
1097                                     TREE_OPERAND (t, 1));
1098     default:
1099       return NULL;
1100     }
1101 }
1102
1103 /* Check the compare STMT in LOOP. If it compares an induction
1104    variable to a loop invariant, return true, and save
1105    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1106    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1107
1108 static bool
1109 is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1110                                      tree *loop_invariant,
1111                                      enum tree_code *compare_code,
1112                                      tree *loop_step,
1113                                      tree *loop_iv_base)
1114 {
1115   tree op0, op1, bound, base;
1116   affine_iv iv0, iv1;
1117   enum tree_code code;
1118   tree step;
1119
1120   code = gimple_cond_code (stmt);
1121   *loop_invariant = NULL;
1122
1123   switch (code)
1124     {
1125     case GT_EXPR:
1126     case GE_EXPR:
1127     case NE_EXPR:
1128     case LT_EXPR:
1129     case LE_EXPR:
1130     case EQ_EXPR:
1131       break;
1132
1133     default:
1134       return false;
1135     }
1136
1137   op0 = gimple_cond_lhs (stmt);
1138   op1 = gimple_cond_rhs (stmt);
1139
1140   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1141        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1142     return false;
1143   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1144     return false;
1145   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1146     return false;
1147   if (TREE_CODE (iv0.step) != INTEGER_CST
1148       || TREE_CODE (iv1.step) != INTEGER_CST)
1149     return false;
1150   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1151       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1152     return false;
1153
1154   if (integer_zerop (iv0.step))
1155     {
1156       if (code != NE_EXPR && code != EQ_EXPR)
1157         code = invert_tree_comparison (code, false);
1158       bound = iv0.base;
1159       base = iv1.base;
1160       if (host_integerp (iv1.step, 0))
1161         step = iv1.step;
1162       else
1163         return false;
1164     }
1165   else
1166     {
1167       bound = iv1.base;
1168       base = iv0.base;
1169       if (host_integerp (iv0.step, 0))
1170         step = iv0.step;
1171       else
1172         return false;
1173     }
1174
1175   if (TREE_CODE (bound) != INTEGER_CST)
1176     bound = get_base_value (bound);
1177   if (!bound)
1178     return false;
1179   if (TREE_CODE (base) != INTEGER_CST)
1180     base = get_base_value (base);
1181   if (!base)
1182     return false;
1183
1184   *loop_invariant = bound;
1185   *compare_code = code;
1186   *loop_step = step;
1187   *loop_iv_base = base;
1188   return true;
1189 }
1190
1191 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1192
1193 static bool
1194 expr_coherent_p (tree t1, tree t2)
1195 {
1196   gimple stmt;
1197   tree ssa_name_1 = NULL;
1198   tree ssa_name_2 = NULL;
1199
1200   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1201   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1202
1203   if (t1 == t2)
1204     return true;
1205
1206   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1207     return true;
1208   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1209     return false;
1210
1211   /* Check to see if t1 is expressed/defined with t2.  */
1212   stmt = SSA_NAME_DEF_STMT (t1);
1213   gcc_assert (stmt != NULL);
1214   if (is_gimple_assign (stmt))
1215     {
1216       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1217       if (ssa_name_1 && ssa_name_1 == t2)
1218         return true;
1219     }
1220
1221   /* Check to see if t2 is expressed/defined with t1.  */
1222   stmt = SSA_NAME_DEF_STMT (t2);
1223   gcc_assert (stmt != NULL);
1224   if (is_gimple_assign (stmt))
1225     {
1226       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1227       if (ssa_name_2 && ssa_name_2 == t1)
1228         return true;
1229     }
1230
1231   /* Compare if t1 and t2's def_stmts are identical.  */
1232   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1233     return true;
1234   else
1235     return false;
1236 }
1237
1238 /* Predict branch probability of BB when BB contains a branch that compares
1239    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1240    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1241
1242    E.g.
1243      for (int i = 0; i < bound; i++) {
1244        if (i < bound - 2)
1245          computation_1();
1246        else
1247          computation_2();
1248      }
1249
1250   In this loop, we will predict the branch inside the loop to be taken.  */
1251
1252 static void
1253 predict_iv_comparison (struct loop *loop, basic_block bb,
1254                        tree loop_bound_var,
1255                        tree loop_iv_base_var,
1256                        enum tree_code loop_bound_code,
1257                        int loop_bound_step)
1258 {
1259   gimple stmt;
1260   tree compare_var, compare_base;
1261   enum tree_code compare_code;
1262   tree compare_step_var;
1263   edge then_edge;
1264   edge_iterator ei;
1265
1266   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1267       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1268       || predicted_by_p (bb, PRED_LOOP_EXIT))
1269     return;
1270
1271   stmt = last_stmt (bb);
1272   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1273     return;
1274   if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1275                                             &compare_code,
1276                                             &compare_step_var,
1277                                             &compare_base))
1278     return;
1279
1280   /* Find the taken edge.  */
1281   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1282     if (then_edge->flags & EDGE_TRUE_VALUE)
1283       break;
1284
1285   /* When comparing an IV to a loop invariant, NE is more likely to be
1286      taken while EQ is more likely to be not-taken.  */
1287   if (compare_code == NE_EXPR)
1288     {
1289       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1290       return;
1291     }
1292   else if (compare_code == EQ_EXPR)
1293     {
1294       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1295       return;
1296     }
1297
1298   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1299     return;
1300
1301   /* If loop bound, base and compare bound are all constants, we can
1302      calculate the probability directly.  */
1303   if (host_integerp (loop_bound_var, 0)
1304       && host_integerp (compare_var, 0)
1305       && host_integerp (compare_base, 0))
1306     {
1307       int probability;
1308       bool of, overflow = false;
1309       double_int mod, compare_count, tem, loop_count;
1310
1311       double_int loop_bound = tree_to_double_int (loop_bound_var);
1312       double_int compare_bound = tree_to_double_int (compare_var);
1313       double_int base = tree_to_double_int (compare_base);
1314       double_int compare_step = tree_to_double_int (compare_step_var);
1315
1316       /* (loop_bound - base) / compare_step */
1317       tem = loop_bound.sub_with_overflow (base, &of);
1318       overflow |= of;
1319       loop_count = tem.divmod_with_overflow (compare_step,
1320                                               0, TRUNC_DIV_EXPR,
1321                                               &mod, &of);
1322       overflow |= of;
1323
1324       if ((!compare_step.is_negative ())
1325           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1326         {
1327           /* (loop_bound - compare_bound) / compare_step */
1328           tem = loop_bound.sub_with_overflow (compare_bound, &of);
1329           overflow |= of;
1330           compare_count = tem.divmod_with_overflow (compare_step,
1331                                                      0, TRUNC_DIV_EXPR,
1332                                                      &mod, &of);
1333           overflow |= of;
1334         }
1335       else
1336         {
1337           /* (compare_bound - base) / compare_step */
1338           tem = compare_bound.sub_with_overflow (base, &of);
1339           overflow |= of;
1340           compare_count = tem.divmod_with_overflow (compare_step,
1341                                                      0, TRUNC_DIV_EXPR,
1342                                                      &mod, &of);
1343           overflow |= of;
1344         }
1345       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1346         ++compare_count;
1347       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1348         ++loop_count;
1349       if (compare_count.is_negative ())
1350         compare_count = double_int_zero;
1351       if (loop_count.is_negative ())
1352         loop_count = double_int_zero;
1353       if (loop_count.is_zero ())
1354         probability = 0;
1355       else if (compare_count.scmp (loop_count) == 1)
1356         probability = REG_BR_PROB_BASE;
1357       else
1358         {
1359           /* If loop_count is too big, such that REG_BR_PROB_BASE * loop_count
1360              could overflow, shift both loop_count and compare_count right
1361              a bit so that it doesn't overflow.  Note both counts are known not
1362              to be negative at this point.  */
1363           int clz_bits = clz_hwi (loop_count.high);
1364           gcc_assert (REG_BR_PROB_BASE < 32768);
1365           if (clz_bits < 16)
1366             {
1367               loop_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1368               compare_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1369             }
1370           tem = compare_count.mul_with_sign (double_int::from_shwi
1371                                             (REG_BR_PROB_BASE), true, &of);
1372           gcc_assert (!of);
1373           tem = tem.divmod (loop_count, true, TRUNC_DIV_EXPR, &mod);
1374           probability = tem.to_uhwi ();
1375         }
1376
1377       if (!overflow)
1378         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1379
1380       return;
1381     }
1382
1383   if (expr_coherent_p (loop_bound_var, compare_var))
1384     {
1385       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1386           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1387         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1388       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1389                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1390         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1391       else if (loop_bound_code == NE_EXPR)
1392         {
1393           /* If the loop backedge condition is "(i != bound)", we do
1394              the comparison based on the step of IV:
1395              * step < 0 : backedge condition is like (i > bound)
1396              * step > 0 : backedge condition is like (i < bound)  */
1397           gcc_assert (loop_bound_step != 0);
1398           if (loop_bound_step > 0
1399               && (compare_code == LT_EXPR
1400                   || compare_code == LE_EXPR))
1401             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1402           else if (loop_bound_step < 0
1403                    && (compare_code == GT_EXPR
1404                        || compare_code == GE_EXPR))
1405             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1406           else
1407             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1408         }
1409       else
1410         /* The branch is predicted not-taken if loop_bound_code is
1411            opposite with compare_code.  */
1412         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1413     }
1414   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1415     {
1416       /* For cases like:
1417            for (i = s; i < h; i++)
1418              if (i > s + 2) ....
1419          The branch should be predicted taken.  */
1420       if (loop_bound_step > 0
1421           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1422         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1423       else if (loop_bound_step < 0
1424                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1425         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1426       else
1427         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1428     }
1429 }
1430
1431 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1432    exits are resulted from short-circuit conditions that will generate an
1433    if_tmp. E.g.:
1434
1435    if (foo() || global > 10)
1436      break;
1437
1438    This will be translated into:
1439
1440    BB3:
1441      loop header...
1442    BB4:
1443      if foo() goto BB6 else goto BB5
1444    BB5:
1445      if global > 10 goto BB6 else goto BB7
1446    BB6:
1447      goto BB7
1448    BB7:
1449      iftmp = (PHI 0(BB5), 1(BB6))
1450      if iftmp == 1 goto BB8 else goto BB3
1451    BB8:
1452      outside of the loop...
1453
1454    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1455    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1456    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1457    exits to predict them using PRED_LOOP_EXIT.  */
1458
1459 static void
1460 predict_extra_loop_exits (edge exit_edge)
1461 {
1462   unsigned i;
1463   bool check_value_one;
1464   gimple phi_stmt;
1465   tree cmp_rhs, cmp_lhs;
1466   gimple cmp_stmt = last_stmt (exit_edge->src);
1467
1468   if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1469     return;
1470   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1471   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1472   if (!TREE_CONSTANT (cmp_rhs)
1473       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1474     return;
1475   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1476     return;
1477
1478   /* If check_value_one is true, only the phi_args with value '1' will lead
1479      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1480      loop exit.  */
1481   check_value_one = (((integer_onep (cmp_rhs))
1482                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1483                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1484
1485   phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1486   if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1487     return;
1488
1489   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1490     {
1491       edge e1;
1492       edge_iterator ei;
1493       tree val = gimple_phi_arg_def (phi_stmt, i);
1494       edge e = gimple_phi_arg_edge (phi_stmt, i);
1495
1496       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1497         continue;
1498       if ((check_value_one ^ integer_onep (val)) == 1)
1499         continue;
1500       if (EDGE_COUNT (e->src->succs) != 1)
1501         {
1502           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1503           continue;
1504         }
1505
1506       FOR_EACH_EDGE (e1, ei, e->src->preds)
1507         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1508     }
1509 }
1510
1511 /* Predict edge probabilities by exploiting loop structure.  */
1512
1513 static void
1514 predict_loops (void)
1515 {
1516   loop_iterator li;
1517   struct loop *loop;
1518
1519   /* Try to predict out blocks in a loop that are not part of a
1520      natural loop.  */
1521   FOR_EACH_LOOP (li, loop, 0)
1522     {
1523       basic_block bb, *bbs;
1524       unsigned j, n_exits;
1525       vec<edge> exits;
1526       struct tree_niter_desc niter_desc;
1527       edge ex;
1528       struct nb_iter_bound *nb_iter;
1529       enum tree_code loop_bound_code = ERROR_MARK;
1530       tree loop_bound_step = NULL;
1531       tree loop_bound_var = NULL;
1532       tree loop_iv_base = NULL;
1533       gimple stmt = NULL;
1534
1535       exits = get_loop_exit_edges (loop);
1536       n_exits = exits.length ();
1537       if (!n_exits)
1538         {
1539           exits.release ();
1540           continue;
1541         }
1542
1543       FOR_EACH_VEC_ELT (exits, j, ex)
1544         {
1545           tree niter = NULL;
1546           HOST_WIDE_INT nitercst;
1547           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1548           int probability;
1549           enum br_predictor predictor;
1550
1551           predict_extra_loop_exits (ex);
1552
1553           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1554             niter = niter_desc.niter;
1555           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1556             niter = loop_niter_by_eval (loop, ex);
1557
1558           if (TREE_CODE (niter) == INTEGER_CST)
1559             {
1560               if (host_integerp (niter, 1)
1561                   && max
1562                   && compare_tree_int (niter, max - 1) == -1)
1563                 nitercst = tree_low_cst (niter, 1) + 1;
1564               else
1565                 nitercst = max;
1566               predictor = PRED_LOOP_ITERATIONS;
1567             }
1568           /* If we have just one exit and we can derive some information about
1569              the number of iterations of the loop from the statements inside
1570              the loop, use it to predict this exit.  */
1571           else if (n_exits == 1)
1572             {
1573               nitercst = estimated_stmt_executions_int (loop);
1574               if (nitercst < 0)
1575                 continue;
1576               if (nitercst > max)
1577                 nitercst = max;
1578
1579               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1580             }
1581           else
1582             continue;
1583
1584           /* If the prediction for number of iterations is zero, do not
1585              predict the exit edges.  */
1586           if (nitercst == 0)
1587             continue;
1588
1589           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1590           predict_edge (ex, predictor, probability);
1591         }
1592       exits.release ();
1593
1594       /* Find information about loop bound variables.  */
1595       for (nb_iter = loop->bounds; nb_iter;
1596            nb_iter = nb_iter->next)
1597         if (nb_iter->stmt
1598             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1599           {
1600             stmt = nb_iter->stmt;
1601             break;
1602           }
1603       if (!stmt && last_stmt (loop->header)
1604           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1605         stmt = last_stmt (loop->header);
1606       if (stmt)
1607         is_comparison_with_loop_invariant_p (stmt, loop,
1608                                              &loop_bound_var,
1609                                              &loop_bound_code,
1610                                              &loop_bound_step,
1611                                              &loop_iv_base);
1612
1613       bbs = get_loop_body (loop);
1614
1615       for (j = 0; j < loop->num_nodes; j++)
1616         {
1617           int header_found = 0;
1618           edge e;
1619           edge_iterator ei;
1620
1621           bb = bbs[j];
1622
1623           /* Bypass loop heuristics on continue statement.  These
1624              statements construct loops via "non-loop" constructs
1625              in the source language and are better to be handled
1626              separately.  */
1627           if (predicted_by_p (bb, PRED_CONTINUE))
1628             continue;
1629
1630           /* Loop branch heuristics - predict an edge back to a
1631              loop's head as taken.  */
1632           if (bb == loop->latch)
1633             {
1634               e = find_edge (loop->latch, loop->header);
1635               if (e)
1636                 {
1637                   header_found = 1;
1638                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1639                 }
1640             }
1641
1642           /* Loop exit heuristics - predict an edge exiting the loop if the
1643              conditional has no loop header successors as not taken.  */
1644           if (!header_found
1645               /* If we already used more reliable loop exit predictors, do not
1646                  bother with PRED_LOOP_EXIT.  */
1647               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1648               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1649             {
1650               /* For loop with many exits we don't want to predict all exits
1651                  with the pretty large probability, because if all exits are
1652                  considered in row, the loop would be predicted to iterate
1653                  almost never.  The code to divide probability by number of
1654                  exits is very rough.  It should compute the number of exits
1655                  taken in each patch through function (not the overall number
1656                  of exits that might be a lot higher for loops with wide switch
1657                  statements in them) and compute n-th square root.
1658
1659                  We limit the minimal probability by 2% to avoid
1660                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1661                  as this was causing regression in perl benchmark containing such
1662                  a wide loop.  */
1663
1664               int probability = ((REG_BR_PROB_BASE
1665                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1666                                  / n_exits);
1667               if (probability < HITRATE (2))
1668                 probability = HITRATE (2);
1669               FOR_EACH_EDGE (e, ei, bb->succs)
1670                 if (e->dest->index < NUM_FIXED_BLOCKS
1671                     || !flow_bb_inside_loop_p (loop, e->dest))
1672                   predict_edge (e, PRED_LOOP_EXIT, probability);
1673             }
1674           if (loop_bound_var)
1675             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1676                                    loop_bound_code,
1677                                    tree_low_cst (loop_bound_step, 0));
1678         }
1679
1680       /* Free basic blocks from get_loop_body.  */
1681       free (bbs);
1682     }
1683 }
1684
1685 /* Attempt to predict probabilities of BB outgoing edges using local
1686    properties.  */
1687 static void
1688 bb_estimate_probability_locally (basic_block bb)
1689 {
1690   rtx last_insn = BB_END (bb);
1691   rtx cond;
1692
1693   if (! can_predict_insn_p (last_insn))
1694     return;
1695   cond = get_condition (last_insn, NULL, false, false);
1696   if (! cond)
1697     return;
1698
1699   /* Try "pointer heuristic."
1700      A comparison ptr == 0 is predicted as false.
1701      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1702   if (COMPARISON_P (cond)
1703       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1704           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1705     {
1706       if (GET_CODE (cond) == EQ)
1707         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1708       else if (GET_CODE (cond) == NE)
1709         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1710     }
1711   else
1712
1713   /* Try "opcode heuristic."
1714      EQ tests are usually false and NE tests are usually true. Also,
1715      most quantities are positive, so we can make the appropriate guesses
1716      about signed comparisons against zero.  */
1717     switch (GET_CODE (cond))
1718       {
1719       case CONST_INT:
1720         /* Unconditional branch.  */
1721         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1722                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1723         break;
1724
1725       case EQ:
1726       case UNEQ:
1727         /* Floating point comparisons appears to behave in a very
1728            unpredictable way because of special role of = tests in
1729            FP code.  */
1730         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1731           ;
1732         /* Comparisons with 0 are often used for booleans and there is
1733            nothing useful to predict about them.  */
1734         else if (XEXP (cond, 1) == const0_rtx
1735                  || XEXP (cond, 0) == const0_rtx)
1736           ;
1737         else
1738           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1739         break;
1740
1741       case NE:
1742       case LTGT:
1743         /* Floating point comparisons appears to behave in a very
1744            unpredictable way because of special role of = tests in
1745            FP code.  */
1746         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1747           ;
1748         /* Comparisons with 0 are often used for booleans and there is
1749            nothing useful to predict about them.  */
1750         else if (XEXP (cond, 1) == const0_rtx
1751                  || XEXP (cond, 0) == const0_rtx)
1752           ;
1753         else
1754           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1755         break;
1756
1757       case ORDERED:
1758         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1759         break;
1760
1761       case UNORDERED:
1762         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1763         break;
1764
1765       case LE:
1766       case LT:
1767         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1768             || XEXP (cond, 1) == constm1_rtx)
1769           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1770         break;
1771
1772       case GE:
1773       case GT:
1774         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1775             || XEXP (cond, 1) == constm1_rtx)
1776           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1777         break;
1778
1779       default:
1780         break;
1781       }
1782 }
1783
1784 /* Set edge->probability for each successor edge of BB.  */
1785 void
1786 guess_outgoing_edge_probabilities (basic_block bb)
1787 {
1788   bb_estimate_probability_locally (bb);
1789   combine_predictions_for_insn (BB_END (bb), bb);
1790 }
1791 \f
1792 static tree expr_expected_value (tree, bitmap);
1793
1794 /* Helper function for expr_expected_value.  */
1795
1796 static tree
1797 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1798                        tree op1, bitmap visited)
1799 {
1800   gimple def;
1801
1802   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1803     {
1804       if (TREE_CONSTANT (op0))
1805         return op0;
1806
1807       if (code != SSA_NAME)
1808         return NULL_TREE;
1809
1810       def = SSA_NAME_DEF_STMT (op0);
1811
1812       /* If we were already here, break the infinite cycle.  */
1813       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1814         return NULL;
1815
1816       if (gimple_code (def) == GIMPLE_PHI)
1817         {
1818           /* All the arguments of the PHI node must have the same constant
1819              length.  */
1820           int i, n = gimple_phi_num_args (def);
1821           tree val = NULL, new_val;
1822
1823           for (i = 0; i < n; i++)
1824             {
1825               tree arg = PHI_ARG_DEF (def, i);
1826
1827               /* If this PHI has itself as an argument, we cannot
1828                  determine the string length of this argument.  However,
1829                  if we can find an expected constant value for the other
1830                  PHI args then we can still be sure that this is
1831                  likely a constant.  So be optimistic and just
1832                  continue with the next argument.  */
1833               if (arg == PHI_RESULT (def))
1834                 continue;
1835
1836               new_val = expr_expected_value (arg, visited);
1837               if (!new_val)
1838                 return NULL;
1839               if (!val)
1840                 val = new_val;
1841               else if (!operand_equal_p (val, new_val, false))
1842                 return NULL;
1843             }
1844           return val;
1845         }
1846       if (is_gimple_assign (def))
1847         {
1848           if (gimple_assign_lhs (def) != op0)
1849             return NULL;
1850
1851           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1852                                         gimple_assign_rhs1 (def),
1853                                         gimple_assign_rhs_code (def),
1854                                         gimple_assign_rhs2 (def),
1855                                         visited);
1856         }
1857
1858       if (is_gimple_call (def))
1859         {
1860           tree decl = gimple_call_fndecl (def);
1861           if (!decl)
1862             return NULL;
1863           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1864             switch (DECL_FUNCTION_CODE (decl))
1865               {
1866               case BUILT_IN_EXPECT:
1867                 {
1868                   tree val;
1869                   if (gimple_call_num_args (def) != 2)
1870                     return NULL;
1871                   val = gimple_call_arg (def, 0);
1872                   if (TREE_CONSTANT (val))
1873                     return val;
1874                   return gimple_call_arg (def, 1);
1875                 }
1876
1877               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1878               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1879               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1880               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1881               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1882               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1883               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1884               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1885               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1886               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1887               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1888               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1889               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1890                 /* Assume that any given atomic operation has low contention,
1891                    and thus the compare-and-swap operation succeeds.  */
1892                 return boolean_true_node;
1893             }
1894         }
1895
1896       return NULL;
1897     }
1898
1899   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1900     {
1901       tree res;
1902       op0 = expr_expected_value (op0, visited);
1903       if (!op0)
1904         return NULL;
1905       op1 = expr_expected_value (op1, visited);
1906       if (!op1)
1907         return NULL;
1908       res = fold_build2 (code, type, op0, op1);
1909       if (TREE_CONSTANT (res))
1910         return res;
1911       return NULL;
1912     }
1913   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1914     {
1915       tree res;
1916       op0 = expr_expected_value (op0, visited);
1917       if (!op0)
1918         return NULL;
1919       res = fold_build1 (code, type, op0);
1920       if (TREE_CONSTANT (res))
1921         return res;
1922       return NULL;
1923     }
1924   return NULL;
1925 }
1926
1927 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1928    The function is used by builtin_expect branch predictor so the evidence
1929    must come from this construct and additional possible constant folding.
1930
1931    We may want to implement more involved value guess (such as value range
1932    propagation based prediction), but such tricks shall go to new
1933    implementation.  */
1934
1935 static tree
1936 expr_expected_value (tree expr, bitmap visited)
1937 {
1938   enum tree_code code;
1939   tree op0, op1;
1940
1941   if (TREE_CONSTANT (expr))
1942     return expr;
1943
1944   extract_ops_from_tree (expr, &code, &op0, &op1);
1945   return expr_expected_value_1 (TREE_TYPE (expr),
1946                                 op0, code, op1, visited);
1947 }
1948
1949 \f
1950 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1951    we no longer need.  */
1952 static unsigned int
1953 strip_predict_hints (void)
1954 {
1955   basic_block bb;
1956   gimple ass_stmt;
1957   tree var;
1958
1959   FOR_EACH_BB (bb)
1960     {
1961       gimple_stmt_iterator bi;
1962       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
1963         {
1964           gimple stmt = gsi_stmt (bi);
1965
1966           if (gimple_code (stmt) == GIMPLE_PREDICT)
1967             {
1968               gsi_remove (&bi, true);
1969               continue;
1970             }
1971           else if (gimple_code (stmt) == GIMPLE_CALL)
1972             {
1973               tree fndecl = gimple_call_fndecl (stmt);
1974
1975               if (fndecl
1976                   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
1977                   && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
1978                   && gimple_call_num_args (stmt) == 2)
1979                 {
1980                   var = gimple_call_lhs (stmt);
1981                   if (var)
1982                     {
1983                       ass_stmt
1984                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
1985                       gsi_replace (&bi, ass_stmt, true);
1986                     }
1987                   else
1988                     {
1989                       gsi_remove (&bi, true);
1990                       continue;
1991                     }
1992                 }
1993             }
1994           gsi_next (&bi);
1995         }
1996     }
1997   return 0;
1998 }
1999 \f
2000 /* Predict using opcode of the last statement in basic block.  */
2001 static void
2002 tree_predict_by_opcode (basic_block bb)
2003 {
2004   gimple stmt = last_stmt (bb);
2005   edge then_edge;
2006   tree op0, op1;
2007   tree type;
2008   tree val;
2009   enum tree_code cmp;
2010   bitmap visited;
2011   edge_iterator ei;
2012
2013   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2014     return;
2015   FOR_EACH_EDGE (then_edge, ei, bb->succs)
2016     if (then_edge->flags & EDGE_TRUE_VALUE)
2017       break;
2018   op0 = gimple_cond_lhs (stmt);
2019   op1 = gimple_cond_rhs (stmt);
2020   cmp = gimple_cond_code (stmt);
2021   type = TREE_TYPE (op0);
2022   visited = BITMAP_ALLOC (NULL);
2023   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited);
2024   BITMAP_FREE (visited);
2025   if (val)
2026     {
2027       int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2028
2029       gcc_assert (percent >= 0 && percent <= 100);
2030       if (integer_zerop (val))
2031         percent = 100 - percent;
2032       predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2033     }
2034   /* Try "pointer heuristic."
2035      A comparison ptr == 0 is predicted as false.
2036      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2037   if (POINTER_TYPE_P (type))
2038     {
2039       if (cmp == EQ_EXPR)
2040         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2041       else if (cmp == NE_EXPR)
2042         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2043     }
2044   else
2045
2046   /* Try "opcode heuristic."
2047      EQ tests are usually false and NE tests are usually true. Also,
2048      most quantities are positive, so we can make the appropriate guesses
2049      about signed comparisons against zero.  */
2050     switch (cmp)
2051       {
2052       case EQ_EXPR:
2053       case UNEQ_EXPR:
2054         /* Floating point comparisons appears to behave in a very
2055            unpredictable way because of special role of = tests in
2056            FP code.  */
2057         if (FLOAT_TYPE_P (type))
2058           ;
2059         /* Comparisons with 0 are often used for booleans and there is
2060            nothing useful to predict about them.  */
2061         else if (integer_zerop (op0) || integer_zerop (op1))
2062           ;
2063         else
2064           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2065         break;
2066
2067       case NE_EXPR:
2068       case LTGT_EXPR:
2069         /* Floating point comparisons appears to behave in a very
2070            unpredictable way because of special role of = tests in
2071            FP code.  */
2072         if (FLOAT_TYPE_P (type))
2073           ;
2074         /* Comparisons with 0 are often used for booleans and there is
2075            nothing useful to predict about them.  */
2076         else if (integer_zerop (op0)
2077                  || integer_zerop (op1))
2078           ;
2079         else
2080           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2081         break;
2082
2083       case ORDERED_EXPR:
2084         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2085         break;
2086
2087       case UNORDERED_EXPR:
2088         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2089         break;
2090
2091       case LE_EXPR:
2092       case LT_EXPR:
2093         if (integer_zerop (op1)
2094             || integer_onep (op1)
2095             || integer_all_onesp (op1)
2096             || real_zerop (op1)
2097             || real_onep (op1)
2098             || real_minus_onep (op1))
2099           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2100         break;
2101
2102       case GE_EXPR:
2103       case GT_EXPR:
2104         if (integer_zerop (op1)
2105             || integer_onep (op1)
2106             || integer_all_onesp (op1)
2107             || real_zerop (op1)
2108             || real_onep (op1)
2109             || real_minus_onep (op1))
2110           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2111         break;
2112
2113       default:
2114         break;
2115       }
2116 }
2117
2118 /* Try to guess whether the value of return means error code.  */
2119
2120 static enum br_predictor
2121 return_prediction (tree val, enum prediction *prediction)
2122 {
2123   /* VOID.  */
2124   if (!val)
2125     return PRED_NO_PREDICTION;
2126   /* Different heuristics for pointers and scalars.  */
2127   if (POINTER_TYPE_P (TREE_TYPE (val)))
2128     {
2129       /* NULL is usually not returned.  */
2130       if (integer_zerop (val))
2131         {
2132           *prediction = NOT_TAKEN;
2133           return PRED_NULL_RETURN;
2134         }
2135     }
2136   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2137     {
2138       /* Negative return values are often used to indicate
2139          errors.  */
2140       if (TREE_CODE (val) == INTEGER_CST
2141           && tree_int_cst_sgn (val) < 0)
2142         {
2143           *prediction = NOT_TAKEN;
2144           return PRED_NEGATIVE_RETURN;
2145         }
2146       /* Constant return values seems to be commonly taken.
2147          Zero/one often represent booleans so exclude them from the
2148          heuristics.  */
2149       if (TREE_CONSTANT (val)
2150           && (!integer_zerop (val) && !integer_onep (val)))
2151         {
2152           *prediction = TAKEN;
2153           return PRED_CONST_RETURN;
2154         }
2155     }
2156   return PRED_NO_PREDICTION;
2157 }
2158
2159 /* Find the basic block with return expression and look up for possible
2160    return value trying to apply RETURN_PREDICTION heuristics.  */
2161 static void
2162 apply_return_prediction (void)
2163 {
2164   gimple return_stmt = NULL;
2165   tree return_val;
2166   edge e;
2167   gimple phi;
2168   int phi_num_args, i;
2169   enum br_predictor pred;
2170   enum prediction direction;
2171   edge_iterator ei;
2172
2173   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2174     {
2175       return_stmt = last_stmt (e->src);
2176       if (return_stmt
2177           && gimple_code (return_stmt) == GIMPLE_RETURN)
2178         break;
2179     }
2180   if (!e)
2181     return;
2182   return_val = gimple_return_retval (return_stmt);
2183   if (!return_val)
2184     return;
2185   if (TREE_CODE (return_val) != SSA_NAME
2186       || !SSA_NAME_DEF_STMT (return_val)
2187       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2188     return;
2189   phi = SSA_NAME_DEF_STMT (return_val);
2190   phi_num_args = gimple_phi_num_args (phi);
2191   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2192
2193   /* Avoid the degenerate case where all return values form the function
2194      belongs to same category (ie they are all positive constants)
2195      so we can hardly say something about them.  */
2196   for (i = 1; i < phi_num_args; i++)
2197     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2198       break;
2199   if (i != phi_num_args)
2200     for (i = 0; i < phi_num_args; i++)
2201       {
2202         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2203         if (pred != PRED_NO_PREDICTION)
2204           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2205                                          direction);
2206       }
2207 }
2208
2209 /* Look for basic block that contains unlikely to happen events
2210    (such as noreturn calls) and mark all paths leading to execution
2211    of this basic blocks as unlikely.  */
2212
2213 static void
2214 tree_bb_level_predictions (void)
2215 {
2216   basic_block bb;
2217   bool has_return_edges = false;
2218   edge e;
2219   edge_iterator ei;
2220
2221   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2222     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2223       {
2224         has_return_edges = true;
2225         break;
2226       }
2227
2228   apply_return_prediction ();
2229
2230   FOR_EACH_BB (bb)
2231     {
2232       gimple_stmt_iterator gsi;
2233
2234       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2235         {
2236           gimple stmt = gsi_stmt (gsi);
2237           tree decl;
2238
2239           if (is_gimple_call (stmt))
2240             {
2241               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2242                   && has_return_edges)
2243                 predict_paths_leading_to (bb, PRED_NORETURN,
2244                                           NOT_TAKEN);
2245               decl = gimple_call_fndecl (stmt);
2246               if (decl
2247                   && lookup_attribute ("cold",
2248                                        DECL_ATTRIBUTES (decl)))
2249                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2250                                           NOT_TAKEN);
2251             }
2252           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2253             {
2254               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2255                                         gimple_predict_outcome (stmt));
2256               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2257                  hints to callers.  */
2258             }
2259         }
2260     }
2261 }
2262
2263 #ifdef ENABLE_CHECKING
2264
2265 /* Callback for pointer_map_traverse, asserts that the pointer map is
2266    empty.  */
2267
2268 static bool
2269 assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value,
2270                  void *data ATTRIBUTE_UNUSED)
2271 {
2272   gcc_assert (!*value);
2273   return false;
2274 }
2275 #endif
2276
2277 /* Predict branch probabilities and estimate profile for basic block BB.  */
2278
2279 static void
2280 tree_estimate_probability_bb (basic_block bb)
2281 {
2282   edge e;
2283   edge_iterator ei;
2284   gimple last;
2285
2286   FOR_EACH_EDGE (e, ei, bb->succs)
2287     {
2288       /* Predict edges to user labels with attributes.  */
2289       if (e->dest != EXIT_BLOCK_PTR)
2290         {
2291           gimple_stmt_iterator gi;
2292           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2293             {
2294               gimple stmt = gsi_stmt (gi);
2295               tree decl;
2296
2297               if (gimple_code (stmt) != GIMPLE_LABEL)
2298                 break;
2299               decl = gimple_label_label (stmt);
2300               if (DECL_ARTIFICIAL (decl))
2301                 continue;
2302
2303               /* Finally, we have a user-defined label.  */
2304               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2305                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2306               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2307                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2308             }
2309         }
2310
2311       /* Predict early returns to be probable, as we've already taken
2312          care for error returns and other cases are often used for
2313          fast paths through function.
2314
2315          Since we've already removed the return statements, we are
2316          looking for CFG like:
2317
2318          if (conditional)
2319          {
2320          ..
2321          goto return_block
2322          }
2323          some other blocks
2324          return_block:
2325          return_stmt.  */
2326       if (e->dest != bb->next_bb
2327           && e->dest != EXIT_BLOCK_PTR
2328           && single_succ_p (e->dest)
2329           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR
2330           && (last = last_stmt (e->dest)) != NULL
2331           && gimple_code (last) == GIMPLE_RETURN)
2332         {
2333           edge e1;
2334           edge_iterator ei1;
2335
2336           if (single_succ_p (bb))
2337             {
2338               FOR_EACH_EDGE (e1, ei1, bb->preds)
2339                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2340                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2341                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2342                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2343             }
2344           else
2345             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2346                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2347                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2348               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2349         }
2350
2351       /* Look for block we are guarding (ie we dominate it,
2352          but it doesn't postdominate us).  */
2353       if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
2354           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2355           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2356         {
2357           gimple_stmt_iterator bi;
2358
2359           /* The call heuristic claims that a guarded function call
2360              is improbable.  This is because such calls are often used
2361              to signal exceptional situations such as printing error
2362              messages.  */
2363           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2364                gsi_next (&bi))
2365             {
2366               gimple stmt = gsi_stmt (bi);
2367               if (is_gimple_call (stmt)
2368                   /* Constant and pure calls are hardly used to signalize
2369                      something exceptional.  */
2370                   && gimple_has_side_effects (stmt))
2371                 {
2372                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2373                   break;
2374                 }
2375             }
2376         }
2377     }
2378   tree_predict_by_opcode (bb);
2379 }
2380
2381 /* Predict branch probabilities and estimate profile of the tree CFG.
2382    This function can be called from the loop optimizers to recompute
2383    the profile information.  */
2384
2385 void
2386 tree_estimate_probability (void)
2387 {
2388   basic_block bb;
2389
2390   add_noreturn_fake_exit_edges ();
2391   connect_infinite_loops_to_exit ();
2392   /* We use loop_niter_by_eval, which requires that the loops have
2393      preheaders.  */
2394   create_preheaders (CP_SIMPLE_PREHEADERS);
2395   calculate_dominance_info (CDI_POST_DOMINATORS);
2396
2397   bb_predictions = pointer_map_create ();
2398   tree_bb_level_predictions ();
2399   record_loop_exits ();
2400
2401   if (number_of_loops (cfun) > 1)
2402     predict_loops ();
2403
2404   FOR_EACH_BB (bb)
2405     tree_estimate_probability_bb (bb);
2406
2407   FOR_EACH_BB (bb)
2408     combine_predictions_for_bb (bb);
2409
2410 #ifdef ENABLE_CHECKING
2411   pointer_map_traverse (bb_predictions, assert_is_empty, NULL);
2412 #endif
2413   pointer_map_destroy (bb_predictions);
2414   bb_predictions = NULL;
2415
2416   estimate_bb_frequencies (false);
2417   free_dominance_info (CDI_POST_DOMINATORS);
2418   remove_fake_exit_edges ();
2419 }
2420
2421 /* Predict branch probabilities and estimate profile of the tree CFG.
2422    This is the driver function for PASS_PROFILE.  */
2423
2424 static unsigned int
2425 tree_estimate_probability_driver (void)
2426 {
2427   unsigned nb_loops;
2428
2429   loop_optimizer_init (LOOPS_NORMAL);
2430   if (dump_file && (dump_flags & TDF_DETAILS))
2431     flow_loops_dump (dump_file, NULL, 0);
2432
2433   mark_irreducible_loops ();
2434
2435   nb_loops = number_of_loops (cfun);
2436   if (nb_loops > 1)
2437     scev_initialize ();
2438
2439   tree_estimate_probability ();
2440
2441   if (nb_loops > 1)
2442     scev_finalize ();
2443
2444   loop_optimizer_finalize ();
2445   if (dump_file && (dump_flags & TDF_DETAILS))
2446     gimple_dump_cfg (dump_file, dump_flags);
2447   if (profile_status == PROFILE_ABSENT)
2448     profile_status = PROFILE_GUESSED;
2449   return 0;
2450 }
2451 \f
2452 /* Predict edges to successors of CUR whose sources are not postdominated by
2453    BB by PRED and recurse to all postdominators.  */
2454
2455 static void
2456 predict_paths_for_bb (basic_block cur, basic_block bb,
2457                       enum br_predictor pred,
2458                       enum prediction taken,
2459                       bitmap visited)
2460 {
2461   edge e;
2462   edge_iterator ei;
2463   basic_block son;
2464
2465   /* We are looking for all edges forming edge cut induced by
2466      set of all blocks postdominated by BB.  */
2467   FOR_EACH_EDGE (e, ei, cur->preds)
2468     if (e->src->index >= NUM_FIXED_BLOCKS
2469         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2470     {
2471       edge e2;
2472       edge_iterator ei2;
2473       bool found = false;
2474
2475       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2476       if (e->flags & (EDGE_EH | EDGE_FAKE))
2477         continue;
2478       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2479
2480       /* See if there is an edge from e->src that is not abnormal
2481          and does not lead to BB.  */
2482       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2483         if (e2 != e
2484             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2485             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2486           {
2487             found = true;
2488             break;
2489           }
2490
2491       /* If there is non-abnormal path leaving e->src, predict edge
2492          using predictor.  Otherwise we need to look for paths
2493          leading to e->src.
2494
2495          The second may lead to infinite loop in the case we are predicitng
2496          regions that are only reachable by abnormal edges.  We simply
2497          prevent visiting given BB twice.  */
2498       if (found)
2499         predict_edge_def (e, pred, taken);
2500       else if (bitmap_set_bit (visited, e->src->index))
2501         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2502     }
2503   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2504        son;
2505        son = next_dom_son (CDI_POST_DOMINATORS, son))
2506     predict_paths_for_bb (son, bb, pred, taken, visited);
2507 }
2508
2509 /* Sets branch probabilities according to PREDiction and
2510    FLAGS.  */
2511
2512 static void
2513 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2514                           enum prediction taken)
2515 {
2516   bitmap visited = BITMAP_ALLOC (NULL);
2517   predict_paths_for_bb (bb, bb, pred, taken, visited);
2518   BITMAP_FREE (visited);
2519 }
2520
2521 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2522
2523 static void
2524 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2525                                enum prediction taken)
2526 {
2527   bool has_nonloop_edge = false;
2528   edge_iterator ei;
2529   edge e2;
2530
2531   basic_block bb = e->src;
2532   FOR_EACH_EDGE (e2, ei, bb->succs)
2533     if (e2->dest != e->src && e2->dest != e->dest
2534         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2535         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2536       {
2537         has_nonloop_edge = true;
2538         break;
2539       }
2540   if (!has_nonloop_edge)
2541     {
2542       bitmap visited = BITMAP_ALLOC (NULL);
2543       predict_paths_for_bb (bb, bb, pred, taken, visited);
2544       BITMAP_FREE (visited);
2545     }
2546   else
2547     predict_edge_def (e, pred, taken);
2548 }
2549 \f
2550 /* This is used to carry information about basic blocks.  It is
2551    attached to the AUX field of the standard CFG block.  */
2552
2553 typedef struct block_info_def
2554 {
2555   /* Estimated frequency of execution of basic_block.  */
2556   sreal frequency;
2557
2558   /* To keep queue of basic blocks to process.  */
2559   basic_block next;
2560
2561   /* Number of predecessors we need to visit first.  */
2562   int npredecessors;
2563 } *block_info;
2564
2565 /* Similar information for edges.  */
2566 typedef struct edge_info_def
2567 {
2568   /* In case edge is a loopback edge, the probability edge will be reached
2569      in case header is.  Estimated number of iterations of the loop can be
2570      then computed as 1 / (1 - back_edge_prob).  */
2571   sreal back_edge_prob;
2572   /* True if the edge is a loopback edge in the natural loop.  */
2573   unsigned int back_edge:1;
2574 } *edge_info;
2575
2576 #define BLOCK_INFO(B)   ((block_info) (B)->aux)
2577 #define EDGE_INFO(E)    ((edge_info) (E)->aux)
2578
2579 /* Helper function for estimate_bb_frequencies.
2580    Propagate the frequencies in blocks marked in
2581    TOVISIT, starting in HEAD.  */
2582
2583 static void
2584 propagate_freq (basic_block head, bitmap tovisit)
2585 {
2586   basic_block bb;
2587   basic_block last;
2588   unsigned i;
2589   edge e;
2590   basic_block nextbb;
2591   bitmap_iterator bi;
2592
2593   /* For each basic block we need to visit count number of his predecessors
2594      we need to visit first.  */
2595   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2596     {
2597       edge_iterator ei;
2598       int count = 0;
2599
2600       bb = BASIC_BLOCK (i);
2601
2602       FOR_EACH_EDGE (e, ei, bb->preds)
2603         {
2604           bool visit = bitmap_bit_p (tovisit, e->src->index);
2605
2606           if (visit && !(e->flags & EDGE_DFS_BACK))
2607             count++;
2608           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2609             fprintf (dump_file,
2610                      "Irreducible region hit, ignoring edge to %i->%i\n",
2611                      e->src->index, bb->index);
2612         }
2613       BLOCK_INFO (bb)->npredecessors = count;
2614       /* When function never returns, we will never process exit block.  */
2615       if (!count && bb == EXIT_BLOCK_PTR)
2616         bb->count = bb->frequency = 0;
2617     }
2618
2619   memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2620   last = head;
2621   for (bb = head; bb; bb = nextbb)
2622     {
2623       edge_iterator ei;
2624       sreal cyclic_probability, frequency;
2625
2626       memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2627       memcpy (&frequency, &real_zero, sizeof (real_zero));
2628
2629       nextbb = BLOCK_INFO (bb)->next;
2630       BLOCK_INFO (bb)->next = NULL;
2631
2632       /* Compute frequency of basic block.  */
2633       if (bb != head)
2634         {
2635 #ifdef ENABLE_CHECKING
2636           FOR_EACH_EDGE (e, ei, bb->preds)
2637             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2638                         || (e->flags & EDGE_DFS_BACK));
2639 #endif
2640
2641           FOR_EACH_EDGE (e, ei, bb->preds)
2642             if (EDGE_INFO (e)->back_edge)
2643               {
2644                 sreal_add (&cyclic_probability, &cyclic_probability,
2645                            &EDGE_INFO (e)->back_edge_prob);
2646               }
2647             else if (!(e->flags & EDGE_DFS_BACK))
2648               {
2649                 sreal tmp;
2650
2651                 /*  frequency += (e->probability
2652                                   * BLOCK_INFO (e->src)->frequency /
2653                                   REG_BR_PROB_BASE);  */
2654
2655                 sreal_init (&tmp, e->probability, 0);
2656                 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2657                 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2658                 sreal_add (&frequency, &frequency, &tmp);
2659               }
2660
2661           if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2662             {
2663               memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2664                       sizeof (frequency));
2665             }
2666           else
2667             {
2668               if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2669                 {
2670                   memcpy (&cyclic_probability, &real_almost_one,
2671                           sizeof (real_almost_one));
2672                 }
2673
2674               /* BLOCK_INFO (bb)->frequency = frequency
2675                                               / (1 - cyclic_probability) */
2676
2677               sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2678               sreal_div (&BLOCK_INFO (bb)->frequency,
2679                          &frequency, &cyclic_probability);
2680             }
2681         }
2682
2683       bitmap_clear_bit (tovisit, bb->index);
2684
2685       e = find_edge (bb, head);
2686       if (e)
2687         {
2688           sreal tmp;
2689
2690           /* EDGE_INFO (e)->back_edge_prob
2691              = ((e->probability * BLOCK_INFO (bb)->frequency)
2692              / REG_BR_PROB_BASE); */
2693
2694           sreal_init (&tmp, e->probability, 0);
2695           sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2696           sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2697                      &tmp, &real_inv_br_prob_base);
2698         }
2699
2700       /* Propagate to successor blocks.  */
2701       FOR_EACH_EDGE (e, ei, bb->succs)
2702         if (!(e->flags & EDGE_DFS_BACK)
2703             && BLOCK_INFO (e->dest)->npredecessors)
2704           {
2705             BLOCK_INFO (e->dest)->npredecessors--;
2706             if (!BLOCK_INFO (e->dest)->npredecessors)
2707               {
2708                 if (!nextbb)
2709                   nextbb = e->dest;
2710                 else
2711                   BLOCK_INFO (last)->next = e->dest;
2712
2713                 last = e->dest;
2714               }
2715           }
2716     }
2717 }
2718
2719 /* Estimate frequencies in loops at same nest level.  */
2720
2721 static void
2722 estimate_loops_at_level (struct loop *first_loop)
2723 {
2724   struct loop *loop;
2725
2726   for (loop = first_loop; loop; loop = loop->next)
2727     {
2728       edge e;
2729       basic_block *bbs;
2730       unsigned i;
2731       bitmap tovisit = BITMAP_ALLOC (NULL);
2732
2733       estimate_loops_at_level (loop->inner);
2734
2735       /* Find current loop back edge and mark it.  */
2736       e = loop_latch_edge (loop);
2737       EDGE_INFO (e)->back_edge = 1;
2738
2739       bbs = get_loop_body (loop);
2740       for (i = 0; i < loop->num_nodes; i++)
2741         bitmap_set_bit (tovisit, bbs[i]->index);
2742       free (bbs);
2743       propagate_freq (loop->header, tovisit);
2744       BITMAP_FREE (tovisit);
2745     }
2746 }
2747
2748 /* Propagates frequencies through structure of loops.  */
2749
2750 static void
2751 estimate_loops (void)
2752 {
2753   bitmap tovisit = BITMAP_ALLOC (NULL);
2754   basic_block bb;
2755
2756   /* Start by estimating the frequencies in the loops.  */
2757   if (number_of_loops (cfun) > 1)
2758     estimate_loops_at_level (current_loops->tree_root->inner);
2759
2760   /* Now propagate the frequencies through all the blocks.  */
2761   FOR_ALL_BB (bb)
2762     {
2763       bitmap_set_bit (tovisit, bb->index);
2764     }
2765   propagate_freq (ENTRY_BLOCK_PTR, tovisit);
2766   BITMAP_FREE (tovisit);
2767 }
2768
2769 /* Drop the profile for NODE to guessed, and update its frequency based on
2770    whether it is expected to be hot given the CALL_COUNT.  */
2771
2772 static void
2773 drop_profile (struct cgraph_node *node, gcov_type call_count)
2774 {
2775   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2776   /* In the case where this was called by another function with a
2777      dropped profile, call_count will be 0. Since there are no
2778      non-zero call counts to this function, we don't know for sure
2779      whether it is hot, and therefore it will be marked normal below.  */
2780   bool hot = maybe_hot_count_p (NULL, call_count);
2781
2782   if (dump_file)
2783     fprintf (dump_file,
2784              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2785              node->name (), node->order,
2786              hot ? "Function is hot" : "Function is normal");
2787   /* We only expect to miss profiles for functions that are reached
2788      via non-zero call edges in cases where the function may have
2789      been linked from another module or library (COMDATs and extern
2790      templates). See the comments below for handle_missing_profiles.
2791      Also, only warn in cases where the missing counts exceed the
2792      number of training runs. In certain cases with an execv followed
2793      by a no-return call the profile for the no-return call is not
2794      dumped and there can be a mismatch.  */
2795   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2796       && call_count > profile_info->runs)
2797     {
2798       if (flag_profile_correction)
2799         {
2800           if (dump_file)
2801             fprintf (dump_file,
2802                      "Missing counts for called function %s/%i\n",
2803                      node->name (), node->order);
2804         }
2805       else
2806         warning (0, "Missing counts for called function %s/%i",
2807                  node->name (), node->order);
2808     }
2809
2810   profile_status_for_function (fn)
2811       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2812   node->frequency
2813       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2814 }
2815
2816 /* In the case of COMDAT routines, multiple object files will contain the same
2817    function and the linker will select one for the binary. In that case
2818    all the other copies from the profile instrument binary will be missing
2819    profile counts. Look for cases where this happened, due to non-zero
2820    call counts going to 0-count functions, and drop the profile to guessed
2821    so that we can use the estimated probabilities and avoid optimizing only
2822    for size.
2823
2824    The other case where the profile may be missing is when the routine
2825    is not going to be emitted to the object file, e.g. for "extern template"
2826    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2827    all other cases of non-zero calls to 0-count functions.  */
2828
2829 void
2830 handle_missing_profiles (void)
2831 {
2832   struct cgraph_node *node;
2833   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2834   vec<struct cgraph_node *> worklist;
2835   worklist.create (64);
2836
2837   /* See if 0 count function has non-0 count callers.  In this case we
2838      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2839   FOR_EACH_DEFINED_FUNCTION (node)
2840     {
2841       struct cgraph_edge *e;
2842       gcov_type call_count = 0;
2843       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2844
2845       if (node->count)
2846         continue;
2847       for (e = node->callers; e; e = e->next_caller)
2848         call_count += e->count;
2849       if (call_count
2850           && fn && fn->cfg
2851           && (call_count * unlikely_count_fraction >= profile_info->runs))
2852         {
2853           drop_profile (node, call_count);
2854           worklist.safe_push (node);
2855         }
2856     }
2857
2858   /* Propagate the profile dropping to other 0-count COMDATs that are
2859      potentially called by COMDATs we already dropped the profile on.  */
2860   while (worklist.length () > 0)
2861     {
2862       struct cgraph_edge *e;
2863
2864       node = worklist.pop ();
2865       for (e = node->callees; e; e = e->next_caller)
2866         {
2867           struct cgraph_node *callee = e->callee;
2868           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2869
2870           if (callee->count > 0)
2871             continue;
2872           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2873               && profile_status_for_function (fn) == PROFILE_READ)
2874             {
2875               drop_profile (node, 0);
2876               worklist.safe_push (callee);
2877             }
2878         }
2879     }
2880   worklist.release ();
2881 }
2882
2883 /* Convert counts measured by profile driven feedback to frequencies.
2884    Return nonzero iff there was any nonzero execution count.  */
2885
2886 int
2887 counts_to_freqs (void)
2888 {
2889   gcov_type count_max, true_count_max = 0;
2890   basic_block bb;
2891
2892   /* Don't overwrite the estimated frequencies when the profile for
2893      the function is missing.  We may drop this function PROFILE_GUESSED
2894      later in drop_profile ().  */
2895   if (!ENTRY_BLOCK_PTR->count)
2896     return 0;
2897
2898   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2899     true_count_max = MAX (bb->count, true_count_max);
2900
2901   count_max = MAX (true_count_max, 1);
2902   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2903     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2904
2905   return true_count_max;
2906 }
2907
2908 /* Return true if function is likely to be expensive, so there is no point to
2909    optimize performance of prologue, epilogue or do inlining at the expense
2910    of code size growth.  THRESHOLD is the limit of number of instructions
2911    function can execute at average to be still considered not expensive.  */
2912
2913 bool
2914 expensive_function_p (int threshold)
2915 {
2916   unsigned int sum = 0;
2917   basic_block bb;
2918   unsigned int limit;
2919
2920   /* We can not compute accurately for large thresholds due to scaled
2921      frequencies.  */
2922   gcc_assert (threshold <= BB_FREQ_MAX);
2923
2924   /* Frequencies are out of range.  This either means that function contains
2925      internal loop executing more than BB_FREQ_MAX times or profile feedback
2926      is available and function has not been executed at all.  */
2927   if (ENTRY_BLOCK_PTR->frequency == 0)
2928     return true;
2929
2930   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2931   limit = ENTRY_BLOCK_PTR->frequency * threshold;
2932   FOR_EACH_BB (bb)
2933     {
2934       rtx insn;
2935
2936       FOR_BB_INSNS (bb, insn)
2937         if (active_insn_p (insn))
2938           {
2939             sum += bb->frequency;
2940             if (sum > limit)
2941               return true;
2942         }
2943     }
2944
2945   return false;
2946 }
2947
2948 /* Estimate and propagate basic block frequencies using the given branch
2949    probabilities.  If FORCE is true, the frequencies are used to estimate
2950    the counts even when there are already non-zero profile counts.  */
2951
2952 void
2953 estimate_bb_frequencies (bool force)
2954 {
2955   basic_block bb;
2956   sreal freq_max;
2957
2958   if (force || profile_status != PROFILE_READ || !counts_to_freqs ())
2959     {
2960       static int real_values_initialized = 0;
2961
2962       if (!real_values_initialized)
2963         {
2964           real_values_initialized = 1;
2965           sreal_init (&real_zero, 0, 0);
2966           sreal_init (&real_one, 1, 0);
2967           sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2968           sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2969           sreal_init (&real_one_half, 1, -1);
2970           sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2971           sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2972         }
2973
2974       mark_dfs_back_edges ();
2975
2976       single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE;
2977
2978       /* Set up block info for each basic block.  */
2979       alloc_aux_for_blocks (sizeof (struct block_info_def));
2980       alloc_aux_for_edges (sizeof (struct edge_info_def));
2981       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2982         {
2983           edge e;
2984           edge_iterator ei;
2985
2986           FOR_EACH_EDGE (e, ei, bb->succs)
2987             {
2988               sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2989               sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2990                          &EDGE_INFO (e)->back_edge_prob,
2991                          &real_inv_br_prob_base);
2992             }
2993         }
2994
2995       /* First compute frequencies locally for each loop from innermost
2996          to outermost to examine frequencies for back edges.  */
2997       estimate_loops ();
2998
2999       memcpy (&freq_max, &real_zero, sizeof (real_zero));
3000       FOR_EACH_BB (bb)
3001         if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
3002           memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
3003
3004       sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
3005       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
3006         {
3007           sreal tmp;
3008
3009           sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
3010           sreal_add (&tmp, &tmp, &real_one_half);
3011           bb->frequency = sreal_to_int (&tmp);
3012         }
3013
3014       free_aux_for_blocks ();
3015       free_aux_for_edges ();
3016     }
3017   compute_function_frequency ();
3018 }
3019
3020 /* Decide whether function is hot, cold or unlikely executed.  */
3021 void
3022 compute_function_frequency (void)
3023 {
3024   basic_block bb;
3025   struct cgraph_node *node = cgraph_get_node (current_function_decl);
3026
3027   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3028       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
3029     node->only_called_at_startup = true;
3030   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
3031     node->only_called_at_exit = true;
3032
3033   if (profile_status != PROFILE_READ)
3034     {
3035       int flags = flags_from_decl_or_type (current_function_decl);
3036       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
3037           != NULL)
3038         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3039       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3040                != NULL)
3041         node->frequency = NODE_FREQUENCY_HOT;
3042       else if (flags & ECF_NORETURN)
3043         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3044       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3045         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3046       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3047                || DECL_STATIC_DESTRUCTOR (current_function_decl))
3048         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3049       return;
3050     }
3051
3052   /* Only first time try to drop function into unlikely executed.
3053      After inlining the roundoff errors may confuse us.
3054      Ipa-profile pass will drop functions only called from unlikely
3055      functions to unlikely and that is most of what we care about.  */
3056   if (!cfun->after_inlining)
3057     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3058   FOR_EACH_BB (bb)
3059     {
3060       if (maybe_hot_bb_p (cfun, bb))
3061         {
3062           node->frequency = NODE_FREQUENCY_HOT;
3063           return;
3064         }
3065       if (!probably_never_executed_bb_p (cfun, bb))
3066         node->frequency = NODE_FREQUENCY_NORMAL;
3067     }
3068 }
3069
3070 static bool
3071 gate_estimate_probability (void)
3072 {
3073   return flag_guess_branch_prob;
3074 }
3075
3076 /* Build PREDICT_EXPR.  */
3077 tree
3078 build_predict_expr (enum br_predictor predictor, enum prediction taken)
3079 {
3080   tree t = build1 (PREDICT_EXPR, void_type_node,
3081                    build_int_cst (integer_type_node, predictor));
3082   SET_PREDICT_EXPR_OUTCOME (t, taken);
3083   return t;
3084 }
3085
3086 const char *
3087 predictor_name (enum br_predictor predictor)
3088 {
3089   return predictor_info[predictor].name;
3090 }
3091
3092 namespace {
3093
3094 const pass_data pass_data_profile =
3095 {
3096   GIMPLE_PASS, /* type */
3097   "profile_estimate", /* name */
3098   OPTGROUP_NONE, /* optinfo_flags */
3099   true, /* has_gate */
3100   true, /* has_execute */
3101   TV_BRANCH_PROB, /* tv_id */
3102   PROP_cfg, /* properties_required */
3103   0, /* properties_provided */
3104   0, /* properties_destroyed */
3105   0, /* todo_flags_start */
3106   TODO_verify_ssa, /* todo_flags_finish */
3107 };
3108
3109 class pass_profile : public gimple_opt_pass
3110 {
3111 public:
3112   pass_profile (gcc::context *ctxt)
3113     : gimple_opt_pass (pass_data_profile, ctxt)
3114   {}
3115
3116   /* opt_pass methods: */
3117   bool gate () { return gate_estimate_probability (); }
3118   unsigned int execute () { return tree_estimate_probability_driver (); }
3119
3120 }; // class pass_profile
3121
3122 } // anon namespace
3123
3124 gimple_opt_pass *
3125 make_pass_profile (gcc::context *ctxt)
3126 {
3127   return new pass_profile (ctxt);
3128 }
3129
3130 namespace {
3131
3132 const pass_data pass_data_strip_predict_hints =
3133 {
3134   GIMPLE_PASS, /* type */
3135   "*strip_predict_hints", /* name */
3136   OPTGROUP_NONE, /* optinfo_flags */
3137   false, /* has_gate */
3138   true, /* has_execute */
3139   TV_BRANCH_PROB, /* tv_id */
3140   PROP_cfg, /* properties_required */
3141   0, /* properties_provided */
3142   0, /* properties_destroyed */
3143   0, /* todo_flags_start */
3144   TODO_verify_ssa, /* todo_flags_finish */
3145 };
3146
3147 class pass_strip_predict_hints : public gimple_opt_pass
3148 {
3149 public:
3150   pass_strip_predict_hints (gcc::context *ctxt)
3151     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3152   {}
3153
3154   /* opt_pass methods: */
3155   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3156   unsigned int execute () { return strip_predict_hints (); }
3157
3158 }; // class pass_strip_predict_hints
3159
3160 } // anon namespace
3161
3162 gimple_opt_pass *
3163 make_pass_strip_predict_hints (gcc::context *ctxt)
3164 {
3165   return new pass_strip_predict_hints (ctxt);
3166 }
3167
3168 /* Rebuild function frequencies.  Passes are in general expected to
3169    maintain profile by hand, however in some cases this is not possible:
3170    for example when inlining several functions with loops freuqencies might run
3171    out of scale and thus needs to be recomputed.  */
3172
3173 void
3174 rebuild_frequencies (void)
3175 {
3176   timevar_push (TV_REBUILD_FREQUENCIES);
3177
3178   /* When the max bb count in the function is small, there is a higher
3179      chance that there were truncation errors in the integer scaling
3180      of counts by inlining and other optimizations. This could lead
3181      to incorrect classification of code as being cold when it isn't.
3182      In that case, force the estimation of bb counts/frequencies from the
3183      branch probabilities, rather than computing frequencies from counts,
3184      which may also lead to frequencies incorrectly reduced to 0. There
3185      is less precision in the probabilities, so we only do this for small
3186      max counts.  */
3187   gcov_type count_max = 0;
3188   basic_block bb;
3189   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
3190     count_max = MAX (bb->count, count_max);
3191
3192   if (profile_status == PROFILE_GUESSED
3193       || (profile_status == PROFILE_READ && count_max < REG_BR_PROB_BASE/10))
3194     {
3195       loop_optimizer_init (0);
3196       add_noreturn_fake_exit_edges ();
3197       mark_irreducible_loops ();
3198       connect_infinite_loops_to_exit ();
3199       estimate_bb_frequencies (true);
3200       remove_fake_exit_edges ();
3201       loop_optimizer_finalize ();
3202     }
3203   else if (profile_status == PROFILE_READ)
3204     counts_to_freqs ();
3205   else
3206     gcc_unreachable ();
3207   timevar_pop (TV_REBUILD_FREQUENCIES);
3208 }