gcc/tree-ssa-threadupdate.c

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 3, or (at your option)
   9 any later version.
  10
  11 GCC is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "hash-set.h"
  24 #include "machmode.h"
  25 #include "vec.h"
  26 #include "double-int.h"
  27 #include "input.h"
  28 #include "alias.h"
  29 #include "symtab.h"
  30 #include "options.h"
  31 #include "wide-int.h"
  32 #include "inchash.h"
  33 #include "tree.h"
  34 #include "fold-const.h"
  35 #include "flags.h"
  36 #include "predict.h"
  37 #include "tm.h"
  38 #include "hard-reg-set.h"
  39 #include "input.h"
  40 #include "function.h"
  41 #include "dominance.h"
  42 #include "cfg.h"
  43 #include "cfganal.h"
  44 #include "basic-block.h"
  45 #include "hash-table.h"
  46 #include "tree-ssa-alias.h"
  47 #include "internal-fn.h"
  48 #include "gimple-expr.h"
  49 #include "is-a.h"
  50 #include "gimple.h"
  51 #include "gimple-iterator.h"
  52 #include "gimple-ssa.h"
  53 #include "tree-phinodes.h"
  54 #include "tree-ssa.h"
  55 #include "tree-ssa-threadupdate.h"
  56 #include "ssa-iterators.h"
  57 #include "dumpfile.h"
  58 #include "cfgloop.h"
  59 #include "dbgcnt.h"
  60 #include "tree-cfg.h"
  61 #include "tree-pass.h"
  62
  63 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  64    one or more in-edges to B to instead reach the destination of an
  65    out-edge from B while preserving any side effects in B.
  66
  67    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
  68    side effects of executing B.
  69
  70      1. Make a copy of B (including its outgoing edges and statements).  Call
  71         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  72
  73      2. Remove the control statement at the end of B' and all outgoing edges
  74         except B'->C.
  75
  76      3. Add a new argument to each PHI in C with the same value as the existing
  77         argument associated with edge B->C.  Associate the new PHI arguments
  78         with the edge B'->C.
  79
  80      4. For each PHI in B, find or create a PHI in B' with an identical
  81         PHI_RESULT.  Add an argument to the PHI in B' which has the same
  82         value as the PHI in B associated with the edge A->B.  Associate
  83         the new argument in the PHI in B' with the edge A->B.
  84
  85      5. Change the edge A->B to A->B'.
  86
  87         5a. This automatically deletes any PHI arguments associated with the
  88             edge A->B in B.
  89
  90         5b. This automatically associates each new argument added in step 4
  91             with the edge A->B'.
  92
  93      6. Repeat for other incoming edges into B.
  94
  95      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  96
  97    Note that block duplication can be minimized by first collecting the
  98    set of unique destination blocks that the incoming edges should
  99    be threaded to.
 100
 101    We reduce the number of edges and statements we create by not copying all
 102    the outgoing edges and the control statement in step #1.  We instead create
 103    a template block without the outgoing edges and duplicate the template.
 104
 105    Another case this code handles is threading through a "joiner" block.  In
 106    this case, we do not know the destination of the joiner block, but one
 107    of the outgoing edges from the joiner block leads to a threadable path.  This
 108    case largely works as outlined above, except the duplicate of the joiner
 109    block still contains a full set of outgoing edges and its control statement.
 110    We just redirect one of its outgoing edges to our jump threading path.  */
 111
 112
 113 /* Steps #5 and #6 of the above algorithm are best implemented by walking
 114    all the incoming edges which thread to the same destination edge at
 115    the same time.  That avoids lots of table lookups to get information
 116    for the destination edge.
 117
 118    To realize that implementation we create a list of incoming edges
 119    which thread to the same outgoing edge.  Thus to implement steps
 120    #5 and #6 we traverse our hash table of outgoing edge information.
 121    For each entry we walk the list of incoming edges which thread to
 122    the current outgoing edge.  */
 123
 124 struct el
 125 {
 126   edge e;
 127   struct el *next;
 128 };
 129
 130 /* Main data structure recording information regarding B's duplicate
 131    blocks.  */
 132
 133 /* We need to efficiently record the unique thread destinations of this
 134    block and specific information associated with those destinations.  We
 135    may have many incoming edges threaded to the same outgoing edge.  This
 136    can be naturally implemented with a hash table.  */
 137
 138 struct redirection_data : typed_free_remove<redirection_data>
 139 {
 140   /* We support wiring up two block duplicates in a jump threading path.
 141
 142      One is a normal block copy where we remove the control statement
 143      and wire up its single remaining outgoing edge to the thread path.
 144
 145      The other is a joiner block where we leave the control statement
 146      in place, but wire one of the outgoing edges to a thread path.
 147
 148      In theory we could have multiple block duplicates in a jump
 149      threading path, but I haven't tried that.
 150
 151      The duplicate blocks appear in this array in the same order in
 152      which they appear in the jump thread path.  */
 153   basic_block dup_blocks[2];
 154
 155   /* The jump threading path.  */
 156   vec<jump_thread_edge *> *path;
 157
 158   /* A list of incoming edges which we want to thread to the
 159      same path.  */
 160   struct el *incoming_edges;
 161
 162   /* hash_table support.  */
 163   typedef redirection_data *value_type;
 164   typedef redirection_data *compare_type;
 165   static inline hashval_t hash (const redirection_data *);
 166   static inline int equal (const redirection_data *, const redirection_data *);
 167 };
 168
 169 /* Dump a jump threading path, including annotations about each
 170    edge in the path.  */
 171
 172 static void
 173 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
 174                        bool registering)
 175 {
 176   fprintf (dump_file,
 177            "  %s%s jump thread: (%d, %d) incoming edge; ",
 178            (registering ? "Registering" : "Cancelling"),
 179            (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
 180            path[0]->e->src->index, path[0]->e->dest->index);
 181
 182   for (unsigned int i = 1; i < path.length (); i++)
 183     {
 184       /* We can get paths with a NULL edge when the final destination
 185          of a jump thread turns out to be a constant address.  We dump
 186          those paths when debugging, so we have to be prepared for that
 187          possibility here.  */
 188       if (path[i]->e == NULL)
 189         continue;
 190
 191       if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 192         fprintf (dump_file, " (%d, %d) joiner; ",
 193                  path[i]->e->src->index, path[i]->e->dest->index);
 194       if (path[i]->type == EDGE_COPY_SRC_BLOCK)
 195        fprintf (dump_file, " (%d, %d) normal;",
 196                  path[i]->e->src->index, path[i]->e->dest->index);
 197       if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
 198        fprintf (dump_file, " (%d, %d) nocopy;",
 199                  path[i]->e->src->index, path[i]->e->dest->index);
 200       if (path[0]->type == EDGE_FSM_THREAD)
 201         fprintf (dump_file, " (%d, %d) ",
 202                  path[i]->e->src->index, path[i]->e->dest->index);
 203     }
 204   fputc ('\n', dump_file);
 205 }
 206
 207 /* Simple hashing function.  For any given incoming edge E, we're going
 208    to be most concerned with the final destination of its jump thread
 209    path.  So hash on the block index of the final edge in the path.  */
 210
 211 inline hashval_t
 212 redirection_data::hash (const redirection_data *p)
 213 {
 214   vec<jump_thread_edge *> *path = p->path;
 215   return path->last ()->e->dest->index;
 216 }
 217
 218 /* Given two hash table entries, return true if they have the same
 219    jump threading path.  */
 220 inline int
 221 redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
 222 {
 223   vec<jump_thread_edge *> *path1 = p1->path;
 224   vec<jump_thread_edge *> *path2 = p2->path;
 225
 226   if (path1->length () != path2->length ())
 227     return false;
 228
 229   for (unsigned int i = 1; i < path1->length (); i++)
 230     {
 231       if ((*path1)[i]->type != (*path2)[i]->type
 232           || (*path1)[i]->e != (*path2)[i]->e)
 233         return false;
 234     }
 235
 236   return true;
 237 }
 238
 239 /* Data structure of information to pass to hash table traversal routines.  */
 240 struct ssa_local_info_t
 241 {
 242   /* The current block we are working on.  */
 243   basic_block bb;
 244
 245   /* We only create a template block for the first duplicated block in a
 246      jump threading path as we may need many duplicates of that block.
 247
 248      The second duplicate block in a path is specific to that path.  Creating
 249      and sharing a template for that block is considerably more difficult.  */
 250   basic_block template_block;
 251
 252   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
 253   bool jumps_threaded;
 254
 255   /* Blocks duplicated for the thread.  */
 256   bitmap duplicate_blocks;
 257 };
 258
 259 /* Passes which use the jump threading code register jump threading
 260    opportunities as they are discovered.  We keep the registered
 261    jump threading opportunities in this vector as edge pairs
 262    (original_edge, target_edge).  */
 263 static vec<vec<jump_thread_edge *> *> paths;
 264
 265 /* When we start updating the CFG for threading, data necessary for jump
 266    threading is attached to the AUX field for the incoming edge.  Use these
 267    macros to access the underlying structure attached to the AUX field.  */
 268 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
 269
 270 /* Jump threading statistics.  */
 271
 272 struct thread_stats_d
 273 {
 274   unsigned long num_threaded_edges;
 275 };
 276
 277 struct thread_stats_d thread_stats;
 278
 279
 280 /* Remove the last statement in block BB if it is a control statement
 281    Also remove all outgoing edges except the edge which reaches DEST_BB.
 282    If DEST_BB is NULL, then remove all outgoing edges.  */
 283
 284 static void
 285 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 286 {
 287   gimple_stmt_iterator gsi;
 288   edge e;
 289   edge_iterator ei;
 290
 291   gsi = gsi_last_bb (bb);
 292
 293   /* If the duplicate ends with a control statement, then remove it.
 294
 295      Note that if we are duplicating the template block rather than the
 296      original basic block, then the duplicate might not have any real
 297      statements in it.  */
 298   if (!gsi_end_p (gsi)
 299       && gsi_stmt (gsi)
 300       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 301           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 302           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
 303     gsi_remove (&gsi, true);
 304
 305   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 306     {
 307       if (e->dest != dest_bb)
 308         remove_edge (e);
 309       else
 310         ei_next (&ei);
 311     }
 312 }
 313
 314 /* Create a duplicate of BB.  Record the duplicate block in an array
 315    indexed by COUNT stored in RD.  */
 316
 317 static void
 318 create_block_for_threading (basic_block bb,
 319                             struct redirection_data *rd,
 320                             unsigned int count,
 321                             bitmap *duplicate_blocks)
 322 {
 323   edge_iterator ei;
 324   edge e;
 325
 326   /* We can use the generic block duplication code and simply remove
 327      the stuff we do not need.  */
 328   rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
 329
 330   FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
 331     e->aux = NULL;
 332
 333   /* Zero out the profile, since the block is unreachable for now.  */
 334   rd->dup_blocks[count]->frequency = 0;
 335   rd->dup_blocks[count]->count = 0;
 336   if (duplicate_blocks)
 337     bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
 338 }
 339
 340 /* Main data structure to hold information for duplicates of BB.  */
 341
 342 static hash_table<redirection_data> *redirection_data;
 343
 344 /* Given an outgoing edge E lookup and return its entry in our hash table.
 345
 346    If INSERT is true, then we insert the entry into the hash table if
 347    it is not already present.  INCOMING_EDGE is added to the list of incoming
 348    edges associated with E in the hash table.  */
 349
 350 static struct redirection_data *
 351 lookup_redirection_data (edge e, enum insert_option insert)
 352 {
 353   struct redirection_data **slot;
 354   struct redirection_data *elt;
 355   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 356
 357  /* Build a hash table element so we can see if E is already
 358      in the table.  */
 359   elt = XNEW (struct redirection_data);
 360   elt->path = path;
 361   elt->dup_blocks[0] = NULL;
 362   elt->dup_blocks[1] = NULL;
 363   elt->incoming_edges = NULL;
 364
 365   slot = redirection_data->find_slot (elt, insert);
 366
 367   /* This will only happen if INSERT is false and the entry is not
 368      in the hash table.  */
 369   if (slot == NULL)
 370     {
 371       free (elt);
 372       return NULL;
 373     }
 374
 375   /* This will only happen if E was not in the hash table and
 376      INSERT is true.  */
 377   if (*slot == NULL)
 378     {
 379       *slot = elt;
 380       elt->incoming_edges = XNEW (struct el);
 381       elt->incoming_edges->e = e;
 382       elt->incoming_edges->next = NULL;
 383       return elt;
 384     }
 385   /* E was in the hash table.  */
 386   else
 387     {
 388       /* Free ELT as we do not need it anymore, we will extract the
 389          relevant entry from the hash table itself.  */
 390       free (elt);
 391
 392       /* Get the entry stored in the hash table.  */
 393       elt = *slot;
 394
 395       /* If insertion was requested, then we need to add INCOMING_EDGE
 396          to the list of incoming edges associated with E.  */
 397       if (insert)
 398         {
 399           struct el *el = XNEW (struct el);
 400           el->next = elt->incoming_edges;
 401           el->e = e;
 402           elt->incoming_edges = el;
 403         }
 404
 405       return elt;
 406     }
 407 }
 408
 409 /* Similar to copy_phi_args, except that the PHI arg exists, it just
 410    does not have a value associated with it.  */
 411
 412 static void
 413 copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
 414 {
 415   int src_idx = src_e->dest_idx;
 416   int tgt_idx = tgt_e->dest_idx;
 417
 418   /* Iterate over each PHI in e->dest.  */
 419   for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
 420                            gsi2 = gsi_start_phis (tgt_e->dest);
 421        !gsi_end_p (gsi);
 422        gsi_next (&gsi), gsi_next (&gsi2))
 423     {
 424       gphi *src_phi = gsi.phi ();
 425       gphi *dest_phi = gsi2.phi ();
 426       tree val = gimple_phi_arg_def (src_phi, src_idx);
 427       source_location locus = gimple_phi_arg_location (src_phi, src_idx);
 428
 429       SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
 430       gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
 431     }
 432 }
 433
 434 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
 435    to see if it has constant value in a flow sensitive manner.  Set
 436    LOCUS to location of the constant phi arg and return the value.
 437    Return DEF directly if either PATH or idx is ZERO.  */
 438
 439 static tree
 440 get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
 441                          basic_block bb, int idx, source_location *locus)
 442 {
 443   tree arg;
 444   gphi *def_phi;
 445   basic_block def_bb;
 446
 447   if (path == NULL || idx == 0)
 448     return def;
 449
 450   def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
 451   if (!def_phi)
 452     return def;
 453
 454   def_bb = gimple_bb (def_phi);
 455   /* Don't propagate loop invariants into deeper loops.  */
 456   if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
 457     return def;
 458
 459   /* Backtrack jump threading path from IDX to see if def has constant
 460      value.  */
 461   for (int j = idx - 1; j >= 0; j--)
 462     {
 463       edge e = (*path)[j]->e;
 464       if (e->dest == def_bb)
 465         {
 466           arg = gimple_phi_arg_def (def_phi, e->dest_idx);
 467           if (is_gimple_min_invariant (arg))
 468             {
 469               *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
 470               return arg;
 471             }
 472           break;
 473         }
 474     }
 475
 476   return def;
 477 }
 478
 479 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
 480    Try to backtrack jump threading PATH from node IDX to see if the arg
 481    has constant value, copy constant value instead of argument itself
 482    if yes.  */
 483
 484 static void
 485 copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
 486                vec<jump_thread_edge *> *path, int idx)
 487 {
 488   gphi_iterator gsi;
 489   int src_indx = src_e->dest_idx;
 490
 491   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 492     {
 493       gphi *phi = gsi.phi ();
 494       tree def = gimple_phi_arg_def (phi, src_indx);
 495       source_location locus = gimple_phi_arg_location (phi, src_indx);
 496
 497       if (TREE_CODE (def) == SSA_NAME
 498           && !virtual_operand_p (gimple_phi_result (phi)))
 499         def = get_value_locus_in_path (def, path, bb, idx, &locus);
 500
 501       add_phi_arg (phi, def, tgt_e, locus);
 502     }
 503 }
 504
 505 /* We have recently made a copy of ORIG_BB, including its outgoing
 506    edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
 507    ORIG_BB has a new argument associated with edge from NEW_BB to the
 508    successor.  Initialize the PHI argument so that it is equal to the PHI
 509    argument associated with the edge from ORIG_BB to the successor.
 510    PATH and IDX are used to check if the new PHI argument has constant
 511    value in a flow sensitive manner.  */
 512
 513 static void
 514 update_destination_phis (basic_block orig_bb, basic_block new_bb,
 515                          vec<jump_thread_edge *> *path, int idx)
 516 {
 517   edge_iterator ei;
 518   edge e;
 519
 520   FOR_EACH_EDGE (e, ei, orig_bb->succs)
 521     {
 522       edge e2 = find_edge (new_bb, e->dest);
 523       copy_phi_args (e->dest, e, e2, path, idx);
 524     }
 525 }
 526
 527 /* Given a duplicate block and its single destination (both stored
 528    in RD).  Create an edge between the duplicate and its single
 529    destination.
 530
 531    Add an additional argument to any PHI nodes at the single
 532    destination.  IDX is the start node in jump threading path
 533    we start to check to see if the new PHI argument has constant
 534    value along the jump threading path.  */
 535
 536 static void
 537 create_edge_and_update_destination_phis (struct redirection_data *rd,
 538                                          basic_block bb, int idx)
 539 {
 540   edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
 541
 542   rescan_loop_exit (e, true, false);
 543   e->probability = REG_BR_PROB_BASE;
 544   e->count = bb->count;
 545
 546   /* We used to copy the thread path here.  That was added in 2007
 547      and dutifully updated through the representation changes in 2013.
 548
 549      In 2013 we added code to thread from an interior node through
 550      the backedge to another interior node.  That runs after the code
 551      to thread through loop headers from outside the loop.
 552
 553      The latter may delete edges in the CFG, including those
 554      which appeared in the jump threading path we copied here.  Thus
 555      we'd end up using a dangling pointer.
 556
 557      After reviewing the 2007/2011 code, I can't see how anything
 558      depended on copying the AUX field and clearly copying the jump
 559      threading path is problematical due to embedded edge pointers.
 560      It has been removed.  */
 561   e->aux = NULL;
 562
 563   /* If there are any PHI nodes at the destination of the outgoing edge
 564      from the duplicate block, then we will need to add a new argument
 565      to them.  The argument should have the same value as the argument
 566      associated with the outgoing edge stored in RD.  */
 567   copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
 568 }
 569
 570 /* Look through PATH beginning at START and return TRUE if there are
 571    any additional blocks that need to be duplicated.  Otherwise,
 572    return FALSE.  */
 573 static bool
 574 any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
 575                                  unsigned int start)
 576 {
 577   for (unsigned int i = start + 1; i < path->length (); i++)
 578     {
 579       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
 580           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
 581         return true;
 582     }
 583   return false;
 584 }
 585
 586
 587 /* Compute the amount of profile count/frequency coming into the jump threading
 588    path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
 589    PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
 590    duplicated path, returned in PATH_OUT_COUNT_PTR.  LOCAL_INFO is used to
 591    identify blocks duplicated for jump threading, which have duplicated
 592    edges that need to be ignored in the analysis.  Return true if path contains
 593    a joiner, false otherwise.
 594
 595    In the non-joiner case, this is straightforward - all the counts/frequency
 596    flowing into the jump threading path should flow through the duplicated
 597    block and out of the duplicated path.
 598
 599    In the joiner case, it is very tricky.  Some of the counts flowing into
 600    the original path go offpath at the joiner.  The problem is that while
 601    we know how much total count goes off-path in the original control flow,
 602    we don't know how many of the counts corresponding to just the jump
 603    threading path go offpath at the joiner.
 604
 605    For example, assume we have the following control flow and identified
 606    jump threading paths:
 607
 608                 A     B     C
 609                  \    |    /
 610                Ea \   |Eb / Ec
 611                    \  |  /
 612                     v v v
 613                       J       <-- Joiner
 614                      / \
 615                 Eoff/   \Eon
 616                    /     \
 617                   v       v
 618                 Soff     Son  <--- Normal
 619                          /\
 620                       Ed/  \ Ee
 621                        /    \
 622                       v     v
 623                       D      E
 624
 625             Jump threading paths: A -> J -> Son -> D (path 1)
 626                                   C -> J -> Son -> E (path 2)
 627
 628    Note that the control flow could be more complicated:
 629    - Each jump threading path may have more than one incoming edge.  I.e. A and
 630    Ea could represent multiple incoming blocks/edges that are included in
 631    path 1.
 632    - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
 633    before or after the "normal" copy block).  These are not duplicated onto
 634    the jump threading path, as they are single-successor.
 635    - Any of the blocks along the path may have other incoming edges that
 636    are not part of any jump threading path, but add profile counts along
 637    the path.
 638
 639    In the aboe example, after all jump threading is complete, we will
 640    end up with the following control flow:
 641
 642                 A          B            C
 643                 |          |            |
 644               Ea|          |Eb          |Ec
 645                 |          |            |
 646                 v          v            v
 647                Ja          J           Jc
 648                / \        / \Eon'     / \
 649           Eona/   \   ---/---\--------   \Eonc
 650              /     \ /  /     \           \
 651             v       v  v       v          v
 652            Sona     Soff      Son        Sonc
 653              \                 /\         /
 654               \___________    /  \  _____/
 655                           \  /    \/
 656                            vv      v
 657                             D      E
 658
 659    The main issue to notice here is that when we are processing path 1
 660    (A->J->Son->D) we need to figure out the outgoing edge weights to
 661    the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
 662    sum of the incoming weights to D remain Ed.  The problem with simply
 663    assuming that Ja (and Jc when processing path 2) has the same outgoing
 664    probabilities to its successors as the original block J, is that after
 665    all paths are processed and other edges/counts removed (e.g. none
 666    of Ec will reach D after processing path 2), we may end up with not
 667    enough count flowing along duplicated edge Sona->D.
 668
 669    Therefore, in the case of a joiner, we keep track of all counts
 670    coming in along the current path, as well as from predecessors not
 671    on any jump threading path (Eb in the above example).  While we
 672    first assume that the duplicated Eona for Ja->Sona has the same
 673    probability as the original, we later compensate for other jump
 674    threading paths that may eliminate edges.  We do that by keep track
 675    of all counts coming into the original path that are not in a jump
 676    thread (Eb in the above example, but as noted earlier, there could
 677    be other predecessors incoming to the path at various points, such
 678    as at Son).  Call this cumulative non-path count coming into the path
 679    before D as Enonpath.  We then ensure that the count from Sona->D is as at
 680    least as big as (Ed - Enonpath), but no bigger than the minimum
 681    weight along the jump threading path.  The probabilities of both the
 682    original and duplicated joiner block J and Ja will be adjusted
 683    accordingly after the updates.  */
 684
 685 static bool
 686 compute_path_counts (struct redirection_data *rd,
 687                      ssa_local_info_t *local_info,
 688                      gcov_type *path_in_count_ptr,
 689                      gcov_type *path_out_count_ptr,
 690                      int *path_in_freq_ptr)
 691 {
 692   edge e = rd->incoming_edges->e;
 693   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 694   edge elast = path->last ()->e;
 695   gcov_type nonpath_count = 0;
 696   bool has_joiner = false;
 697   gcov_type path_in_count = 0;
 698   int path_in_freq = 0;
 699
 700   /* Start by accumulating incoming edge counts to the path's first bb
 701      into a couple buckets:
 702         path_in_count: total count of incoming edges that flow into the
 703                   current path.
 704         nonpath_count: total count of incoming edges that are not
 705                   flowing along *any* path.  These are the counts
 706                   that will still flow along the original path after
 707                   all path duplication is done by potentially multiple
 708                   calls to this routine.
 709      (any other incoming edge counts are for a different jump threading
 710      path that will be handled by a later call to this routine.)
 711      To make this easier, start by recording all incoming edges that flow into
 712      the current path in a bitmap.  We could add up the path's incoming edge
 713      counts here, but we still need to walk all the first bb's incoming edges
 714      below to add up the counts of the other edges not included in this jump
 715      threading path.  */
 716   struct el *next, *el;
 717   bitmap in_edge_srcs = BITMAP_ALLOC (NULL);
 718   for (el = rd->incoming_edges; el; el = next)
 719     {
 720       next = el->next;
 721       bitmap_set_bit (in_edge_srcs, el->e->src->index);
 722     }
 723   edge ein;
 724   edge_iterator ei;
 725   FOR_EACH_EDGE (ein, ei, e->dest->preds)
 726     {
 727       vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
 728       /* Simply check the incoming edge src against the set captured above.  */
 729       if (ein_path
 730           && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
 731         {
 732           /* It is necessary but not sufficient that the last path edges
 733              are identical.  There may be different paths that share the
 734              same last path edge in the case where the last edge has a nocopy
 735              source block.  */
 736           gcc_assert (ein_path->last ()->e == elast);
 737           path_in_count += ein->count;
 738           path_in_freq += EDGE_FREQUENCY (ein);
 739         }
 740       else if (!ein_path)
 741         {
 742           /* Keep track of the incoming edges that are not on any jump-threading
 743              path.  These counts will still flow out of original path after all
 744              jump threading is complete.  */
 745             nonpath_count += ein->count;
 746         }
 747     }
 748
 749   /* This is needed due to insane incoming frequencies.  */
 750   if (path_in_freq > BB_FREQ_MAX)
 751     path_in_freq = BB_FREQ_MAX;
 752
 753   BITMAP_FREE (in_edge_srcs);
 754
 755   /* Now compute the fraction of the total count coming into the first
 756      path bb that is from the current threading path.  */
 757   gcov_type total_count = e->dest->count;
 758   /* Handle incoming profile insanities.  */
 759   if (total_count < path_in_count)
 760     path_in_count = total_count;
 761   int onpath_scale = GCOV_COMPUTE_SCALE (path_in_count, total_count);
 762
 763   /* Walk the entire path to do some more computation in order to estimate
 764      how much of the path_in_count will flow out of the duplicated threading
 765      path.  In the non-joiner case this is straightforward (it should be
 766      the same as path_in_count, although we will handle incoming profile
 767      insanities by setting it equal to the minimum count along the path).
 768
 769      In the joiner case, we need to estimate how much of the path_in_count
 770      will stay on the threading path after the joiner's conditional branch.
 771      We don't really know for sure how much of the counts
 772      associated with this path go to each successor of the joiner, but we'll
 773      estimate based on the fraction of the total count coming into the path
 774      bb was from the threading paths (computed above in onpath_scale).
 775      Afterwards, we will need to do some fixup to account for other threading
 776      paths and possible profile insanities.
 777
 778      In order to estimate the joiner case's counts we also need to update
 779      nonpath_count with any additional counts coming into the path.  Other
 780      blocks along the path may have additional predecessors from outside
 781      the path.  */
 782   gcov_type path_out_count = path_in_count;
 783   gcov_type min_path_count = path_in_count;
 784   for (unsigned int i = 1; i < path->length (); i++)
 785     {
 786       edge epath = (*path)[i]->e;
 787       gcov_type cur_count = epath->count;
 788       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 789         {
 790           has_joiner = true;
 791           cur_count = apply_probability (cur_count, onpath_scale);
 792         }
 793       /* In the joiner case we need to update nonpath_count for any edges
 794          coming into the path that will contribute to the count flowing
 795          into the path successor.  */
 796       if (has_joiner && epath != elast)
 797       {
 798         /* Look for other incoming edges after joiner.  */
 799         FOR_EACH_EDGE (ein, ei, epath->dest->preds)
 800           {
 801             if (ein != epath
 802                 /* Ignore in edges from blocks we have duplicated for a
 803                    threading path, which have duplicated edge counts until
 804                    they are redirected by an invocation of this routine.  */
 805                 && !bitmap_bit_p (local_info->duplicate_blocks,
 806                                   ein->src->index))
 807               nonpath_count += ein->count;
 808           }
 809       }
 810       if (cur_count < path_out_count)
 811         path_out_count = cur_count;
 812       if (epath->count < min_path_count)
 813         min_path_count = epath->count;
 814     }
 815
 816   /* We computed path_out_count above assuming that this path targeted
 817      the joiner's on-path successor with the same likelihood as it
 818      reached the joiner.  However, other thread paths through the joiner
 819      may take a different path through the normal copy source block
 820      (i.e. they have a different elast), meaning that they do not
 821      contribute any counts to this path's elast.  As a result, it may
 822      turn out that this path must have more count flowing to the on-path
 823      successor of the joiner.  Essentially, all of this path's elast
 824      count must be contributed by this path and any nonpath counts
 825      (since any path through the joiner with a different elast will not
 826      include a copy of this elast in its duplicated path).
 827      So ensure that this path's path_out_count is at least the
 828      difference between elast->count and nonpath_count.  Otherwise the edge
 829      counts after threading will not be sane.  */
 830   if (has_joiner && path_out_count < elast->count - nonpath_count)
 831   {
 832     path_out_count = elast->count - nonpath_count;
 833     /* But neither can we go above the minimum count along the path
 834        we are duplicating.  This can be an issue due to profile
 835        insanities coming in to this pass.  */
 836     if (path_out_count > min_path_count)
 837       path_out_count = min_path_count;
 838   }
 839
 840   *path_in_count_ptr = path_in_count;
 841   *path_out_count_ptr = path_out_count;
 842   *path_in_freq_ptr = path_in_freq;
 843   return has_joiner;
 844 }
 845
 846
 847 /* Update the counts and frequencies for both an original path
 848    edge EPATH and its duplicate EDUP.  The duplicate source block
 849    will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
 850    and the duplicate edge EDUP will have a count of PATH_OUT_COUNT.  */
 851 static void
 852 update_profile (edge epath, edge edup, gcov_type path_in_count,
 853                 gcov_type path_out_count, int path_in_freq)
 854 {
 855
 856   /* First update the duplicated block's count / frequency.  */
 857   if (edup)
 858     {
 859       basic_block dup_block = edup->src;
 860       gcc_assert (dup_block->count == 0);
 861       gcc_assert (dup_block->frequency == 0);
 862       dup_block->count = path_in_count;
 863       dup_block->frequency = path_in_freq;
 864     }
 865
 866   /* Now update the original block's count and frequency in the
 867      opposite manner - remove the counts/freq that will flow
 868      into the duplicated block.  Handle underflow due to precision/
 869      rounding issues.  */
 870   epath->src->count -= path_in_count;
 871   if (epath->src->count < 0)
 872     epath->src->count = 0;
 873   epath->src->frequency -= path_in_freq;
 874   if (epath->src->frequency < 0)
 875     epath->src->frequency = 0;
 876
 877   /* Next update this path edge's original and duplicated counts.  We know
 878      that the duplicated path will have path_out_count flowing
 879      out of it (in the joiner case this is the count along the duplicated path
 880      out of the duplicated joiner).  This count can then be removed from the
 881      original path edge.  */
 882   if (edup)
 883     edup->count = path_out_count;
 884   epath->count -= path_out_count;
 885   gcc_assert (epath->count >= 0);
 886 }
 887
 888
 889 /* The duplicate and original joiner blocks may end up with different
 890    probabilities (different from both the original and from each other).
 891    Recompute the probabilities here once we have updated the edge
 892    counts and frequencies.  */
 893
 894 static void
 895 recompute_probabilities (basic_block bb)
 896 {
 897   edge esucc;
 898   edge_iterator ei;
 899   FOR_EACH_EDGE (esucc, ei, bb->succs)
 900     {
 901       if (!bb->count)
 902         continue;
 903
 904       /* Prevent overflow computation due to insane profiles.  */
 905       if (esucc->count < bb->count)
 906         esucc->probability = GCOV_COMPUTE_SCALE (esucc->count,
 907                                                  bb->count);
 908       else
 909         /* Can happen with missing/guessed probabilities, since we
 910            may determine that more is flowing along duplicated
 911            path than joiner succ probabilities allowed.
 912            Counts and freqs will be insane after jump threading,
 913            at least make sure probability is sane or we will
 914            get a flow verification error.
 915            Not much we can do to make counts/freqs sane without
 916            redoing the profile estimation.  */
 917         esucc->probability = REG_BR_PROB_BASE;
 918     }
 919 }
 920
 921
 922 /* Update the counts of the original and duplicated edges from a joiner
 923    that go off path, given that we have already determined that the
 924    duplicate joiner DUP_BB has incoming count PATH_IN_COUNT and
 925    outgoing count along the path PATH_OUT_COUNT.  The original (on-)path
 926    edge from joiner is EPATH.  */
 927
 928 static void
 929 update_joiner_offpath_counts (edge epath, basic_block dup_bb,
 930                               gcov_type path_in_count,
 931                               gcov_type path_out_count)
 932 {
 933   /* Compute the count that currently flows off path from the joiner.
 934      In other words, the total count of joiner's out edges other than
 935      epath.  Compute this by walking the successors instead of
 936      subtracting epath's count from the joiner bb count, since there
 937      are sometimes slight insanities where the total out edge count is
 938      larger than the bb count (possibly due to rounding/truncation
 939      errors).  */
 940   gcov_type total_orig_off_path_count = 0;
 941   edge enonpath;
 942   edge_iterator ei;
 943   FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
 944     {
 945       if (enonpath == epath)
 946         continue;
 947       total_orig_off_path_count += enonpath->count;
 948     }
 949
 950   /* For the path that we are duplicating, the amount that will flow
 951      off path from the duplicated joiner is the delta between the
 952      path's cumulative in count and the portion of that count we
 953      estimated above as flowing from the joiner along the duplicated
 954      path.  */
 955   gcov_type total_dup_off_path_count = path_in_count - path_out_count;
 956
 957   /* Now do the actual updates of the off-path edges.  */
 958   FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
 959     {
 960       /* Look for edges going off of the threading path.  */
 961       if (enonpath == epath)
 962         continue;
 963
 964       /* Find the corresponding edge out of the duplicated joiner.  */
 965       edge enonpathdup = find_edge (dup_bb, enonpath->dest);
 966       gcc_assert (enonpathdup);
 967
 968       /* We can't use the original probability of the joiner's out
 969          edges, since the probabilities of the original branch
 970          and the duplicated branches may vary after all threading is
 971          complete.  But apportion the duplicated joiner's off-path
 972          total edge count computed earlier (total_dup_off_path_count)
 973          among the duplicated off-path edges based on their original
 974          ratio to the full off-path count (total_orig_off_path_count).
 975          */
 976       int scale = GCOV_COMPUTE_SCALE (enonpath->count,
 977                                       total_orig_off_path_count);
 978       /* Give the duplicated offpath edge a portion of the duplicated
 979          total.  */
 980       enonpathdup->count = apply_scale (scale,
 981                                         total_dup_off_path_count);
 982       /* Now update the original offpath edge count, handling underflow
 983          due to rounding errors.  */
 984       enonpath->count -= enonpathdup->count;
 985       if (enonpath->count < 0)
 986         enonpath->count = 0;
 987     }
 988 }
 989
 990
 991 /* Check if the paths through RD all have estimated frequencies but zero
 992    profile counts.  This is more accurate than checking the entry block
 993    for a zero profile count, since profile insanities sometimes creep in.  */
 994
 995 static bool
 996 estimated_freqs_path (struct redirection_data *rd)
 997 {
 998   edge e = rd->incoming_edges->e;
 999   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1000   edge ein;
1001   edge_iterator ei;
1002   bool non_zero_freq = false;
1003   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1004     {
1005       if (ein->count)
1006         return false;
1007       non_zero_freq |= ein->src->frequency != 0;
1008     }
1009
1010   for (unsigned int i = 1; i < path->length (); i++)
1011     {
1012       edge epath = (*path)[i]->e;
1013       if (epath->src->count)
1014         return false;
1015       non_zero_freq |= epath->src->frequency != 0;
1016       edge esucc;
1017       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1018         {
1019           if (esucc->count)
1020             return false;
1021           non_zero_freq |= esucc->src->frequency != 0;
1022         }
1023     }
1024   return non_zero_freq;
1025 }
1026
1027
1028 /* Invoked for routines that have guessed frequencies and no profile
1029    counts to record the block and edge frequencies for paths through RD
1030    in the profile count fields of those blocks and edges.  This is because
1031    ssa_fix_duplicate_block_edges incrementally updates the block and
1032    edge counts as edges are redirected, and it is difficult to do that
1033    for edge frequencies which are computed on the fly from the source
1034    block frequency and probability.  When a block frequency is updated
1035    its outgoing edge frequencies are affected and become difficult to
1036    adjust.  */
1037
1038 static void
1039 freqs_to_counts_path (struct redirection_data *rd)
1040 {
1041   edge e = rd->incoming_edges->e;
1042   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1043   edge ein;
1044   edge_iterator ei;
1045   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1046     {
1047       /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1048          errors applying the probability when the frequencies are very
1049          small.  */
1050       ein->count = apply_probability (ein->src->frequency * REG_BR_PROB_BASE,
1051                                       ein->probability);
1052     }
1053
1054   for (unsigned int i = 1; i < path->length (); i++)
1055     {
1056       edge epath = (*path)[i]->e;
1057       edge esucc;
1058       /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1059          errors applying the edge probability when the frequencies are very
1060          small.  */
1061       epath->src->count = epath->src->frequency * REG_BR_PROB_BASE;
1062       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1063         esucc->count = apply_probability (esucc->src->count,
1064                                           esucc->probability);
1065     }
1066 }
1067
1068
1069 /* For routines that have guessed frequencies and no profile counts, where we
1070    used freqs_to_counts_path to record block and edge frequencies for paths
1071    through RD, we clear the counts after completing all updates for RD.
1072    The updates in ssa_fix_duplicate_block_edges are based off the count fields,
1073    but the block frequencies and edge probabilities were updated as well,
1074    so we can simply clear the count fields.  */
1075
1076 static void
1077 clear_counts_path (struct redirection_data *rd)
1078 {
1079   edge e = rd->incoming_edges->e;
1080   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1081   edge ein, esucc;
1082   edge_iterator ei;
1083   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1084     ein->count = 0;
1085
1086   /* First clear counts along original path.  */
1087   for (unsigned int i = 1; i < path->length (); i++)
1088     {
1089       edge epath = (*path)[i]->e;
1090       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1091         esucc->count = 0;
1092       epath->src->count = 0;
1093     }
1094   /* Also need to clear the counts along duplicated path.  */
1095   for (unsigned int i = 0; i < 2; i++)
1096     {
1097       basic_block dup = rd->dup_blocks[i];
1098       if (!dup)
1099         continue;
1100       FOR_EACH_EDGE (esucc, ei, dup->succs)
1101         esucc->count = 0;
1102       dup->count = 0;
1103     }
1104 }
1105
1106 /* Wire up the outgoing edges from the duplicate blocks and
1107    update any PHIs as needed.  Also update the profile counts
1108    on the original and duplicate blocks and edges.  */
1109 void
1110 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1111                                ssa_local_info_t *local_info)
1112 {
1113   bool multi_incomings = (rd->incoming_edges->next != NULL);
1114   edge e = rd->incoming_edges->e;
1115   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1116   edge elast = path->last ()->e;
1117   gcov_type path_in_count = 0;
1118   gcov_type path_out_count = 0;
1119   int path_in_freq = 0;
1120
1121   /* This routine updates profile counts, frequencies, and probabilities
1122      incrementally. Since it is difficult to do the incremental updates
1123      using frequencies/probabilities alone, for routines without profile
1124      data we first take a snapshot of the existing block and edge frequencies
1125      by copying them into the empty profile count fields.  These counts are
1126      then used to do the incremental updates, and cleared at the end of this
1127      routine.  If the function is marked as having a profile, we still check
1128      to see if the paths through RD are using estimated frequencies because
1129      the routine had zero profile counts.  */
1130   bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ
1131                              || estimated_freqs_path (rd));
1132   if (do_freqs_to_counts)
1133     freqs_to_counts_path (rd);
1134
1135   /* First determine how much profile count to move from original
1136      path to the duplicate path.  This is tricky in the presence of
1137      a joiner (see comments for compute_path_counts), where some portion
1138      of the path's counts will flow off-path from the joiner.  In the
1139      non-joiner case the path_in_count and path_out_count should be the
1140      same.  */
1141   bool has_joiner = compute_path_counts (rd, local_info,
1142                                          &path_in_count, &path_out_count,
1143                                          &path_in_freq);
1144
1145   int cur_path_freq = path_in_freq;
1146   for (unsigned int count = 0, i = 1; i < path->length (); i++)
1147     {
1148       edge epath = (*path)[i]->e;
1149
1150       /* If we were threading through an joiner block, then we want
1151          to keep its control statement and redirect an outgoing edge.
1152          Else we want to remove the control statement & edges, then create
1153          a new outgoing edge.  In both cases we may need to update PHIs.  */
1154       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1155         {
1156           edge victim;
1157           edge e2;
1158
1159           gcc_assert (has_joiner);
1160
1161           /* This updates the PHIs at the destination of the duplicate
1162              block.  Pass 0 instead of i if we are threading a path which
1163              has multiple incoming edges.  */
1164           update_destination_phis (local_info->bb, rd->dup_blocks[count],
1165                                    path, multi_incomings ? 0 : i);
1166
1167           /* Find the edge from the duplicate block to the block we're
1168              threading through.  That's the edge we want to redirect.  */
1169           victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1170
1171           /* If there are no remaining blocks on the path to duplicate,
1172              then redirect VICTIM to the final destination of the jump
1173              threading path.  */
1174           if (!any_remaining_duplicated_blocks (path, i))
1175             {
1176               e2 = redirect_edge_and_branch (victim, elast->dest);
1177               /* If we redirected the edge, then we need to copy PHI arguments
1178                  at the target.  If the edge already existed (e2 != victim
1179                  case), then the PHIs in the target already have the correct
1180                  arguments.  */
1181               if (e2 == victim)
1182                 copy_phi_args (e2->dest, elast, e2,
1183                                path, multi_incomings ? 0 : i);
1184             }
1185           else
1186             {
1187               /* Redirect VICTIM to the next duplicated block in the path.  */
1188               e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1189
1190               /* We need to update the PHIs in the next duplicated block.  We
1191                  want the new PHI args to have the same value as they had
1192                  in the source of the next duplicate block.
1193
1194                  Thus, we need to know which edge we traversed into the
1195                  source of the duplicate.  Furthermore, we may have
1196                  traversed many edges to reach the source of the duplicate.
1197
1198                  Walk through the path starting at element I until we
1199                  hit an edge marked with EDGE_COPY_SRC_BLOCK.  We want
1200                  the edge from the prior element.  */
1201               for (unsigned int j = i + 1; j < path->length (); j++)
1202                 {
1203                   if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1204                     {
1205                       copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1206                       break;
1207                     }
1208                 }
1209             }
1210
1211           /* Update the counts and frequency of both the original block
1212              and path edge, and the duplicates.  The path duplicate's
1213              incoming count and frequency are the totals for all edges
1214              incoming to this jump threading path computed earlier.
1215              And we know that the duplicated path will have path_out_count
1216              flowing out of it (i.e. along the duplicated path out of the
1217              duplicated joiner).  */
1218           update_profile (epath, e2, path_in_count, path_out_count,
1219                           path_in_freq);
1220
1221           /* Next we need to update the counts of the original and duplicated
1222              edges from the joiner that go off path.  */
1223           update_joiner_offpath_counts (epath, e2->src, path_in_count,
1224                                         path_out_count);
1225
1226           /* Finally, we need to set the probabilities on the duplicated
1227              edges out of the duplicated joiner (e2->src).  The probabilities
1228              along the original path will all be updated below after we finish
1229              processing the whole path.  */
1230           recompute_probabilities (e2->src);
1231
1232           /* Record the frequency flowing to the downstream duplicated
1233              path blocks.  */
1234           cur_path_freq = EDGE_FREQUENCY (e2);
1235         }
1236       else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1237         {
1238           remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1239           create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1240                                                    multi_incomings ? 0 : i);
1241           if (count == 1)
1242             single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1243
1244           /* Update the counts and frequency of both the original block
1245              and path edge, and the duplicates.  Since we are now after
1246              any joiner that may have existed on the path, the count
1247              flowing along the duplicated threaded path is path_out_count.
1248              If we didn't have a joiner, then cur_path_freq was the sum
1249              of the total frequencies along all incoming edges to the
1250              thread path (path_in_freq).  If we had a joiner, it would have
1251              been updated at the end of that handling to the edge frequency
1252              along the duplicated joiner path edge.  */
1253           update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1254                           path_out_count, path_out_count,
1255                           cur_path_freq);
1256         }
1257       else
1258         {
1259           /* No copy case.  In this case we don't have an equivalent block
1260              on the duplicated thread path to update, but we do need
1261              to remove the portion of the counts/freqs that were moved
1262              to the duplicated path from the counts/freqs flowing through
1263              this block on the original path.  Since all the no-copy edges
1264              are after any joiner, the removed count is the same as
1265              path_out_count.
1266
1267              If we didn't have a joiner, then cur_path_freq was the sum
1268              of the total frequencies along all incoming edges to the
1269              thread path (path_in_freq).  If we had a joiner, it would have
1270              been updated at the end of that handling to the edge frequency
1271              along the duplicated joiner path edge.  */
1272              update_profile (epath, NULL, path_out_count, path_out_count,
1273                              cur_path_freq);
1274         }
1275
1276       /* Increment the index into the duplicated path when we processed
1277          a duplicated block.  */
1278       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1279           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1280       {
1281           count++;
1282       }
1283     }
1284
1285   /* Now walk orig blocks and update their probabilities, since the
1286      counts and freqs should be updated properly by above loop.  */
1287   for (unsigned int i = 1; i < path->length (); i++)
1288     {
1289       edge epath = (*path)[i]->e;
1290       recompute_probabilities (epath->src);
1291     }
1292
1293   /* Done with all profile and frequency updates, clear counts if they
1294      were copied.  */
1295   if (do_freqs_to_counts)
1296     clear_counts_path (rd);
1297 }
1298
1299 /* Hash table traversal callback routine to create duplicate blocks.  */
1300
1301 int
1302 ssa_create_duplicates (struct redirection_data **slot,
1303                        ssa_local_info_t *local_info)
1304 {
1305   struct redirection_data *rd = *slot;
1306
1307   /* The second duplicated block in a jump threading path is specific
1308      to the path.  So it gets stored in RD rather than in LOCAL_DATA.
1309
1310      Each time we're called, we have to look through the path and see
1311      if a second block needs to be duplicated.
1312
1313      Note the search starts with the third edge on the path.  The first
1314      edge is the incoming edge, the second edge always has its source
1315      duplicated.  Thus we start our search with the third edge.  */
1316   vec<jump_thread_edge *> *path = rd->path;
1317   for (unsigned int i = 2; i < path->length (); i++)
1318     {
1319       if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1320           || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1321         {
1322           create_block_for_threading ((*path)[i]->e->src, rd, 1,
1323                                       &local_info->duplicate_blocks);
1324           break;
1325         }
1326     }
1327
1328   /* Create a template block if we have not done so already.  Otherwise
1329      use the template to create a new block.  */
1330   if (local_info->template_block == NULL)
1331     {
1332       create_block_for_threading ((*path)[1]->e->src, rd, 0,
1333                                   &local_info->duplicate_blocks);
1334       local_info->template_block = rd->dup_blocks[0];
1335
1336       /* We do not create any outgoing edges for the template.  We will
1337          take care of that in a later traversal.  That way we do not
1338          create edges that are going to just be deleted.  */
1339     }
1340   else
1341     {
1342       create_block_for_threading (local_info->template_block, rd, 0,
1343                                   &local_info->duplicate_blocks);
1344
1345       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1346          block.   */
1347       ssa_fix_duplicate_block_edges (rd, local_info);
1348     }
1349
1350   /* Keep walking the hash table.  */
1351   return 1;
1352 }
1353
1354 /* We did not create any outgoing edges for the template block during
1355    block creation.  This hash table traversal callback creates the
1356    outgoing edge for the template block.  */
1357
1358 inline int
1359 ssa_fixup_template_block (struct redirection_data **slot,
1360                           ssa_local_info_t *local_info)
1361 {
1362   struct redirection_data *rd = *slot;
1363
1364   /* If this is the template block halt the traversal after updating
1365      it appropriately.
1366
1367      If we were threading through an joiner block, then we want
1368      to keep its control statement and redirect an outgoing edge.
1369      Else we want to remove the control statement & edges, then create
1370      a new outgoing edge.  In both cases we may need to update PHIs.  */
1371   if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1372     {
1373       ssa_fix_duplicate_block_edges (rd, local_info);
1374       return 0;
1375     }
1376
1377   return 1;
1378 }
1379
1380 /* Hash table traversal callback to redirect each incoming edge
1381    associated with this hash table element to its new destination.  */
1382
1383 int
1384 ssa_redirect_edges (struct redirection_data **slot,
1385                     ssa_local_info_t *local_info)
1386 {
1387   struct redirection_data *rd = *slot;
1388   struct el *next, *el;
1389
1390   /* Walk over all the incoming edges associated associated with this
1391      hash table entry.  */
1392   for (el = rd->incoming_edges; el; el = next)
1393     {
1394       edge e = el->e;
1395       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1396
1397       /* Go ahead and free this element from the list.  Doing this now
1398          avoids the need for another list walk when we destroy the hash
1399          table.  */
1400       next = el->next;
1401       free (el);
1402
1403       thread_stats.num_threaded_edges++;
1404
1405       if (rd->dup_blocks[0])
1406         {
1407           edge e2;
1408
1409           if (dump_file && (dump_flags & TDF_DETAILS))
1410             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1411                      e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1412
1413           /* If we redirect a loop latch edge cancel its loop.  */
1414           if (e->src == e->src->loop_father->latch)
1415             mark_loop_for_removal (e->src->loop_father);
1416
1417           /* Redirect the incoming edge (possibly to the joiner block) to the
1418              appropriate duplicate block.  */
1419           e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1420           gcc_assert (e == e2);
1421           flush_pending_stmts (e2);
1422         }
1423
1424       /* Go ahead and clear E->aux.  It's not needed anymore and failure
1425          to clear it will cause all kinds of unpleasant problems later.  */
1426       delete_jump_thread_path (path);
1427       e->aux = NULL;
1428
1429     }
1430
1431   /* Indicate that we actually threaded one or more jumps.  */
1432   if (rd->incoming_edges)
1433     local_info->jumps_threaded = true;
1434
1435   return 1;
1436 }
1437
1438 /* Return true if this block has no executable statements other than
1439    a simple ctrl flow instruction.  When the number of outgoing edges
1440    is one, this is equivalent to a "forwarder" block.  */
1441
1442 static bool
1443 redirection_block_p (basic_block bb)
1444 {
1445   gimple_stmt_iterator gsi;
1446
1447   /* Advance to the first executable statement.  */
1448   gsi = gsi_start_bb (bb);
1449   while (!gsi_end_p (gsi)
1450          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1451              || is_gimple_debug (gsi_stmt (gsi))
1452              || gimple_nop_p (gsi_stmt (gsi))
1453              || gimple_clobber_p (gsi_stmt (gsi))))
1454     gsi_next (&gsi);
1455
1456   /* Check if this is an empty block.  */
1457   if (gsi_end_p (gsi))
1458     return true;
1459
1460   /* Test that we've reached the terminating control statement.  */
1461   return gsi_stmt (gsi)
1462          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1463              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1464              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1465 }
1466
1467 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1468    is reached via one or more specific incoming edges, we know which
1469    outgoing edge from BB will be traversed.
1470
1471    We want to redirect those incoming edges to the target of the
1472    appropriate outgoing edge.  Doing so avoids a conditional branch
1473    and may expose new optimization opportunities.  Note that we have
1474    to update dominator tree and SSA graph after such changes.
1475
1476    The key to keeping the SSA graph update manageable is to duplicate
1477    the side effects occurring in BB so that those side effects still
1478    occur on the paths which bypass BB after redirecting edges.
1479
1480    We accomplish this by creating duplicates of BB and arranging for
1481    the duplicates to unconditionally pass control to one specific
1482    successor of BB.  We then revector the incoming edges into BB to
1483    the appropriate duplicate of BB.
1484
1485    If NOLOOP_ONLY is true, we only perform the threading as long as it
1486    does not affect the structure of the loops in a nontrivial way.
1487
1488    If JOINERS is true, then thread through joiner blocks as well.  */
1489
1490 static bool
1491 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
1492 {
1493   /* E is an incoming edge into BB that we may or may not want to
1494      redirect to a duplicate of BB.  */
1495   edge e, e2;
1496   edge_iterator ei;
1497   ssa_local_info_t local_info;
1498
1499   local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1500
1501   /* To avoid scanning a linear array for the element we need we instead
1502      use a hash table.  For normal code there should be no noticeable
1503      difference.  However, if we have a block with a large number of
1504      incoming and outgoing edges such linear searches can get expensive.  */
1505   redirection_data
1506     = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1507
1508   /* Record each unique threaded destination into a hash table for
1509      efficient lookups.  */
1510   FOR_EACH_EDGE (e, ei, bb->preds)
1511     {
1512       if (e->aux == NULL)
1513         continue;
1514
1515       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1516
1517       if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1518           || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1519         continue;
1520
1521       e2 = path->last ()->e;
1522       if (!e2 || noloop_only)
1523         {
1524           /* If NOLOOP_ONLY is true, we only allow threading through the
1525              header of a loop to exit edges.  */
1526
1527           /* One case occurs when there was loop header buried in a jump
1528              threading path that crosses loop boundaries.  We do not try
1529              and thread this elsewhere, so just cancel the jump threading
1530              request by clearing the AUX field now.  */
1531           if ((bb->loop_father != e2->src->loop_father
1532                && !loop_exit_edge_p (e2->src->loop_father, e2))
1533               || (e2->src->loop_father != e2->dest->loop_father
1534                   && !loop_exit_edge_p (e2->src->loop_father, e2)))
1535             {
1536               /* Since this case is not handled by our special code
1537                  to thread through a loop header, we must explicitly
1538                  cancel the threading request here.  */
1539               delete_jump_thread_path (path);
1540               e->aux = NULL;
1541               continue;
1542             }
1543
1544           /* Another case occurs when trying to thread through our
1545              own loop header, possibly from inside the loop.  We will
1546              thread these later.  */
1547           unsigned int i;
1548           for (i = 1; i < path->length (); i++)
1549             {
1550               if ((*path)[i]->e->src == bb->loop_father->header
1551                   && (!loop_exit_edge_p (bb->loop_father, e2)
1552                       || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1553                 break;
1554             }
1555
1556           if (i != path->length ())
1557             continue;
1558         }
1559
1560       /* Insert the outgoing edge into the hash table if it is not
1561          already in the hash table.  */
1562       lookup_redirection_data (e, INSERT);
1563     }
1564
1565   /* We do not update dominance info.  */
1566   free_dominance_info (CDI_DOMINATORS);
1567
1568   /* We know we only thread through the loop header to loop exits.
1569      Let the basic block duplication hook know we are not creating
1570      a multiple entry loop.  */
1571   if (noloop_only
1572       && bb == bb->loop_father->header)
1573     set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1574
1575   /* Now create duplicates of BB.
1576
1577      Note that for a block with a high outgoing degree we can waste
1578      a lot of time and memory creating and destroying useless edges.
1579
1580      So we first duplicate BB and remove the control structure at the
1581      tail of the duplicate as well as all outgoing edges from the
1582      duplicate.  We then use that duplicate block as a template for
1583      the rest of the duplicates.  */
1584   local_info.template_block = NULL;
1585   local_info.bb = bb;
1586   local_info.jumps_threaded = false;
1587   redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1588                             (&local_info);
1589
1590   /* The template does not have an outgoing edge.  Create that outgoing
1591      edge and update PHI nodes as the edge's target as necessary.
1592
1593      We do this after creating all the duplicates to avoid creating
1594      unnecessary edges.  */
1595   redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1596                             (&local_info);
1597
1598   /* The hash table traversals above created the duplicate blocks (and the
1599      statements within the duplicate blocks).  This loop creates PHI nodes for
1600      the duplicated blocks and redirects the incoming edges into BB to reach
1601      the duplicates of BB.  */
1602   redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1603                             (&local_info);
1604
1605   /* Done with this block.  Clear REDIRECTION_DATA.  */
1606   delete redirection_data;
1607   redirection_data = NULL;
1608
1609   if (noloop_only
1610       && bb == bb->loop_father->header)
1611     set_loop_copy (bb->loop_father, NULL);
1612
1613   BITMAP_FREE (local_info.duplicate_blocks);
1614   local_info.duplicate_blocks = NULL;
1615
1616   /* Indicate to our caller whether or not any jumps were threaded.  */
1617   return local_info.jumps_threaded;
1618 }
1619
1620 /* Wrapper for thread_block_1 so that we can first handle jump
1621    thread paths which do not involve copying joiner blocks, then
1622    handle jump thread paths which have joiner blocks.
1623
1624    By doing things this way we can be as aggressive as possible and
1625    not worry that copying a joiner block will create a jump threading
1626    opportunity.  */
1627
1628 static bool
1629 thread_block (basic_block bb, bool noloop_only)
1630 {
1631   bool retval;
1632   retval = thread_block_1 (bb, noloop_only, false);
1633   retval |= thread_block_1 (bb, noloop_only, true);
1634   return retval;
1635 }
1636
1637
1638 /* Threads edge E through E->dest to the edge THREAD_TARGET (E).  Returns the
1639    copy of E->dest created during threading, or E->dest if it was not necessary
1640    to copy it (E is its single predecessor).  */
1641
1642 static basic_block
1643 thread_single_edge (edge e)
1644 {
1645   basic_block bb = e->dest;
1646   struct redirection_data rd;
1647   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1648   edge eto = (*path)[1]->e;
1649
1650   for (unsigned int i = 0; i < path->length (); i++)
1651     delete (*path)[i];
1652   delete path;
1653   e->aux = NULL;
1654
1655   thread_stats.num_threaded_edges++;
1656
1657   if (single_pred_p (bb))
1658     {
1659       /* If BB has just a single predecessor, we should only remove the
1660          control statements at its end, and successors except for ETO.  */
1661       remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
1662
1663       /* And fixup the flags on the single remaining edge.  */
1664       eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
1665       eto->flags |= EDGE_FALLTHRU;
1666
1667       return bb;
1668     }
1669
1670   /* Otherwise, we need to create a copy.  */
1671   if (e->dest == eto->src)
1672     update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
1673
1674   vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
1675   jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
1676   npath->safe_push (x);
1677
1678   x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
1679   npath->safe_push (x);
1680   rd.path = npath;
1681
1682   create_block_for_threading (bb, &rd, 0, NULL);
1683   remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL);
1684   create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0], 0);
1685
1686   if (dump_file && (dump_flags & TDF_DETAILS))
1687     fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1688              e->src->index, e->dest->index, rd.dup_blocks[0]->index);
1689
1690   rd.dup_blocks[0]->count = e->count;
1691   rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e);
1692   single_succ_edge (rd.dup_blocks[0])->count = e->count;
1693   redirect_edge_and_branch (e, rd.dup_blocks[0]);
1694   flush_pending_stmts (e);
1695
1696   return rd.dup_blocks[0];
1697 }
1698
1699 /* Callback for dfs_enumerate_from.  Returns true if BB is different
1700    from STOP and DBDS_CE_STOP.  */
1701
1702 static basic_block dbds_ce_stop;
1703 static bool
1704 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1705 {
1706   return (bb != (const_basic_block) stop
1707           && bb != dbds_ce_stop);
1708 }
1709
1710 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1711    returns the state.  */
1712
1713 enum bb_dom_status
1714 {
1715   /* BB does not dominate latch of the LOOP.  */
1716   DOMST_NONDOMINATING,
1717   /* The LOOP is broken (there is no path from the header to its latch.  */
1718   DOMST_LOOP_BROKEN,
1719   /* BB dominates the latch of the LOOP.  */
1720   DOMST_DOMINATING
1721 };
1722
1723 static enum bb_dom_status
1724 determine_bb_domination_status (struct loop *loop, basic_block bb)
1725 {
1726   basic_block *bblocks;
1727   unsigned nblocks, i;
1728   bool bb_reachable = false;
1729   edge_iterator ei;
1730   edge e;
1731
1732   /* This function assumes BB is a successor of LOOP->header.
1733      If that is not the case return DOMST_NONDOMINATING which
1734      is always safe.  */
1735     {
1736       bool ok = false;
1737
1738       FOR_EACH_EDGE (e, ei, bb->preds)
1739         {
1740           if (e->src == loop->header)
1741             {
1742               ok = true;
1743               break;
1744             }
1745         }
1746
1747       if (!ok)
1748         return DOMST_NONDOMINATING;
1749     }
1750
1751   if (bb == loop->latch)
1752     return DOMST_DOMINATING;
1753
1754   /* Check that BB dominates LOOP->latch, and that it is back-reachable
1755      from it.  */
1756
1757   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1758   dbds_ce_stop = loop->header;
1759   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1760                                 bblocks, loop->num_nodes, bb);
1761   for (i = 0; i < nblocks; i++)
1762     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1763       {
1764         if (e->src == loop->header)
1765           {
1766             free (bblocks);
1767             return DOMST_NONDOMINATING;
1768           }
1769         if (e->src == bb)
1770           bb_reachable = true;
1771       }
1772
1773   free (bblocks);
1774   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1775 }
1776
1777 /* Return true if BB is part of the new pre-header that is created
1778    when threading the latch to DATA.  */
1779
1780 static bool
1781 def_split_header_continue_p (const_basic_block bb, const void *data)
1782 {
1783   const_basic_block new_header = (const_basic_block) data;
1784   const struct loop *l;
1785
1786   if (bb == new_header
1787       || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
1788     return false;
1789   for (l = bb->loop_father; l; l = loop_outer (l))
1790     if (l == new_header->loop_father)
1791       return true;
1792   return false;
1793 }
1794
1795 /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
1796    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1797    to the inside of the loop.  */
1798
1799 static bool
1800 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
1801 {
1802   basic_block header = loop->header;
1803   edge e, tgt_edge, latch = loop_latch_edge (loop);
1804   edge_iterator ei;
1805   basic_block tgt_bb, atgt_bb;
1806   enum bb_dom_status domst;
1807
1808   /* We have already threaded through headers to exits, so all the threading
1809      requests now are to the inside of the loop.  We need to avoid creating
1810      irreducible regions (i.e., loops with more than one entry block), and
1811      also loop with several latch edges, or new subloops of the loop (although
1812      there are cases where it might be appropriate, it is difficult to decide,
1813      and doing it wrongly may confuse other optimizers).
1814
1815      We could handle more general cases here.  However, the intention is to
1816      preserve some information about the loop, which is impossible if its
1817      structure changes significantly, in a way that is not well understood.
1818      Thus we only handle few important special cases, in which also updating
1819      of the loop-carried information should be feasible:
1820
1821      1) Propagation of latch edge to a block that dominates the latch block
1822         of a loop.  This aims to handle the following idiom:
1823
1824         first = 1;
1825         while (1)
1826           {
1827             if (first)
1828               initialize;
1829             first = 0;
1830             body;
1831           }
1832
1833         After threading the latch edge, this becomes
1834
1835         first = 1;
1836         if (first)
1837           initialize;
1838         while (1)
1839           {
1840             first = 0;
1841             body;
1842           }
1843
1844         The original header of the loop is moved out of it, and we may thread
1845         the remaining edges through it without further constraints.
1846
1847      2) All entry edges are propagated to a single basic block that dominates
1848         the latch block of the loop.  This aims to handle the following idiom
1849         (normally created for "for" loops):
1850
1851         i = 0;
1852         while (1)
1853           {
1854             if (i >= 100)
1855               break;
1856             body;
1857             i++;
1858           }
1859
1860         This becomes
1861
1862         i = 0;
1863         while (1)
1864           {
1865             body;
1866             i++;
1867             if (i >= 100)
1868               break;
1869           }
1870      */
1871
1872   /* Threading through the header won't improve the code if the header has just
1873      one successor.  */
1874   if (single_succ_p (header))
1875     goto fail;
1876
1877   /* If we threaded the latch using a joiner block, we cancel the
1878      threading opportunity out of an abundance of caution.  However,
1879      still allow threading from outside to inside the loop.  */
1880   if (latch->aux)
1881     {
1882       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1883       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1884         {
1885           delete_jump_thread_path (path);
1886           latch->aux = NULL;
1887         }
1888     }
1889
1890   if (latch->aux)
1891     {
1892       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1893       tgt_edge = (*path)[1]->e;
1894       tgt_bb = tgt_edge->dest;
1895     }
1896   else if (!may_peel_loop_headers
1897            && !redirection_block_p (loop->header))
1898     goto fail;
1899   else
1900     {
1901       tgt_bb = NULL;
1902       tgt_edge = NULL;
1903       FOR_EACH_EDGE (e, ei, header->preds)
1904         {
1905           if (!e->aux)
1906             {
1907               if (e == latch)
1908                 continue;
1909
1910               /* If latch is not threaded, and there is a header
1911                  edge that is not threaded, we would create loop
1912                  with multiple entries.  */
1913               goto fail;
1914             }
1915
1916           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1917
1918           if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1919             goto fail;
1920           tgt_edge = (*path)[1]->e;
1921           atgt_bb = tgt_edge->dest;
1922           if (!tgt_bb)
1923             tgt_bb = atgt_bb;
1924           /* Two targets of threading would make us create loop
1925              with multiple entries.  */
1926           else if (tgt_bb != atgt_bb)
1927             goto fail;
1928         }
1929
1930       if (!tgt_bb)
1931         {
1932           /* There are no threading requests.  */
1933           return false;
1934         }
1935
1936       /* Redirecting to empty loop latch is useless.  */
1937       if (tgt_bb == loop->latch
1938           && empty_block_p (loop->latch))
1939         goto fail;
1940     }
1941
1942   /* The target block must dominate the loop latch, otherwise we would be
1943      creating a subloop.  */
1944   domst = determine_bb_domination_status (loop, tgt_bb);
1945   if (domst == DOMST_NONDOMINATING)
1946     goto fail;
1947   if (domst == DOMST_LOOP_BROKEN)
1948     {
1949       /* If the loop ceased to exist, mark it as such, and thread through its
1950          original header.  */
1951       mark_loop_for_removal (loop);
1952       return thread_block (header, false);
1953     }
1954
1955   if (tgt_bb->loop_father->header == tgt_bb)
1956     {
1957       /* If the target of the threading is a header of a subloop, we need
1958          to create a preheader for it, so that the headers of the two loops
1959          do not merge.  */
1960       if (EDGE_COUNT (tgt_bb->preds) > 2)
1961         {
1962           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1963           gcc_assert (tgt_bb != NULL);
1964         }
1965       else
1966         tgt_bb = split_edge (tgt_edge);
1967     }
1968
1969   if (latch->aux)
1970     {
1971       basic_block *bblocks;
1972       unsigned nblocks, i;
1973
1974       /* First handle the case latch edge is redirected.  We are copying
1975          the loop header but not creating a multiple entry loop.  Make the
1976          cfg manipulation code aware of that fact.  */
1977       set_loop_copy (loop, loop);
1978       loop->latch = thread_single_edge (latch);
1979       set_loop_copy (loop, NULL);
1980       gcc_assert (single_succ (loop->latch) == tgt_bb);
1981       loop->header = tgt_bb;
1982
1983       /* Remove the new pre-header blocks from our loop.  */
1984       bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1985       nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1986                                     bblocks, loop->num_nodes, tgt_bb);
1987       for (i = 0; i < nblocks; i++)
1988         if (bblocks[i]->loop_father == loop)
1989           {
1990             remove_bb_from_loops (bblocks[i]);
1991             add_bb_to_loop (bblocks[i], loop_outer (loop));
1992           }
1993       free (bblocks);
1994
1995       /* If the new header has multiple latches mark it so.  */
1996       FOR_EACH_EDGE (e, ei, loop->header->preds)
1997         if (e->src->loop_father == loop
1998             && e->src != loop->latch)
1999           {
2000             loop->latch = NULL;
2001             loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
2002           }
2003
2004       /* Cancel remaining threading requests that would make the
2005          loop a multiple entry loop.  */
2006       FOR_EACH_EDGE (e, ei, header->preds)
2007         {
2008           edge e2;
2009
2010           if (e->aux == NULL)
2011             continue;
2012
2013           vec<jump_thread_edge *> *path = THREAD_PATH (e);
2014           e2 = path->last ()->e;
2015
2016           if (e->src->loop_father != e2->dest->loop_father
2017               && e2->dest != loop->header)
2018             {
2019               delete_jump_thread_path (path);
2020               e->aux = NULL;
2021             }
2022         }
2023
2024       /* Thread the remaining edges through the former header.  */
2025       thread_block (header, false);
2026     }
2027   else
2028     {
2029       basic_block new_preheader;
2030
2031       /* Now consider the case entry edges are redirected to the new entry
2032          block.  Remember one entry edge, so that we can find the new
2033          preheader (its destination after threading).  */
2034       FOR_EACH_EDGE (e, ei, header->preds)
2035         {
2036           if (e->aux)
2037             break;
2038         }
2039
2040       /* The duplicate of the header is the new preheader of the loop.  Ensure
2041          that it is placed correctly in the loop hierarchy.  */
2042       set_loop_copy (loop, loop_outer (loop));
2043
2044       thread_block (header, false);
2045       set_loop_copy (loop, NULL);
2046       new_preheader = e->dest;
2047
2048       /* Create the new latch block.  This is always necessary, as the latch
2049          must have only a single successor, but the original header had at
2050          least two successors.  */
2051       loop->latch = NULL;
2052       mfb_kj_edge = single_succ_edge (new_preheader);
2053       loop->header = mfb_kj_edge->dest;
2054       latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
2055       loop->header = latch->dest;
2056       loop->latch = latch->src;
2057     }
2058
2059   return true;
2060
2061 fail:
2062   /* We failed to thread anything.  Cancel the requests.  */
2063   FOR_EACH_EDGE (e, ei, header->preds)
2064     {
2065       vec<jump_thread_edge *> *path = THREAD_PATH (e);
2066
2067       if (path)
2068         {
2069           delete_jump_thread_path (path);
2070           e->aux = NULL;
2071         }
2072     }
2073   return false;
2074 }
2075
2076 /* E1 and E2 are edges into the same basic block.  Return TRUE if the
2077    PHI arguments associated with those edges are equal or there are no
2078    PHI arguments, otherwise return FALSE.  */
2079
2080 static bool
2081 phi_args_equal_on_edges (edge e1, edge e2)
2082 {
2083   gphi_iterator gsi;
2084   int indx1 = e1->dest_idx;
2085   int indx2 = e2->dest_idx;
2086
2087   for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2088     {
2089       gphi *phi = gsi.phi ();
2090
2091       if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
2092                             gimple_phi_arg_def (phi, indx2), 0))
2093         return false;
2094     }
2095   return true;
2096 }
2097
2098 /* Walk through the registered jump threads and convert them into a
2099    form convenient for this pass.
2100
2101    Any block which has incoming edges threaded to outgoing edges
2102    will have its entry in THREADED_BLOCK set.
2103
2104    Any threaded edge will have its new outgoing edge stored in the
2105    original edge's AUX field.
2106
2107    This form avoids the need to walk all the edges in the CFG to
2108    discover blocks which need processing and avoids unnecessary
2109    hash table lookups to map from threaded edge to new target.  */
2110
2111 static void
2112 mark_threaded_blocks (bitmap threaded_blocks)
2113 {
2114   unsigned int i;
2115   bitmap_iterator bi;
2116   bitmap tmp = BITMAP_ALLOC (NULL);
2117   basic_block bb;
2118   edge e;
2119   edge_iterator ei;
2120
2121   /* It is possible to have jump threads in which one is a subpath
2122      of the other.  ie, (A, B), (B, C), (C, D) where B is a joiner
2123      block and (B, C), (C, D) where no joiner block exists.
2124
2125      When this occurs ignore the jump thread request with the joiner
2126      block.  It's totally subsumed by the simpler jump thread request.
2127
2128      This results in less block copying, simpler CFGs.  More importantly,
2129      when we duplicate the joiner block, B, in this case we will create
2130      a new threading opportunity that we wouldn't be able to optimize
2131      until the next jump threading iteration.
2132
2133      So first convert the jump thread requests which do not require a
2134      joiner block.  */
2135   for (i = 0; i < paths.length (); i++)
2136     {
2137       vec<jump_thread_edge *> *path = paths[i];
2138
2139       if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
2140         {
2141           edge e = (*path)[0]->e;
2142           e->aux = (void *)path;
2143           bitmap_set_bit (tmp, e->dest->index);
2144         }
2145     }
2146
2147   /* Now iterate again, converting cases where we want to thread
2148      through a joiner block, but only if no other edge on the path
2149      already has a jump thread attached to it.  We do this in two passes,
2150      to avoid situations where the order in the paths vec can hide overlapping
2151      threads (the path is recorded on the incoming edge, so we would miss
2152      cases where the second path starts at a downstream edge on the same
2153      path).  First record all joiner paths, deleting any in the unexpected
2154      case where there is already a path for that incoming edge.  */
2155   for (i = 0; i < paths.length (); i++)
2156     {
2157       vec<jump_thread_edge *> *path = paths[i];
2158
2159       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2160         {
2161           /* Attach the path to the starting edge if none is yet recorded.  */
2162           if ((*path)[0]->e->aux == NULL)
2163             (*path)[0]->e->aux = path;
2164           else if (dump_file && (dump_flags & TDF_DETAILS))
2165             dump_jump_thread_path (dump_file, *path, false);
2166         }
2167     }
2168   /* Second, look for paths that have any other jump thread attached to
2169      them, and either finish converting them or cancel them.  */
2170   for (i = 0; i < paths.length (); i++)
2171     {
2172       vec<jump_thread_edge *> *path = paths[i];
2173       edge e = (*path)[0]->e;
2174
2175       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
2176         {
2177           unsigned int j;
2178           for (j = 1; j < path->length (); j++)
2179             if ((*path)[j]->e->aux != NULL)
2180               break;
2181
2182           /* If we iterated through the entire path without exiting the loop,
2183              then we are good to go, record it.  */
2184           if (j == path->length ())
2185             bitmap_set_bit (tmp, e->dest->index);
2186           else
2187             {
2188               e->aux = NULL;
2189               if (dump_file && (dump_flags & TDF_DETAILS))
2190                 dump_jump_thread_path (dump_file, *path, false);
2191             }
2192         }
2193     }
2194
2195   /* If optimizing for size, only thread through block if we don't have
2196      to duplicate it or it's an otherwise empty redirection block.  */
2197   if (optimize_function_for_size_p (cfun))
2198     {
2199       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2200         {
2201           bb = BASIC_BLOCK_FOR_FN (cfun, i);
2202           if (EDGE_COUNT (bb->preds) > 1
2203               && !redirection_block_p (bb))
2204             {
2205               FOR_EACH_EDGE (e, ei, bb->preds)
2206                 {
2207                   if (e->aux)
2208                     {
2209                       vec<jump_thread_edge *> *path = THREAD_PATH (e);
2210                       delete_jump_thread_path (path);
2211                       e->aux = NULL;
2212                     }
2213                 }
2214             }
2215           else
2216             bitmap_set_bit (threaded_blocks, i);
2217         }
2218     }
2219   else
2220     bitmap_copy (threaded_blocks, tmp);
2221
2222   /* Look for jump threading paths which cross multiple loop headers.
2223
2224      The code to thread through loop headers will change the CFG in ways
2225      that break assumptions made by the loop optimization code.
2226
2227      We don't want to blindly cancel the requests.  We can instead do better
2228      by trimming off the end of the jump thread path.  */
2229   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2230     {
2231       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2232       FOR_EACH_EDGE (e, ei, bb->preds)
2233         {
2234           if (e->aux)
2235             {
2236               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2237
2238               for (unsigned int i = 0, crossed_headers = 0;
2239                    i < path->length ();
2240                    i++)
2241                 {
2242                   basic_block dest = (*path)[i]->e->dest;
2243                   crossed_headers += (dest == dest->loop_father->header);
2244                   if (crossed_headers > 1)
2245                     {
2246                       /* Trim from entry I onwards.  */
2247                       for (unsigned int j = i; j < path->length (); j++)
2248                         delete (*path)[j];
2249                       path->truncate (i);
2250
2251                       /* Now that we've truncated the path, make sure
2252                          what's left is still valid.   We need at least
2253                          two edges on the path and the last edge can not
2254                          be a joiner.  This should never happen, but let's
2255                          be safe.  */
2256                       if (path->length () < 2
2257                           || (path->last ()->type
2258                               == EDGE_COPY_SRC_JOINER_BLOCK))
2259                         {
2260                           delete_jump_thread_path (path);
2261                           e->aux = NULL;
2262                         }
2263                       break;
2264                     }
2265                 }
2266             }
2267         }
2268     }
2269
2270   /* If we have a joiner block (J) which has two successors S1 and S2 and
2271      we are threading though S1 and the final destination of the thread
2272      is S2, then we must verify that any PHI nodes in S2 have the same
2273      PHI arguments for the edge J->S2 and J->S1->...->S2.
2274
2275      We used to detect this prior to registering the jump thread, but
2276      that prohibits propagation of edge equivalences into non-dominated
2277      PHI nodes as the equivalency test might occur before propagation.
2278
2279      This must also occur after we truncate any jump threading paths
2280      as this scenario may only show up after truncation.
2281
2282      This works for now, but will need improvement as part of the FSA
2283      optimization.
2284
2285      Note since we've moved the thread request data to the edges,
2286      we have to iterate on those rather than the threaded_edges vector.  */
2287   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2288     {
2289       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2290       FOR_EACH_EDGE (e, ei, bb->preds)
2291         {
2292           if (e->aux)
2293             {
2294               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2295               bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2296
2297               if (have_joiner)
2298                 {
2299                   basic_block joiner = e->dest;
2300                   edge final_edge = path->last ()->e;
2301                   basic_block final_dest = final_edge->dest;
2302                   edge e2 = find_edge (joiner, final_dest);
2303
2304                   if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2305                     {
2306                       delete_jump_thread_path (path);
2307                       e->aux = NULL;
2308                     }
2309                 }
2310             }
2311         }
2312     }
2313
2314   BITMAP_FREE (tmp);
2315 }
2316
2317
2318 /* Return TRUE if BB ends with a switch statement or a computed goto.
2319    Otherwise return false.  */
2320 static bool
2321 bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED)
2322 {
2323   gimple stmt = last_stmt (bb);
2324   if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
2325     return true;
2326   if (stmt && gimple_code (stmt) == GIMPLE_GOTO
2327       && TREE_CODE (gimple_goto_dest (stmt)) == SSA_NAME)
2328     return true;
2329   return false;
2330 }
2331
2332 /* Verify that the REGION is a valid jump thread.  A jump thread is a special
2333    case of SEME Single Entry Multiple Exits region in which all nodes in the
2334    REGION have exactly one incoming edge.  The only exception is the first block
2335    that may not have been connected to the rest of the cfg yet.  */
2336
2337 DEBUG_FUNCTION void
2338 verify_jump_thread (basic_block *region, unsigned n_region)
2339 {
2340   for (unsigned i = 0; i < n_region; i++)
2341     gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
2342 }
2343
2344 /* Return true when BB is one of the first N items in BBS.  */
2345
2346 static inline bool
2347 bb_in_bbs (basic_block bb, basic_block *bbs, int n)
2348 {
2349   for (int i = 0; i < n; i++)
2350     if (bb == bbs[i])
2351       return true;
2352
2353   return false;
2354 }
2355
2356 /* Duplicates a jump-thread path of N_REGION basic blocks.
2357    The ENTRY edge is redirected to the duplicate of the region.
2358
2359    Remove the last conditional statement in the last basic block in the REGION,
2360    and create a single fallthru edge pointing to the same destination as the
2361    EXIT edge.
2362
2363    The new basic blocks are stored to REGION_COPY in the same order as they had
2364    in REGION, provided that REGION_COPY is not NULL.
2365
2366    Returns false if it is unable to copy the region, true otherwise.  */
2367
2368 static bool
2369 duplicate_thread_path (edge entry, edge exit,
2370                        basic_block *region, unsigned n_region,
2371                        basic_block *region_copy)
2372 {
2373   unsigned i;
2374   bool free_region_copy = false;
2375   struct loop *loop = entry->dest->loop_father;
2376   edge exit_copy;
2377   edge redirected;
2378   int total_freq = 0, entry_freq = 0;
2379   gcov_type total_count = 0, entry_count = 0;
2380
2381   if (!can_copy_bbs_p (region, n_region))
2382     return false;
2383
2384   /* Some sanity checking.  Note that we do not check for all possible
2385      missuses of the functions.  I.e. if you ask to copy something weird,
2386      it will work, but the state of structures probably will not be
2387      correct.  */
2388   for (i = 0; i < n_region; i++)
2389     {
2390       /* We do not handle subloops, i.e. all the blocks must belong to the
2391          same loop.  */
2392       if (region[i]->loop_father != loop)
2393         return false;
2394     }
2395
2396   initialize_original_copy_tables ();
2397
2398   set_loop_copy (loop, loop);
2399
2400   if (!region_copy)
2401     {
2402       region_copy = XNEWVEC (basic_block, n_region);
2403       free_region_copy = true;
2404     }
2405
2406   if (entry->dest->count)
2407     {
2408       total_count = entry->dest->count;
2409       entry_count = entry->count;
2410       /* Fix up corner cases, to avoid division by zero or creation of negative
2411          frequencies.  */
2412       if (entry_count > total_count)
2413         entry_count = total_count;
2414     }
2415   else
2416     {
2417       total_freq = entry->dest->frequency;
2418       entry_freq = EDGE_FREQUENCY (entry);
2419       /* Fix up corner cases, to avoid division by zero or creation of negative
2420          frequencies.  */
2421       if (total_freq == 0)
2422         total_freq = 1;
2423       else if (entry_freq > total_freq)
2424         entry_freq = total_freq;
2425     }
2426
2427   copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2428             split_edge_bb_loc (entry), false);
2429
2430   /* Fix up: copy_bbs redirects all edges pointing to copied blocks.  The
2431      following code ensures that all the edges exiting the jump-thread path are
2432      redirected back to the original code: these edges are exceptions
2433      invalidating the property that is propagated by executing all the blocks of
2434      the jump-thread path in order.  */
2435
2436   for (i = 0; i < n_region; i++)
2437     {
2438       edge e;
2439       edge_iterator ei;
2440       basic_block bb = region_copy[i];
2441
2442       if (single_succ_p (bb))
2443         {
2444           /* Make sure the successor is the next node in the path.  */
2445           gcc_assert (i + 1 == n_region
2446                       || region_copy[i + 1] == single_succ_edge (bb)->dest);
2447           continue;
2448         }
2449
2450       /* Special case the last block on the path: make sure that it does not
2451          jump back on the copied path.  */
2452       if (i + 1 == n_region)
2453         {
2454           FOR_EACH_EDGE (e, ei, bb->succs)
2455             if (bb_in_bbs (e->dest, region_copy, n_region - 1))
2456               {
2457                 basic_block orig = get_bb_original (e->dest);
2458                 if (orig)
2459                   redirect_edge_and_branch_force (e, orig);
2460               }
2461           continue;
2462         }
2463
2464       /* Redirect all other edges jumping to non-adjacent blocks back to the
2465          original code.  */
2466       FOR_EACH_EDGE (e, ei, bb->succs)
2467         if (region_copy[i + 1] != e->dest)
2468           {
2469             basic_block orig = get_bb_original (e->dest);
2470             if (orig)
2471               redirect_edge_and_branch_force (e, orig);
2472           }
2473     }
2474
2475   if (total_count)
2476     {
2477       scale_bbs_frequencies_gcov_type (region, n_region,
2478                                        total_count - entry_count,
2479                                        total_count);
2480       scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
2481                                        total_count);
2482     }
2483   else
2484     {
2485       scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
2486                                  total_freq);
2487       scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
2488     }
2489
2490 #ifdef ENABLE_CHECKING
2491   verify_jump_thread (region_copy, n_region);
2492 #endif
2493
2494   /* Remove the last branch in the jump thread path.  */
2495   remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
2496   edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2497
2498   if (e) {
2499     rescan_loop_exit (e, true, false);
2500     e->probability = REG_BR_PROB_BASE;
2501     e->count = region_copy[n_region - 1]->count;
2502   }
2503
2504   /* Redirect the entry and add the phi node arguments.  */
2505   if (entry->dest == loop->header)
2506     mark_loop_for_removal (loop);
2507   redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2508   gcc_assert (redirected != NULL);
2509   flush_pending_stmts (entry);
2510
2511   /* Add the other PHI node arguments.  */
2512   add_phi_args_after_copy (region_copy, n_region, NULL);
2513
2514   if (free_region_copy)
2515     free (region_copy);
2516
2517   free_original_copy_tables ();
2518   return true;
2519 }
2520
2521 /* Return true when PATH is a valid jump-thread path.  */
2522
2523 static bool
2524 valid_jump_thread_path (vec<jump_thread_edge *> *path)
2525 {
2526   unsigned len = path->length ();
2527
2528   /* Check that the path is connected.  */
2529   for (unsigned int j = 0; j < len - 1; j++)
2530     if ((*path)[j]->e->dest != (*path)[j+1]->e->src)
2531       return false;
2532
2533   return true;
2534 }
2535
2536 /* Walk through all blocks and thread incoming edges to the appropriate
2537    outgoing edge for each edge pair recorded in THREADED_EDGES.
2538
2539    It is the caller's responsibility to fix the dominance information
2540    and rewrite duplicated SSA_NAMEs back into SSA form.
2541
2542    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2543    loop headers if it does not simplify the loop.
2544
2545    Returns true if one or more edges were threaded, false otherwise.  */
2546
2547 bool
2548 thread_through_all_blocks (bool may_peel_loop_headers)
2549 {
2550   bool retval = false;
2551   unsigned int i;
2552   bitmap_iterator bi;
2553   bitmap threaded_blocks;
2554   struct loop *loop;
2555
2556   if (!paths.exists ())
2557     return false;
2558
2559   threaded_blocks = BITMAP_ALLOC (NULL);
2560   memset (&thread_stats, 0, sizeof (thread_stats));
2561
2562   /* Jump-thread all FSM threads before other jump-threads.  */
2563   for (i = 0; i < paths.length ();)
2564     {
2565       vec<jump_thread_edge *> *path = paths[i];
2566       edge entry = (*path)[0]->e;
2567
2568       /* Only code-generate FSM jump-threads in this loop.  */
2569       if ((*path)[0]->type != EDGE_FSM_THREAD)
2570         {
2571           i++;
2572           continue;
2573         }
2574
2575       /* Do not jump-thread twice from the same block.  */
2576       if (bitmap_bit_p (threaded_blocks, entry->src->index)
2577           /* Verify that the jump thread path is still valid: a
2578              previous jump-thread may have changed the CFG, and
2579              invalidated the current path.  */
2580           || !valid_jump_thread_path (path))
2581         {
2582           /* Remove invalid FSM jump-thread paths.  */
2583           delete_jump_thread_path (path);
2584           paths.unordered_remove (i);
2585           continue;
2586         }
2587
2588       unsigned len = path->length ();
2589       edge exit = (*path)[len - 1]->e;
2590       basic_block *region = XNEWVEC (basic_block, len - 1);
2591
2592       for (unsigned int j = 0; j < len - 1; j++)
2593         region[j] = (*path)[j]->e->dest;
2594
2595       if (duplicate_thread_path (entry, exit, region, len - 1, NULL))
2596         {
2597           /* We do not update dominance info.  */
2598           free_dominance_info (CDI_DOMINATORS);
2599           bitmap_set_bit (threaded_blocks, entry->src->index);
2600           retval = true;
2601         }
2602
2603       delete_jump_thread_path (path);
2604       paths.unordered_remove (i);
2605     }
2606
2607   /* Remove from PATHS all the jump-threads starting with an edge already
2608      jump-threaded.  */
2609   for (i = 0; i < paths.length ();)
2610     {
2611       vec<jump_thread_edge *> *path = paths[i];
2612       edge entry = (*path)[0]->e;
2613
2614       /* Do not jump-thread twice from the same block.  */
2615       if (bitmap_bit_p (threaded_blocks, entry->src->index))
2616         {
2617           delete_jump_thread_path (path);
2618           paths.unordered_remove (i);
2619         }
2620       else
2621         i++;
2622     }
2623
2624   bitmap_clear (threaded_blocks);
2625
2626   mark_threaded_blocks (threaded_blocks);
2627
2628   initialize_original_copy_tables ();
2629
2630   /* First perform the threading requests that do not affect
2631      loop structure.  */
2632   EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
2633     {
2634       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2635
2636       if (EDGE_COUNT (bb->preds) > 0)
2637         retval |= thread_block (bb, true);
2638     }
2639
2640   /* Then perform the threading through loop headers.  We start with the
2641      innermost loop, so that the changes in cfg we perform won't affect
2642      further threading.  */
2643   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2644     {
2645       if (!loop->header
2646           || !bitmap_bit_p (threaded_blocks, loop->header->index))
2647         continue;
2648
2649       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2650     }
2651
2652   /* Any jump threading paths that are still attached to edges at this
2653      point must be one of two cases.
2654
2655      First, we could have a jump threading path which went from outside
2656      a loop to inside a loop that was ignored because a prior jump thread
2657      across a backedge was realized (which indirectly causes the loop
2658      above to ignore the latter thread).  We can detect these because the
2659      loop structures will be different and we do not currently try to
2660      optimize this case.
2661
2662      Second, we could be threading across a backedge to a point within the
2663      same loop.  This occurrs for the FSA/FSM optimization and we would
2664      like to optimize it.  However, we have to be very careful as this
2665      may completely scramble the loop structures, with the result being
2666      irreducible loops causing us to throw away our loop structure.
2667
2668      As a compromise for the latter case, if the thread path ends in
2669      a block where the last statement is a multiway branch, then go
2670      ahead and thread it, else ignore it.  */
2671   basic_block bb;
2672   edge e;
2673   FOR_EACH_BB_FN (bb, cfun)
2674     {
2675       /* If we do end up threading here, we can remove elements from
2676          BB->preds.  Thus we can not use the FOR_EACH_EDGE iterator.  */
2677       for (edge_iterator ei = ei_start (bb->preds);
2678            (e = ei_safe_edge (ei));)
2679         if (e->aux)
2680           {
2681             vec<jump_thread_edge *> *path = THREAD_PATH (e);
2682
2683             /* Case 1, threading from outside to inside the loop
2684                after we'd already threaded through the header.  */
2685             if ((*path)[0]->e->dest->loop_father
2686                 != path->last ()->e->src->loop_father)
2687               {
2688                 delete_jump_thread_path (path);
2689                 e->aux = NULL;
2690                 ei_next (&ei);
2691               }
2692            else if (bb_ends_with_multiway_branch (path->last ()->e->src))
2693               {
2694                 /* The code to thread through loop headers may have
2695                    split a block with jump threads attached to it.
2696
2697                    We can identify this with a disjoint jump threading
2698                    path.  If found, just remove it.  */
2699                 for (unsigned int i = 0; i < path->length () - 1; i++)
2700                   if ((*path)[i]->e->dest != (*path)[i + 1]->e->src)
2701                     {
2702                       delete_jump_thread_path (path);
2703                       e->aux = NULL;
2704                       ei_next (&ei);
2705                       break;
2706                     }
2707
2708                 /* Our path is still valid, thread it.  */
2709                 if (e->aux)
2710                   {
2711                     if (thread_block ((*path)[0]->e->dest, false))
2712                       e->aux = NULL;
2713                     else
2714                       {
2715                         delete_jump_thread_path (path);
2716                         e->aux = NULL;
2717                         ei_next (&ei);
2718                       }
2719                   }
2720               }
2721            else
2722               {
2723                 delete_jump_thread_path (path);
2724                 e->aux = NULL;
2725                 ei_next (&ei);
2726               }
2727           }
2728         else
2729           ei_next (&ei);
2730     }
2731
2732   statistics_counter_event (cfun, "Jumps threaded",
2733                             thread_stats.num_threaded_edges);
2734
2735   free_original_copy_tables ();
2736
2737   BITMAP_FREE (threaded_blocks);
2738   threaded_blocks = NULL;
2739   paths.release ();
2740
2741   if (retval)
2742     loops_state_set (LOOPS_NEED_FIXUP);
2743
2744   return retval;
2745 }
2746
2747 /* Delete the jump threading path PATH.  We have to explcitly delete
2748    each entry in the vector, then the container.  */
2749
2750 void
2751 delete_jump_thread_path (vec<jump_thread_edge *> *path)
2752 {
2753   for (unsigned int i = 0; i < path->length (); i++)
2754     delete (*path)[i];
2755   path->release();
2756   delete path;
2757 }
2758
2759 /* Register a jump threading opportunity.  We queue up all the jump
2760    threading opportunities discovered by a pass and update the CFG
2761    and SSA form all at once.
2762
2763    E is the edge we can thread, E2 is the new target edge, i.e., we
2764    are effectively recording that E->dest can be changed to E2->dest
2765    after fixing the SSA graph.  */
2766
2767 void
2768 register_jump_thread (vec<jump_thread_edge *> *path)
2769 {
2770   if (!dbg_cnt (registered_jump_thread))
2771     {
2772       delete_jump_thread_path (path);
2773       return;
2774     }
2775
2776   /* First make sure there are no NULL outgoing edges on the jump threading
2777      path.  That can happen for jumping to a constant address.  */
2778   for (unsigned int i = 0; i < path->length (); i++)
2779     if ((*path)[i]->e == NULL)
2780       {
2781         if (dump_file && (dump_flags & TDF_DETAILS))
2782           {
2783             fprintf (dump_file,
2784                      "Found NULL edge in jump threading path.  Cancelling jump thread:\n");
2785             dump_jump_thread_path (dump_file, *path, false);
2786           }
2787
2788         delete_jump_thread_path (path);
2789         return;
2790       }
2791
2792   if (dump_file && (dump_flags & TDF_DETAILS))
2793     dump_jump_thread_path (dump_file, *path, true);
2794
2795   if (!paths.exists ())
2796     paths.create (5);
2797
2798   paths.safe_push (path);
2799 }