gcc/tree-ssa-threadupdate.c

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 3, or (at your option)
   9 any later version.
  10
  11 GCC is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "tree.h"
  24 #include "flags.h"
  25 #include "basic-block.h"
  26 #include "function.h"
  27 #include "gimple.h"
  28 #include "gimple-ssa.h"
  29 #include "tree-phinodes.h"
  30 #include "tree-ssa.h"
  31 #include "tree-ssa-threadupdate.h"
  32 #include "dumpfile.h"
  33 #include "cfgloop.h"
  34 #include "hash-table.h"
  35 #include "dbgcnt.h"
  36
  37 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  38    one or more in-edges to B to instead reach the destination of an
  39    out-edge from B while preserving any side effects in B.
  40
  41    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
  42    side effects of executing B.
  43
  44      1. Make a copy of B (including its outgoing edges and statements).  Call
  45         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  46
  47      2. Remove the control statement at the end of B' and all outgoing edges
  48         except B'->C.
  49
  50      3. Add a new argument to each PHI in C with the same value as the existing
  51         argument associated with edge B->C.  Associate the new PHI arguments
  52         with the edge B'->C.
  53
  54      4. For each PHI in B, find or create a PHI in B' with an identical
  55         PHI_RESULT.  Add an argument to the PHI in B' which has the same
  56         value as the PHI in B associated with the edge A->B.  Associate
  57         the new argument in the PHI in B' with the edge A->B.
  58
  59      5. Change the edge A->B to A->B'.
  60
  61         5a. This automatically deletes any PHI arguments associated with the
  62             edge A->B in B.
  63
  64         5b. This automatically associates each new argument added in step 4
  65             with the edge A->B'.
  66
  67      6. Repeat for other incoming edges into B.
  68
  69      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  70
  71    Note that block duplication can be minimized by first collecting the
  72    set of unique destination blocks that the incoming edges should
  73    be threaded to.
  74
  75    Block duplication can be further minimized by using B instead of
  76    creating B' for one destination if all edges into B are going to be
  77    threaded to a successor of B.  We had code to do this at one time, but
  78    I'm not convinced it is correct with the changes to avoid mucking up
  79    the loop structure (which may cancel threading requests, thus a block
  80    which we thought was going to become unreachable may still be reachable).
  81    This code was also going to get ugly with the introduction of the ability
  82    for a single jump thread request to bypass multiple blocks.
  83
  84    We further reduce the number of edges and statements we create by
  85    not copying all the outgoing edges and the control statement in
  86    step #1.  We instead create a template block without the outgoing
  87    edges and duplicate the template.  */
  88
  89
  90 /* Steps #5 and #6 of the above algorithm are best implemented by walking
  91    all the incoming edges which thread to the same destination edge at
  92    the same time.  That avoids lots of table lookups to get information
  93    for the destination edge.
  94
  95    To realize that implementation we create a list of incoming edges
  96    which thread to the same outgoing edge.  Thus to implement steps
  97    #5 and #6 we traverse our hash table of outgoing edge information.
  98    For each entry we walk the list of incoming edges which thread to
  99    the current outgoing edge.  */
 100
 101 struct el
 102 {
 103   edge e;
 104   struct el *next;
 105 };
 106
 107 /* Main data structure recording information regarding B's duplicate
 108    blocks.  */
 109
 110 /* We need to efficiently record the unique thread destinations of this
 111    block and specific information associated with those destinations.  We
 112    may have many incoming edges threaded to the same outgoing edge.  This
 113    can be naturally implemented with a hash table.  */
 114
 115 struct redirection_data : typed_free_remove<redirection_data>
 116 {
 117   /* A duplicate of B with the trailing control statement removed and which
 118      targets a single successor of B.  */
 119   basic_block dup_block;
 120
 121   /* The jump threading path.  */
 122   vec<jump_thread_edge *> *path;
 123
 124   /* A list of incoming edges which we want to thread to the
 125      same path.  */
 126   struct el *incoming_edges;
 127
 128   /* hash_table support.  */
 129   typedef redirection_data value_type;
 130   typedef redirection_data compare_type;
 131   static inline hashval_t hash (const value_type *);
 132   static inline int equal (const value_type *, const compare_type *);
 133 };
 134
 135 /* Simple hashing function.  For any given incoming edge E, we're going
 136    to be most concerned with the final destination of its jump thread
 137    path.  So hash on the block index of the final edge in the path.  */
 138
 139 inline hashval_t
 140 redirection_data::hash (const value_type *p)
 141 {
 142   vec<jump_thread_edge *> *path = p->path;
 143   return path->last ()->e->dest->index;
 144 }
 145
 146 /* Given two hash table entries, return true if they have the same
 147    jump threading path.  */
 148 inline int
 149 redirection_data::equal (const value_type *p1, const compare_type *p2)
 150 {
 151   vec<jump_thread_edge *> *path1 = p1->path;
 152   vec<jump_thread_edge *> *path2 = p2->path;
 153
 154   if (path1->length () != path2->length ())
 155     return false;
 156
 157   for (unsigned int i = 1; i < path1->length (); i++)
 158     {
 159       if ((*path1)[i]->type != (*path2)[i]->type
 160           || (*path1)[i]->e != (*path2)[i]->e)
 161         return false;
 162     }
 163
 164   return true;
 165 }
 166
 167 /* Data structure of information to pass to hash table traversal routines.  */
 168 struct ssa_local_info_t
 169 {
 170   /* The current block we are working on.  */
 171   basic_block bb;
 172
 173   /* A template copy of BB with no outgoing edges or control statement that
 174      we use for creating copies.  */
 175   basic_block template_block;
 176
 177   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
 178   bool jumps_threaded;
 179 };
 180
 181 /* Passes which use the jump threading code register jump threading
 182    opportunities as they are discovered.  We keep the registered
 183    jump threading opportunities in this vector as edge pairs
 184    (original_edge, target_edge).  */
 185 static vec<vec<jump_thread_edge *> *> paths;
 186
 187 /* When we start updating the CFG for threading, data necessary for jump
 188    threading is attached to the AUX field for the incoming edge.  Use these
 189    macros to access the underlying structure attached to the AUX field.  */
 190 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
 191
 192 /* Jump threading statistics.  */
 193
 194 struct thread_stats_d
 195 {
 196   unsigned long num_threaded_edges;
 197 };
 198
 199 struct thread_stats_d thread_stats;
 200
 201
 202 /* Remove the last statement in block BB if it is a control statement
 203    Also remove all outgoing edges except the edge which reaches DEST_BB.
 204    If DEST_BB is NULL, then remove all outgoing edges.  */
 205
 206 static void
 207 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 208 {
 209   gimple_stmt_iterator gsi;
 210   edge e;
 211   edge_iterator ei;
 212
 213   gsi = gsi_last_bb (bb);
 214
 215   /* If the duplicate ends with a control statement, then remove it.
 216
 217      Note that if we are duplicating the template block rather than the
 218      original basic block, then the duplicate might not have any real
 219      statements in it.  */
 220   if (!gsi_end_p (gsi)
 221       && gsi_stmt (gsi)
 222       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 223           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 224           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
 225     gsi_remove (&gsi, true);
 226
 227   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 228     {
 229       if (e->dest != dest_bb)
 230         remove_edge (e);
 231       else
 232         ei_next (&ei);
 233     }
 234 }
 235
 236 /* Create a duplicate of BB.  Record the duplicate block in RD.  */
 237
 238 static void
 239 create_block_for_threading (basic_block bb, struct redirection_data *rd)
 240 {
 241   edge_iterator ei;
 242   edge e;
 243
 244   /* We can use the generic block duplication code and simply remove
 245      the stuff we do not need.  */
 246   rd->dup_block = duplicate_block (bb, NULL, NULL);
 247
 248   FOR_EACH_EDGE (e, ei, rd->dup_block->succs)
 249     e->aux = NULL;
 250
 251   /* Zero out the profile, since the block is unreachable for now.  */
 252   rd->dup_block->frequency = 0;
 253   rd->dup_block->count = 0;
 254 }
 255
 256 /* Main data structure to hold information for duplicates of BB.  */
 257
 258 static hash_table <redirection_data> redirection_data;
 259
 260 /* Given an outgoing edge E lookup and return its entry in our hash table.
 261
 262    If INSERT is true, then we insert the entry into the hash table if
 263    it is not already present.  INCOMING_EDGE is added to the list of incoming
 264    edges associated with E in the hash table.  */
 265
 266 static struct redirection_data *
 267 lookup_redirection_data (edge e, enum insert_option insert)
 268 {
 269   struct redirection_data **slot;
 270   struct redirection_data *elt;
 271   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 272
 273  /* Build a hash table element so we can see if E is already
 274      in the table.  */
 275   elt = XNEW (struct redirection_data);
 276   elt->path = path;
 277   elt->dup_block = NULL;
 278   elt->incoming_edges = NULL;
 279
 280   slot = redirection_data.find_slot (elt, insert);
 281
 282   /* This will only happen if INSERT is false and the entry is not
 283      in the hash table.  */
 284   if (slot == NULL)
 285     {
 286       free (elt);
 287       return NULL;
 288     }
 289
 290   /* This will only happen if E was not in the hash table and
 291      INSERT is true.  */
 292   if (*slot == NULL)
 293     {
 294       *slot = elt;
 295       elt->incoming_edges = XNEW (struct el);
 296       elt->incoming_edges->e = e;
 297       elt->incoming_edges->next = NULL;
 298       return elt;
 299     }
 300   /* E was in the hash table.  */
 301   else
 302     {
 303       /* Free ELT as we do not need it anymore, we will extract the
 304          relevant entry from the hash table itself.  */
 305       free (elt);
 306
 307       /* Get the entry stored in the hash table.  */
 308       elt = *slot;
 309
 310       /* If insertion was requested, then we need to add INCOMING_EDGE
 311          to the list of incoming edges associated with E.  */
 312       if (insert)
 313         {
 314           struct el *el = XNEW (struct el);
 315           el->next = elt->incoming_edges;
 316           el->e = e;
 317           elt->incoming_edges = el;
 318         }
 319
 320       return elt;
 321     }
 322 }
 323
 324 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.  */
 325
 326 static void
 327 copy_phi_args (basic_block bb, edge src_e, edge tgt_e)
 328 {
 329   gimple_stmt_iterator gsi;
 330   int src_indx = src_e->dest_idx;
 331
 332   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 333     {
 334       gimple phi = gsi_stmt (gsi);
 335       source_location locus = gimple_phi_arg_location (phi, src_indx);
 336       add_phi_arg (phi, gimple_phi_arg_def (phi, src_indx), tgt_e, locus);
 337     }
 338 }
 339
 340 /* We have recently made a copy of ORIG_BB, including its outgoing
 341    edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
 342    ORIG_BB has a new argument associated with edge from NEW_BB to the
 343    successor.  Initialize the PHI argument so that it is equal to the PHI
 344    argument associated with the edge from ORIG_BB to the successor.  */
 345
 346 static void
 347 update_destination_phis (basic_block orig_bb, basic_block new_bb)
 348 {
 349   edge_iterator ei;
 350   edge e;
 351
 352   FOR_EACH_EDGE (e, ei, orig_bb->succs)
 353     {
 354       edge e2 = find_edge (new_bb, e->dest);
 355       copy_phi_args (e->dest, e, e2);
 356     }
 357 }
 358
 359 /* Given a duplicate block and its single destination (both stored
 360    in RD).  Create an edge between the duplicate and its single
 361    destination.
 362
 363    Add an additional argument to any PHI nodes at the single
 364    destination.  */
 365
 366 static void
 367 create_edge_and_update_destination_phis (struct redirection_data *rd,
 368                                          basic_block bb)
 369 {
 370   edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
 371
 372   rescan_loop_exit (e, true, false);
 373   e->probability = REG_BR_PROB_BASE;
 374   e->count = bb->count;
 375
 376   /* We have to copy path -- which means creating a new vector as well
 377      as all the jump_thread_edge entries.  */
 378   if (rd->path->last ()->e->aux)
 379     {
 380       vec<jump_thread_edge *> *path = THREAD_PATH (rd->path->last ()->e);
 381       vec<jump_thread_edge *> *copy = new vec<jump_thread_edge *> ();
 382
 383       /* Sadly, the elements of the vector are pointers and need to
 384          be copied as well.  */
 385       for (unsigned int i = 0; i < path->length (); i++)
 386         {
 387           jump_thread_edge *x
 388             = new jump_thread_edge ((*path)[i]->e, (*path)[i]->type);
 389           copy->safe_push (x);
 390         }
 391      e->aux = (void *)copy;
 392     }
 393   else
 394     {
 395       e->aux = NULL;
 396     }
 397
 398   /* If there are any PHI nodes at the destination of the outgoing edge
 399      from the duplicate block, then we will need to add a new argument
 400      to them.  The argument should have the same value as the argument
 401      associated with the outgoing edge stored in RD.  */
 402   copy_phi_args (e->dest, rd->path->last ()->e, e);
 403 }
 404
 405 /* Wire up the outgoing edges from the duplicate block and
 406    update any PHIs as needed.  */
 407 void
 408 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
 409                                ssa_local_info_t *local_info)
 410 {
 411   edge e = rd->incoming_edges->e;
 412   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 413
 414   /* If we were threading through an joiner block, then we want
 415      to keep its control statement and redirect an outgoing edge.
 416      Else we want to remove the control statement & edges, then create
 417      a new outgoing edge.  In both cases we may need to update PHIs.  */
 418   if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 419     {
 420       edge victim;
 421       edge e2;
 422
 423       /* This updates the PHIs at the destination of the duplicate
 424          block.  */
 425       update_destination_phis (local_info->bb, rd->dup_block);
 426
 427       /* Find the edge from the duplicate block to the block we're
 428          threading through.  That's the edge we want to redirect.  */
 429       victim = find_edge (rd->dup_block, (*path)[1]->e->dest);
 430       e2 = redirect_edge_and_branch (victim, path->last ()->e->dest);
 431       e2->count = path->last ()->e->count;
 432
 433       /* If we redirected the edge, then we need to copy PHI arguments
 434          at the target.  If the edge already existed (e2 != victim case),
 435          then the PHIs in the target already have the correct arguments.  */
 436       if (e2 == victim)
 437         copy_phi_args (e2->dest, path->last ()->e, e2);
 438     }
 439   else
 440     {
 441       remove_ctrl_stmt_and_useless_edges (rd->dup_block, NULL);
 442       create_edge_and_update_destination_phis (rd, rd->dup_block);
 443     }
 444 }
 445 /* Hash table traversal callback routine to create duplicate blocks.  */
 446
 447 int
 448 ssa_create_duplicates (struct redirection_data **slot,
 449                        ssa_local_info_t *local_info)
 450 {
 451   struct redirection_data *rd = *slot;
 452
 453   /* Create a template block if we have not done so already.  Otherwise
 454      use the template to create a new block.  */
 455   if (local_info->template_block == NULL)
 456     {
 457       create_block_for_threading (local_info->bb, rd);
 458       local_info->template_block = rd->dup_block;
 459
 460       /* We do not create any outgoing edges for the template.  We will
 461          take care of that in a later traversal.  That way we do not
 462          create edges that are going to just be deleted.  */
 463     }
 464   else
 465     {
 466       create_block_for_threading (local_info->template_block, rd);
 467
 468       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
 469          block.   */
 470       ssa_fix_duplicate_block_edges (rd, local_info);
 471     }
 472
 473   /* Keep walking the hash table.  */
 474   return 1;
 475 }
 476
 477 /* We did not create any outgoing edges for the template block during
 478    block creation.  This hash table traversal callback creates the
 479    outgoing edge for the template block.  */
 480
 481 inline int
 482 ssa_fixup_template_block (struct redirection_data **slot,
 483                           ssa_local_info_t *local_info)
 484 {
 485   struct redirection_data *rd = *slot;
 486
 487   /* If this is the template block halt the traversal after updating
 488      it appropriately.
 489
 490      If we were threading through an joiner block, then we want
 491      to keep its control statement and redirect an outgoing edge.
 492      Else we want to remove the control statement & edges, then create
 493      a new outgoing edge.  In both cases we may need to update PHIs.  */
 494   if (rd->dup_block && rd->dup_block == local_info->template_block)
 495     {
 496       ssa_fix_duplicate_block_edges (rd, local_info);
 497       return 0;
 498     }
 499
 500   return 1;
 501 }
 502
 503 /* Hash table traversal callback to redirect each incoming edge
 504    associated with this hash table element to its new destination.  */
 505
 506 int
 507 ssa_redirect_edges (struct redirection_data **slot,
 508                     ssa_local_info_t *local_info)
 509 {
 510   struct redirection_data *rd = *slot;
 511   struct el *next, *el;
 512
 513   /* Walk over all the incoming edges associated associated with this
 514      hash table entry.  */
 515   for (el = rd->incoming_edges; el; el = next)
 516     {
 517       edge e = el->e;
 518       vec<jump_thread_edge *> *path = THREAD_PATH (e);
 519
 520       /* Go ahead and free this element from the list.  Doing this now
 521          avoids the need for another list walk when we destroy the hash
 522          table.  */
 523       next = el->next;
 524       free (el);
 525
 526       thread_stats.num_threaded_edges++;
 527
 528       if (rd->dup_block)
 529         {
 530           edge e2;
 531
 532           if (dump_file && (dump_flags & TDF_DETAILS))
 533             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
 534                      e->src->index, e->dest->index, rd->dup_block->index);
 535
 536           rd->dup_block->count += e->count;
 537
 538           /* Excessive jump threading may make frequencies large enough so
 539              the computation overflows.  */
 540           if (rd->dup_block->frequency < BB_FREQ_MAX * 2)
 541             rd->dup_block->frequency += EDGE_FREQUENCY (e);
 542
 543           /* In the case of threading through a joiner block, the outgoing
 544              edges from the duplicate block were updated when they were
 545              redirected during ssa_fix_duplicate_block_edges.  */
 546           if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
 547             EDGE_SUCC (rd->dup_block, 0)->count += e->count;
 548
 549           /* Redirect the incoming edge (possibly to the joiner block) to the
 550              appropriate duplicate block.  */
 551           e2 = redirect_edge_and_branch (e, rd->dup_block);
 552           gcc_assert (e == e2);
 553           flush_pending_stmts (e2);
 554         }
 555
 556       /* Go ahead and clear E->aux.  It's not needed anymore and failure
 557          to clear it will cause all kinds of unpleasant problems later.  */
 558       delete_jump_thread_path (path);
 559       e->aux = NULL;
 560
 561     }
 562
 563   /* Indicate that we actually threaded one or more jumps.  */
 564   if (rd->incoming_edges)
 565     local_info->jumps_threaded = true;
 566
 567   return 1;
 568 }
 569
 570 /* Return true if this block has no executable statements other than
 571    a simple ctrl flow instruction.  When the number of outgoing edges
 572    is one, this is equivalent to a "forwarder" block.  */
 573
 574 static bool
 575 redirection_block_p (basic_block bb)
 576 {
 577   gimple_stmt_iterator gsi;
 578
 579   /* Advance to the first executable statement.  */
 580   gsi = gsi_start_bb (bb);
 581   while (!gsi_end_p (gsi)
 582          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
 583              || is_gimple_debug (gsi_stmt (gsi))
 584              || gimple_nop_p (gsi_stmt (gsi))))
 585     gsi_next (&gsi);
 586
 587   /* Check if this is an empty block.  */
 588   if (gsi_end_p (gsi))
 589     return true;
 590
 591   /* Test that we've reached the terminating control statement.  */
 592   return gsi_stmt (gsi)
 593          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 594              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 595              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
 596 }
 597
 598 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
 599    is reached via one or more specific incoming edges, we know which
 600    outgoing edge from BB will be traversed.
 601
 602    We want to redirect those incoming edges to the target of the
 603    appropriate outgoing edge.  Doing so avoids a conditional branch
 604    and may expose new optimization opportunities.  Note that we have
 605    to update dominator tree and SSA graph after such changes.
 606
 607    The key to keeping the SSA graph update manageable is to duplicate
 608    the side effects occurring in BB so that those side effects still
 609    occur on the paths which bypass BB after redirecting edges.
 610
 611    We accomplish this by creating duplicates of BB and arranging for
 612    the duplicates to unconditionally pass control to one specific
 613    successor of BB.  We then revector the incoming edges into BB to
 614    the appropriate duplicate of BB.
 615
 616    If NOLOOP_ONLY is true, we only perform the threading as long as it
 617    does not affect the structure of the loops in a nontrivial way.
 618
 619    If JOINERS is true, then thread through joiner blocks as well.  */
 620
 621 static bool
 622 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
 623 {
 624   /* E is an incoming edge into BB that we may or may not want to
 625      redirect to a duplicate of BB.  */
 626   edge e, e2;
 627   edge_iterator ei;
 628   ssa_local_info_t local_info;
 629   struct loop *loop = bb->loop_father;
 630
 631   /* To avoid scanning a linear array for the element we need we instead
 632      use a hash table.  For normal code there should be no noticeable
 633      difference.  However, if we have a block with a large number of
 634      incoming and outgoing edges such linear searches can get expensive.  */
 635   redirection_data.create (EDGE_COUNT (bb->succs));
 636
 637   /* If we thread the latch of the loop to its exit, the loop ceases to
 638      exist.  Make sure we do not restrict ourselves in order to preserve
 639      this loop.  */
 640   if (loop->header == bb)
 641     {
 642       e = loop_latch_edge (loop);
 643       vec<jump_thread_edge *> *path = THREAD_PATH (e);
 644
 645       if (path
 646           && (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && joiners)
 647               || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && !joiners)))
 648         {
 649           for (unsigned int i = 1; i < path->length (); i++)
 650             {
 651               edge e2 = (*path)[i]->e;
 652
 653               if (loop_exit_edge_p (loop, e2))
 654                 {
 655                   loop->header = NULL;
 656                   loop->latch = NULL;
 657                   loops_state_set (LOOPS_NEED_FIXUP);
 658                 }
 659             }
 660         }
 661     }
 662
 663   /* Record each unique threaded destination into a hash table for
 664      efficient lookups.  */
 665   FOR_EACH_EDGE (e, ei, bb->preds)
 666     {
 667       if (e->aux == NULL)
 668         continue;
 669
 670       vec<jump_thread_edge *> *path = THREAD_PATH (e);
 671
 672       if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
 673           || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
 674         continue;
 675
 676       e2 = path->last ()->e;
 677       if (!e2 || noloop_only)
 678         {
 679           /* If NOLOOP_ONLY is true, we only allow threading through the
 680              header of a loop to exit edges.
 681
 682              There are two cases to consider.  The first when BB is the
 683              loop header.  We will attempt to thread this elsewhere, so
 684              we can just continue here.  */
 685
 686           if (bb == bb->loop_father->header
 687               && (!loop_exit_edge_p (bb->loop_father, e2)
 688                   || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
 689             continue;
 690
 691
 692           /* The second occurs when there was loop header buried in a jump
 693              threading path.  We do not try and thread this elsewhere, so
 694              just cancel the jump threading request by clearing the AUX
 695              field now.  */
 696           if ((bb->loop_father != e2->src->loop_father
 697                && !loop_exit_edge_p (e2->src->loop_father, e2))
 698               || (e2->src->loop_father != e2->dest->loop_father
 699                   && !loop_exit_edge_p (e2->src->loop_father, e2)))
 700             {
 701               /* Since this case is not handled by our special code
 702                  to thread through a loop header, we must explicitly
 703                  cancel the threading request here.  */
 704               delete_jump_thread_path (path);
 705               e->aux = NULL;
 706               continue;
 707             }
 708         }
 709
 710       if (e->dest == e2->src)
 711         update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e),
 712                                          e->count, (*THREAD_PATH (e))[1]->e);
 713
 714       /* Insert the outgoing edge into the hash table if it is not
 715          already in the hash table.  */
 716       lookup_redirection_data (e, INSERT);
 717     }
 718
 719   /* We do not update dominance info.  */
 720   free_dominance_info (CDI_DOMINATORS);
 721
 722   /* We know we only thread through the loop header to loop exits.
 723      Let the basic block duplication hook know we are not creating
 724      a multiple entry loop.  */
 725   if (noloop_only
 726       && bb == bb->loop_father->header)
 727     set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
 728
 729   /* Now create duplicates of BB.
 730
 731      Note that for a block with a high outgoing degree we can waste
 732      a lot of time and memory creating and destroying useless edges.
 733
 734      So we first duplicate BB and remove the control structure at the
 735      tail of the duplicate as well as all outgoing edges from the
 736      duplicate.  We then use that duplicate block as a template for
 737      the rest of the duplicates.  */
 738   local_info.template_block = NULL;
 739   local_info.bb = bb;
 740   local_info.jumps_threaded = false;
 741   redirection_data.traverse <ssa_local_info_t *, ssa_create_duplicates>
 742                             (&local_info);
 743
 744   /* The template does not have an outgoing edge.  Create that outgoing
 745      edge and update PHI nodes as the edge's target as necessary.
 746
 747      We do this after creating all the duplicates to avoid creating
 748      unnecessary edges.  */
 749   redirection_data.traverse <ssa_local_info_t *, ssa_fixup_template_block>
 750                             (&local_info);
 751
 752   /* The hash table traversals above created the duplicate blocks (and the
 753      statements within the duplicate blocks).  This loop creates PHI nodes for
 754      the duplicated blocks and redirects the incoming edges into BB to reach
 755      the duplicates of BB.  */
 756   redirection_data.traverse <ssa_local_info_t *, ssa_redirect_edges>
 757                             (&local_info);
 758
 759   /* Done with this block.  Clear REDIRECTION_DATA.  */
 760   redirection_data.dispose ();
 761
 762   if (noloop_only
 763       && bb == bb->loop_father->header)
 764     set_loop_copy (bb->loop_father, NULL);
 765
 766   /* Indicate to our caller whether or not any jumps were threaded.  */
 767   return local_info.jumps_threaded;
 768 }
 769
 770 /* Wrapper for thread_block_1 so that we can first handle jump
 771    thread paths which do not involve copying joiner blocks, then
 772    handle jump thread paths which have joiner blocks.
 773
 774    By doing things this way we can be as aggressive as possible and
 775    not worry that copying a joiner block will create a jump threading
 776    opportunity.  */
 777
 778 static bool
 779 thread_block (basic_block bb, bool noloop_only)
 780 {
 781   bool retval;
 782   retval = thread_block_1 (bb, noloop_only, false);
 783   retval |= thread_block_1 (bb, noloop_only, true);
 784   return retval;
 785 }
 786
 787
 788 /* Threads edge E through E->dest to the edge THREAD_TARGET (E).  Returns the
 789    copy of E->dest created during threading, or E->dest if it was not necessary
 790    to copy it (E is its single predecessor).  */
 791
 792 static basic_block
 793 thread_single_edge (edge e)
 794 {
 795   basic_block bb = e->dest;
 796   struct redirection_data rd;
 797   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 798   edge eto = (*path)[1]->e;
 799
 800   for (unsigned int i = 0; i < path->length (); i++)
 801     delete (*path)[i];
 802   delete path;
 803   e->aux = NULL;
 804
 805   thread_stats.num_threaded_edges++;
 806
 807   if (single_pred_p (bb))
 808     {
 809       /* If BB has just a single predecessor, we should only remove the
 810          control statements at its end, and successors except for ETO.  */
 811       remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
 812
 813       /* And fixup the flags on the single remaining edge.  */
 814       eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
 815       eto->flags |= EDGE_FALLTHRU;
 816
 817       return bb;
 818     }
 819
 820   /* Otherwise, we need to create a copy.  */
 821   if (e->dest == eto->src)
 822     update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
 823
 824   vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
 825   jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
 826   npath->safe_push (x);
 827
 828   x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
 829   npath->safe_push (x);
 830   rd.path = npath;
 831
 832   create_block_for_threading (bb, &rd);
 833   remove_ctrl_stmt_and_useless_edges (rd.dup_block, NULL);
 834   create_edge_and_update_destination_phis (&rd, rd.dup_block);
 835
 836   if (dump_file && (dump_flags & TDF_DETAILS))
 837     fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
 838              e->src->index, e->dest->index, rd.dup_block->index);
 839
 840   rd.dup_block->count = e->count;
 841   rd.dup_block->frequency = EDGE_FREQUENCY (e);
 842   single_succ_edge (rd.dup_block)->count = e->count;
 843   redirect_edge_and_branch (e, rd.dup_block);
 844   flush_pending_stmts (e);
 845
 846   return rd.dup_block;
 847 }
 848
 849 /* Callback for dfs_enumerate_from.  Returns true if BB is different
 850    from STOP and DBDS_CE_STOP.  */
 851
 852 static basic_block dbds_ce_stop;
 853 static bool
 854 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
 855 {
 856   return (bb != (const_basic_block) stop
 857           && bb != dbds_ce_stop);
 858 }
 859
 860 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
 861    returns the state.  */
 862
 863 enum bb_dom_status
 864 {
 865   /* BB does not dominate latch of the LOOP.  */
 866   DOMST_NONDOMINATING,
 867   /* The LOOP is broken (there is no path from the header to its latch.  */
 868   DOMST_LOOP_BROKEN,
 869   /* BB dominates the latch of the LOOP.  */
 870   DOMST_DOMINATING
 871 };
 872
 873 static enum bb_dom_status
 874 determine_bb_domination_status (struct loop *loop, basic_block bb)
 875 {
 876   basic_block *bblocks;
 877   unsigned nblocks, i;
 878   bool bb_reachable = false;
 879   edge_iterator ei;
 880   edge e;
 881
 882   /* This function assumes BB is a successor of LOOP->header.
 883      If that is not the case return DOMST_NONDOMINATING which
 884      is always safe.  */
 885     {
 886       bool ok = false;
 887
 888       FOR_EACH_EDGE (e, ei, bb->preds)
 889         {
 890           if (e->src == loop->header)
 891             {
 892               ok = true;
 893               break;
 894             }
 895         }
 896
 897       if (!ok)
 898         return DOMST_NONDOMINATING;
 899     }
 900
 901   if (bb == loop->latch)
 902     return DOMST_DOMINATING;
 903
 904   /* Check that BB dominates LOOP->latch, and that it is back-reachable
 905      from it.  */
 906
 907   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
 908   dbds_ce_stop = loop->header;
 909   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
 910                                 bblocks, loop->num_nodes, bb);
 911   for (i = 0; i < nblocks; i++)
 912     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
 913       {
 914         if (e->src == loop->header)
 915           {
 916             free (bblocks);
 917             return DOMST_NONDOMINATING;
 918           }
 919         if (e->src == bb)
 920           bb_reachable = true;
 921       }
 922
 923   free (bblocks);
 924   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
 925 }
 926
 927 /* Return true if BB is part of the new pre-header that is created
 928    when threading the latch to DATA.  */
 929
 930 static bool
 931 def_split_header_continue_p (const_basic_block bb, const void *data)
 932 {
 933   const_basic_block new_header = (const_basic_block) data;
 934   const struct loop *l;
 935
 936   if (bb == new_header
 937       || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
 938     return false;
 939   for (l = bb->loop_father; l; l = loop_outer (l))
 940     if (l == new_header->loop_father)
 941       return true;
 942   return false;
 943 }
 944
 945 /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
 946    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
 947    to the inside of the loop.  */
 948
 949 static bool
 950 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
 951 {
 952   basic_block header = loop->header;
 953   edge e, tgt_edge, latch = loop_latch_edge (loop);
 954   edge_iterator ei;
 955   basic_block tgt_bb, atgt_bb;
 956   enum bb_dom_status domst;
 957
 958   /* We have already threaded through headers to exits, so all the threading
 959      requests now are to the inside of the loop.  We need to avoid creating
 960      irreducible regions (i.e., loops with more than one entry block), and
 961      also loop with several latch edges, or new subloops of the loop (although
 962      there are cases where it might be appropriate, it is difficult to decide,
 963      and doing it wrongly may confuse other optimizers).
 964
 965      We could handle more general cases here.  However, the intention is to
 966      preserve some information about the loop, which is impossible if its
 967      structure changes significantly, in a way that is not well understood.
 968      Thus we only handle few important special cases, in which also updating
 969      of the loop-carried information should be feasible:
 970
 971      1) Propagation of latch edge to a block that dominates the latch block
 972         of a loop.  This aims to handle the following idiom:
 973
 974         first = 1;
 975         while (1)
 976           {
 977             if (first)
 978               initialize;
 979             first = 0;
 980             body;
 981           }
 982
 983         After threading the latch edge, this becomes
 984
 985         first = 1;
 986         if (first)
 987           initialize;
 988         while (1)
 989           {
 990             first = 0;
 991             body;
 992           }
 993
 994         The original header of the loop is moved out of it, and we may thread
 995         the remaining edges through it without further constraints.
 996
 997      2) All entry edges are propagated to a single basic block that dominates
 998         the latch block of the loop.  This aims to handle the following idiom
 999         (normally created for "for" loops):
1000
1001         i = 0;
1002         while (1)
1003           {
1004             if (i >= 100)
1005               break;
1006             body;
1007             i++;
1008           }
1009
1010         This becomes
1011
1012         i = 0;
1013         while (1)
1014           {
1015             body;
1016             i++;
1017             if (i >= 100)
1018               break;
1019           }
1020      */
1021
1022   /* Threading through the header won't improve the code if the header has just
1023      one successor.  */
1024   if (single_succ_p (header))
1025     goto fail;
1026
1027   if (latch->aux)
1028     {
1029       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1030       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1031         goto fail;
1032       tgt_edge = (*path)[1]->e;
1033       tgt_bb = tgt_edge->dest;
1034     }
1035   else if (!may_peel_loop_headers
1036            && !redirection_block_p (loop->header))
1037     goto fail;
1038   else
1039     {
1040       tgt_bb = NULL;
1041       tgt_edge = NULL;
1042       FOR_EACH_EDGE (e, ei, header->preds)
1043         {
1044           if (!e->aux)
1045             {
1046               if (e == latch)
1047                 continue;
1048
1049               /* If latch is not threaded, and there is a header
1050                  edge that is not threaded, we would create loop
1051                  with multiple entries.  */
1052               goto fail;
1053             }
1054
1055           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1056
1057           if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1058             goto fail;
1059           tgt_edge = (*path)[1]->e;
1060           atgt_bb = tgt_edge->dest;
1061           if (!tgt_bb)
1062             tgt_bb = atgt_bb;
1063           /* Two targets of threading would make us create loop
1064              with multiple entries.  */
1065           else if (tgt_bb != atgt_bb)
1066             goto fail;
1067         }
1068
1069       if (!tgt_bb)
1070         {
1071           /* There are no threading requests.  */
1072           return false;
1073         }
1074
1075       /* Redirecting to empty loop latch is useless.  */
1076       if (tgt_bb == loop->latch
1077           && empty_block_p (loop->latch))
1078         goto fail;
1079     }
1080
1081   /* The target block must dominate the loop latch, otherwise we would be
1082      creating a subloop.  */
1083   domst = determine_bb_domination_status (loop, tgt_bb);
1084   if (domst == DOMST_NONDOMINATING)
1085     goto fail;
1086   if (domst == DOMST_LOOP_BROKEN)
1087     {
1088       /* If the loop ceased to exist, mark it as such, and thread through its
1089          original header.  */
1090       loop->header = NULL;
1091       loop->latch = NULL;
1092       loops_state_set (LOOPS_NEED_FIXUP);
1093       return thread_block (header, false);
1094     }
1095
1096   if (tgt_bb->loop_father->header == tgt_bb)
1097     {
1098       /* If the target of the threading is a header of a subloop, we need
1099          to create a preheader for it, so that the headers of the two loops
1100          do not merge.  */
1101       if (EDGE_COUNT (tgt_bb->preds) > 2)
1102         {
1103           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1104           gcc_assert (tgt_bb != NULL);
1105         }
1106       else
1107         tgt_bb = split_edge (tgt_edge);
1108     }
1109
1110   if (latch->aux)
1111     {
1112       basic_block *bblocks;
1113       unsigned nblocks, i;
1114
1115       /* First handle the case latch edge is redirected.  We are copying
1116          the loop header but not creating a multiple entry loop.  Make the
1117          cfg manipulation code aware of that fact.  */
1118       set_loop_copy (loop, loop);
1119       loop->latch = thread_single_edge (latch);
1120       set_loop_copy (loop, NULL);
1121       gcc_assert (single_succ (loop->latch) == tgt_bb);
1122       loop->header = tgt_bb;
1123
1124       /* Remove the new pre-header blocks from our loop.  */
1125       bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1126       nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1127                                     bblocks, loop->num_nodes, tgt_bb);
1128       for (i = 0; i < nblocks; i++)
1129         if (bblocks[i]->loop_father == loop)
1130           {
1131             remove_bb_from_loops (bblocks[i]);
1132             add_bb_to_loop (bblocks[i], loop_outer (loop));
1133           }
1134       free (bblocks);
1135
1136       /* If the new header has multiple latches mark it so.  */
1137       FOR_EACH_EDGE (e, ei, loop->header->preds)
1138         if (e->src->loop_father == loop
1139             && e->src != loop->latch)
1140           {
1141             loop->latch = NULL;
1142             loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
1143           }
1144
1145       /* Cancel remaining threading requests that would make the
1146          loop a multiple entry loop.  */
1147       FOR_EACH_EDGE (e, ei, header->preds)
1148         {
1149           edge e2;
1150
1151           if (e->aux == NULL)
1152             continue;
1153
1154           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1155           e2 = path->last ()->e;
1156
1157           if (e->src->loop_father != e2->dest->loop_father
1158               && e2->dest != loop->header)
1159             {
1160               delete_jump_thread_path (path);
1161               e->aux = NULL;
1162             }
1163         }
1164
1165       /* Thread the remaining edges through the former header.  */
1166       thread_block (header, false);
1167     }
1168   else
1169     {
1170       basic_block new_preheader;
1171
1172       /* Now consider the case entry edges are redirected to the new entry
1173          block.  Remember one entry edge, so that we can find the new
1174          preheader (its destination after threading).  */
1175       FOR_EACH_EDGE (e, ei, header->preds)
1176         {
1177           if (e->aux)
1178             break;
1179         }
1180
1181       /* The duplicate of the header is the new preheader of the loop.  Ensure
1182          that it is placed correctly in the loop hierarchy.  */
1183       set_loop_copy (loop, loop_outer (loop));
1184
1185       thread_block (header, false);
1186       set_loop_copy (loop, NULL);
1187       new_preheader = e->dest;
1188
1189       /* Create the new latch block.  This is always necessary, as the latch
1190          must have only a single successor, but the original header had at
1191          least two successors.  */
1192       loop->latch = NULL;
1193       mfb_kj_edge = single_succ_edge (new_preheader);
1194       loop->header = mfb_kj_edge->dest;
1195       latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
1196       loop->header = latch->dest;
1197       loop->latch = latch->src;
1198     }
1199
1200   return true;
1201
1202 fail:
1203   /* We failed to thread anything.  Cancel the requests.  */
1204   FOR_EACH_EDGE (e, ei, header->preds)
1205     {
1206       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1207
1208       if (path)
1209         {
1210           delete_jump_thread_path (path);
1211           e->aux = NULL;
1212         }
1213     }
1214   return false;
1215 }
1216
1217 /* E1 and E2 are edges into the same basic block.  Return TRUE if the
1218    PHI arguments associated with those edges are equal or there are no
1219    PHI arguments, otherwise return FALSE.  */
1220
1221 static bool
1222 phi_args_equal_on_edges (edge e1, edge e2)
1223 {
1224   gimple_stmt_iterator gsi;
1225   int indx1 = e1->dest_idx;
1226   int indx2 = e2->dest_idx;
1227
1228   for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1229     {
1230       gimple phi = gsi_stmt (gsi);
1231
1232       if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
1233                             gimple_phi_arg_def (phi, indx2), 0))
1234         return false;
1235     }
1236   return true;
1237 }
1238
1239 /* Walk through the registered jump threads and convert them into a
1240    form convenient for this pass.
1241
1242    Any block which has incoming edges threaded to outgoing edges
1243    will have its entry in THREADED_BLOCK set.
1244
1245    Any threaded edge will have its new outgoing edge stored in the
1246    original edge's AUX field.
1247
1248    This form avoids the need to walk all the edges in the CFG to
1249    discover blocks which need processing and avoids unnecessary
1250    hash table lookups to map from threaded edge to new target.  */
1251
1252 static void
1253 mark_threaded_blocks (bitmap threaded_blocks)
1254 {
1255   unsigned int i;
1256   bitmap_iterator bi;
1257   bitmap tmp = BITMAP_ALLOC (NULL);
1258   basic_block bb;
1259   edge e;
1260   edge_iterator ei;
1261
1262   /* Move the jump threading requests from PATHS to each edge
1263      which starts a jump thread path.  */
1264   for (i = 0; i < paths.length (); i++)
1265     {
1266       vec<jump_thread_edge *> *path = paths[i];
1267       edge e = (*path)[0]->e;
1268       e->aux = (void *)path;
1269       bitmap_set_bit (tmp, e->dest->index);
1270     }
1271
1272
1273
1274   /* If optimizing for size, only thread through block if we don't have
1275      to duplicate it or it's an otherwise empty redirection block.  */
1276   if (optimize_function_for_size_p (cfun))
1277     {
1278       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1279         {
1280           bb = BASIC_BLOCK (i);
1281           if (EDGE_COUNT (bb->preds) > 1
1282               && !redirection_block_p (bb))
1283             {
1284               FOR_EACH_EDGE (e, ei, bb->preds)
1285                 {
1286                   if (e->aux)
1287                     {
1288                       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1289                       delete_jump_thread_path (path);
1290                       e->aux = NULL;
1291                     }
1292                 }
1293             }
1294           else
1295             bitmap_set_bit (threaded_blocks, i);
1296         }
1297     }
1298   else
1299     bitmap_copy (threaded_blocks, tmp);
1300
1301   /* Look for jump threading paths which cross multiple loop headers.
1302
1303      The code to thread through loop headers will change the CFG in ways
1304      that break assumptions made by the loop optimization code.
1305
1306      We don't want to blindly cancel the requests.  We can instead do better
1307      by trimming off the end of the jump thread path.  */
1308   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1309     {
1310       basic_block bb = BASIC_BLOCK (i);
1311       FOR_EACH_EDGE (e, ei, bb->preds)
1312         {
1313           if (e->aux)
1314             {
1315               vec<jump_thread_edge *> *path = THREAD_PATH (e);
1316
1317               /* Basically we're looking for a situation where we can see
1318                  3 or more loop structures on a jump threading path.  */
1319
1320               struct loop *first_father = (*path)[0]->e->src->loop_father;
1321               struct loop *second_father = NULL;
1322               for (unsigned int i = 0; i < path->length (); i++)
1323                 {
1324                   /* See if this is a loop father we have not seen before.  */
1325                   if ((*path)[i]->e->dest->loop_father != first_father
1326                       && (*path)[i]->e->dest->loop_father != second_father)
1327                     {
1328                       /* We've already seen two loop fathers, so we
1329                          need to trim this jump threading path.  */
1330                       if (second_father != NULL)
1331                         {
1332                           /* Trim from entry I onwards.  */
1333                           for (unsigned int j = i; j < path->length (); j++)
1334                             delete (*path)[j];
1335                           path->truncate (i);
1336
1337                           /* Now that we've truncated the path, make sure
1338                              what's left is still valid.   We need at least
1339                              two edges on the path and the last edge can not
1340                              be a joiner.  This should never happen, but let's
1341                              be safe.  */
1342                           if (path->length () < 2
1343                               || (path->last ()->type
1344                                   == EDGE_COPY_SRC_JOINER_BLOCK))
1345                             {
1346                               delete_jump_thread_path (path);
1347                               e->aux = NULL;
1348                             }
1349                           break;
1350                         }
1351                       else
1352                         {
1353                           second_father = (*path)[i]->e->dest->loop_father;
1354                         }
1355                     }
1356                 }
1357             }
1358         }
1359     }
1360
1361   /* If we have a joiner block (J) which has two successors S1 and S2 and
1362      we are threading though S1 and the final destination of the thread
1363      is S2, then we must verify that any PHI nodes in S2 have the same
1364      PHI arguments for the edge J->S2 and J->S1->...->S2.
1365
1366      We used to detect this prior to registering the jump thread, but
1367      that prohibits propagation of edge equivalences into non-dominated
1368      PHI nodes as the equivalency test might occur before propagation.
1369
1370      This must also occur after we truncate any jump threading paths
1371      as this scenario may only show up after truncation.
1372
1373      This works for now, but will need improvement as part of the FSA
1374      optimization.
1375
1376      Note since we've moved the thread request data to the edges,
1377      we have to iterate on those rather than the threaded_edges vector.  */
1378   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1379     {
1380       bb = BASIC_BLOCK (i);
1381       FOR_EACH_EDGE (e, ei, bb->preds)
1382         {
1383           if (e->aux)
1384             {
1385               vec<jump_thread_edge *> *path = THREAD_PATH (e);
1386               bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
1387
1388               if (have_joiner)
1389                 {
1390                   basic_block joiner = e->dest;
1391                   edge final_edge = path->last ()->e;
1392                   basic_block final_dest = final_edge->dest;
1393                   edge e2 = find_edge (joiner, final_dest);
1394
1395                   if (e2 && !phi_args_equal_on_edges (e2, final_edge))
1396                     {
1397                       delete_jump_thread_path (path);
1398                       e->aux = NULL;
1399                     }
1400                 }
1401             }
1402         }
1403     }
1404
1405   BITMAP_FREE (tmp);
1406 }
1407
1408
1409 /* Walk through all blocks and thread incoming edges to the appropriate
1410    outgoing edge for each edge pair recorded in THREADED_EDGES.
1411
1412    It is the caller's responsibility to fix the dominance information
1413    and rewrite duplicated SSA_NAMEs back into SSA form.
1414
1415    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
1416    loop headers if it does not simplify the loop.
1417
1418    Returns true if one or more edges were threaded, false otherwise.  */
1419
1420 bool
1421 thread_through_all_blocks (bool may_peel_loop_headers)
1422 {
1423   bool retval = false;
1424   unsigned int i;
1425   bitmap_iterator bi;
1426   bitmap threaded_blocks;
1427   struct loop *loop;
1428   loop_iterator li;
1429
1430   /* We must know about loops in order to preserve them.  */
1431   gcc_assert (current_loops != NULL);
1432
1433   if (!paths.exists ())
1434     return false;
1435
1436   threaded_blocks = BITMAP_ALLOC (NULL);
1437   memset (&thread_stats, 0, sizeof (thread_stats));
1438
1439   mark_threaded_blocks (threaded_blocks);
1440
1441   initialize_original_copy_tables ();
1442
1443   /* First perform the threading requests that do not affect
1444      loop structure.  */
1445   EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
1446     {
1447       basic_block bb = BASIC_BLOCK (i);
1448
1449       if (EDGE_COUNT (bb->preds) > 0)
1450         retval |= thread_block (bb, true);
1451     }
1452
1453   /* Then perform the threading through loop headers.  We start with the
1454      innermost loop, so that the changes in cfg we perform won't affect
1455      further threading.  */
1456   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
1457     {
1458       if (!loop->header
1459           || !bitmap_bit_p (threaded_blocks, loop->header->index))
1460         continue;
1461
1462       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
1463     }
1464
1465   /* Assume we had a jump thread path which went from the latch to the exit
1466      and a path which goes from outside to inside the same loop.
1467
1468      If the latch to exit was handled first, we will thread it and clear
1469      loop->header.
1470
1471      The second path will be ignored by thread_block because we're going
1472      through a loop header.  It will also be ignored by the loop above
1473      because loop->header is NULL.
1474
1475      This results in the second path never being threaded.  The failure
1476      mode is a dangling AUX field.
1477
1478      This is inherently a bit of a pain to fix, so we just walk all the
1479      blocks and all the incoming edges to those blocks and clear their
1480      AUX fields.  */
1481   basic_block bb;
1482   edge_iterator ei;
1483   edge e;
1484   FOR_EACH_BB (bb)
1485     {
1486       FOR_EACH_EDGE (e, ei, bb->preds)
1487         if (e->aux)
1488           {
1489             vec<jump_thread_edge *> *path = THREAD_PATH (e);
1490
1491             delete_jump_thread_path (path);
1492             e->aux = NULL;
1493           }
1494     }
1495
1496   statistics_counter_event (cfun, "Jumps threaded",
1497                             thread_stats.num_threaded_edges);
1498
1499   free_original_copy_tables ();
1500
1501   BITMAP_FREE (threaded_blocks);
1502   threaded_blocks = NULL;
1503   paths.release ();
1504
1505   if (retval)
1506     loops_state_set (LOOPS_NEED_FIXUP);
1507
1508   return retval;
1509 }
1510
1511 /* Delete the jump threading path PATH.  We have to explcitly delete
1512    each entry in the vector, then the container.  */
1513
1514 void
1515 delete_jump_thread_path (vec<jump_thread_edge *> *path)
1516 {
1517   for (unsigned int i = 0; i < path->length (); i++)
1518     delete (*path)[i];
1519   path->release();
1520 }
1521
1522 /* Dump a jump threading path, including annotations about each
1523    edge in the path.  */
1524
1525 static void
1526 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path)
1527 {
1528   fprintf (dump_file,
1529            "  Registering jump thread: (%d, %d) incoming edge; ",
1530            path[0]->e->src->index, path[0]->e->dest->index);
1531
1532   for (unsigned int i = 1; i < path.length (); i++)
1533     {
1534       /* We can get paths with a NULL edge when the final destination
1535          of a jump thread turns out to be a constant address.  We dump
1536          those paths when debugging, so we have to be prepared for that
1537          possibility here.  */
1538       if (path[i]->e == NULL)
1539         continue;
1540
1541       if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1542         fprintf (dump_file, " (%d, %d) joiner; ",
1543                  path[i]->e->src->index, path[i]->e->dest->index);
1544       if (path[i]->type == EDGE_COPY_SRC_BLOCK)
1545        fprintf (dump_file, " (%d, %d) normal;",
1546                  path[i]->e->src->index, path[i]->e->dest->index);
1547       if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
1548        fprintf (dump_file, " (%d, %d) nocopy;",
1549                  path[i]->e->src->index, path[i]->e->dest->index);
1550     }
1551   fputc ('\n', dump_file);
1552 }
1553
1554 /* Register a jump threading opportunity.  We queue up all the jump
1555    threading opportunities discovered by a pass and update the CFG
1556    and SSA form all at once.
1557
1558    E is the edge we can thread, E2 is the new target edge, i.e., we
1559    are effectively recording that E->dest can be changed to E2->dest
1560    after fixing the SSA graph.  */
1561
1562 void
1563 register_jump_thread (vec<jump_thread_edge *> *path)
1564 {
1565   if (!dbg_cnt (registered_jump_thread))
1566     {
1567       delete_jump_thread_path (path);
1568       return;
1569     }
1570
1571   /* First make sure there are no NULL outgoing edges on the jump threading
1572      path.  That can happen for jumping to a constant address.  */
1573   for (unsigned int i = 0; i < path->length (); i++)
1574     if ((*path)[i]->e == NULL)
1575       {
1576         if (dump_file && (dump_flags & TDF_DETAILS))
1577           {
1578             fprintf (dump_file,
1579                      "Found NULL edge in jump threading path.  Cancelling jump thread:\n");
1580             dump_jump_thread_path (dump_file, *path);
1581           }
1582
1583         delete_jump_thread_path (path);
1584         return;
1585       }
1586
1587   if (dump_file && (dump_flags & TDF_DETAILS))
1588     dump_jump_thread_path (dump_file, *path);
1589
1590   if (!paths.exists ())
1591     paths.create (5);
1592
1593   paths.safe_push (path);
1594 }