1 /* Loop manipulation code for GNU compiler.
2 Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
23 #include "coretypes.h"
26 #include "hard-reg-set.h"
28 #include "basic-block.h"
30 #include "cfglayout.h"
34 static void duplicate_subloops (struct loop *, struct loop *);
35 static void copy_loops_to (struct loop **, int,
37 static void loop_redirect_edge (edge, basic_block);
38 static void remove_bbs (basic_block *, int);
39 static bool rpe_enum_p (basic_block, void *);
40 static int find_path (edge, basic_block **);
41 static bool alp_enum_p (basic_block, void *);
42 static void fix_loop_placements (struct loop *, bool *);
43 static bool fix_bb_placement (basic_block);
44 static void fix_bb_placements (basic_block, bool *);
45 static void place_new_loop (struct loop *);
46 static basic_block create_preheader (struct loop *, int);
47 static void unloop (struct loop *, bool *);
49 #define RDIV(X,Y) (((X) + (Y) / 2) / (Y))
51 /* Checks whether basic block BB is dominated by DATA. */
53 rpe_enum_p (basic_block bb, void *data)
55 return dominated_by_p (CDI_DOMINATORS, bb, data);
58 /* Remove basic blocks BBS. NBBS is the number of the basic blocks. */
61 remove_bbs (basic_block *bbs, int nbbs)
65 for (i = 0; i < nbbs; i++)
66 delete_basic_block (bbs[i]);
69 /* Find path -- i.e. the basic blocks dominated by edge E and put them
70 into array BBS, that will be allocated large enough to contain them.
71 E->dest must have exactly one predecessor for this to work (it is
72 easy to achieve and we do not put it here because we do not want to
73 alter anything by this function). The number of basic blocks in the
76 find_path (edge e, basic_block **bbs)
78 gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
80 /* Find bbs in the path. */
81 *bbs = XCNEWVEC (basic_block, n_basic_blocks);
82 return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
83 n_basic_blocks, e->dest);
86 /* Fix placement of basic block BB inside loop hierarchy --
87 Let L be a loop to that BB belongs. Then every successor of BB must either
88 1) belong to some superloop of loop L, or
89 2) be a header of loop K such that K->outer is superloop of L
90 Returns true if we had to move BB into other loop to enforce this condition,
91 false if the placement of BB was already correct (provided that placements
92 of its successors are correct). */
94 fix_bb_placement (basic_block bb)
98 struct loop *loop = current_loops->tree_root, *act;
100 FOR_EACH_EDGE (e, ei, bb->succs)
102 if (e->dest == EXIT_BLOCK_PTR)
105 act = e->dest->loop_father;
106 if (act->header == e->dest)
109 if (flow_loop_nested_p (loop, act))
113 if (loop == bb->loop_father)
116 remove_bb_from_loops (bb);
117 add_bb_to_loop (bb, loop);
122 /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
123 of LOOP to that leads at least one exit edge of LOOP, and set it
124 as the immediate superloop of LOOP. Return true if the immediate superloop
128 fix_loop_placement (struct loop *loop)
132 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
133 struct loop *father = current_loops->tree_root, *act;
136 for (i = 0; VEC_iterate (edge, exits, i, e); i++)
138 act = find_common_loop (loop, e->dest->loop_father);
139 if (flow_loop_nested_p (father, act))
143 if (father != loop->outer)
145 for (act = loop->outer; act != father; act = act->outer)
146 act->num_nodes -= loop->num_nodes;
147 flow_loop_tree_node_remove (loop);
148 flow_loop_tree_node_add (father, loop);
150 /* The exit edges of LOOP no longer exits its original immediate
151 superloops; remove them from the appropriate exit lists. */
152 for (i = 0; VEC_iterate (edge, exits, i, e); i++)
153 rescan_loop_exit (e, false, false);
158 VEC_free (edge, heap, exits);
162 /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
163 enforce condition condition stated in description of fix_bb_placement. We
164 start from basic block FROM that had some of its successors removed, so that
165 his placement no longer has to be correct, and iteratively fix placement of
166 its predecessors that may change if placement of FROM changed. Also fix
167 placement of subloops of FROM->loop_father, that might also be altered due
168 to this change; the condition for them is similar, except that instead of
169 successors we consider edges coming out of the loops.
171 If the changes may invalidate the information about irreducible regions,
172 IRRED_INVALIDATED is set to true. */
175 fix_bb_placements (basic_block from,
176 bool *irred_invalidated)
179 basic_block *queue, *qtop, *qbeg, *qend;
180 struct loop *base_loop;
183 /* We pass through blocks back-reachable from FROM, testing whether some
184 of their successors moved to outer loop. It may be necessary to
185 iterate several times, but it is finite, as we stop unless we move
186 the basic block up the loop structure. The whole story is a bit
187 more complicated due to presence of subloops, those are moved using
188 fix_loop_placement. */
190 base_loop = from->loop_father;
191 if (base_loop == current_loops->tree_root)
194 in_queue = sbitmap_alloc (last_basic_block);
195 sbitmap_zero (in_queue);
196 SET_BIT (in_queue, from->index);
197 /* Prevent us from going out of the base_loop. */
198 SET_BIT (in_queue, base_loop->header->index);
200 queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
201 qtop = queue + base_loop->num_nodes + 1;
213 RESET_BIT (in_queue, from->index);
215 if (from->loop_father->header == from)
217 /* Subloop header, maybe move the loop upward. */
218 if (!fix_loop_placement (from->loop_father))
223 /* Ordinary basic block. */
224 if (!fix_bb_placement (from))
228 FOR_EACH_EDGE (e, ei, from->succs)
230 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
231 *irred_invalidated = true;
234 /* Something has changed, insert predecessors into queue. */
235 FOR_EACH_EDGE (e, ei, from->preds)
237 basic_block pred = e->src;
240 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
241 *irred_invalidated = true;
243 if (TEST_BIT (in_queue, pred->index))
246 /* If it is subloop, then it either was not moved, or
247 the path up the loop tree from base_loop do not contain
249 nca = find_common_loop (pred->loop_father, base_loop);
250 if (pred->loop_father != base_loop
252 || nca != pred->loop_father))
253 pred = pred->loop_father->header;
254 else if (!flow_loop_nested_p (from->loop_father, pred->loop_father))
256 /* No point in processing it. */
260 if (TEST_BIT (in_queue, pred->index))
263 /* Schedule the basic block. */
268 SET_BIT (in_queue, pred->index);
275 /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
276 and update loop structures and dominators. Return true if we were able
277 to remove the path, false otherwise (and nothing is affected then). */
282 basic_block *rem_bbs, *bord_bbs, *dom_bbs, from, bb;
283 int i, nrem, n_bord_bbs, n_dom_bbs, nreml;
285 bool irred_invalidated = false;
286 struct loop **deleted_loop;
288 if (!can_remove_branch_p (e))
291 /* Keep track of whether we need to update information about irreducible
292 regions. This is the case if the removed area is a part of the
293 irreducible region, or if the set of basic blocks that belong to a loop
294 that is inside an irreducible region is changed, or if such a loop is
296 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
297 irred_invalidated = true;
299 /* We need to check whether basic blocks are dominated by the edge
300 e, but we only have basic block dominators. This is easy to
301 fix -- when e->dest has exactly one predecessor, this corresponds
302 to blocks dominated by e->dest, if not, split the edge. */
303 if (!single_pred_p (e->dest))
304 e = single_pred_edge (split_edge (e));
306 /* It may happen that by removing path we remove one or more loops
307 we belong to. In this case first unloop the loops, then proceed
308 normally. We may assume that e->dest is not a header of any loop,
309 as it now has exactly one predecessor. */
310 while (e->src->loop_father->outer
311 && dominated_by_p (CDI_DOMINATORS,
312 e->src->loop_father->latch, e->dest))
313 unloop (e->src->loop_father, &irred_invalidated);
315 /* Identify the path. */
316 nrem = find_path (e, &rem_bbs);
319 bord_bbs = XCNEWVEC (basic_block, n_basic_blocks);
320 seen = sbitmap_alloc (last_basic_block);
323 /* Find "border" hexes -- i.e. those with predecessor in removed path. */
324 for (i = 0; i < nrem; i++)
325 SET_BIT (seen, rem_bbs[i]->index);
326 for (i = 0; i < nrem; i++)
330 FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
331 if (ae->dest != EXIT_BLOCK_PTR && !TEST_BIT (seen, ae->dest->index))
333 SET_BIT (seen, ae->dest->index);
334 bord_bbs[n_bord_bbs++] = ae->dest;
336 if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
337 irred_invalidated = true;
341 /* Remove the path. */
344 dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
346 /* Cancel loops contained in the path. */
347 deleted_loop = XNEWVEC (struct loop *, nrem);
349 for (i = 0; i < nrem; i++)
350 if (rem_bbs[i]->loop_father->header == rem_bbs[i])
351 deleted_loop[nreml++] = rem_bbs[i]->loop_father;
353 remove_bbs (rem_bbs, nrem);
356 for (i = 0; i < nreml; i++)
357 cancel_loop_tree (deleted_loop[i]);
360 /* Find blocks whose dominators may be affected. */
363 for (i = 0; i < n_bord_bbs; i++)
367 bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
368 if (TEST_BIT (seen, bb->index))
370 SET_BIT (seen, bb->index);
372 for (ldom = first_dom_son (CDI_DOMINATORS, bb);
374 ldom = next_dom_son (CDI_DOMINATORS, ldom))
375 if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
376 dom_bbs[n_dom_bbs++] = ldom;
381 /* Recount dominators. */
382 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
386 /* Fix placements of basic blocks inside loops and the placement of
387 loops in the loop tree. */
388 fix_bb_placements (from, &irred_invalidated);
389 fix_loop_placements (from->loop_father, &irred_invalidated);
391 if (irred_invalidated
392 && (current_loops->state & LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS) != 0)
393 mark_irreducible_loops ();
398 /* Predicate for enumeration in add_loop. */
400 alp_enum_p (basic_block bb, void *alp_header)
402 return bb != (basic_block) alp_header;
405 /* Given LOOP structure with filled header and latch, find the body of the
406 corresponding loop and add it to loops tree. Insert the LOOP as a son of
410 add_loop (struct loop *loop, struct loop *outer)
415 /* Add it to loop structure. */
416 place_new_loop (loop);
417 flow_loop_tree_node_add (outer, loop);
419 /* Find its nodes. */
420 bbs = XCNEWVEC (basic_block, n_basic_blocks);
421 n = dfs_enumerate_from (loop->latch, 1, alp_enum_p,
422 bbs, n_basic_blocks, loop->header);
424 for (i = 0; i < n; i++)
426 remove_bb_from_loops (bbs[i]);
427 add_bb_to_loop (bbs[i], loop);
429 remove_bb_from_loops (loop->header);
430 add_bb_to_loop (loop->header, loop);
435 /* Multiply all frequencies in LOOP by NUM/DEN. */
437 scale_loop_frequencies (struct loop *loop, int num, int den)
441 bbs = get_loop_body (loop);
442 scale_bbs_frequencies_int (bbs, loop->num_nodes, num, den);
446 /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting
447 latch to header and update loop tree and dominators
448 accordingly. Everything between them plus LATCH_EDGE destination must
449 be dominated by HEADER_EDGE destination, and back-reachable from
450 LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB,
451 FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and
452 TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE.
453 Returns the newly created loop. Frequencies and counts in the new loop
454 are scaled by FALSE_SCALE and in the old one by TRUE_SCALE. */
457 loopify (edge latch_edge, edge header_edge,
458 basic_block switch_bb, edge true_edge, edge false_edge,
459 bool redirect_all_edges, unsigned true_scale, unsigned false_scale)
461 basic_block succ_bb = latch_edge->dest;
462 basic_block pred_bb = header_edge->src;
463 basic_block *dom_bbs, *body;
464 unsigned n_dom_bbs, i;
466 struct loop *loop = alloc_loop ();
467 struct loop *outer = succ_bb->loop_father->outer;
473 loop->header = header_edge->dest;
474 loop->latch = latch_edge->src;
476 freq = EDGE_FREQUENCY (header_edge);
477 cnt = header_edge->count;
479 /* Redirect edges. */
480 loop_redirect_edge (latch_edge, loop->header);
481 loop_redirect_edge (true_edge, succ_bb);
483 /* During loop versioning, one of the switch_bb edge is already properly
484 set. Do not redirect it again unless redirect_all_edges is true. */
485 if (redirect_all_edges)
487 loop_redirect_edge (header_edge, switch_bb);
488 loop_redirect_edge (false_edge, loop->header);
490 /* Update dominators. */
491 set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb);
492 set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb);
495 set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb);
497 /* Compute new loop. */
498 add_loop (loop, outer);
500 /* Add switch_bb to appropriate loop. */
501 if (switch_bb->loop_father)
502 remove_bb_from_loops (switch_bb);
503 add_bb_to_loop (switch_bb, outer);
505 /* Fix frequencies. */
506 if (redirect_all_edges)
508 switch_bb->frequency = freq;
509 switch_bb->count = cnt;
510 FOR_EACH_EDGE (e, ei, switch_bb->succs)
512 e->count = (switch_bb->count * e->probability) / REG_BR_PROB_BASE;
515 scale_loop_frequencies (loop, false_scale, REG_BR_PROB_BASE);
516 scale_loop_frequencies (succ_bb->loop_father, true_scale, REG_BR_PROB_BASE);
518 /* Update dominators of blocks outside of LOOP. */
519 dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
521 seen = sbitmap_alloc (last_basic_block);
523 body = get_loop_body (loop);
525 for (i = 0; i < loop->num_nodes; i++)
526 SET_BIT (seen, body[i]->index);
528 for (i = 0; i < loop->num_nodes; i++)
532 for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
534 ldom = next_dom_son (CDI_DOMINATORS, ldom))
535 if (!TEST_BIT (seen, ldom->index))
537 SET_BIT (seen, ldom->index);
538 dom_bbs[n_dom_bbs++] = ldom;
542 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
551 /* Remove the latch edge of a LOOP and update loops to indicate that
552 the LOOP was removed. After this function, original loop latch will
553 have no successor, which caller is expected to fix somehow.
555 If this may cause the information about irreducible regions to become
556 invalid, IRRED_INVALIDATED is set to true. */
559 unloop (struct loop *loop, bool *irred_invalidated)
564 basic_block latch = loop->latch;
567 if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
568 *irred_invalidated = true;
570 /* This is relatively straightforward. The dominators are unchanged, as
571 loop header dominates loop latch, so the only thing we have to care of
572 is the placement of loops and basic blocks inside the loop tree. We
573 move them all to the loop->outer, and then let fix_bb_placements do
576 body = get_loop_body (loop);
578 for (i = 0; i < n; i++)
579 if (body[i]->loop_father == loop)
581 remove_bb_from_loops (body[i]);
582 add_bb_to_loop (body[i], loop->outer);
589 flow_loop_tree_node_remove (ploop);
590 flow_loop_tree_node_add (loop->outer, ploop);
593 /* Remove the loop and free its data. */
596 remove_edge (single_succ_edge (latch));
598 /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
599 there is an irreducible region inside the cancelled loop, the flags will
601 fix_bb_placements (latch, &dummy);
604 /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
605 condition stated in description of fix_loop_placement holds for them.
606 It is used in case when we removed some edges coming out of LOOP, which
607 may cause the right placement of LOOP inside loop tree to change.
609 IRRED_INVALIDATED is set to true if a change in the loop structures might
610 invalidate the information about irreducible regions. */
613 fix_loop_placements (struct loop *loop, bool *irred_invalidated)
620 if (!fix_loop_placement (loop))
623 /* Changing the placement of a loop in the loop tree may alter the
624 validity of condition 2) of the description of fix_bb_placement
625 for its preheader, because the successor is the header and belongs
626 to the loop. So call fix_bb_placements to fix up the placement
627 of the preheader and (possibly) of its predecessors. */
628 fix_bb_placements (loop_preheader_edge (loop)->src,
634 /* Creates place for a new LOOP in loops structure. */
636 place_new_loop (struct loop *loop)
638 loop->num = number_of_loops ();
639 VEC_safe_push (loop_p, heap, current_loops->larray, loop);
642 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
643 created loop into loops structure. */
645 duplicate_loop (struct loop *loop, struct loop *target)
648 cloop = alloc_loop ();
649 place_new_loop (cloop);
651 /* Mark the new loop as copy of LOOP. */
654 /* Add it to target. */
655 flow_loop_tree_node_add (target, cloop);
660 /* Copies structure of subloops of LOOP into TARGET loop, placing
661 newly created loops into loop tree. */
663 duplicate_subloops (struct loop *loop, struct loop *target)
665 struct loop *aloop, *cloop;
667 for (aloop = loop->inner; aloop; aloop = aloop->next)
669 cloop = duplicate_loop (aloop, target);
670 duplicate_subloops (aloop, cloop);
674 /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
675 into TARGET loop, placing newly created loops into loop tree. */
677 copy_loops_to (struct loop **copied_loops, int n, struct loop *target)
682 for (i = 0; i < n; i++)
684 aloop = duplicate_loop (copied_loops[i], target);
685 duplicate_subloops (copied_loops[i], aloop);
689 /* Redirects edge E to basic block DEST. */
691 loop_redirect_edge (edge e, basic_block dest)
696 redirect_edge_and_branch_force (e, dest);
699 /* Check whether LOOP's body can be duplicated. */
701 can_duplicate_loop_p (struct loop *loop)
704 basic_block *bbs = get_loop_body (loop);
706 ret = can_copy_bbs_p (bbs, loop->num_nodes);
712 /* Sets probability and count of edge E to zero. The probability and count
713 is redistributed evenly to the remaining edges coming from E->src. */
716 set_zero_probability (edge e)
718 basic_block bb = e->src;
720 edge ae, last = NULL;
721 unsigned n = EDGE_COUNT (bb->succs);
722 gcov_type cnt = e->count, cnt1;
723 unsigned prob = e->probability, prob1;
726 cnt1 = cnt / (n - 1);
727 prob1 = prob / (n - 1);
729 FOR_EACH_EDGE (ae, ei, bb->succs)
734 ae->probability += prob1;
739 /* Move the rest to one of the edges. */
740 last->probability += prob % (n - 1);
741 last->count += cnt % (n - 1);
747 /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
748 loop structure and dominators. E's destination must be LOOP header for
749 this to work, i.e. it must be entry or latch edge of this loop; these are
750 unique, as the loops must have preheaders for this function to work
751 correctly (in case E is latch, the function unrolls the loop, if E is entry
752 edge, it peels the loop). Store edges created by copying ORIG edge from
753 copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to
754 original LOOP body, the other copies are numbered in order given by control
755 flow through them) into TO_REMOVE array. Returns false if duplication is
759 duplicate_loop_to_header_edge (struct loop *loop, edge e,
760 unsigned int ndupl, sbitmap wont_exit,
761 edge orig, VEC (edge, heap) **to_remove,
764 struct loop *target, *aloop;
765 struct loop **orig_loops;
766 unsigned n_orig_loops;
767 basic_block header = loop->header, latch = loop->latch;
768 basic_block *new_bbs, *bbs, *first_active;
769 basic_block new_bb, bb, first_active_latch = NULL;
771 edge spec_edges[2], new_spec_edges[2];
775 int is_latch = (latch == e->src);
776 int scale_act = 0, *scale_step = NULL, scale_main = 0;
777 int scale_after_exit = 0;
778 int p, freq_in, freq_le, freq_out_orig;
779 int prob_pass_thru, prob_pass_wont_exit, prob_pass_main;
780 int add_irreducible_flag;
781 basic_block place_after;
782 bitmap bbs_to_scale = NULL;
785 gcc_assert (e->dest == loop->header);
786 gcc_assert (ndupl > 0);
790 /* Orig must be edge out of the loop. */
791 gcc_assert (flow_bb_inside_loop_p (loop, orig->src));
792 gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest));
796 bbs = get_loop_body_in_dom_order (loop);
797 gcc_assert (bbs[0] == loop->header);
798 gcc_assert (bbs[n - 1] == loop->latch);
800 /* Check whether duplication is possible. */
801 if (!can_copy_bbs_p (bbs, loop->num_nodes))
806 new_bbs = XNEWVEC (basic_block, loop->num_nodes);
808 /* In case we are doing loop peeling and the loop is in the middle of
809 irreducible region, the peeled copies will be inside it too. */
810 add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP;
811 gcc_assert (!is_latch || !add_irreducible_flag);
813 /* Find edge from latch. */
814 latch_edge = loop_latch_edge (loop);
816 if (flags & DLTHE_FLAG_UPDATE_FREQ)
818 /* Calculate coefficients by that we have to scale frequencies
819 of duplicated loop bodies. */
820 freq_in = header->frequency;
821 freq_le = EDGE_FREQUENCY (latch_edge);
824 if (freq_in < freq_le)
826 freq_out_orig = orig ? EDGE_FREQUENCY (orig) : freq_in - freq_le;
827 if (freq_out_orig > freq_in - freq_le)
828 freq_out_orig = freq_in - freq_le;
829 prob_pass_thru = RDIV (REG_BR_PROB_BASE * freq_le, freq_in);
830 prob_pass_wont_exit =
831 RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in);
834 && REG_BR_PROB_BASE - orig->probability != 0)
836 /* The blocks that are dominated by a removed exit edge ORIG have
837 frequencies scaled by this. */
838 scale_after_exit = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE,
839 REG_BR_PROB_BASE - orig->probability);
840 bbs_to_scale = BITMAP_ALLOC (NULL);
841 for (i = 0; i < n; i++)
843 if (bbs[i] != orig->src
844 && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
845 bitmap_set_bit (bbs_to_scale, i);
849 scale_step = XNEWVEC (int, ndupl);
851 for (i = 1; i <= ndupl; i++)
852 scale_step[i - 1] = TEST_BIT (wont_exit, i)
853 ? prob_pass_wont_exit
856 /* Complete peeling is special as the probability of exit in last
858 if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
860 int wanted_freq = EDGE_FREQUENCY (e);
862 if (wanted_freq > freq_in)
863 wanted_freq = freq_in;
865 gcc_assert (!is_latch);
866 /* First copy has frequency of incoming edge. Each subsequent
867 frequency should be reduced by prob_pass_wont_exit. Caller
868 should've managed the flags so all except for original loop
869 has won't exist set. */
870 scale_act = RDIV (wanted_freq * REG_BR_PROB_BASE, freq_in);
871 /* Now simulate the duplication adjustments and compute header
872 frequency of the last copy. */
873 for (i = 0; i < ndupl; i++)
874 wanted_freq = RDIV (wanted_freq * scale_step[i], REG_BR_PROB_BASE);
875 scale_main = RDIV (wanted_freq * REG_BR_PROB_BASE, freq_in);
879 prob_pass_main = TEST_BIT (wont_exit, 0)
880 ? prob_pass_wont_exit
883 scale_main = REG_BR_PROB_BASE;
884 for (i = 0; i < ndupl; i++)
887 p = RDIV (p * scale_step[i], REG_BR_PROB_BASE);
889 scale_main = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE, scale_main);
890 scale_act = RDIV (scale_main * prob_pass_main, REG_BR_PROB_BASE);
894 scale_main = REG_BR_PROB_BASE;
895 for (i = 0; i < ndupl; i++)
896 scale_main = RDIV (scale_main * scale_step[i], REG_BR_PROB_BASE);
897 scale_act = REG_BR_PROB_BASE - prob_pass_thru;
899 for (i = 0; i < ndupl; i++)
900 gcc_assert (scale_step[i] >= 0 && scale_step[i] <= REG_BR_PROB_BASE);
901 gcc_assert (scale_main >= 0 && scale_main <= REG_BR_PROB_BASE
902 && scale_act >= 0 && scale_act <= REG_BR_PROB_BASE);
905 /* Loop the new bbs will belong to. */
906 target = e->src->loop_father;
908 /* Original loops. */
910 for (aloop = loop->inner; aloop; aloop = aloop->next)
912 orig_loops = XCNEWVEC (struct loop *, n_orig_loops);
913 for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
914 orig_loops[i] = aloop;
918 first_active = XNEWVEC (basic_block, n);
921 memcpy (first_active, bbs, n * sizeof (basic_block));
922 first_active_latch = latch;
925 spec_edges[SE_ORIG] = orig;
926 spec_edges[SE_LATCH] = latch_edge;
928 place_after = e->src;
929 for (j = 0; j < ndupl; j++)
932 copy_loops_to (orig_loops, n_orig_loops, target);
935 copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
937 place_after = new_spec_edges[SE_LATCH]->src;
939 if (flags & DLTHE_RECORD_COPY_NUMBER)
940 for (i = 0; i < n; i++)
942 gcc_assert (!new_bbs[i]->aux);
943 new_bbs[i]->aux = (void *)(size_t)(j + 1);
946 /* Note whether the blocks and edges belong to an irreducible loop. */
947 if (add_irreducible_flag)
949 for (i = 0; i < n; i++)
950 new_bbs[i]->flags |= BB_DUPLICATED;
951 for (i = 0; i < n; i++)
955 if (new_bb->loop_father == target)
956 new_bb->flags |= BB_IRREDUCIBLE_LOOP;
958 FOR_EACH_EDGE (ae, ei, new_bb->succs)
959 if ((ae->dest->flags & BB_DUPLICATED)
960 && (ae->src->loop_father == target
961 || ae->dest->loop_father == target))
962 ae->flags |= EDGE_IRREDUCIBLE_LOOP;
964 for (i = 0; i < n; i++)
965 new_bbs[i]->flags &= ~BB_DUPLICATED;
968 /* Redirect the special edges. */
971 redirect_edge_and_branch_force (latch_edge, new_bbs[0]);
972 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
974 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch);
975 latch = loop->latch = new_bbs[n - 1];
976 e = latch_edge = new_spec_edges[SE_LATCH];
980 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
982 redirect_edge_and_branch_force (e, new_bbs[0]);
983 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src);
984 e = new_spec_edges[SE_LATCH];
987 /* Record exit edge in this copy. */
988 if (orig && TEST_BIT (wont_exit, j + 1))
991 VEC_safe_push (edge, heap, *to_remove, new_spec_edges[SE_ORIG]);
992 set_zero_probability (new_spec_edges[SE_ORIG]);
994 /* Scale the frequencies of the blocks dominated by the exit. */
997 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
999 scale_bbs_frequencies_int (new_bbs + i, 1, scale_after_exit,
1005 /* Record the first copy in the control flow order if it is not
1006 the original loop (i.e. in case of peeling). */
1007 if (!first_active_latch)
1009 memcpy (first_active, new_bbs, n * sizeof (basic_block));
1010 first_active_latch = new_bbs[n - 1];
1013 /* Set counts and frequencies. */
1014 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1016 scale_bbs_frequencies_int (new_bbs, n, scale_act, REG_BR_PROB_BASE);
1017 scale_act = RDIV (scale_act * scale_step[j], REG_BR_PROB_BASE);
1023 /* Record the exit edge in the original loop body, and update the frequencies. */
1024 if (orig && TEST_BIT (wont_exit, 0))
1027 VEC_safe_push (edge, heap, *to_remove, orig);
1028 set_zero_probability (orig);
1030 /* Scale the frequencies of the blocks dominated by the exit. */
1033 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1035 scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit,
1041 /* Update the original loop. */
1043 set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1044 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1046 scale_bbs_frequencies_int (bbs, n, scale_main, REG_BR_PROB_BASE);
1050 /* Update dominators of outer blocks if affected. */
1051 for (i = 0; i < n; i++)
1053 basic_block dominated, dom_bb, *dom_bbs;
1059 n_dom_bbs = get_dominated_by (CDI_DOMINATORS, bb, &dom_bbs);
1060 for (j = 0; j < n_dom_bbs; j++)
1062 dominated = dom_bbs[j];
1063 if (flow_bb_inside_loop_p (loop, dominated))
1065 dom_bb = nearest_common_dominator (
1066 CDI_DOMINATORS, first_active[i], first_active_latch);
1067 set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
1071 free (first_active);
1074 BITMAP_FREE (bbs_to_scale);
1079 /* A callback for make_forwarder block, to redirect all edges except for
1080 MFB_KJ_EDGE to the entry part. E is the edge for that we should decide
1081 whether to redirect it. */
1083 static edge mfb_kj_edge;
1085 mfb_keep_just (edge e)
1087 return e != mfb_kj_edge;
1090 /* Creates a pre-header for a LOOP. Returns newly created block. Unless
1091 CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1092 entry; otherwise we also force preheader block to have only one successor.
1093 The function also updates dominators. */
1096 create_preheader (struct loop *loop, int flags)
1102 bool latch_edge_was_fallthru;
1103 edge one_succ_pred = 0;
1106 FOR_EACH_EDGE (e, ei, loop->header->preds)
1108 if (e->src == loop->latch)
1110 irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
1112 if (single_succ_p (e->src))
1115 gcc_assert (nentry);
1118 /* Get an edge that is different from the one from loop->latch
1120 e = EDGE_PRED (loop->header,
1121 EDGE_PRED (loop->header, 0)->src == loop->latch);
1123 if (!(flags & CP_SIMPLE_PREHEADERS) || single_succ_p (e->src))
1127 mfb_kj_edge = loop_latch_edge (loop);
1128 latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
1129 fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
1130 dummy = fallthru->src;
1131 loop->header = fallthru->dest;
1133 /* Try to be clever in placing the newly created preheader. The idea is to
1134 avoid breaking any "fallthruness" relationship between blocks.
1136 The preheader was created just before the header and all incoming edges
1137 to the header were redirected to the preheader, except the latch edge.
1138 So the only problematic case is when this latch edge was a fallthru
1139 edge: it is not anymore after the preheader creation so we have broken
1140 the fallthruness. We're therefore going to look for a better place. */
1141 if (latch_edge_was_fallthru)
1146 e = EDGE_PRED (dummy, 0);
1148 move_block_after (dummy, e->src);
1153 dummy->flags |= BB_IRREDUCIBLE_LOOP;
1154 single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP;
1158 fprintf (dump_file, "Created preheader block for loop %i\n",
1164 /* Create preheaders for each loop; for meaning of FLAGS see create_preheader. */
1167 create_preheaders (int flags)
1172 FOR_EACH_LOOP (li, loop, 0)
1173 create_preheader (loop, flags);
1174 current_loops->state |= LOOPS_HAVE_PREHEADERS;
1177 /* Forces all loop latches to have only single successor. */
1180 force_single_succ_latches (void)
1186 FOR_EACH_LOOP (li, loop, 0)
1188 if (loop->latch != loop->header && single_succ_p (loop->latch))
1191 e = find_edge (loop->latch, loop->header);
1195 current_loops->state |= LOOPS_HAVE_SIMPLE_LATCHES;
1198 /* This function is called from loop_version. It splits the entry edge
1199 of the loop we want to version, adds the versioning condition, and
1200 adjust the edges to the two versions of the loop appropriately.
1201 e is an incoming edge. Returns the basic block containing the
1204 --- edge e ---- > [second_head]
1206 Split it and insert new conditional expression and adjust edges.
1208 --- edge e ---> [cond expr] ---> [first_head]
1210 +---------> [second_head]
1212 THEN_PROB is the probability of then branch of the condition. */
1215 lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
1216 edge e, void *cond_expr, unsigned then_prob)
1218 basic_block new_head = NULL;
1221 gcc_assert (e->dest == second_head);
1223 /* Split edge 'e'. This will create a new basic block, where we can
1224 insert conditional expr. */
1225 new_head = split_edge (e);
1227 lv_add_condition_to_bb (first_head, second_head, new_head,
1230 /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */
1231 e = single_succ_edge (new_head);
1232 e1 = make_edge (new_head, first_head,
1233 current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
1234 e1->probability = then_prob;
1235 e->probability = REG_BR_PROB_BASE - then_prob;
1236 e1->count = RDIV (e->count * e1->probability, REG_BR_PROB_BASE);
1237 e->count = RDIV (e->count * e->probability, REG_BR_PROB_BASE);
1239 set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
1240 set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
1242 /* Adjust loop header phi nodes. */
1243 lv_adjust_loop_header_phi (first_head, second_head, new_head, e1);
1248 /* Main entry point for Loop Versioning transformation.
1250 This transformation given a condition and a loop, creates
1251 -if (condition) { loop_copy1 } else { loop_copy2 },
1252 where loop_copy1 is the loop transformed in one way, and loop_copy2
1253 is the loop transformed in another way (or unchanged). 'condition'
1254 may be a run time test for things that were not resolved by static
1255 analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1257 THEN_PROB is the probability of the then edge of the if. THEN_SCALE
1258 is the ratio by that the frequencies in the original loop should
1259 be scaled. ELSE_SCALE is the ratio by that the frequencies in the
1260 new loop should be scaled.
1262 If PLACE_AFTER is true, we place the new loop after LOOP in the
1263 instruction stream, otherwise it is placed before LOOP. */
1266 loop_version (struct loop *loop,
1267 void *cond_expr, basic_block *condition_bb,
1268 unsigned then_prob, unsigned then_scale, unsigned else_scale,
1271 basic_block first_head, second_head;
1272 edge entry, latch_edge, true_edge, false_edge;
1275 basic_block cond_bb;
1277 /* CHECKME: Loop versioning does not handle nested loop at this point. */
1281 /* Record entry and latch edges for the loop */
1282 entry = loop_preheader_edge (loop);
1283 irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
1284 entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1286 /* Note down head of loop as first_head. */
1287 first_head = entry->dest;
1289 /* Duplicate loop. */
1290 if (!cfg_hook_duplicate_loop_to_header_edge (loop, entry, 1,
1291 NULL, NULL, NULL, 0))
1294 /* After duplication entry edge now points to new loop head block.
1295 Note down new head as second_head. */
1296 second_head = entry->dest;
1298 /* Split loop entry edge and insert new block with cond expr. */
1299 cond_bb = lv_adjust_loop_entry_edge (first_head, second_head,
1300 entry, cond_expr, then_prob);
1302 *condition_bb = cond_bb;
1306 entry->flags |= irred_flag;
1310 latch_edge = single_succ_edge (get_bb_copy (loop->latch));
1312 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1313 nloop = loopify (latch_edge,
1314 single_pred_edge (get_bb_copy (loop->header)),
1315 cond_bb, true_edge, false_edge,
1316 false /* Do not redirect all edges. */,
1317 then_scale, else_scale);
1319 /* loopify redirected latch_edge. Update its PENDING_STMTS. */
1320 lv_flush_pending_stmts (latch_edge);
1322 /* loopify redirected condition_bb's succ edge. Update its PENDING_STMTS. */
1323 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1324 lv_flush_pending_stmts (false_edge);
1325 /* Adjust irreducible flag. */
1328 cond_bb->flags |= BB_IRREDUCIBLE_LOOP;
1329 loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1330 loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1331 single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP;
1336 basic_block *bbs = get_loop_body_in_dom_order (nloop), after;
1339 after = loop->latch;
1341 for (i = 0; i < nloop->num_nodes; i++)
1343 move_block_after (bbs[i], after);
1349 /* At this point condition_bb is loop predheader with two successors,
1350 first_head and second_head. Make sure that loop predheader has only
1352 split_edge (loop_preheader_edge (loop));
1353 split_edge (loop_preheader_edge (nloop));
1358 /* The structure of loops might have changed. Some loops might get removed
1359 (and their headers and latches were set to NULL), loop exists might get
1360 removed (thus the loop nesting may be wrong), and some blocks and edges
1361 were changed (so the information about bb --> loop mapping does not have
1362 to be correct). But still for the remaining loops the header dominates
1363 the latch, and loops did not get new subloobs (new loops might possibly
1364 get created, but we are not interested in them). Fix up the mess.
1366 If CHANGED_BBS is not NULL, basic blocks whose loop has changed are
1370 fix_loop_structure (bitmap changed_bbs)
1373 struct loop *loop, *ploop;
1376 /* Remove the old bb -> loop mapping. */
1379 bb->aux = (void *) (size_t) bb->loop_father->depth;
1380 bb->loop_father = current_loops->tree_root;
1383 /* Remove the dead loops from structures. */
1384 current_loops->tree_root->num_nodes = n_basic_blocks;
1385 FOR_EACH_LOOP (li, loop, 0)
1387 loop->num_nodes = 0;
1393 ploop = loop->inner;
1394 flow_loop_tree_node_remove (ploop);
1395 flow_loop_tree_node_add (loop->outer, ploop);
1398 /* Remove the loop and free its data. */
1402 /* Rescan the bodies of loops, starting from the outermost. */
1403 FOR_EACH_LOOP (li, loop, 0)
1405 loop->num_nodes = flow_loop_nodes_find (loop->header, loop);
1408 /* Now fix the loop nesting. */
1409 FOR_EACH_LOOP (li, loop, 0)
1411 bb = loop_preheader_edge (loop)->src;
1412 if (bb->loop_father != loop->outer)
1414 flow_loop_tree_node_remove (loop);
1415 flow_loop_tree_node_add (bb->loop_father, loop);
1419 /* Mark the blocks whose loop has changed. */
1423 && (void *) (size_t) bb->loop_father->depth != bb->aux)
1424 bitmap_set_bit (changed_bbs, bb->index);
1429 if (current_loops->state & LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)
1430 mark_irreducible_loops ();
1432 if (current_loops->state & LOOPS_HAVE_RECORDED_EXITS)
1434 release_recorded_exits ();
1435 record_loop_exits ();