1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 GNU CC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to the Free
21 the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
53 #include "hard-reg-set.h"
54 #include "basic-block.h"
58 #include "insn-config.h"
59 #include "insn-attr.h"
63 #include "sched-int.h"
65 #ifdef INSN_SCHEDULING
66 /* Some accessor macros for h_i_d members only used within this file. */
67 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
68 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
69 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
71 #define MAX_RGN_BLOCKS 10
72 #define MAX_RGN_INSNS 100
74 /* nr_inter/spec counts interblock/speculative motion for the function. */
75 static int nr_inter, nr_spec;
77 /* Control flow graph edges are kept in circular lists. */
86 static haifa_edge *edge_table;
88 #define NEXT_IN(edge) (edge_table[edge].next_in)
89 #define NEXT_OUT(edge) (edge_table[edge].next_out)
90 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
91 #define TO_BLOCK(edge) (edge_table[edge].to_block)
93 /* Number of edges in the control flow graph. (In fact, larger than
94 that by 1, since edge 0 is unused.) */
97 /* Circular list of incoming/outgoing edges of a block. */
99 static int *out_edges;
101 #define IN_EDGES(block) (in_edges[block])
102 #define OUT_EDGES(block) (out_edges[block])
104 static int is_cfg_nonregular PARAMS ((void));
105 static int build_control_flow PARAMS ((struct edge_list *));
106 static void new_edge PARAMS ((int, int));
108 /* A region is the main entity for interblock scheduling: insns
109 are allowed to move between blocks in the same region, along
110 control flow graph edges, in the 'up' direction. */
113 int rgn_nr_blocks; /* Number of blocks in region. */
114 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
118 /* Number of regions in the procedure. */
119 static int nr_regions;
121 /* Table of region descriptions. */
122 static region *rgn_table;
124 /* Array of lists of regions' blocks. */
125 static int *rgn_bb_table;
127 /* Topological order of blocks in the region (if b2 is reachable from
128 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
129 always referred to by either block or b, while its topological
130 order name (in the region) is refered to by bb. */
131 static int *block_to_bb;
133 /* The number of the region containing a block. */
134 static int *containing_rgn;
136 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
137 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
138 #define BLOCK_TO_BB(block) (block_to_bb[block])
139 #define CONTAINING_RGN(block) (containing_rgn[block])
141 void debug_regions PARAMS ((void));
142 static void find_single_block_region PARAMS ((void));
143 static void find_rgns PARAMS ((struct edge_list *, sbitmap *));
144 static int too_large PARAMS ((int, int *, int *));
146 extern void debug_live PARAMS ((int, int));
148 /* Blocks of the current region being scheduled. */
149 static int current_nr_blocks;
150 static int current_blocks;
152 /* The mapping from bb to block. */
153 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
155 /* Bit vectors and bitset operations are needed for computations on
156 the control flow graph. */
158 typedef unsigned HOST_WIDE_INT *bitset;
161 int *first_member; /* Pointer to the list start in bitlst_table. */
162 int nr_members; /* The number of members of the bit list. */
166 static int bitlst_table_last;
167 static int bitlst_table_size;
168 static int *bitlst_table;
170 static char bitset_member PARAMS ((bitset, int, int));
171 static void extract_bitlst PARAMS ((bitset, int, int, bitlst *));
173 /* Target info declarations.
175 The block currently being scheduled is referred to as the "target" block,
176 while other blocks in the region from which insns can be moved to the
177 target are called "source" blocks. The candidate structure holds info
178 about such sources: are they valid? Speculative? Etc. */
179 typedef bitlst bblst;
190 static candidate *candidate_table;
192 /* A speculative motion requires checking live information on the path
193 from 'source' to 'target'. The split blocks are those to be checked.
194 After a speculative motion, live information should be modified in
197 Lists of split and update blocks for each candidate of the current
198 target are in array bblst_table. */
199 static int *bblst_table, bblst_size, bblst_last;
201 #define IS_VALID(src) ( candidate_table[src].is_valid )
202 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
203 #define SRC_PROB(src) ( candidate_table[src].src_prob )
205 /* The bb being currently scheduled. */
206 static int target_bb;
209 typedef bitlst edgelst;
211 /* Target info functions. */
212 static void split_edges PARAMS ((int, int, edgelst *));
213 static void compute_trg_info PARAMS ((int));
214 void debug_candidate PARAMS ((int));
215 void debug_candidates PARAMS ((int));
217 /* Bit-set of bbs, where bit 'i' stands for bb 'i'. */
218 typedef bitset bbset;
220 /* Number of words of the bbset. */
221 static int bbset_size;
223 /* Dominators array: dom[i] contains the bbset of dominators of
224 bb i in the region. */
227 /* bb 0 is the only region entry. */
228 #define IS_RGN_ENTRY(bb) (!bb)
230 /* Is bb_src dominated by bb_trg. */
231 #define IS_DOMINATED(bb_src, bb_trg) \
232 ( bitset_member (dom[bb_src], bb_trg, bbset_size) )
234 /* Probability: Prob[i] is a float in [0, 1] which is the probability
235 of bb i relative to the region entry. */
238 /* The probability of bb_src, relative to bb_trg. Note, that while the
239 'prob[bb]' is a float in [0, 1], this macro returns an integer
241 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
244 /* Bit-set of edges, where bit i stands for edge i. */
245 typedef bitset edgeset;
247 /* Number of edges in the region. */
248 static int rgn_nr_edges;
250 /* Array of size rgn_nr_edges. */
251 static int *rgn_edges;
253 /* Number of words in an edgeset. */
254 static int edgeset_size;
256 /* Number of bits in an edgeset. */
257 static int edgeset_bitsize;
259 /* Mapping from each edge in the graph to its number in the rgn. */
260 static int *edge_to_bit;
261 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
263 /* The split edges of a source bb is different for each target
264 bb. In order to compute this efficiently, the 'potential-split edges'
265 are computed for each bb prior to scheduling a region. This is actually
266 the split edges of each bb relative to the region entry.
268 pot_split[bb] is the set of potential split edges of bb. */
269 static edgeset *pot_split;
271 /* For every bb, a set of its ancestor edges. */
272 static edgeset *ancestor_edges;
274 static void compute_dom_prob_ps PARAMS ((int));
276 #define ABS_VALUE(x) (((x)<0)?(-(x)):(x))
277 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
278 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
279 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
281 /* Parameters affecting the decision of rank_for_schedule().
282 ??? Nope. But MIN_PROBABILITY is used in copmute_trg_info. */
283 #define MIN_DIFF_PRIORITY 2
284 #define MIN_PROBABILITY 40
285 #define MIN_PROB_DIFF 10
287 /* Speculative scheduling functions. */
288 static int check_live_1 PARAMS ((int, rtx));
289 static void update_live_1 PARAMS ((int, rtx));
290 static int check_live PARAMS ((rtx, int));
291 static void update_live PARAMS ((rtx, int));
292 static void set_spec_fed PARAMS ((rtx));
293 static int is_pfree PARAMS ((rtx, int, int));
294 static int find_conditional_protection PARAMS ((rtx, int));
295 static int is_conditionally_protected PARAMS ((rtx, int, int));
296 static int may_trap_exp PARAMS ((rtx, int));
297 static int haifa_classify_insn PARAMS ((rtx));
298 static int is_prisky PARAMS ((rtx, int, int));
299 static int is_exception_free PARAMS ((rtx, int, int));
301 static void add_branch_dependences PARAMS ((rtx, rtx));
302 static void compute_block_backward_dependences PARAMS ((int));
303 void debug_dependencies PARAMS ((void));
305 static void init_regions PARAMS ((void));
306 static void schedule_region PARAMS ((int));
307 static void propagate_deps PARAMS ((int, struct deps *, int));
308 static void free_pending_lists PARAMS ((void));
310 /* Functions for construction of the control flow graph. */
312 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
314 We decide not to build the control flow graph if there is possibly more
315 than one entry to the function, if computed branches exist, of if we
316 have nonlocal gotos. */
325 /* If we have a label that could be the target of a nonlocal goto, then
326 the cfg is not well structured. */
327 if (nonlocal_goto_handler_labels)
330 /* If we have any forced labels, then the cfg is not well structured. */
334 /* If this function has a computed jump, then we consider the cfg
335 not well structured. */
336 if (current_function_has_computed_jump)
339 /* If we have exception handlers, then we consider the cfg not well
340 structured. ?!? We should be able to handle this now that flow.c
341 computes an accurate cfg for EH. */
342 if (exception_handler_labels)
345 /* If we have non-jumping insns which refer to labels, then we consider
346 the cfg not well structured. */
347 /* Check for labels referred to other thn by jumps. */
348 for (b = 0; b < n_basic_blocks; b++)
349 for (insn = BLOCK_HEAD (b);; insn = NEXT_INSN (insn))
351 code = GET_CODE (insn);
352 if (GET_RTX_CLASS (code) == 'i')
356 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
357 if (REG_NOTE_KIND (note) == REG_LABEL)
361 if (insn == BLOCK_END (b))
365 /* All the tests passed. Consider the cfg well structured. */
369 /* Build the control flow graph and set nr_edges.
371 Instead of trying to build a cfg ourselves, we rely on flow to
372 do it for us. Stamp out useless code (and bug) duplication.
374 Return nonzero if an irregularity in the cfg is found which would
375 prevent cross block scheduling. */
378 build_control_flow (edge_list)
379 struct edge_list *edge_list;
381 int i, unreachable, num_edges;
383 /* This already accounts for entry/exit edges. */
384 num_edges = NUM_EDGES (edge_list);
386 /* Unreachable loops with more than one basic block are detected
387 during the DFS traversal in find_rgns.
389 Unreachable loops with a single block are detected here. This
390 test is redundant with the one in find_rgns, but it's much
391 cheaper to go ahead and catch the trivial case here. */
393 for (i = 0; i < n_basic_blocks; i++)
395 basic_block b = BASIC_BLOCK (i);
398 || (b->pred->src == b
399 && b->pred->pred_next == NULL))
403 /* ??? We can kill these soon. */
404 in_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
405 out_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
406 edge_table = (haifa_edge *) xcalloc (num_edges, sizeof (haifa_edge));
409 for (i = 0; i < num_edges; i++)
411 edge e = INDEX_EDGE (edge_list, i);
413 if (e->dest != EXIT_BLOCK_PTR
414 && e->src != ENTRY_BLOCK_PTR)
415 new_edge (e->src->index, e->dest->index);
418 /* Increment by 1, since edge 0 is unused. */
424 /* Record an edge in the control flow graph from SOURCE to TARGET.
426 In theory, this is redundant with the s_succs computed above, but
427 we have not converted all of haifa to use information from the
431 new_edge (source, target)
435 int curr_edge, fst_edge;
437 /* Check for duplicates. */
438 fst_edge = curr_edge = OUT_EDGES (source);
441 if (FROM_BLOCK (curr_edge) == source
442 && TO_BLOCK (curr_edge) == target)
447 curr_edge = NEXT_OUT (curr_edge);
449 if (fst_edge == curr_edge)
455 FROM_BLOCK (e) = source;
456 TO_BLOCK (e) = target;
458 if (OUT_EDGES (source))
460 next_edge = NEXT_OUT (OUT_EDGES (source));
461 NEXT_OUT (OUT_EDGES (source)) = e;
462 NEXT_OUT (e) = next_edge;
466 OUT_EDGES (source) = e;
470 if (IN_EDGES (target))
472 next_edge = NEXT_IN (IN_EDGES (target));
473 NEXT_IN (IN_EDGES (target)) = e;
474 NEXT_IN (e) = next_edge;
478 IN_EDGES (target) = e;
483 /* BITSET macros for operations on the control flow graph. */
485 /* Compute bitwise union of two bitsets. */
486 #define BITSET_UNION(set1, set2, len) \
487 do { register bitset tp = set1, sp = set2; \
489 for (i = 0; i < len; i++) \
490 *(tp++) |= *(sp++); } while (0)
492 /* Compute bitwise intersection of two bitsets. */
493 #define BITSET_INTER(set1, set2, len) \
494 do { register bitset tp = set1, sp = set2; \
496 for (i = 0; i < len; i++) \
497 *(tp++) &= *(sp++); } while (0)
499 /* Compute bitwise difference of two bitsets. */
500 #define BITSET_DIFFER(set1, set2, len) \
501 do { register bitset tp = set1, sp = set2; \
503 for (i = 0; i < len; i++) \
504 *(tp++) &= ~*(sp++); } while (0)
506 /* Inverts every bit of bitset 'set'. */
507 #define BITSET_INVERT(set, len) \
508 do { register bitset tmpset = set; \
510 for (i = 0; i < len; i++, tmpset++) \
511 *tmpset = ~*tmpset; } while (0)
513 /* Turn on the index'th bit in bitset set. */
514 #define BITSET_ADD(set, index, len) \
516 if (index >= HOST_BITS_PER_WIDE_INT * len) \
519 set[index/HOST_BITS_PER_WIDE_INT] |= \
520 1 << (index % HOST_BITS_PER_WIDE_INT); \
523 /* Turn off the index'th bit in set. */
524 #define BITSET_REMOVE(set, index, len) \
526 if (index >= HOST_BITS_PER_WIDE_INT * len) \
529 set[index/HOST_BITS_PER_WIDE_INT] &= \
530 ~(1 << (index%HOST_BITS_PER_WIDE_INT)); \
533 /* Check if the index'th bit in bitset set is on. */
536 bitset_member (set, index, len)
540 if (index >= HOST_BITS_PER_WIDE_INT * len)
542 return (set[index / HOST_BITS_PER_WIDE_INT] &
543 1 << (index % HOST_BITS_PER_WIDE_INT)) ? 1 : 0;
546 /* Translate a bit-set SET to a list BL of the bit-set members. */
549 extract_bitlst (set, len, bitlen, bl)
556 unsigned HOST_WIDE_INT word;
558 /* bblst table space is reused in each call to extract_bitlst. */
559 bitlst_table_last = 0;
561 bl->first_member = &bitlst_table[bitlst_table_last];
564 /* Iterate over each word in the bitset. */
565 for (i = 0; i < len; i++)
568 offset = i * HOST_BITS_PER_WIDE_INT;
570 /* Iterate over each bit in the word, but do not
571 go beyond the end of the defined bits. */
572 for (j = 0; offset < bitlen && word; j++)
576 bitlst_table[bitlst_table_last++] = offset;
586 /* Functions for the construction of regions. */
588 /* Print the regions, for debugging purposes. Callable from debugger. */
595 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
596 for (rgn = 0; rgn < nr_regions; rgn++)
598 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
599 rgn_table[rgn].rgn_nr_blocks);
600 fprintf (sched_dump, ";;\tbb/block: ");
602 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
604 current_blocks = RGN_BLOCKS (rgn);
606 if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
609 fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
612 fprintf (sched_dump, "\n\n");
616 /* Build a single block region for each basic block in the function.
617 This allows for using the same code for interblock and basic block
621 find_single_block_region ()
625 for (i = 0; i < n_basic_blocks; i++)
628 RGN_NR_BLOCKS (i) = 1;
630 CONTAINING_RGN (i) = i;
633 nr_regions = n_basic_blocks;
636 /* Update number of blocks and the estimate for number of insns
637 in the region. Return 1 if the region is "too large" for interblock
638 scheduling (compile time considerations), otherwise return 0. */
641 too_large (block, num_bbs, num_insns)
642 int block, *num_bbs, *num_insns;
645 (*num_insns) += (INSN_LUID (BLOCK_END (block)) -
646 INSN_LUID (BLOCK_HEAD (block)));
647 if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS))
653 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
654 is still an inner loop. Put in max_hdr[blk] the header of the most inner
655 loop containing blk. */
656 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
658 if (max_hdr[blk] == -1) \
659 max_hdr[blk] = hdr; \
660 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
661 RESET_BIT (inner, hdr); \
662 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
664 RESET_BIT (inner,max_hdr[blk]); \
665 max_hdr[blk] = hdr; \
669 /* Find regions for interblock scheduling.
671 A region for scheduling can be:
673 * A loop-free procedure, or
675 * A reducible inner loop, or
677 * A basic block not contained in any other region.
679 ?!? In theory we could build other regions based on extended basic
680 blocks or reverse extended basic blocks. Is it worth the trouble?
682 Loop blocks that form a region are put into the region's block list
683 in topological order.
685 This procedure stores its results into the following global (ick) variables
693 We use dominator relationships to avoid making regions out of non-reducible
696 This procedure needs to be converted to work on pred/succ lists instead
697 of edge tables. That would simplify it somewhat. */
700 find_rgns (edge_list, dom)
701 struct edge_list *edge_list;
704 int *max_hdr, *dfs_nr, *stack, *degree;
706 int node, child, loop_head, i, head, tail;
707 int count = 0, sp, idx = 0, current_edge = out_edges[0];
708 int num_bbs, num_insns, unreachable;
709 int too_large_failure;
711 /* Note if an edge has been passed. */
714 /* Note if a block is a natural loop header. */
717 /* Note if a block is an natural inner loop header. */
720 /* Note if a block is in the block queue. */
723 /* Note if a block is in the block queue. */
726 int num_edges = NUM_EDGES (edge_list);
728 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
729 and a mapping from block to its loop header (if the block is contained
732 Store results in HEADER, INNER, and MAX_HDR respectively, these will
733 be used as inputs to the second traversal.
735 STACK, SP and DFS_NR are only used during the first traversal. */
737 /* Allocate and initialize variables for the first traversal. */
738 max_hdr = (int *) xmalloc (n_basic_blocks * sizeof (int));
739 dfs_nr = (int *) xcalloc (n_basic_blocks, sizeof (int));
740 stack = (int *) xmalloc (nr_edges * sizeof (int));
742 inner = sbitmap_alloc (n_basic_blocks);
743 sbitmap_ones (inner);
745 header = sbitmap_alloc (n_basic_blocks);
746 sbitmap_zero (header);
748 passed = sbitmap_alloc (nr_edges);
749 sbitmap_zero (passed);
751 in_queue = sbitmap_alloc (n_basic_blocks);
752 sbitmap_zero (in_queue);
754 in_stack = sbitmap_alloc (n_basic_blocks);
755 sbitmap_zero (in_stack);
757 for (i = 0; i < n_basic_blocks; i++)
760 /* DFS traversal to find inner loops in the cfg. */
765 if (current_edge == 0 || TEST_BIT (passed, current_edge))
767 /* We have reached a leaf node or a node that was already
768 processed. Pop edges off the stack until we find
769 an edge that has not yet been processed. */
771 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
773 /* Pop entry off the stack. */
774 current_edge = stack[sp--];
775 node = FROM_BLOCK (current_edge);
776 child = TO_BLOCK (current_edge);
777 RESET_BIT (in_stack, child);
778 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
779 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
780 current_edge = NEXT_OUT (current_edge);
783 /* See if have finished the DFS tree traversal. */
784 if (sp < 0 && TEST_BIT (passed, current_edge))
787 /* Nope, continue the traversal with the popped node. */
791 /* Process a node. */
792 node = FROM_BLOCK (current_edge);
793 child = TO_BLOCK (current_edge);
794 SET_BIT (in_stack, node);
795 dfs_nr[node] = ++count;
797 /* If the successor is in the stack, then we've found a loop.
798 Mark the loop, if it is not a natural loop, then it will
799 be rejected during the second traversal. */
800 if (TEST_BIT (in_stack, child))
803 SET_BIT (header, child);
804 UPDATE_LOOP_RELATIONS (node, child);
805 SET_BIT (passed, current_edge);
806 current_edge = NEXT_OUT (current_edge);
810 /* If the child was already visited, then there is no need to visit
811 it again. Just update the loop relationships and restart
815 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
816 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
817 SET_BIT (passed, current_edge);
818 current_edge = NEXT_OUT (current_edge);
822 /* Push an entry on the stack and continue DFS traversal. */
823 stack[++sp] = current_edge;
824 SET_BIT (passed, current_edge);
825 current_edge = OUT_EDGES (child);
827 /* This is temporary until haifa is converted to use rth's new
828 cfg routines which have true entry/exit blocks and the
829 appropriate edges from/to those blocks.
831 Generally we update dfs_nr for a node when we process its
832 out edge. However, if the node has no out edge then we will
833 not set dfs_nr for that node. This can confuse the scheduler
834 into thinking that we have unreachable blocks, which in turn
835 disables cross block scheduling.
837 So, if we have a node with no out edges, go ahead and mark it
839 if (current_edge == 0)
840 dfs_nr[child] = ++count;
843 /* Another check for unreachable blocks. The earlier test in
844 is_cfg_nonregular only finds unreachable blocks that do not
847 The DFS traversal will mark every block that is reachable from
848 the entry node by placing a nonzero value in dfs_nr. Thus if
849 dfs_nr is zero for any block, then it must be unreachable. */
851 for (i = 0; i < n_basic_blocks; i++)
858 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
859 to hold degree counts. */
862 for (i = 0; i < n_basic_blocks; i++)
864 for (i = 0; i < num_edges; i++)
866 edge e = INDEX_EDGE (edge_list, i);
868 if (e->dest != EXIT_BLOCK_PTR)
869 degree[e->dest->index]++;
872 /* Do not perform region scheduling if there are any unreachable
881 /* Second travsersal:find reducible inner loops and topologically sort
882 block of each region. */
884 queue = (int *) xmalloc (n_basic_blocks * sizeof (int));
886 /* Find blocks which are inner loop headers. We still have non-reducible
887 loops to consider at this point. */
888 for (i = 0; i < n_basic_blocks; i++)
890 if (TEST_BIT (header, i) && TEST_BIT (inner, i))
895 /* Now check that the loop is reducible. We do this separate
896 from finding inner loops so that we do not find a reducible
897 loop which contains an inner non-reducible loop.
899 A simple way to find reducible/natural loops is to verify
900 that each block in the loop is dominated by the loop
903 If there exists a block that is not dominated by the loop
904 header, then the block is reachable from outside the loop
905 and thus the loop is not a natural loop. */
906 for (j = 0; j < n_basic_blocks; j++)
908 /* First identify blocks in the loop, except for the loop
910 if (i == max_hdr[j] && i != j)
912 /* Now verify that the block is dominated by the loop
914 if (!TEST_BIT (dom[j], i))
919 /* If we exited the loop early, then I is the header of
920 a non-reducible loop and we should quit processing it
922 if (j != n_basic_blocks)
925 /* I is a header of an inner loop, or block 0 in a subroutine
926 with no loops at all. */
928 too_large_failure = 0;
929 loop_head = max_hdr[i];
931 /* Decrease degree of all I's successors for topological
933 for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
934 if (e->dest != EXIT_BLOCK_PTR)
935 --degree[e->dest->index];
937 /* Estimate # insns, and count # blocks in the region. */
939 num_insns = (INSN_LUID (BLOCK_END (i))
940 - INSN_LUID (BLOCK_HEAD (i)));
942 /* Find all loop latches (blocks with back edges to the loop
943 header) or all the leaf blocks in the cfg has no loops.
945 Place those blocks into the queue. */
948 for (j = 0; j < n_basic_blocks; j++)
949 /* Leaf nodes have only a single successor which must
951 if (BASIC_BLOCK (j)->succ
952 && BASIC_BLOCK (j)->succ->dest == EXIT_BLOCK_PTR
953 && BASIC_BLOCK (j)->succ->succ_next == NULL)
956 SET_BIT (in_queue, j);
958 if (too_large (j, &num_bbs, &num_insns))
960 too_large_failure = 1;
969 for (e = BASIC_BLOCK (i)->pred; e; e = e->pred_next)
971 if (e->src == ENTRY_BLOCK_PTR)
974 node = e->src->index;
976 if (max_hdr[node] == loop_head && node != i)
978 /* This is a loop latch. */
979 queue[++tail] = node;
980 SET_BIT (in_queue, node);
982 if (too_large (node, &num_bbs, &num_insns))
984 too_large_failure = 1;
991 /* Now add all the blocks in the loop to the queue.
993 We know the loop is a natural loop; however the algorithm
994 above will not always mark certain blocks as being in the
1002 The algorithm in the DFS traversal may not mark B & D as part
1003 of the loop (ie they will not have max_hdr set to A).
1005 We know they can not be loop latches (else they would have
1006 had max_hdr set since they'd have a backedge to a dominator
1007 block). So we don't need them on the initial queue.
1009 We know they are part of the loop because they are dominated
1010 by the loop header and can be reached by a backwards walk of
1011 the edges starting with nodes on the initial queue.
1013 It is safe and desirable to include those nodes in the
1014 loop/scheduling region. To do so we would need to decrease
1015 the degree of a node if it is the target of a backedge
1016 within the loop itself as the node is placed in the queue.
1018 We do not do this because I'm not sure that the actual
1019 scheduling code will properly handle this case. ?!? */
1021 while (head < tail && !too_large_failure)
1024 child = queue[++head];
1026 for (e = BASIC_BLOCK (child)->pred; e; e = e->pred_next)
1028 node = e->src->index;
1030 /* See discussion above about nodes not marked as in
1031 this loop during the initial DFS traversal. */
1032 if (e->src == ENTRY_BLOCK_PTR
1033 || max_hdr[node] != loop_head)
1038 else if (!TEST_BIT (in_queue, node) && node != i)
1040 queue[++tail] = node;
1041 SET_BIT (in_queue, node);
1043 if (too_large (node, &num_bbs, &num_insns))
1045 too_large_failure = 1;
1052 if (tail >= 0 && !too_large_failure)
1054 /* Place the loop header into list of region blocks. */
1056 rgn_bb_table[idx] = i;
1057 RGN_NR_BLOCKS (nr_regions) = num_bbs;
1058 RGN_BLOCKS (nr_regions) = idx++;
1059 CONTAINING_RGN (i) = nr_regions;
1060 BLOCK_TO_BB (i) = count = 0;
1062 /* Remove blocks from queue[] when their in degree
1063 becomes zero. Repeat until no blocks are left on the
1064 list. This produces a topological list of blocks in
1070 child = queue[head];
1071 if (degree[child] == 0)
1076 rgn_bb_table[idx++] = child;
1077 BLOCK_TO_BB (child) = ++count;
1078 CONTAINING_RGN (child) = nr_regions;
1079 queue[head] = queue[tail--];
1081 for (e = BASIC_BLOCK (child)->succ;
1084 if (e->dest != EXIT_BLOCK_PTR)
1085 --degree[e->dest->index];
1097 /* Any block that did not end up in a region is placed into a region
1099 for (i = 0; i < n_basic_blocks; i++)
1102 rgn_bb_table[idx] = i;
1103 RGN_NR_BLOCKS (nr_regions) = 1;
1104 RGN_BLOCKS (nr_regions) = idx++;
1105 CONTAINING_RGN (i) = nr_regions++;
1106 BLOCK_TO_BB (i) = 0;
1119 /* Functions for regions scheduling information. */
1121 /* Compute dominators, probability, and potential-split-edges of bb.
1122 Assume that these values were already computed for bb's predecessors. */
1125 compute_dom_prob_ps (bb)
1128 int nxt_in_edge, fst_in_edge, pred;
1129 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1132 if (IS_RGN_ENTRY (bb))
1134 BITSET_ADD (dom[bb], 0, bbset_size);
1139 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1141 /* Intialize dom[bb] to '111..1'. */
1142 BITSET_INVERT (dom[bb], bbset_size);
1146 pred = FROM_BLOCK (nxt_in_edge);
1147 BITSET_INTER (dom[bb], dom[BLOCK_TO_BB (pred)], bbset_size);
1149 BITSET_UNION (ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)],
1152 BITSET_ADD (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge), edgeset_size);
1155 nr_rgn_out_edges = 0;
1156 fst_out_edge = OUT_EDGES (pred);
1157 nxt_out_edge = NEXT_OUT (fst_out_edge);
1158 BITSET_UNION (pot_split[bb], pot_split[BLOCK_TO_BB (pred)],
1161 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (fst_out_edge), edgeset_size);
1163 /* The successor doesn't belong in the region? */
1164 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1165 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1168 while (fst_out_edge != nxt_out_edge)
1171 /* The successor doesn't belong in the region? */
1172 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1173 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1175 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (nxt_out_edge), edgeset_size);
1176 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1180 /* Now nr_rgn_out_edges is the number of region-exit edges from
1181 pred, and nr_out_edges will be the number of pred out edges
1182 not leaving the region. */
1183 nr_out_edges -= nr_rgn_out_edges;
1184 if (nr_rgn_out_edges > 0)
1185 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1187 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1188 nxt_in_edge = NEXT_IN (nxt_in_edge);
1190 while (fst_in_edge != nxt_in_edge);
1192 BITSET_ADD (dom[bb], bb, bbset_size);
1193 BITSET_DIFFER (pot_split[bb], ancestor_edges[bb], edgeset_size);
1195 if (sched_verbose >= 2)
1196 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1197 (int) (100.0 * prob[bb]));
1200 /* Functions for target info. */
1202 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1203 Note that bb_trg dominates bb_src. */
1206 split_edges (bb_src, bb_trg, bl)
1211 int es = edgeset_size;
1212 edgeset src = (edgeset) xcalloc (es, sizeof (HOST_WIDE_INT));
1215 src[es] = (pot_split[bb_src])[es];
1216 BITSET_DIFFER (src, pot_split[bb_trg], edgeset_size);
1217 extract_bitlst (src, edgeset_size, edgeset_bitsize, bl);
1221 /* Find the valid candidate-source-blocks for the target block TRG, compute
1222 their probability, and check if they are speculative or not.
1223 For speculative sources, compute their update-blocks and split-blocks. */
1226 compute_trg_info (trg)
1229 register candidate *sp;
1231 int check_block, update_idx;
1232 int i, j, k, fst_edge, nxt_edge;
1234 /* Define some of the fields for the target bb as well. */
1235 sp = candidate_table + trg;
1237 sp->is_speculative = 0;
1240 for (i = trg + 1; i < current_nr_blocks; i++)
1242 sp = candidate_table + i;
1244 sp->is_valid = IS_DOMINATED (i, trg);
1247 sp->src_prob = GET_SRC_PROB (i, trg);
1248 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1253 split_edges (i, trg, &el);
1254 sp->is_speculative = (el.nr_members) ? 1 : 0;
1255 if (sp->is_speculative && !flag_schedule_speculative)
1261 char *update_blocks;
1263 /* Compute split blocks and store them in bblst_table.
1264 The TO block of every split edge is a split block. */
1265 sp->split_bbs.first_member = &bblst_table[bblst_last];
1266 sp->split_bbs.nr_members = el.nr_members;
1267 for (j = 0; j < el.nr_members; bblst_last++, j++)
1268 bblst_table[bblst_last] =
1269 TO_BLOCK (rgn_edges[el.first_member[j]]);
1270 sp->update_bbs.first_member = &bblst_table[bblst_last];
1272 /* Compute update blocks and store them in bblst_table.
1273 For every split edge, look at the FROM block, and check
1274 all out edges. For each out edge that is not a split edge,
1275 add the TO block to the update block list. This list can end
1276 up with a lot of duplicates. We need to weed them out to avoid
1277 overrunning the end of the bblst_table. */
1278 update_blocks = (char *) alloca (n_basic_blocks);
1279 memset (update_blocks, 0, n_basic_blocks);
1282 for (j = 0; j < el.nr_members; j++)
1284 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1285 fst_edge = nxt_edge = OUT_EDGES (check_block);
1288 if (! update_blocks[TO_BLOCK (nxt_edge)])
1290 for (k = 0; k < el.nr_members; k++)
1291 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1294 if (k >= el.nr_members)
1296 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1297 update_blocks[TO_BLOCK (nxt_edge)] = 1;
1302 nxt_edge = NEXT_OUT (nxt_edge);
1304 while (fst_edge != nxt_edge);
1306 sp->update_bbs.nr_members = update_idx;
1308 /* Make sure we didn't overrun the end of bblst_table. */
1309 if (bblst_last > bblst_size)
1314 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1316 sp->is_speculative = 0;
1322 /* Print candidates info, for debugging purposes. Callable from debugger. */
1328 if (!candidate_table[i].is_valid)
1331 if (candidate_table[i].is_speculative)
1334 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1336 fprintf (sched_dump, "split path: ");
1337 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1339 int b = candidate_table[i].split_bbs.first_member[j];
1341 fprintf (sched_dump, " %d ", b);
1343 fprintf (sched_dump, "\n");
1345 fprintf (sched_dump, "update path: ");
1346 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1348 int b = candidate_table[i].update_bbs.first_member[j];
1350 fprintf (sched_dump, " %d ", b);
1352 fprintf (sched_dump, "\n");
1356 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1360 /* Print candidates info, for debugging purposes. Callable from debugger. */
1363 debug_candidates (trg)
1368 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1369 BB_TO_BLOCK (trg), trg);
1370 for (i = trg + 1; i < current_nr_blocks; i++)
1371 debug_candidate (i);
1374 /* Functions for speculative scheduing. */
1376 /* Return 0 if x is a set of a register alive in the beginning of one
1377 of the split-blocks of src, otherwise return 1. */
1380 check_live_1 (src, x)
1386 register rtx reg = SET_DEST (x);
1391 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1392 || GET_CODE (reg) == SIGN_EXTRACT
1393 || GET_CODE (reg) == STRICT_LOW_PART)
1394 reg = XEXP (reg, 0);
1396 if (GET_CODE (reg) == PARALLEL
1397 && GET_MODE (reg) == BLKmode)
1400 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1401 if (check_live_1 (src, XVECEXP (reg, 0, i)))
1406 if (GET_CODE (reg) != REG)
1409 regno = REGNO (reg);
1411 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1413 /* Global registers are assumed live. */
1418 if (regno < FIRST_PSEUDO_REGISTER)
1420 /* Check for hard registers. */
1421 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1424 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1426 int b = candidate_table[src].split_bbs.first_member[i];
1428 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
1438 /* Check for psuedo registers. */
1439 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1441 int b = candidate_table[src].split_bbs.first_member[i];
1443 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
1454 /* If x is a set of a register R, mark that R is alive in the beginning
1455 of every update-block of src. */
1458 update_live_1 (src, x)
1464 register rtx reg = SET_DEST (x);
1469 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1470 || GET_CODE (reg) == SIGN_EXTRACT
1471 || GET_CODE (reg) == STRICT_LOW_PART)
1472 reg = XEXP (reg, 0);
1474 if (GET_CODE (reg) == PARALLEL
1475 && GET_MODE (reg) == BLKmode)
1478 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1479 update_live_1 (src, XVECEXP (reg, 0, i));
1483 if (GET_CODE (reg) != REG)
1486 /* Global registers are always live, so the code below does not apply
1489 regno = REGNO (reg);
1491 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1493 if (regno < FIRST_PSEUDO_REGISTER)
1495 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1498 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1500 int b = candidate_table[src].update_bbs.first_member[i];
1502 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
1509 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1511 int b = candidate_table[src].update_bbs.first_member[i];
1513 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
1519 /* Return 1 if insn can be speculatively moved from block src to trg,
1520 otherwise return 0. Called before first insertion of insn to
1521 ready-list or before the scheduling. */
1524 check_live (insn, src)
1528 /* Find the registers set by instruction. */
1529 if (GET_CODE (PATTERN (insn)) == SET
1530 || GET_CODE (PATTERN (insn)) == CLOBBER)
1531 return check_live_1 (src, PATTERN (insn));
1532 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1535 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1536 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1537 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1538 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1547 /* Update the live registers info after insn was moved speculatively from
1548 block src to trg. */
1551 update_live (insn, src)
1555 /* Find the registers set by instruction. */
1556 if (GET_CODE (PATTERN (insn)) == SET
1557 || GET_CODE (PATTERN (insn)) == CLOBBER)
1558 update_live_1 (src, PATTERN (insn));
1559 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1562 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1563 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1564 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1565 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1569 /* Exception Free Loads:
1571 We define five classes of speculative loads: IFREE, IRISKY,
1572 PFREE, PRISKY, and MFREE.
1574 IFREE loads are loads that are proved to be exception-free, just
1575 by examining the load insn. Examples for such loads are loads
1576 from TOC and loads of global data.
1578 IRISKY loads are loads that are proved to be exception-risky,
1579 just by examining the load insn. Examples for such loads are
1580 volatile loads and loads from shared memory.
1582 PFREE loads are loads for which we can prove, by examining other
1583 insns, that they are exception-free. Currently, this class consists
1584 of loads for which we are able to find a "similar load", either in
1585 the target block, or, if only one split-block exists, in that split
1586 block. Load2 is similar to load1 if both have same single base
1587 register. We identify only part of the similar loads, by finding
1588 an insn upon which both load1 and load2 have a DEF-USE dependence.
1590 PRISKY loads are loads for which we can prove, by examining other
1591 insns, that they are exception-risky. Currently we have two proofs for
1592 such loads. The first proof detects loads that are probably guarded by a
1593 test on the memory address. This proof is based on the
1594 backward and forward data dependence information for the region.
1595 Let load-insn be the examined load.
1596 Load-insn is PRISKY iff ALL the following hold:
1598 - insn1 is not in the same block as load-insn
1599 - there is a DEF-USE dependence chain (insn1, ..., load-insn)
1600 - test-insn is either a compare or a branch, not in the same block
1602 - load-insn is reachable from test-insn
1603 - there is a DEF-USE dependence chain (insn1, ..., test-insn)
1605 This proof might fail when the compare and the load are fed
1606 by an insn not in the region. To solve this, we will add to this
1607 group all loads that have no input DEF-USE dependence.
1609 The second proof detects loads that are directly or indirectly
1610 fed by a speculative load. This proof is affected by the
1611 scheduling process. We will use the flag fed_by_spec_load.
1612 Initially, all insns have this flag reset. After a speculative
1613 motion of an insn, if insn is either a load, or marked as
1614 fed_by_spec_load, we will also mark as fed_by_spec_load every
1615 insn1 for which a DEF-USE dependence (insn, insn1) exists. A
1616 load which is fed_by_spec_load is also PRISKY.
1618 MFREE (maybe-free) loads are all the remaining loads. They may be
1619 exception-free, but we cannot prove it.
1621 Now, all loads in IFREE and PFREE classes are considered
1622 exception-free, while all loads in IRISKY and PRISKY classes are
1623 considered exception-risky. As for loads in the MFREE class,
1624 these are considered either exception-free or exception-risky,
1625 depending on whether we are pessimistic or optimistic. We have
1626 to take the pessimistic approach to assure the safety of
1627 speculative scheduling, but we can take the optimistic approach
1628 by invoking the -fsched_spec_load_dangerous option. */
1630 enum INSN_TRAP_CLASS
1632 TRAP_FREE = 0, IFREE = 1, PFREE_CANDIDATE = 2,
1633 PRISKY_CANDIDATE = 3, IRISKY = 4, TRAP_RISKY = 5
1636 #define WORST_CLASS(class1, class2) \
1637 ((class1 > class2) ? class1 : class2)
1639 /* Non-zero if block bb_to is equal to, or reachable from block bb_from. */
1640 #define IS_REACHABLE(bb_from, bb_to) \
1642 || IS_RGN_ENTRY (bb_from) \
1643 || (bitset_member (ancestor_edges[bb_to], \
1644 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))), \
1647 /* Non-zero iff the address is comprised from at most 1 register. */
1648 #define CONST_BASED_ADDRESS_P(x) \
1649 (GET_CODE (x) == REG \
1650 || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \
1651 || (GET_CODE (x) == LO_SUM)) \
1652 && (GET_CODE (XEXP (x, 0)) == CONST_INT \
1653 || GET_CODE (XEXP (x, 1)) == CONST_INT)))
1655 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1658 set_spec_fed (load_insn)
1663 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1664 if (GET_MODE (link) == VOIDmode)
1665 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1666 } /* set_spec_fed */
1668 /* On the path from the insn to load_insn_bb, find a conditional
1669 branch depending on insn, that guards the speculative load. */
1672 find_conditional_protection (insn, load_insn_bb)
1678 /* Iterate through DEF-USE forward dependences. */
1679 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1681 rtx next = XEXP (link, 0);
1682 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1683 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1684 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1685 && load_insn_bb != INSN_BB (next)
1686 && GET_MODE (link) == VOIDmode
1687 && (GET_CODE (next) == JUMP_INSN
1688 || find_conditional_protection (next, load_insn_bb)))
1692 } /* find_conditional_protection */
1694 /* Returns 1 if the same insn1 that participates in the computation
1695 of load_insn's address is feeding a conditional branch that is
1696 guarding on load_insn. This is true if we find a the two DEF-USE
1698 insn1 -> ... -> conditional-branch
1699 insn1 -> ... -> load_insn,
1700 and if a flow path exist:
1701 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1702 and if insn1 is on the path
1703 region-entry -> ... -> bb_trg -> ... load_insn.
1705 Locate insn1 by climbing on LOG_LINKS from load_insn.
1706 Locate the branch by following INSN_DEPEND from insn1. */
1709 is_conditionally_protected (load_insn, bb_src, bb_trg)
1715 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1717 rtx insn1 = XEXP (link, 0);
1719 /* Must be a DEF-USE dependence upon non-branch. */
1720 if (GET_MODE (link) != VOIDmode
1721 || GET_CODE (insn1) == JUMP_INSN)
1724 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1725 if (INSN_BB (insn1) == bb_src
1726 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1727 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1728 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1729 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1732 /* Now search for the conditional-branch. */
1733 if (find_conditional_protection (insn1, bb_src))
1736 /* Recursive step: search another insn1, "above" current insn1. */
1737 return is_conditionally_protected (insn1, bb_src, bb_trg);
1740 /* The chain does not exist. */
1742 } /* is_conditionally_protected */
1744 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1745 load_insn can move speculatively from bb_src to bb_trg. All the
1746 following must hold:
1748 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1749 (2) load_insn and load1 have a def-use dependence upon
1750 the same insn 'insn1'.
1751 (3) either load2 is in bb_trg, or:
1752 - there's only one split-block, and
1753 - load1 is on the escape path, and
1755 From all these we can conclude that the two loads access memory
1756 addresses that differ at most by a constant, and hence if moving
1757 load_insn would cause an exception, it would have been caused by
1761 is_pfree (load_insn, bb_src, bb_trg)
1766 register candidate *candp = candidate_table + bb_src;
1768 if (candp->split_bbs.nr_members != 1)
1769 /* Must have exactly one escape block. */
1772 for (back_link = LOG_LINKS (load_insn);
1773 back_link; back_link = XEXP (back_link, 1))
1775 rtx insn1 = XEXP (back_link, 0);
1777 if (GET_MODE (back_link) == VOIDmode)
1779 /* Found a DEF-USE dependence (insn1, load_insn). */
1782 for (fore_link = INSN_DEPEND (insn1);
1783 fore_link; fore_link = XEXP (fore_link, 1))
1785 rtx insn2 = XEXP (fore_link, 0);
1786 if (GET_MODE (fore_link) == VOIDmode)
1788 /* Found a DEF-USE dependence (insn1, insn2). */
1789 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1790 /* insn2 not guaranteed to be a 1 base reg load. */
1793 if (INSN_BB (insn2) == bb_trg)
1794 /* insn2 is the similar load, in the target block. */
1797 if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
1798 /* insn2 is a similar load, in a split-block. */
1805 /* Couldn't find a similar load. */
1809 /* Returns a class that insn with GET_DEST(insn)=x may belong to,
1810 as found by analyzing insn's expression. */
1813 may_trap_exp (x, is_store)
1821 code = GET_CODE (x);
1831 /* The insn uses memory: a volatile load. */
1832 if (MEM_VOLATILE_P (x))
1834 /* An exception-free load. */
1835 if (!may_trap_p (x))
1837 /* A load with 1 base register, to be further checked. */
1838 if (CONST_BASED_ADDRESS_P (XEXP (x, 0)))
1839 return PFREE_CANDIDATE;
1840 /* No info on the load, to be further checked. */
1841 return PRISKY_CANDIDATE;
1846 int i, insn_class = TRAP_FREE;
1848 /* Neither store nor load, check if it may cause a trap. */
1851 /* Recursive step: walk the insn... */
1852 fmt = GET_RTX_FORMAT (code);
1853 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1857 int tmp_class = may_trap_exp (XEXP (x, i), is_store);
1858 insn_class = WORST_CLASS (insn_class, tmp_class);
1860 else if (fmt[i] == 'E')
1863 for (j = 0; j < XVECLEN (x, i); j++)
1865 int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store);
1866 insn_class = WORST_CLASS (insn_class, tmp_class);
1867 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1871 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1878 /* Classifies insn for the purpose of verifying that it can be
1879 moved speculatively, by examining it's patterns, returning:
1880 TRAP_RISKY: store, or risky non-load insn (e.g. division by variable).
1881 TRAP_FREE: non-load insn.
1882 IFREE: load from a globaly safe location.
1883 IRISKY: volatile load.
1884 PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for
1885 being either PFREE or PRISKY. */
1888 haifa_classify_insn (insn)
1891 rtx pat = PATTERN (insn);
1892 int tmp_class = TRAP_FREE;
1893 int insn_class = TRAP_FREE;
1896 if (GET_CODE (pat) == PARALLEL)
1898 int i, len = XVECLEN (pat, 0);
1900 for (i = len - 1; i >= 0; i--)
1902 code = GET_CODE (XVECEXP (pat, 0, i));
1906 /* Test if it is a 'store'. */
1907 tmp_class = may_trap_exp (XEXP (XVECEXP (pat, 0, i), 0), 1);
1910 /* Test if it is a store. */
1911 tmp_class = may_trap_exp (SET_DEST (XVECEXP (pat, 0, i)), 1);
1912 if (tmp_class == TRAP_RISKY)
1914 /* Test if it is a load. */
1916 WORST_CLASS (tmp_class,
1917 may_trap_exp (SET_SRC (XVECEXP (pat, 0, i)), 0));
1921 tmp_class = TRAP_RISKY;
1925 insn_class = WORST_CLASS (insn_class, tmp_class);
1926 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1932 code = GET_CODE (pat);
1936 /* Test if it is a 'store'. */
1937 tmp_class = may_trap_exp (XEXP (pat, 0), 1);
1940 /* Test if it is a store. */
1941 tmp_class = may_trap_exp (SET_DEST (pat), 1);
1942 if (tmp_class == TRAP_RISKY)
1944 /* Test if it is a load. */
1946 WORST_CLASS (tmp_class,
1947 may_trap_exp (SET_SRC (pat), 0));
1951 tmp_class = TRAP_RISKY;
1955 insn_class = tmp_class;
1961 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1962 a load moved speculatively, or if load_insn is protected by
1963 a compare on load_insn's address). */
1966 is_prisky (load_insn, bb_src, bb_trg)
1970 if (FED_BY_SPEC_LOAD (load_insn))
1973 if (LOG_LINKS (load_insn) == NULL)
1974 /* Dependence may 'hide' out of the region. */
1977 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1983 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1984 Return 1 if insn is exception-free (and the motion is valid)
1988 is_exception_free (insn, bb_src, bb_trg)
1992 int insn_class = haifa_classify_insn (insn);
1994 /* Handle non-load insns. */
2005 if (!flag_schedule_speculative_load)
2007 IS_LOAD_INSN (insn) = 1;
2014 case PFREE_CANDIDATE:
2015 if (is_pfree (insn, bb_src, bb_trg))
2017 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
2018 case PRISKY_CANDIDATE:
2019 if (!flag_schedule_speculative_load_dangerous
2020 || is_prisky (insn, bb_src, bb_trg))
2026 return flag_schedule_speculative_load_dangerous;
2029 /* The number of insns from the current block scheduled so far. */
2030 static int sched_target_n_insns;
2031 /* The number of insns from the current block to be scheduled in total. */
2032 static int target_n_insns;
2033 /* The number of insns from the entire region scheduled so far. */
2034 static int sched_n_insns;
2035 /* Nonzero if the last scheduled insn was a jump. */
2036 static int last_was_jump;
2038 /* Implementations of the sched_info functions for region scheduling. */
2039 static void init_ready_list PARAMS ((struct ready_list *));
2040 static int can_schedule_ready_p PARAMS ((rtx));
2041 static int new_ready PARAMS ((rtx));
2042 static int schedule_more_p PARAMS ((void));
2043 static const char *rgn_print_insn PARAMS ((rtx, int));
2044 static int rgn_rank PARAMS ((rtx, rtx));
2046 /* Return nonzero if there are more insns that should be scheduled. */
2051 return ! last_was_jump && sched_target_n_insns < target_n_insns;
2054 /* Add all insns that are initially ready to the ready list READY. Called
2055 once before scheduling a set of insns. */
2058 init_ready_list (ready)
2059 struct ready_list *ready;
2061 rtx prev_head = current_sched_info->prev_head;
2062 rtx next_tail = current_sched_info->next_tail;
2067 sched_target_n_insns = 0;
2071 /* Print debugging information. */
2072 if (sched_verbose >= 5)
2073 debug_dependencies ();
2075 /* Prepare current target block info. */
2076 if (current_nr_blocks > 1)
2078 candidate_table = (candidate *) xmalloc (current_nr_blocks
2079 * sizeof (candidate));
2082 /* bblst_table holds split blocks and update blocks for each block after
2083 the current one in the region. split blocks and update blocks are
2084 the TO blocks of region edges, so there can be at most rgn_nr_edges
2086 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
2087 bblst_table = (int *) xmalloc (bblst_size * sizeof (int));
2089 bitlst_table_last = 0;
2090 bitlst_table_size = rgn_nr_edges;
2091 bitlst_table = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2093 compute_trg_info (target_bb);
2096 /* Initialize ready list with all 'ready' insns in target block.
2097 Count number of insns in the target block being scheduled. */
2098 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
2102 if (! INSN_P (insn))
2104 next = NEXT_INSN (insn);
2106 if (INSN_DEP_COUNT (insn) == 0
2107 && (SCHED_GROUP_P (next) == 0 || ! INSN_P (next)))
2108 ready_add (ready, insn);
2109 if (!(SCHED_GROUP_P (insn)))
2113 /* Add to ready list all 'ready' insns in valid source blocks.
2114 For speculative insns, check-live, exception-free, and
2116 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
2117 if (IS_VALID (bb_src))
2123 get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
2124 src_next_tail = NEXT_INSN (tail);
2127 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
2129 if (! INSN_P (insn))
2132 if (!CANT_MOVE (insn)
2133 && (!IS_SPECULATIVE_INSN (insn)
2134 || (insn_issue_delay (insn) <= 3
2135 && check_live (insn, bb_src)
2136 && is_exception_free (insn, bb_src, target_bb))))
2140 /* Note that we havn't squirrled away the notes for
2141 blocks other than the current. So if this is a
2142 speculative insn, NEXT might otherwise be a note. */
2143 next = next_nonnote_insn (insn);
2144 if (INSN_DEP_COUNT (insn) == 0
2146 || SCHED_GROUP_P (next) == 0
2147 || ! INSN_P (next)))
2148 ready_add (ready, insn);
2154 /* Called after taking INSN from the ready list. Returns nonzero if this
2155 insn can be scheduled, nonzero if we should silently discard it. */
2158 can_schedule_ready_p (insn)
2161 if (GET_CODE (insn) == JUMP_INSN)
2164 /* An interblock motion? */
2165 if (INSN_BB (insn) != target_bb)
2170 if (IS_SPECULATIVE_INSN (insn))
2172 if (!check_live (insn, INSN_BB (insn)))
2174 update_live (insn, INSN_BB (insn));
2176 /* For speculative load, mark insns fed by it. */
2177 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
2178 set_spec_fed (insn);
2184 /* Find the beginning of the scheduling group. */
2185 /* ??? Ought to update basic block here, but later bits of
2186 schedule_block assumes the original insn block is
2190 while (SCHED_GROUP_P (temp))
2191 temp = PREV_INSN (temp);
2193 /* Update source block boundaries. */
2194 b1 = BLOCK_FOR_INSN (temp);
2195 if (temp == b1->head && insn == b1->end)
2197 /* We moved all the insns in the basic block.
2198 Emit a note after the last insn and update the
2199 begin/end boundaries to point to the note. */
2200 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
2204 else if (insn == b1->end)
2206 /* We took insns from the end of the basic block,
2207 so update the end of block boundary so that it
2208 points to the first insn we did not move. */
2209 b1->end = PREV_INSN (temp);
2211 else if (temp == b1->head)
2213 /* We took insns from the start of the basic block,
2214 so update the start of block boundary so that
2215 it points to the first insn we did not move. */
2216 b1->head = NEXT_INSN (insn);
2221 /* In block motion. */
2222 sched_target_n_insns++;
2229 /* Called after INSN has all its dependencies resolved. Return nonzero
2230 if it should be moved to the ready list or the queue, or zero if we
2231 should silently discard it. */
2236 /* For speculative insns, before inserting to ready/queue,
2237 check live, exception-free, and issue-delay. */
2238 if (INSN_BB (next) != target_bb
2239 && (!IS_VALID (INSN_BB (next))
2241 || (IS_SPECULATIVE_INSN (next)
2242 && (insn_issue_delay (next) > 3
2243 || !check_live (next, INSN_BB (next))
2244 || !is_exception_free (next, INSN_BB (next), target_bb)))))
2249 /* Return a string that contains the insn uid and optionally anything else
2250 necessary to identify this insn in an output. It's valid to use a
2251 static buffer for this. The ALIGNED parameter should cause the string
2252 to be formatted so that multiple output lines will line up nicely. */
2255 rgn_print_insn (insn, aligned)
2259 static char tmp[80];
2262 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
2265 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
2266 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
2268 sprintf (tmp, "%d", INSN_UID (insn));
2273 /* Compare priority of two insns. Return a positive number if the second
2274 insn is to be preferred for scheduling, and a negative one if the first
2275 is to be preferred. Zero if they are equally good. */
2278 rgn_rank (insn1, insn2)
2281 /* Some comparison make sense in interblock scheduling only. */
2282 if (INSN_BB (insn1) != INSN_BB (insn2))
2284 int spec_val, prob_val;
2286 /* Prefer an inblock motion on an interblock motion. */
2287 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
2289 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
2292 /* Prefer a useful motion on a speculative one. */
2293 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
2297 /* Prefer a more probable (speculative) insn. */
2298 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
2305 /* Used in schedule_insns to initialize current_sched_info for scheduling
2306 regions (or single basic blocks). */
2308 static struct sched_info region_sched_info =
2311 can_schedule_ready_p,
2322 /* Add dependences so that branches are scheduled to run last in their
2326 add_branch_dependences (head, tail)
2331 /* For all branches, calls, uses, clobbers, and cc0 setters, force them
2332 to remain in order at the end of the block by adding dependencies and
2333 giving the last a high priority. There may be notes present, and
2334 prev_head may also be a note.
2336 Branches must obviously remain at the end. Calls should remain at the
2337 end since moving them results in worse register allocation. Uses remain
2338 at the end to ensure proper register allocation. cc0 setters remaim
2339 at the end because they can't be moved away from their cc0 user. */
2342 while (GET_CODE (insn) == CALL_INSN
2343 || GET_CODE (insn) == JUMP_INSN
2344 || (GET_CODE (insn) == INSN
2345 && (GET_CODE (PATTERN (insn)) == USE
2346 || GET_CODE (PATTERN (insn)) == CLOBBER
2348 || sets_cc0_p (PATTERN (insn))
2351 || GET_CODE (insn) == NOTE)
2353 if (GET_CODE (insn) != NOTE)
2356 && !find_insn_list (insn, LOG_LINKS (last)))
2358 add_dependence (last, insn, REG_DEP_ANTI);
2359 INSN_REF_COUNT (insn)++;
2362 CANT_MOVE (insn) = 1;
2365 /* Skip over insns that are part of a group.
2366 Make each insn explicitly depend on the previous insn.
2367 This ensures that only the group header will ever enter
2368 the ready queue (and, when scheduled, will automatically
2369 schedule the SCHED_GROUP_P block). */
2370 while (SCHED_GROUP_P (insn))
2372 rtx temp = prev_nonnote_insn (insn);
2373 add_dependence (insn, temp, REG_DEP_ANTI);
2378 /* Don't overrun the bounds of the basic block. */
2382 insn = PREV_INSN (insn);
2385 /* Make sure these insns are scheduled last in their block. */
2388 while (insn != head)
2390 insn = prev_nonnote_insn (insn);
2392 if (INSN_REF_COUNT (insn) != 0)
2395 add_dependence (last, insn, REG_DEP_ANTI);
2396 INSN_REF_COUNT (insn) = 1;
2398 /* Skip over insns that are part of a group. */
2399 while (SCHED_GROUP_P (insn))
2400 insn = prev_nonnote_insn (insn);
2404 /* Data structures for the computation of data dependences in a regions. We
2405 keep one `deps' structure for every basic block. Before analyzing the
2406 data dependences for a bb, its variables are initialized as a function of
2407 the variables of its predecessors. When the analysis for a bb completes,
2408 we save the contents to the corresponding bb_deps[bb] variable. */
2410 static struct deps *bb_deps;
2412 /* After computing the dependencies for block BB, propagate the dependencies
2413 found in TMP_DEPS to the successors of the block. MAX_REG is the number
2416 propagate_deps (bb, tmp_deps, max_reg)
2418 struct deps *tmp_deps;
2421 int b = BB_TO_BLOCK (bb);
2424 rtx link_insn, link_mem;
2427 /* These lists should point to the right place, for correct
2429 bb_deps[bb].pending_read_insns = tmp_deps->pending_read_insns;
2430 bb_deps[bb].pending_read_mems = tmp_deps->pending_read_mems;
2431 bb_deps[bb].pending_write_insns = tmp_deps->pending_write_insns;
2432 bb_deps[bb].pending_write_mems = tmp_deps->pending_write_mems;
2434 /* bb's structures are inherited by its successors. */
2435 first_edge = e = OUT_EDGES (b);
2442 int b_succ = TO_BLOCK (e);
2443 int bb_succ = BLOCK_TO_BB (b_succ);
2444 struct deps *succ_deps = bb_deps + bb_succ;
2446 /* Only bbs "below" bb, in the same region, are interesting. */
2447 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
2454 for (reg = 0; reg < max_reg; reg++)
2456 /* reg-last-uses lists are inherited by bb_succ. */
2457 for (u = tmp_deps->reg_last_uses[reg]; u; u = XEXP (u, 1))
2459 if (find_insn_list (XEXP (u, 0),
2460 succ_deps->reg_last_uses[reg]))
2463 succ_deps->reg_last_uses[reg]
2464 = alloc_INSN_LIST (XEXP (u, 0),
2465 succ_deps->reg_last_uses[reg]);
2468 /* reg-last-defs lists are inherited by bb_succ. */
2469 for (u = tmp_deps->reg_last_sets[reg]; u; u = XEXP (u, 1))
2471 if (find_insn_list (XEXP (u, 0),
2472 succ_deps->reg_last_sets[reg]))
2475 succ_deps->reg_last_sets[reg]
2476 = alloc_INSN_LIST (XEXP (u, 0),
2477 succ_deps->reg_last_sets[reg]);
2480 for (u = tmp_deps->reg_last_clobbers[reg]; u; u = XEXP (u, 1))
2482 if (find_insn_list (XEXP (u, 0),
2483 succ_deps->reg_last_clobbers[reg]))
2486 succ_deps->reg_last_clobbers[reg]
2487 = alloc_INSN_LIST (XEXP (u, 0),
2488 succ_deps->reg_last_clobbers[reg]);
2492 /* Mem read/write lists are inherited by bb_succ. */
2493 link_insn = tmp_deps->pending_read_insns;
2494 link_mem = tmp_deps->pending_read_mems;
2497 if (!(find_insn_mem_list (XEXP (link_insn, 0),
2499 succ_deps->pending_read_insns,
2500 succ_deps->pending_read_mems)))
2501 add_insn_mem_dependence (succ_deps, &succ_deps->pending_read_insns,
2502 &succ_deps->pending_read_mems,
2503 XEXP (link_insn, 0), XEXP (link_mem, 0));
2504 link_insn = XEXP (link_insn, 1);
2505 link_mem = XEXP (link_mem, 1);
2508 link_insn = tmp_deps->pending_write_insns;
2509 link_mem = tmp_deps->pending_write_mems;
2512 if (!(find_insn_mem_list (XEXP (link_insn, 0),
2514 succ_deps->pending_write_insns,
2515 succ_deps->pending_write_mems)))
2516 add_insn_mem_dependence (succ_deps,
2517 &succ_deps->pending_write_insns,
2518 &succ_deps->pending_write_mems,
2519 XEXP (link_insn, 0), XEXP (link_mem, 0));
2521 link_insn = XEXP (link_insn, 1);
2522 link_mem = XEXP (link_mem, 1);
2525 /* last_function_call is inherited by bb_succ. */
2526 for (u = tmp_deps->last_function_call; u; u = XEXP (u, 1))
2528 if (find_insn_list (XEXP (u, 0),
2529 succ_deps->last_function_call))
2532 succ_deps->last_function_call
2533 = alloc_INSN_LIST (XEXP (u, 0),
2534 succ_deps->last_function_call);
2537 /* last_pending_memory_flush is inherited by bb_succ. */
2538 for (u = tmp_deps->last_pending_memory_flush; u; u = XEXP (u, 1))
2540 if (find_insn_list (XEXP (u, 0),
2541 succ_deps->last_pending_memory_flush))
2544 succ_deps->last_pending_memory_flush
2545 = alloc_INSN_LIST (XEXP (u, 0),
2546 succ_deps->last_pending_memory_flush);
2549 /* sched_before_next_call is inherited by bb_succ. */
2550 x = LOG_LINKS (tmp_deps->sched_before_next_call);
2551 for (; x; x = XEXP (x, 1))
2552 add_dependence (succ_deps->sched_before_next_call,
2553 XEXP (x, 0), REG_DEP_ANTI);
2557 while (e != first_edge);
2560 /* Compute backward dependences inside bb. In a multiple blocks region:
2561 (1) a bb is analyzed after its predecessors, and (2) the lists in
2562 effect at the end of bb (after analyzing for bb) are inherited by
2565 Specifically for reg-reg data dependences, the block insns are
2566 scanned by sched_analyze () top-to-bottom. Two lists are
2567 maintained by sched_analyze (): reg_last_sets[] for register DEFs,
2568 and reg_last_uses[] for register USEs.
2570 When analysis is completed for bb, we update for its successors:
2571 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2572 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2574 The mechanism for computing mem-mem data dependence is very
2575 similar, and the result is interblock dependences in the region. */
2578 compute_block_backward_dependences (bb)
2582 int max_reg = max_reg_num ();
2583 struct deps tmp_deps;
2585 tmp_deps = bb_deps[bb];
2587 /* Do the analysis for this block. */
2588 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2589 sched_analyze (&tmp_deps, head, tail);
2590 add_branch_dependences (head, tail);
2592 if (current_nr_blocks > 1)
2593 propagate_deps (bb, &tmp_deps, max_reg);
2595 /* Free up the INSN_LISTs. */
2596 free_deps (&tmp_deps);
2598 /* Assert that we won't need bb_reg_last_* for this block anymore.
2599 The vectors we're zeroing out have just been freed by the call to
2601 bb_deps[bb].reg_last_uses = 0;
2602 bb_deps[bb].reg_last_sets = 0;
2603 bb_deps[bb].reg_last_clobbers = 0;
2605 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2606 them to the unused_*_list variables, so that they can be reused. */
2609 free_pending_lists ()
2613 for (bb = 0; bb < current_nr_blocks; bb++)
2615 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2616 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2617 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2618 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2622 /* Print dependences for debugging, callable from debugger. */
2625 debug_dependencies ()
2629 fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n");
2630 for (bb = 0; bb < current_nr_blocks; bb++)
2638 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2639 next_tail = NEXT_INSN (tail);
2640 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2641 BB_TO_BLOCK (bb), bb);
2643 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2644 "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
2645 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2646 "----", "----", "--", "---", "----", "----", "--------", "-----");
2647 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2652 if (! INSN_P (insn))
2655 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2656 if (GET_CODE (insn) == NOTE)
2658 n = NOTE_LINE_NUMBER (insn);
2660 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2662 fprintf (sched_dump, "line %d, file %s\n", n,
2663 NOTE_SOURCE_FILE (insn));
2666 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2670 unit = insn_unit (insn);
2672 || function_units[unit].blockage_range_function == 0) ? 0 :
2673 function_units[unit].blockage_range_function (insn);
2674 fprintf (sched_dump,
2675 ";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
2676 (SCHED_GROUP_P (insn) ? "+" : " "),
2680 INSN_DEP_COUNT (insn),
2681 INSN_PRIORITY (insn),
2682 insn_cost (insn, 0, 0),
2683 (int) MIN_BLOCKAGE_COST (range),
2684 (int) MAX_BLOCKAGE_COST (range));
2685 insn_print_units (insn);
2686 fprintf (sched_dump, "\t: ");
2687 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2688 fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2689 fprintf (sched_dump, "\n");
2693 fprintf (sched_dump, "\n");
2696 /* Schedule a region. A region is either an inner loop, a loop-free
2697 subroutine, or a single basic block. Each bb in the region is
2698 scheduled after its flow predecessors. */
2701 schedule_region (rgn)
2705 int rgn_n_insns = 0;
2706 int sched_rgn_n_insns = 0;
2708 /* Set variables for the current region. */
2709 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2710 current_blocks = RGN_BLOCKS (rgn);
2712 init_deps_global ();
2714 /* Initializations for region data dependence analyisis. */
2715 bb_deps = (struct deps *) xmalloc (sizeof (struct deps) * current_nr_blocks);
2716 for (bb = 0; bb < current_nr_blocks; bb++)
2717 init_deps (bb_deps + bb);
2719 /* Compute LOG_LINKS. */
2720 for (bb = 0; bb < current_nr_blocks; bb++)
2721 compute_block_backward_dependences (bb);
2723 /* Compute INSN_DEPEND. */
2724 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2727 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2729 compute_forward_dependences (head, tail);
2732 /* Set priorities. */
2733 for (bb = 0; bb < current_nr_blocks; bb++)
2736 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2738 rgn_n_insns += set_priorities (head, tail);
2741 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2742 if (current_nr_blocks > 1)
2746 prob = (float *) xmalloc ((current_nr_blocks) * sizeof (float));
2748 bbset_size = current_nr_blocks / HOST_BITS_PER_WIDE_INT + 1;
2749 dom = (bbset *) xmalloc (current_nr_blocks * sizeof (bbset));
2750 for (i = 0; i < current_nr_blocks; i++)
2751 dom[i] = (bbset) xcalloc (bbset_size, sizeof (HOST_WIDE_INT));
2755 edge_to_bit = (int *) xmalloc (nr_edges * sizeof (int));
2756 for (i = 1; i < nr_edges; i++)
2757 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
2758 EDGE_TO_BIT (i) = rgn_nr_edges++;
2759 rgn_edges = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2762 for (i = 1; i < nr_edges; i++)
2763 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
2764 rgn_edges[rgn_nr_edges++] = i;
2767 edgeset_size = rgn_nr_edges / HOST_BITS_PER_WIDE_INT + 1;
2768 edgeset_bitsize = rgn_nr_edges;
2769 pot_split = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
2771 = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
2772 for (i = 0; i < current_nr_blocks; i++)
2775 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
2777 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
2780 /* Compute probabilities, dominators, split_edges. */
2781 for (bb = 0; bb < current_nr_blocks; bb++)
2782 compute_dom_prob_ps (bb);
2785 /* Now we can schedule all blocks. */
2786 for (bb = 0; bb < current_nr_blocks; bb++)
2789 int b = BB_TO_BLOCK (bb);
2791 get_block_head_tail (b, &head, &tail);
2793 if (no_real_insns_p (head, tail))
2796 current_sched_info->prev_head = PREV_INSN (head);
2797 current_sched_info->next_tail = NEXT_INSN (tail);
2799 if (write_symbols != NO_DEBUG)
2801 save_line_notes (b, head, tail);
2802 rm_line_notes (head, tail);
2805 /* rm_other_notes only removes notes which are _inside_ the
2806 block---that is, it won't remove notes before the first real insn
2807 or after the last real insn of the block. So if the first insn
2808 has a REG_SAVE_NOTE which would otherwise be emitted before the
2809 insn, it is redundant with the note before the start of the
2810 block, and so we have to take it out.
2812 FIXME: Probably the same thing should be done with REG_SAVE_NOTEs
2813 referencing NOTE_INSN_SETJMP at the end of the block. */
2818 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2819 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2821 if (INTVAL (XEXP (note, 0)) != NOTE_INSN_SETJMP)
2823 remove_note (head, note);
2824 note = XEXP (note, 1);
2825 remove_note (head, note);
2828 note = XEXP (note, 1);
2832 /* Remove remaining note insns from the block, save them in
2833 note_list. These notes are restored at the end of
2834 schedule_block (). */
2835 rm_other_notes (head, tail);
2839 current_sched_info->queue_must_finish_empty
2840 = current_nr_blocks > 1 && !flag_schedule_interblock;
2842 schedule_block (b, rgn_n_insns);
2843 sched_rgn_n_insns += sched_n_insns;
2845 /* Update target block boundaries. */
2846 if (head == BLOCK_HEAD (b))
2847 BLOCK_HEAD (b) = current_sched_info->head;
2848 if (tail == BLOCK_END (b))
2849 BLOCK_END (b) = current_sched_info->tail;
2852 if (current_nr_blocks > 1)
2854 free (candidate_table);
2856 free (bitlst_table);
2860 /* Sanity check: verify that all region insns were scheduled. */
2861 if (sched_rgn_n_insns != rgn_n_insns)
2864 /* Restore line notes. */
2865 if (write_symbols != NO_DEBUG)
2867 for (bb = 0; bb < current_nr_blocks; bb++)
2870 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2871 restore_line_notes (BB_TO_BLOCK (bb), head, tail);
2875 /* Done with this region. */
2876 free_pending_lists ();
2878 finish_deps_global ();
2882 if (current_nr_blocks > 1)
2887 for (i = 0; i < current_nr_blocks; ++i)
2890 free (pot_split[i]);
2891 free (ancestor_edges[i]);
2897 free (ancestor_edges);
2901 /* Indexed by region, holds the number of death notes found in that region.
2902 Used for consistency checks. */
2903 static int *deaths_in_region;
2905 /* Initialize data structures for region scheduling. */
2914 rgn_table = (region *) xmalloc ((n_basic_blocks) * sizeof (region));
2915 rgn_bb_table = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2916 block_to_bb = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2917 containing_rgn = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2919 blocks = sbitmap_alloc (n_basic_blocks);
2921 /* Compute regions for scheduling. */
2922 if (reload_completed
2923 || n_basic_blocks == 1
2924 || !flag_schedule_interblock)
2926 find_single_block_region ();
2930 /* Verify that a 'good' control flow graph can be built. */
2931 if (is_cfg_nonregular ())
2933 find_single_block_region ();
2938 struct edge_list *edge_list;
2940 dom = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
2942 /* The scheduler runs after flow; therefore, we can't blindly call
2943 back into find_basic_blocks since doing so could invalidate the
2944 info in global_live_at_start.
2946 Consider a block consisting entirely of dead stores; after life
2947 analysis it would be a block of NOTE_INSN_DELETED notes. If
2948 we call find_basic_blocks again, then the block would be removed
2949 entirely and invalidate our the register live information.
2951 We could (should?) recompute register live information. Doing
2952 so may even be beneficial. */
2953 edge_list = create_edge_list ();
2955 /* Compute the dominators and post dominators. */
2956 calculate_dominance_info (NULL, dom, CDI_DOMINATORS);
2958 /* build_control_flow will return nonzero if it detects unreachable
2959 blocks or any other irregularity with the cfg which prevents
2960 cross block scheduling. */
2961 if (build_control_flow (edge_list) != 0)
2962 find_single_block_region ();
2964 find_rgns (edge_list, dom);
2966 if (sched_verbose >= 3)
2969 /* We are done with flow's edge list. */
2970 free_edge_list (edge_list);
2972 /* For now. This will move as more and more of haifa is converted
2973 to using the cfg code in flow.c. */
2978 deaths_in_region = (int *) xmalloc (sizeof (int) * nr_regions);
2980 /* Remove all death notes from the subroutine. */
2981 for (rgn = 0; rgn < nr_regions; rgn++)
2985 sbitmap_zero (blocks);
2986 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
2987 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
2989 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
2992 sbitmap_free (blocks);
2995 /* The one entry point in this file. DUMP_FILE is the dump file for
2999 schedule_insns (dump_file)
3002 sbitmap large_region_blocks, blocks;
3004 int any_large_regions;
3006 /* Taking care of this degenerate case makes the rest of
3007 this code simpler. */
3008 if (n_basic_blocks == 0)
3014 sched_init (dump_file);
3018 current_sched_info = ®ion_sched_info;
3020 /* Schedule every region in the subroutine. */
3021 for (rgn = 0; rgn < nr_regions; rgn++)
3022 schedule_region (rgn);
3024 /* Update life analysis for the subroutine. Do single block regions
3025 first so that we can verify that live_at_start didn't change. Then
3026 do all other blocks. */
3027 /* ??? There is an outside possibility that update_life_info, or more
3028 to the point propagate_block, could get called with non-zero flags
3029 more than once for one basic block. This would be kinda bad if it
3030 were to happen, since REG_INFO would be accumulated twice for the
3031 block, and we'd have twice the REG_DEAD notes.
3033 I'm fairly certain that this _shouldn't_ happen, since I don't think
3034 that live_at_start should change at region heads. Not sure what the
3035 best way to test for this kind of thing... */
3037 allocate_reg_life_data ();
3038 compute_bb_for_insn (get_max_uid ());
3040 any_large_regions = 0;
3041 large_region_blocks = sbitmap_alloc (n_basic_blocks);
3042 sbitmap_ones (large_region_blocks);
3044 blocks = sbitmap_alloc (n_basic_blocks);
3046 for (rgn = 0; rgn < nr_regions; rgn++)
3047 if (RGN_NR_BLOCKS (rgn) > 1)
3048 any_large_regions = 1;
3051 sbitmap_zero (blocks);
3052 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3053 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3055 /* Don't update reg info after reload, since that affects
3056 regs_ever_live, which should not change after reload. */
3057 update_life_info (blocks, UPDATE_LIFE_LOCAL,
3058 (reload_completed ? PROP_DEATH_NOTES
3059 : PROP_DEATH_NOTES | PROP_REG_INFO));
3061 #ifndef HAVE_conditional_execution
3062 /* ??? REG_DEAD notes only exist for unconditional deaths. We need
3063 a count of the conditional plus unconditional deaths for this to
3065 /* In the single block case, the count of registers that died should
3066 not have changed during the schedule. */
3067 if (count_or_remove_death_notes (blocks, 0) != deaths_in_region[rgn])
3072 if (any_large_regions)
3074 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
3075 PROP_DEATH_NOTES | PROP_REG_INFO);
3078 /* Reposition the prologue and epilogue notes in case we moved the
3079 prologue/epilogue insns. */
3080 if (reload_completed)
3081 reposition_prologue_and_epilogue_notes (get_insns ());
3083 /* Delete redundant line notes. */
3084 if (write_symbols != NO_DEBUG)
3085 rm_redundant_line_notes ();
3089 if (reload_completed == 0 && flag_schedule_interblock)
3091 fprintf (sched_dump,
3092 "\n;; Procedure interblock/speculative motions == %d/%d \n",
3100 fprintf (sched_dump, "\n\n");
3105 free (rgn_bb_table);
3107 free (containing_rgn);
3128 sbitmap_free (blocks);
3129 sbitmap_free (large_region_blocks);
3131 free (deaths_in_region);