1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 GNU CC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to the Free
21 the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
53 #include "hard-reg-set.h"
54 #include "basic-block.h"
58 #include "insn-config.h"
59 #include "insn-attr.h"
63 #include "sched-int.h"
65 #ifdef INSN_SCHEDULING
66 /* Some accessor macros for h_i_d members only used within this file. */
67 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
68 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
69 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
71 #define MAX_RGN_BLOCKS 10
72 #define MAX_RGN_INSNS 100
74 /* nr_inter/spec counts interblock/speculative motion for the function. */
75 static int nr_inter, nr_spec;
77 /* Control flow graph edges are kept in circular lists. */
86 static haifa_edge *edge_table;
88 #define NEXT_IN(edge) (edge_table[edge].next_in)
89 #define NEXT_OUT(edge) (edge_table[edge].next_out)
90 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
91 #define TO_BLOCK(edge) (edge_table[edge].to_block)
93 /* Number of edges in the control flow graph. (In fact, larger than
94 that by 1, since edge 0 is unused.) */
97 /* Circular list of incoming/outgoing edges of a block. */
99 static int *out_edges;
101 #define IN_EDGES(block) (in_edges[block])
102 #define OUT_EDGES(block) (out_edges[block])
104 static int is_cfg_nonregular PARAMS ((void));
105 static int build_control_flow PARAMS ((struct edge_list *));
106 static void new_edge PARAMS ((int, int));
108 /* A region is the main entity for interblock scheduling: insns
109 are allowed to move between blocks in the same region, along
110 control flow graph edges, in the 'up' direction. */
113 int rgn_nr_blocks; /* Number of blocks in region. */
114 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
118 /* Number of regions in the procedure. */
119 static int nr_regions;
121 /* Table of region descriptions. */
122 static region *rgn_table;
124 /* Array of lists of regions' blocks. */
125 static int *rgn_bb_table;
127 /* Topological order of blocks in the region (if b2 is reachable from
128 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
129 always referred to by either block or b, while its topological
130 order name (in the region) is refered to by bb. */
131 static int *block_to_bb;
133 /* The number of the region containing a block. */
134 static int *containing_rgn;
136 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
137 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
138 #define BLOCK_TO_BB(block) (block_to_bb[block])
139 #define CONTAINING_RGN(block) (containing_rgn[block])
141 void debug_regions PARAMS ((void));
142 static void find_single_block_region PARAMS ((void));
143 static void find_rgns PARAMS ((struct edge_list *, sbitmap *));
144 static int too_large PARAMS ((int, int *, int *));
146 extern void debug_live PARAMS ((int, int));
148 /* Blocks of the current region being scheduled. */
149 static int current_nr_blocks;
150 static int current_blocks;
152 /* The mapping from bb to block. */
153 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
155 /* Bit vectors and bitset operations are needed for computations on
156 the control flow graph. */
158 typedef unsigned HOST_WIDE_INT *bitset;
161 int *first_member; /* Pointer to the list start in bitlst_table. */
162 int nr_members; /* The number of members of the bit list. */
166 static int bitlst_table_last;
167 static int bitlst_table_size;
168 static int *bitlst_table;
170 static char bitset_member PARAMS ((bitset, int, int));
171 static void extract_bitlst PARAMS ((bitset, int, int, bitlst *));
173 /* Target info declarations.
175 The block currently being scheduled is referred to as the "target" block,
176 while other blocks in the region from which insns can be moved to the
177 target are called "source" blocks. The candidate structure holds info
178 about such sources: are they valid? Speculative? Etc. */
179 typedef bitlst bblst;
190 static candidate *candidate_table;
192 /* A speculative motion requires checking live information on the path
193 from 'source' to 'target'. The split blocks are those to be checked.
194 After a speculative motion, live information should be modified in
197 Lists of split and update blocks for each candidate of the current
198 target are in array bblst_table. */
199 static int *bblst_table, bblst_size, bblst_last;
201 #define IS_VALID(src) ( candidate_table[src].is_valid )
202 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
203 #define SRC_PROB(src) ( candidate_table[src].src_prob )
205 /* The bb being currently scheduled. */
206 static int target_bb;
209 typedef bitlst edgelst;
211 /* Target info functions. */
212 static void split_edges PARAMS ((int, int, edgelst *));
213 static void compute_trg_info PARAMS ((int));
214 void debug_candidate PARAMS ((int));
215 void debug_candidates PARAMS ((int));
217 /* Bit-set of bbs, where bit 'i' stands for bb 'i'. */
218 typedef bitset bbset;
220 /* Number of words of the bbset. */
221 static int bbset_size;
223 /* Dominators array: dom[i] contains the bbset of dominators of
224 bb i in the region. */
227 /* bb 0 is the only region entry. */
228 #define IS_RGN_ENTRY(bb) (!bb)
230 /* Is bb_src dominated by bb_trg. */
231 #define IS_DOMINATED(bb_src, bb_trg) \
232 ( bitset_member (dom[bb_src], bb_trg, bbset_size) )
234 /* Probability: Prob[i] is a float in [0, 1] which is the probability
235 of bb i relative to the region entry. */
238 /* The probability of bb_src, relative to bb_trg. Note, that while the
239 'prob[bb]' is a float in [0, 1], this macro returns an integer
241 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
244 /* Bit-set of edges, where bit i stands for edge i. */
245 typedef bitset edgeset;
247 /* Number of edges in the region. */
248 static int rgn_nr_edges;
250 /* Array of size rgn_nr_edges. */
251 static int *rgn_edges;
253 /* Number of words in an edgeset. */
254 static int edgeset_size;
256 /* Number of bits in an edgeset. */
257 static int edgeset_bitsize;
259 /* Mapping from each edge in the graph to its number in the rgn. */
260 static int *edge_to_bit;
261 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
263 /* The split edges of a source bb is different for each target
264 bb. In order to compute this efficiently, the 'potential-split edges'
265 are computed for each bb prior to scheduling a region. This is actually
266 the split edges of each bb relative to the region entry.
268 pot_split[bb] is the set of potential split edges of bb. */
269 static edgeset *pot_split;
271 /* For every bb, a set of its ancestor edges. */
272 static edgeset *ancestor_edges;
274 static void compute_dom_prob_ps PARAMS ((int));
276 #define ABS_VALUE(x) (((x)<0)?(-(x)):(x))
277 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
278 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
279 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
281 /* Parameters affecting the decision of rank_for_schedule().
282 ??? Nope. But MIN_PROBABILITY is used in copmute_trg_info. */
283 #define MIN_DIFF_PRIORITY 2
284 #define MIN_PROBABILITY 40
285 #define MIN_PROB_DIFF 10
287 /* Speculative scheduling functions. */
288 static int check_live_1 PARAMS ((int, rtx));
289 static void update_live_1 PARAMS ((int, rtx));
290 static int check_live PARAMS ((rtx, int));
291 static void update_live PARAMS ((rtx, int));
292 static void set_spec_fed PARAMS ((rtx));
293 static int is_pfree PARAMS ((rtx, int, int));
294 static int find_conditional_protection PARAMS ((rtx, int));
295 static int is_conditionally_protected PARAMS ((rtx, int, int));
296 static int may_trap_exp PARAMS ((rtx, int));
297 static int haifa_classify_insn PARAMS ((rtx));
298 static int is_prisky PARAMS ((rtx, int, int));
299 static int is_exception_free PARAMS ((rtx, int, int));
301 static void add_branch_dependences PARAMS ((rtx, rtx));
302 static void compute_block_backward_dependences PARAMS ((int));
303 void debug_dependencies PARAMS ((void));
305 static void init_regions PARAMS ((void));
306 static void schedule_region PARAMS ((int));
307 static void propagate_deps PARAMS ((int, struct deps *));
308 static void free_pending_lists PARAMS ((void));
310 /* Functions for construction of the control flow graph. */
312 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
314 We decide not to build the control flow graph if there is possibly more
315 than one entry to the function, if computed branches exist, of if we
316 have nonlocal gotos. */
325 /* If we have a label that could be the target of a nonlocal goto, then
326 the cfg is not well structured. */
327 if (nonlocal_goto_handler_labels)
330 /* If we have any forced labels, then the cfg is not well structured. */
334 /* If this function has a computed jump, then we consider the cfg
335 not well structured. */
336 if (current_function_has_computed_jump)
339 /* If we have exception handlers, then we consider the cfg not well
340 structured. ?!? We should be able to handle this now that flow.c
341 computes an accurate cfg for EH. */
342 if (exception_handler_labels)
345 /* If we have non-jumping insns which refer to labels, then we consider
346 the cfg not well structured. */
347 /* Check for labels referred to other thn by jumps. */
348 for (b = 0; b < n_basic_blocks; b++)
349 for (insn = BLOCK_HEAD (b);; insn = NEXT_INSN (insn))
351 code = GET_CODE (insn);
352 if (GET_RTX_CLASS (code) == 'i' && code != JUMP_INSN)
354 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
357 && ! (GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
358 && find_reg_note (NEXT_INSN (insn), REG_LABEL,
363 if (insn == BLOCK_END (b))
367 /* All the tests passed. Consider the cfg well structured. */
371 /* Build the control flow graph and set nr_edges.
373 Instead of trying to build a cfg ourselves, we rely on flow to
374 do it for us. Stamp out useless code (and bug) duplication.
376 Return nonzero if an irregularity in the cfg is found which would
377 prevent cross block scheduling. */
380 build_control_flow (edge_list)
381 struct edge_list *edge_list;
383 int i, unreachable, num_edges;
385 /* This already accounts for entry/exit edges. */
386 num_edges = NUM_EDGES (edge_list);
388 /* Unreachable loops with more than one basic block are detected
389 during the DFS traversal in find_rgns.
391 Unreachable loops with a single block are detected here. This
392 test is redundant with the one in find_rgns, but it's much
393 cheaper to go ahead and catch the trivial case here. */
395 for (i = 0; i < n_basic_blocks; i++)
397 basic_block b = BASIC_BLOCK (i);
400 || (b->pred->src == b
401 && b->pred->pred_next == NULL))
405 /* ??? We can kill these soon. */
406 in_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
407 out_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
408 edge_table = (haifa_edge *) xcalloc (num_edges, sizeof (haifa_edge));
411 for (i = 0; i < num_edges; i++)
413 edge e = INDEX_EDGE (edge_list, i);
415 if (e->dest != EXIT_BLOCK_PTR
416 && e->src != ENTRY_BLOCK_PTR)
417 new_edge (e->src->index, e->dest->index);
420 /* Increment by 1, since edge 0 is unused. */
426 /* Record an edge in the control flow graph from SOURCE to TARGET.
428 In theory, this is redundant with the s_succs computed above, but
429 we have not converted all of haifa to use information from the
433 new_edge (source, target)
437 int curr_edge, fst_edge;
439 /* Check for duplicates. */
440 fst_edge = curr_edge = OUT_EDGES (source);
443 if (FROM_BLOCK (curr_edge) == source
444 && TO_BLOCK (curr_edge) == target)
449 curr_edge = NEXT_OUT (curr_edge);
451 if (fst_edge == curr_edge)
457 FROM_BLOCK (e) = source;
458 TO_BLOCK (e) = target;
460 if (OUT_EDGES (source))
462 next_edge = NEXT_OUT (OUT_EDGES (source));
463 NEXT_OUT (OUT_EDGES (source)) = e;
464 NEXT_OUT (e) = next_edge;
468 OUT_EDGES (source) = e;
472 if (IN_EDGES (target))
474 next_edge = NEXT_IN (IN_EDGES (target));
475 NEXT_IN (IN_EDGES (target)) = e;
476 NEXT_IN (e) = next_edge;
480 IN_EDGES (target) = e;
485 /* BITSET macros for operations on the control flow graph. */
487 /* Compute bitwise union of two bitsets. */
488 #define BITSET_UNION(set1, set2, len) \
489 do { register bitset tp = set1, sp = set2; \
491 for (i = 0; i < len; i++) \
492 *(tp++) |= *(sp++); } while (0)
494 /* Compute bitwise intersection of two bitsets. */
495 #define BITSET_INTER(set1, set2, len) \
496 do { register bitset tp = set1, sp = set2; \
498 for (i = 0; i < len; i++) \
499 *(tp++) &= *(sp++); } while (0)
501 /* Compute bitwise difference of two bitsets. */
502 #define BITSET_DIFFER(set1, set2, len) \
503 do { register bitset tp = set1, sp = set2; \
505 for (i = 0; i < len; i++) \
506 *(tp++) &= ~*(sp++); } while (0)
508 /* Inverts every bit of bitset 'set'. */
509 #define BITSET_INVERT(set, len) \
510 do { register bitset tmpset = set; \
512 for (i = 0; i < len; i++, tmpset++) \
513 *tmpset = ~*tmpset; } while (0)
515 /* Turn on the index'th bit in bitset set. */
516 #define BITSET_ADD(set, index, len) \
518 if (index >= HOST_BITS_PER_WIDE_INT * len) \
521 set[index/HOST_BITS_PER_WIDE_INT] |= \
522 ((unsigned HOST_WIDE_INT) 1) << (index % HOST_BITS_PER_WIDE_INT); \
525 /* Turn off the index'th bit in set. */
526 #define BITSET_REMOVE(set, index, len) \
528 if (index >= HOST_BITS_PER_WIDE_INT * len) \
531 set[index/HOST_BITS_PER_WIDE_INT] &= \
532 ~(((unsigned HOST_WIDE_INT) 1) << (index % HOST_BITS_PER_WIDE_INT)); \
535 /* Check if the index'th bit in bitset set is on. */
538 bitset_member (set, index, len)
542 if (index >= HOST_BITS_PER_WIDE_INT * len)
544 return ((set[index / HOST_BITS_PER_WIDE_INT] &
545 ((unsigned HOST_WIDE_INT) 1) << (index % HOST_BITS_PER_WIDE_INT))
549 /* Translate a bit-set SET to a list BL of the bit-set members. */
552 extract_bitlst (set, len, bitlen, bl)
559 unsigned HOST_WIDE_INT word;
561 /* bblst table space is reused in each call to extract_bitlst. */
562 bitlst_table_last = 0;
564 bl->first_member = &bitlst_table[bitlst_table_last];
567 /* Iterate over each word in the bitset. */
568 for (i = 0; i < len; i++)
571 offset = i * HOST_BITS_PER_WIDE_INT;
573 /* Iterate over each bit in the word, but do not
574 go beyond the end of the defined bits. */
575 for (j = 0; offset < bitlen && word; j++)
579 bitlst_table[bitlst_table_last++] = offset;
589 /* Functions for the construction of regions. */
591 /* Print the regions, for debugging purposes. Callable from debugger. */
598 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
599 for (rgn = 0; rgn < nr_regions; rgn++)
601 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
602 rgn_table[rgn].rgn_nr_blocks);
603 fprintf (sched_dump, ";;\tbb/block: ");
605 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
607 current_blocks = RGN_BLOCKS (rgn);
609 if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
612 fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
615 fprintf (sched_dump, "\n\n");
619 /* Build a single block region for each basic block in the function.
620 This allows for using the same code for interblock and basic block
624 find_single_block_region ()
628 for (i = 0; i < n_basic_blocks; i++)
631 RGN_NR_BLOCKS (i) = 1;
633 CONTAINING_RGN (i) = i;
636 nr_regions = n_basic_blocks;
639 /* Update number of blocks and the estimate for number of insns
640 in the region. Return 1 if the region is "too large" for interblock
641 scheduling (compile time considerations), otherwise return 0. */
644 too_large (block, num_bbs, num_insns)
645 int block, *num_bbs, *num_insns;
648 (*num_insns) += (INSN_LUID (BLOCK_END (block)) -
649 INSN_LUID (BLOCK_HEAD (block)));
650 if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS))
656 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
657 is still an inner loop. Put in max_hdr[blk] the header of the most inner
658 loop containing blk. */
659 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
661 if (max_hdr[blk] == -1) \
662 max_hdr[blk] = hdr; \
663 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
664 RESET_BIT (inner, hdr); \
665 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
667 RESET_BIT (inner,max_hdr[blk]); \
668 max_hdr[blk] = hdr; \
672 /* Find regions for interblock scheduling.
674 A region for scheduling can be:
676 * A loop-free procedure, or
678 * A reducible inner loop, or
680 * A basic block not contained in any other region.
682 ?!? In theory we could build other regions based on extended basic
683 blocks or reverse extended basic blocks. Is it worth the trouble?
685 Loop blocks that form a region are put into the region's block list
686 in topological order.
688 This procedure stores its results into the following global (ick) variables
696 We use dominator relationships to avoid making regions out of non-reducible
699 This procedure needs to be converted to work on pred/succ lists instead
700 of edge tables. That would simplify it somewhat. */
703 find_rgns (edge_list, dom)
704 struct edge_list *edge_list;
707 int *max_hdr, *dfs_nr, *stack, *degree;
709 int node, child, loop_head, i, head, tail;
710 int count = 0, sp, idx = 0, current_edge = out_edges[0];
711 int num_bbs, num_insns, unreachable;
712 int too_large_failure;
714 /* Note if an edge has been passed. */
717 /* Note if a block is a natural loop header. */
720 /* Note if a block is an natural inner loop header. */
723 /* Note if a block is in the block queue. */
726 /* Note if a block is in the block queue. */
729 int num_edges = NUM_EDGES (edge_list);
731 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
732 and a mapping from block to its loop header (if the block is contained
735 Store results in HEADER, INNER, and MAX_HDR respectively, these will
736 be used as inputs to the second traversal.
738 STACK, SP and DFS_NR are only used during the first traversal. */
740 /* Allocate and initialize variables for the first traversal. */
741 max_hdr = (int *) xmalloc (n_basic_blocks * sizeof (int));
742 dfs_nr = (int *) xcalloc (n_basic_blocks, sizeof (int));
743 stack = (int *) xmalloc (nr_edges * sizeof (int));
745 inner = sbitmap_alloc (n_basic_blocks);
746 sbitmap_ones (inner);
748 header = sbitmap_alloc (n_basic_blocks);
749 sbitmap_zero (header);
751 passed = sbitmap_alloc (nr_edges);
752 sbitmap_zero (passed);
754 in_queue = sbitmap_alloc (n_basic_blocks);
755 sbitmap_zero (in_queue);
757 in_stack = sbitmap_alloc (n_basic_blocks);
758 sbitmap_zero (in_stack);
760 for (i = 0; i < n_basic_blocks; i++)
763 /* DFS traversal to find inner loops in the cfg. */
768 if (current_edge == 0 || TEST_BIT (passed, current_edge))
770 /* We have reached a leaf node or a node that was already
771 processed. Pop edges off the stack until we find
772 an edge that has not yet been processed. */
774 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
776 /* Pop entry off the stack. */
777 current_edge = stack[sp--];
778 node = FROM_BLOCK (current_edge);
779 child = TO_BLOCK (current_edge);
780 RESET_BIT (in_stack, child);
781 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
782 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
783 current_edge = NEXT_OUT (current_edge);
786 /* See if have finished the DFS tree traversal. */
787 if (sp < 0 && TEST_BIT (passed, current_edge))
790 /* Nope, continue the traversal with the popped node. */
794 /* Process a node. */
795 node = FROM_BLOCK (current_edge);
796 child = TO_BLOCK (current_edge);
797 SET_BIT (in_stack, node);
798 dfs_nr[node] = ++count;
800 /* If the successor is in the stack, then we've found a loop.
801 Mark the loop, if it is not a natural loop, then it will
802 be rejected during the second traversal. */
803 if (TEST_BIT (in_stack, child))
806 SET_BIT (header, child);
807 UPDATE_LOOP_RELATIONS (node, child);
808 SET_BIT (passed, current_edge);
809 current_edge = NEXT_OUT (current_edge);
813 /* If the child was already visited, then there is no need to visit
814 it again. Just update the loop relationships and restart
818 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
819 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
820 SET_BIT (passed, current_edge);
821 current_edge = NEXT_OUT (current_edge);
825 /* Push an entry on the stack and continue DFS traversal. */
826 stack[++sp] = current_edge;
827 SET_BIT (passed, current_edge);
828 current_edge = OUT_EDGES (child);
830 /* This is temporary until haifa is converted to use rth's new
831 cfg routines which have true entry/exit blocks and the
832 appropriate edges from/to those blocks.
834 Generally we update dfs_nr for a node when we process its
835 out edge. However, if the node has no out edge then we will
836 not set dfs_nr for that node. This can confuse the scheduler
837 into thinking that we have unreachable blocks, which in turn
838 disables cross block scheduling.
840 So, if we have a node with no out edges, go ahead and mark it
842 if (current_edge == 0)
843 dfs_nr[child] = ++count;
846 /* Another check for unreachable blocks. The earlier test in
847 is_cfg_nonregular only finds unreachable blocks that do not
850 The DFS traversal will mark every block that is reachable from
851 the entry node by placing a nonzero value in dfs_nr. Thus if
852 dfs_nr is zero for any block, then it must be unreachable. */
854 for (i = 0; i < n_basic_blocks; i++)
861 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
862 to hold degree counts. */
865 for (i = 0; i < n_basic_blocks; i++)
867 for (i = 0; i < num_edges; i++)
869 edge e = INDEX_EDGE (edge_list, i);
871 if (e->dest != EXIT_BLOCK_PTR)
872 degree[e->dest->index]++;
875 /* Do not perform region scheduling if there are any unreachable
884 /* Second travsersal:find reducible inner loops and topologically sort
885 block of each region. */
887 queue = (int *) xmalloc (n_basic_blocks * sizeof (int));
889 /* Find blocks which are inner loop headers. We still have non-reducible
890 loops to consider at this point. */
891 for (i = 0; i < n_basic_blocks; i++)
893 if (TEST_BIT (header, i) && TEST_BIT (inner, i))
898 /* Now check that the loop is reducible. We do this separate
899 from finding inner loops so that we do not find a reducible
900 loop which contains an inner non-reducible loop.
902 A simple way to find reducible/natural loops is to verify
903 that each block in the loop is dominated by the loop
906 If there exists a block that is not dominated by the loop
907 header, then the block is reachable from outside the loop
908 and thus the loop is not a natural loop. */
909 for (j = 0; j < n_basic_blocks; j++)
911 /* First identify blocks in the loop, except for the loop
913 if (i == max_hdr[j] && i != j)
915 /* Now verify that the block is dominated by the loop
917 if (!TEST_BIT (dom[j], i))
922 /* If we exited the loop early, then I is the header of
923 a non-reducible loop and we should quit processing it
925 if (j != n_basic_blocks)
928 /* I is a header of an inner loop, or block 0 in a subroutine
929 with no loops at all. */
931 too_large_failure = 0;
932 loop_head = max_hdr[i];
934 /* Decrease degree of all I's successors for topological
936 for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
937 if (e->dest != EXIT_BLOCK_PTR)
938 --degree[e->dest->index];
940 /* Estimate # insns, and count # blocks in the region. */
942 num_insns = (INSN_LUID (BLOCK_END (i))
943 - INSN_LUID (BLOCK_HEAD (i)));
945 /* Find all loop latches (blocks with back edges to the loop
946 header) or all the leaf blocks in the cfg has no loops.
948 Place those blocks into the queue. */
951 for (j = 0; j < n_basic_blocks; j++)
952 /* Leaf nodes have only a single successor which must
954 if (BASIC_BLOCK (j)->succ
955 && BASIC_BLOCK (j)->succ->dest == EXIT_BLOCK_PTR
956 && BASIC_BLOCK (j)->succ->succ_next == NULL)
959 SET_BIT (in_queue, j);
961 if (too_large (j, &num_bbs, &num_insns))
963 too_large_failure = 1;
972 for (e = BASIC_BLOCK (i)->pred; e; e = e->pred_next)
974 if (e->src == ENTRY_BLOCK_PTR)
977 node = e->src->index;
979 if (max_hdr[node] == loop_head && node != i)
981 /* This is a loop latch. */
982 queue[++tail] = node;
983 SET_BIT (in_queue, node);
985 if (too_large (node, &num_bbs, &num_insns))
987 too_large_failure = 1;
994 /* Now add all the blocks in the loop to the queue.
996 We know the loop is a natural loop; however the algorithm
997 above will not always mark certain blocks as being in the
1005 The algorithm in the DFS traversal may not mark B & D as part
1006 of the loop (ie they will not have max_hdr set to A).
1008 We know they can not be loop latches (else they would have
1009 had max_hdr set since they'd have a backedge to a dominator
1010 block). So we don't need them on the initial queue.
1012 We know they are part of the loop because they are dominated
1013 by the loop header and can be reached by a backwards walk of
1014 the edges starting with nodes on the initial queue.
1016 It is safe and desirable to include those nodes in the
1017 loop/scheduling region. To do so we would need to decrease
1018 the degree of a node if it is the target of a backedge
1019 within the loop itself as the node is placed in the queue.
1021 We do not do this because I'm not sure that the actual
1022 scheduling code will properly handle this case. ?!? */
1024 while (head < tail && !too_large_failure)
1027 child = queue[++head];
1029 for (e = BASIC_BLOCK (child)->pred; e; e = e->pred_next)
1031 node = e->src->index;
1033 /* See discussion above about nodes not marked as in
1034 this loop during the initial DFS traversal. */
1035 if (e->src == ENTRY_BLOCK_PTR
1036 || max_hdr[node] != loop_head)
1041 else if (!TEST_BIT (in_queue, node) && node != i)
1043 queue[++tail] = node;
1044 SET_BIT (in_queue, node);
1046 if (too_large (node, &num_bbs, &num_insns))
1048 too_large_failure = 1;
1055 if (tail >= 0 && !too_large_failure)
1057 /* Place the loop header into list of region blocks. */
1059 rgn_bb_table[idx] = i;
1060 RGN_NR_BLOCKS (nr_regions) = num_bbs;
1061 RGN_BLOCKS (nr_regions) = idx++;
1062 CONTAINING_RGN (i) = nr_regions;
1063 BLOCK_TO_BB (i) = count = 0;
1065 /* Remove blocks from queue[] when their in degree
1066 becomes zero. Repeat until no blocks are left on the
1067 list. This produces a topological list of blocks in
1073 child = queue[head];
1074 if (degree[child] == 0)
1079 rgn_bb_table[idx++] = child;
1080 BLOCK_TO_BB (child) = ++count;
1081 CONTAINING_RGN (child) = nr_regions;
1082 queue[head] = queue[tail--];
1084 for (e = BASIC_BLOCK (child)->succ;
1087 if (e->dest != EXIT_BLOCK_PTR)
1088 --degree[e->dest->index];
1100 /* Any block that did not end up in a region is placed into a region
1102 for (i = 0; i < n_basic_blocks; i++)
1105 rgn_bb_table[idx] = i;
1106 RGN_NR_BLOCKS (nr_regions) = 1;
1107 RGN_BLOCKS (nr_regions) = idx++;
1108 CONTAINING_RGN (i) = nr_regions++;
1109 BLOCK_TO_BB (i) = 0;
1122 /* Functions for regions scheduling information. */
1124 /* Compute dominators, probability, and potential-split-edges of bb.
1125 Assume that these values were already computed for bb's predecessors. */
1128 compute_dom_prob_ps (bb)
1131 int nxt_in_edge, fst_in_edge, pred;
1132 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1135 if (IS_RGN_ENTRY (bb))
1137 BITSET_ADD (dom[bb], 0, bbset_size);
1142 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1144 /* Intialize dom[bb] to '111..1'. */
1145 BITSET_INVERT (dom[bb], bbset_size);
1149 pred = FROM_BLOCK (nxt_in_edge);
1150 BITSET_INTER (dom[bb], dom[BLOCK_TO_BB (pred)], bbset_size);
1152 BITSET_UNION (ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)],
1155 BITSET_ADD (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge), edgeset_size);
1158 nr_rgn_out_edges = 0;
1159 fst_out_edge = OUT_EDGES (pred);
1160 nxt_out_edge = NEXT_OUT (fst_out_edge);
1161 BITSET_UNION (pot_split[bb], pot_split[BLOCK_TO_BB (pred)],
1164 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (fst_out_edge), edgeset_size);
1166 /* The successor doesn't belong in the region? */
1167 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1168 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1171 while (fst_out_edge != nxt_out_edge)
1174 /* The successor doesn't belong in the region? */
1175 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1176 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1178 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (nxt_out_edge), edgeset_size);
1179 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1183 /* Now nr_rgn_out_edges is the number of region-exit edges from
1184 pred, and nr_out_edges will be the number of pred out edges
1185 not leaving the region. */
1186 nr_out_edges -= nr_rgn_out_edges;
1187 if (nr_rgn_out_edges > 0)
1188 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1190 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1191 nxt_in_edge = NEXT_IN (nxt_in_edge);
1193 while (fst_in_edge != nxt_in_edge);
1195 BITSET_ADD (dom[bb], bb, bbset_size);
1196 BITSET_DIFFER (pot_split[bb], ancestor_edges[bb], edgeset_size);
1198 if (sched_verbose >= 2)
1199 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1200 (int) (100.0 * prob[bb]));
1203 /* Functions for target info. */
1205 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1206 Note that bb_trg dominates bb_src. */
1209 split_edges (bb_src, bb_trg, bl)
1214 int es = edgeset_size;
1215 edgeset src = (edgeset) xcalloc (es, sizeof (HOST_WIDE_INT));
1218 src[es] = (pot_split[bb_src])[es];
1219 BITSET_DIFFER (src, pot_split[bb_trg], edgeset_size);
1220 extract_bitlst (src, edgeset_size, edgeset_bitsize, bl);
1224 /* Find the valid candidate-source-blocks for the target block TRG, compute
1225 their probability, and check if they are speculative or not.
1226 For speculative sources, compute their update-blocks and split-blocks. */
1229 compute_trg_info (trg)
1232 register candidate *sp;
1234 int check_block, update_idx;
1235 int i, j, k, fst_edge, nxt_edge;
1237 /* Define some of the fields for the target bb as well. */
1238 sp = candidate_table + trg;
1240 sp->is_speculative = 0;
1243 for (i = trg + 1; i < current_nr_blocks; i++)
1245 sp = candidate_table + i;
1247 sp->is_valid = IS_DOMINATED (i, trg);
1250 sp->src_prob = GET_SRC_PROB (i, trg);
1251 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1256 split_edges (i, trg, &el);
1257 sp->is_speculative = (el.nr_members) ? 1 : 0;
1258 if (sp->is_speculative && !flag_schedule_speculative)
1264 char *update_blocks;
1266 /* Compute split blocks and store them in bblst_table.
1267 The TO block of every split edge is a split block. */
1268 sp->split_bbs.first_member = &bblst_table[bblst_last];
1269 sp->split_bbs.nr_members = el.nr_members;
1270 for (j = 0; j < el.nr_members; bblst_last++, j++)
1271 bblst_table[bblst_last] =
1272 TO_BLOCK (rgn_edges[el.first_member[j]]);
1273 sp->update_bbs.first_member = &bblst_table[bblst_last];
1275 /* Compute update blocks and store them in bblst_table.
1276 For every split edge, look at the FROM block, and check
1277 all out edges. For each out edge that is not a split edge,
1278 add the TO block to the update block list. This list can end
1279 up with a lot of duplicates. We need to weed them out to avoid
1280 overrunning the end of the bblst_table. */
1281 update_blocks = (char *) alloca (n_basic_blocks);
1282 memset (update_blocks, 0, n_basic_blocks);
1285 for (j = 0; j < el.nr_members; j++)
1287 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1288 fst_edge = nxt_edge = OUT_EDGES (check_block);
1291 if (! update_blocks[TO_BLOCK (nxt_edge)])
1293 for (k = 0; k < el.nr_members; k++)
1294 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1297 if (k >= el.nr_members)
1299 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1300 update_blocks[TO_BLOCK (nxt_edge)] = 1;
1305 nxt_edge = NEXT_OUT (nxt_edge);
1307 while (fst_edge != nxt_edge);
1309 sp->update_bbs.nr_members = update_idx;
1311 /* Make sure we didn't overrun the end of bblst_table. */
1312 if (bblst_last > bblst_size)
1317 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1319 sp->is_speculative = 0;
1325 /* Print candidates info, for debugging purposes. Callable from debugger. */
1331 if (!candidate_table[i].is_valid)
1334 if (candidate_table[i].is_speculative)
1337 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1339 fprintf (sched_dump, "split path: ");
1340 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1342 int b = candidate_table[i].split_bbs.first_member[j];
1344 fprintf (sched_dump, " %d ", b);
1346 fprintf (sched_dump, "\n");
1348 fprintf (sched_dump, "update path: ");
1349 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1351 int b = candidate_table[i].update_bbs.first_member[j];
1353 fprintf (sched_dump, " %d ", b);
1355 fprintf (sched_dump, "\n");
1359 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1363 /* Print candidates info, for debugging purposes. Callable from debugger. */
1366 debug_candidates (trg)
1371 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1372 BB_TO_BLOCK (trg), trg);
1373 for (i = trg + 1; i < current_nr_blocks; i++)
1374 debug_candidate (i);
1377 /* Functions for speculative scheduing. */
1379 /* Return 0 if x is a set of a register alive in the beginning of one
1380 of the split-blocks of src, otherwise return 1. */
1383 check_live_1 (src, x)
1389 register rtx reg = SET_DEST (x);
1394 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1395 || GET_CODE (reg) == SIGN_EXTRACT
1396 || GET_CODE (reg) == STRICT_LOW_PART)
1397 reg = XEXP (reg, 0);
1399 if (GET_CODE (reg) == PARALLEL && GET_MODE (reg) == BLKmode)
1403 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1405 rtx dest = XVECEXP (reg, 0, i);
1407 if (GET_CODE (dest) == EXPR_LIST)
1408 dest = XEXP (dest, 0);
1410 if (check_live_1 (src, dest))
1417 if (GET_CODE (reg) != REG)
1420 regno = REGNO (reg);
1422 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1424 /* Global registers are assumed live. */
1429 if (regno < FIRST_PSEUDO_REGISTER)
1431 /* Check for hard registers. */
1432 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1435 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1437 int b = candidate_table[src].split_bbs.first_member[i];
1439 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
1449 /* Check for psuedo registers. */
1450 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1452 int b = candidate_table[src].split_bbs.first_member[i];
1454 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
1465 /* If x is a set of a register R, mark that R is alive in the beginning
1466 of every update-block of src. */
1469 update_live_1 (src, x)
1475 register rtx reg = SET_DEST (x);
1480 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1481 || GET_CODE (reg) == SIGN_EXTRACT
1482 || GET_CODE (reg) == STRICT_LOW_PART)
1483 reg = XEXP (reg, 0);
1485 if (GET_CODE (reg) == PARALLEL && GET_MODE (reg) == BLKmode)
1489 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1491 rtx dest = XVECEXP (reg, 0, i);
1493 if (GET_CODE (dest) == EXPR_LIST)
1494 dest = XEXP (dest, 0);
1496 update_live_1 (src, dest);
1502 if (GET_CODE (reg) != REG)
1505 /* Global registers are always live, so the code below does not apply
1508 regno = REGNO (reg);
1510 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1512 if (regno < FIRST_PSEUDO_REGISTER)
1514 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1517 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1519 int b = candidate_table[src].update_bbs.first_member[i];
1521 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
1528 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1530 int b = candidate_table[src].update_bbs.first_member[i];
1532 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
1538 /* Return 1 if insn can be speculatively moved from block src to trg,
1539 otherwise return 0. Called before first insertion of insn to
1540 ready-list or before the scheduling. */
1543 check_live (insn, src)
1547 /* Find the registers set by instruction. */
1548 if (GET_CODE (PATTERN (insn)) == SET
1549 || GET_CODE (PATTERN (insn)) == CLOBBER)
1550 return check_live_1 (src, PATTERN (insn));
1551 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1554 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1555 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1556 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1557 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1566 /* Update the live registers info after insn was moved speculatively from
1567 block src to trg. */
1570 update_live (insn, src)
1574 /* Find the registers set by instruction. */
1575 if (GET_CODE (PATTERN (insn)) == SET
1576 || GET_CODE (PATTERN (insn)) == CLOBBER)
1577 update_live_1 (src, PATTERN (insn));
1578 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1581 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1582 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1583 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1584 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1588 /* Exception Free Loads:
1590 We define five classes of speculative loads: IFREE, IRISKY,
1591 PFREE, PRISKY, and MFREE.
1593 IFREE loads are loads that are proved to be exception-free, just
1594 by examining the load insn. Examples for such loads are loads
1595 from TOC and loads of global data.
1597 IRISKY loads are loads that are proved to be exception-risky,
1598 just by examining the load insn. Examples for such loads are
1599 volatile loads and loads from shared memory.
1601 PFREE loads are loads for which we can prove, by examining other
1602 insns, that they are exception-free. Currently, this class consists
1603 of loads for which we are able to find a "similar load", either in
1604 the target block, or, if only one split-block exists, in that split
1605 block. Load2 is similar to load1 if both have same single base
1606 register. We identify only part of the similar loads, by finding
1607 an insn upon which both load1 and load2 have a DEF-USE dependence.
1609 PRISKY loads are loads for which we can prove, by examining other
1610 insns, that they are exception-risky. Currently we have two proofs for
1611 such loads. The first proof detects loads that are probably guarded by a
1612 test on the memory address. This proof is based on the
1613 backward and forward data dependence information for the region.
1614 Let load-insn be the examined load.
1615 Load-insn is PRISKY iff ALL the following hold:
1617 - insn1 is not in the same block as load-insn
1618 - there is a DEF-USE dependence chain (insn1, ..., load-insn)
1619 - test-insn is either a compare or a branch, not in the same block
1621 - load-insn is reachable from test-insn
1622 - there is a DEF-USE dependence chain (insn1, ..., test-insn)
1624 This proof might fail when the compare and the load are fed
1625 by an insn not in the region. To solve this, we will add to this
1626 group all loads that have no input DEF-USE dependence.
1628 The second proof detects loads that are directly or indirectly
1629 fed by a speculative load. This proof is affected by the
1630 scheduling process. We will use the flag fed_by_spec_load.
1631 Initially, all insns have this flag reset. After a speculative
1632 motion of an insn, if insn is either a load, or marked as
1633 fed_by_spec_load, we will also mark as fed_by_spec_load every
1634 insn1 for which a DEF-USE dependence (insn, insn1) exists. A
1635 load which is fed_by_spec_load is also PRISKY.
1637 MFREE (maybe-free) loads are all the remaining loads. They may be
1638 exception-free, but we cannot prove it.
1640 Now, all loads in IFREE and PFREE classes are considered
1641 exception-free, while all loads in IRISKY and PRISKY classes are
1642 considered exception-risky. As for loads in the MFREE class,
1643 these are considered either exception-free or exception-risky,
1644 depending on whether we are pessimistic or optimistic. We have
1645 to take the pessimistic approach to assure the safety of
1646 speculative scheduling, but we can take the optimistic approach
1647 by invoking the -fsched_spec_load_dangerous option. */
1649 enum INSN_TRAP_CLASS
1651 TRAP_FREE = 0, IFREE = 1, PFREE_CANDIDATE = 2,
1652 PRISKY_CANDIDATE = 3, IRISKY = 4, TRAP_RISKY = 5
1655 #define WORST_CLASS(class1, class2) \
1656 ((class1 > class2) ? class1 : class2)
1658 /* Non-zero if block bb_to is equal to, or reachable from block bb_from. */
1659 #define IS_REACHABLE(bb_from, bb_to) \
1661 || IS_RGN_ENTRY (bb_from) \
1662 || (bitset_member (ancestor_edges[bb_to], \
1663 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))), \
1666 /* Non-zero iff the address is comprised from at most 1 register. */
1667 #define CONST_BASED_ADDRESS_P(x) \
1668 (GET_CODE (x) == REG \
1669 || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \
1670 || (GET_CODE (x) == LO_SUM)) \
1671 && (GET_CODE (XEXP (x, 0)) == CONST_INT \
1672 || GET_CODE (XEXP (x, 1)) == CONST_INT)))
1674 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1677 set_spec_fed (load_insn)
1682 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1683 if (GET_MODE (link) == VOIDmode)
1684 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1685 } /* set_spec_fed */
1687 /* On the path from the insn to load_insn_bb, find a conditional
1688 branch depending on insn, that guards the speculative load. */
1691 find_conditional_protection (insn, load_insn_bb)
1697 /* Iterate through DEF-USE forward dependences. */
1698 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1700 rtx next = XEXP (link, 0);
1701 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1702 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1703 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1704 && load_insn_bb != INSN_BB (next)
1705 && GET_MODE (link) == VOIDmode
1706 && (GET_CODE (next) == JUMP_INSN
1707 || find_conditional_protection (next, load_insn_bb)))
1711 } /* find_conditional_protection */
1713 /* Returns 1 if the same insn1 that participates in the computation
1714 of load_insn's address is feeding a conditional branch that is
1715 guarding on load_insn. This is true if we find a the two DEF-USE
1717 insn1 -> ... -> conditional-branch
1718 insn1 -> ... -> load_insn,
1719 and if a flow path exist:
1720 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1721 and if insn1 is on the path
1722 region-entry -> ... -> bb_trg -> ... load_insn.
1724 Locate insn1 by climbing on LOG_LINKS from load_insn.
1725 Locate the branch by following INSN_DEPEND from insn1. */
1728 is_conditionally_protected (load_insn, bb_src, bb_trg)
1734 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1736 rtx insn1 = XEXP (link, 0);
1738 /* Must be a DEF-USE dependence upon non-branch. */
1739 if (GET_MODE (link) != VOIDmode
1740 || GET_CODE (insn1) == JUMP_INSN)
1743 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1744 if (INSN_BB (insn1) == bb_src
1745 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1746 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1747 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1748 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1751 /* Now search for the conditional-branch. */
1752 if (find_conditional_protection (insn1, bb_src))
1755 /* Recursive step: search another insn1, "above" current insn1. */
1756 return is_conditionally_protected (insn1, bb_src, bb_trg);
1759 /* The chain does not exist. */
1761 } /* is_conditionally_protected */
1763 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1764 load_insn can move speculatively from bb_src to bb_trg. All the
1765 following must hold:
1767 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1768 (2) load_insn and load1 have a def-use dependence upon
1769 the same insn 'insn1'.
1770 (3) either load2 is in bb_trg, or:
1771 - there's only one split-block, and
1772 - load1 is on the escape path, and
1774 From all these we can conclude that the two loads access memory
1775 addresses that differ at most by a constant, and hence if moving
1776 load_insn would cause an exception, it would have been caused by
1780 is_pfree (load_insn, bb_src, bb_trg)
1785 register candidate *candp = candidate_table + bb_src;
1787 if (candp->split_bbs.nr_members != 1)
1788 /* Must have exactly one escape block. */
1791 for (back_link = LOG_LINKS (load_insn);
1792 back_link; back_link = XEXP (back_link, 1))
1794 rtx insn1 = XEXP (back_link, 0);
1796 if (GET_MODE (back_link) == VOIDmode)
1798 /* Found a DEF-USE dependence (insn1, load_insn). */
1801 for (fore_link = INSN_DEPEND (insn1);
1802 fore_link; fore_link = XEXP (fore_link, 1))
1804 rtx insn2 = XEXP (fore_link, 0);
1805 if (GET_MODE (fore_link) == VOIDmode)
1807 /* Found a DEF-USE dependence (insn1, insn2). */
1808 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1809 /* insn2 not guaranteed to be a 1 base reg load. */
1812 if (INSN_BB (insn2) == bb_trg)
1813 /* insn2 is the similar load, in the target block. */
1816 if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
1817 /* insn2 is a similar load, in a split-block. */
1824 /* Couldn't find a similar load. */
1828 /* Returns a class that insn with GET_DEST(insn)=x may belong to,
1829 as found by analyzing insn's expression. */
1832 may_trap_exp (x, is_store)
1840 code = GET_CODE (x);
1850 /* The insn uses memory: a volatile load. */
1851 if (MEM_VOLATILE_P (x))
1853 /* An exception-free load. */
1854 if (!may_trap_p (x))
1856 /* A load with 1 base register, to be further checked. */
1857 if (CONST_BASED_ADDRESS_P (XEXP (x, 0)))
1858 return PFREE_CANDIDATE;
1859 /* No info on the load, to be further checked. */
1860 return PRISKY_CANDIDATE;
1865 int i, insn_class = TRAP_FREE;
1867 /* Neither store nor load, check if it may cause a trap. */
1870 /* Recursive step: walk the insn... */
1871 fmt = GET_RTX_FORMAT (code);
1872 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1876 int tmp_class = may_trap_exp (XEXP (x, i), is_store);
1877 insn_class = WORST_CLASS (insn_class, tmp_class);
1879 else if (fmt[i] == 'E')
1882 for (j = 0; j < XVECLEN (x, i); j++)
1884 int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store);
1885 insn_class = WORST_CLASS (insn_class, tmp_class);
1886 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1890 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1897 /* Classifies insn for the purpose of verifying that it can be
1898 moved speculatively, by examining it's patterns, returning:
1899 TRAP_RISKY: store, or risky non-load insn (e.g. division by variable).
1900 TRAP_FREE: non-load insn.
1901 IFREE: load from a globaly safe location.
1902 IRISKY: volatile load.
1903 PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for
1904 being either PFREE or PRISKY. */
1907 haifa_classify_insn (insn)
1910 rtx pat = PATTERN (insn);
1911 int tmp_class = TRAP_FREE;
1912 int insn_class = TRAP_FREE;
1915 if (GET_CODE (pat) == PARALLEL)
1917 int i, len = XVECLEN (pat, 0);
1919 for (i = len - 1; i >= 0; i--)
1921 code = GET_CODE (XVECEXP (pat, 0, i));
1925 /* Test if it is a 'store'. */
1926 tmp_class = may_trap_exp (XEXP (XVECEXP (pat, 0, i), 0), 1);
1929 /* Test if it is a store. */
1930 tmp_class = may_trap_exp (SET_DEST (XVECEXP (pat, 0, i)), 1);
1931 if (tmp_class == TRAP_RISKY)
1933 /* Test if it is a load. */
1935 = WORST_CLASS (tmp_class,
1936 may_trap_exp (SET_SRC (XVECEXP (pat, 0, i)),
1941 tmp_class = TRAP_RISKY;
1946 insn_class = WORST_CLASS (insn_class, tmp_class);
1947 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1953 code = GET_CODE (pat);
1957 /* Test if it is a 'store'. */
1958 tmp_class = may_trap_exp (XEXP (pat, 0), 1);
1961 /* Test if it is a store. */
1962 tmp_class = may_trap_exp (SET_DEST (pat), 1);
1963 if (tmp_class == TRAP_RISKY)
1965 /* Test if it is a load. */
1967 WORST_CLASS (tmp_class,
1968 may_trap_exp (SET_SRC (pat), 0));
1972 tmp_class = TRAP_RISKY;
1976 insn_class = tmp_class;
1982 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1983 a load moved speculatively, or if load_insn is protected by
1984 a compare on load_insn's address). */
1987 is_prisky (load_insn, bb_src, bb_trg)
1991 if (FED_BY_SPEC_LOAD (load_insn))
1994 if (LOG_LINKS (load_insn) == NULL)
1995 /* Dependence may 'hide' out of the region. */
1998 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
2004 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
2005 Return 1 if insn is exception-free (and the motion is valid)
2009 is_exception_free (insn, bb_src, bb_trg)
2013 int insn_class = haifa_classify_insn (insn);
2015 /* Handle non-load insns. */
2026 if (!flag_schedule_speculative_load)
2028 IS_LOAD_INSN (insn) = 1;
2035 case PFREE_CANDIDATE:
2036 if (is_pfree (insn, bb_src, bb_trg))
2038 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
2039 case PRISKY_CANDIDATE:
2040 if (!flag_schedule_speculative_load_dangerous
2041 || is_prisky (insn, bb_src, bb_trg))
2047 return flag_schedule_speculative_load_dangerous;
2050 /* The number of insns from the current block scheduled so far. */
2051 static int sched_target_n_insns;
2052 /* The number of insns from the current block to be scheduled in total. */
2053 static int target_n_insns;
2054 /* The number of insns from the entire region scheduled so far. */
2055 static int sched_n_insns;
2056 /* Nonzero if the last scheduled insn was a jump. */
2057 static int last_was_jump;
2059 /* Implementations of the sched_info functions for region scheduling. */
2060 static void init_ready_list PARAMS ((struct ready_list *));
2061 static int can_schedule_ready_p PARAMS ((rtx));
2062 static int new_ready PARAMS ((rtx));
2063 static int schedule_more_p PARAMS ((void));
2064 static const char *rgn_print_insn PARAMS ((rtx, int));
2065 static int rgn_rank PARAMS ((rtx, rtx));
2066 static int contributes_to_priority PARAMS ((rtx, rtx));
2067 static void compute_jump_reg_dependencies PARAMS ((rtx, regset));
2069 /* Return nonzero if there are more insns that should be scheduled. */
2074 return ! last_was_jump && sched_target_n_insns < target_n_insns;
2077 /* Add all insns that are initially ready to the ready list READY. Called
2078 once before scheduling a set of insns. */
2081 init_ready_list (ready)
2082 struct ready_list *ready;
2084 rtx prev_head = current_sched_info->prev_head;
2085 rtx next_tail = current_sched_info->next_tail;
2090 sched_target_n_insns = 0;
2094 /* Print debugging information. */
2095 if (sched_verbose >= 5)
2096 debug_dependencies ();
2098 /* Prepare current target block info. */
2099 if (current_nr_blocks > 1)
2101 candidate_table = (candidate *) xmalloc (current_nr_blocks
2102 * sizeof (candidate));
2105 /* bblst_table holds split blocks and update blocks for each block after
2106 the current one in the region. split blocks and update blocks are
2107 the TO blocks of region edges, so there can be at most rgn_nr_edges
2109 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
2110 bblst_table = (int *) xmalloc (bblst_size * sizeof (int));
2112 bitlst_table_last = 0;
2113 bitlst_table_size = rgn_nr_edges;
2114 bitlst_table = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2116 compute_trg_info (target_bb);
2119 /* Initialize ready list with all 'ready' insns in target block.
2120 Count number of insns in the target block being scheduled. */
2121 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
2125 if (! INSN_P (insn))
2127 next = NEXT_INSN (insn);
2129 if (INSN_DEP_COUNT (insn) == 0
2130 && (SCHED_GROUP_P (next) == 0 || ! INSN_P (next)))
2131 ready_add (ready, insn);
2132 if (!(SCHED_GROUP_P (insn)))
2136 /* Add to ready list all 'ready' insns in valid source blocks.
2137 For speculative insns, check-live, exception-free, and
2139 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
2140 if (IS_VALID (bb_src))
2146 get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
2147 src_next_tail = NEXT_INSN (tail);
2150 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
2152 if (! INSN_P (insn))
2155 if (!CANT_MOVE (insn)
2156 && (!IS_SPECULATIVE_INSN (insn)
2157 || (insn_issue_delay (insn) <= 3
2158 && check_live (insn, bb_src)
2159 && is_exception_free (insn, bb_src, target_bb))))
2163 /* Note that we havn't squirrled away the notes for
2164 blocks other than the current. So if this is a
2165 speculative insn, NEXT might otherwise be a note. */
2166 next = next_nonnote_insn (insn);
2167 if (INSN_DEP_COUNT (insn) == 0
2169 || SCHED_GROUP_P (next) == 0
2170 || ! INSN_P (next)))
2171 ready_add (ready, insn);
2177 /* Called after taking INSN from the ready list. Returns nonzero if this
2178 insn can be scheduled, nonzero if we should silently discard it. */
2181 can_schedule_ready_p (insn)
2184 if (GET_CODE (insn) == JUMP_INSN)
2187 /* An interblock motion? */
2188 if (INSN_BB (insn) != target_bb)
2193 if (IS_SPECULATIVE_INSN (insn))
2195 if (!check_live (insn, INSN_BB (insn)))
2197 update_live (insn, INSN_BB (insn));
2199 /* For speculative load, mark insns fed by it. */
2200 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
2201 set_spec_fed (insn);
2207 /* Find the beginning of the scheduling group. */
2208 /* ??? Ought to update basic block here, but later bits of
2209 schedule_block assumes the original insn block is
2213 while (SCHED_GROUP_P (temp))
2214 temp = PREV_INSN (temp);
2216 /* Update source block boundaries. */
2217 b1 = BLOCK_FOR_INSN (temp);
2218 if (temp == b1->head && insn == b1->end)
2220 /* We moved all the insns in the basic block.
2221 Emit a note after the last insn and update the
2222 begin/end boundaries to point to the note. */
2223 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
2227 else if (insn == b1->end)
2229 /* We took insns from the end of the basic block,
2230 so update the end of block boundary so that it
2231 points to the first insn we did not move. */
2232 b1->end = PREV_INSN (temp);
2234 else if (temp == b1->head)
2236 /* We took insns from the start of the basic block,
2237 so update the start of block boundary so that
2238 it points to the first insn we did not move. */
2239 b1->head = NEXT_INSN (insn);
2244 /* In block motion. */
2245 sched_target_n_insns++;
2252 /* Called after INSN has all its dependencies resolved. Return nonzero
2253 if it should be moved to the ready list or the queue, or zero if we
2254 should silently discard it. */
2259 /* For speculative insns, before inserting to ready/queue,
2260 check live, exception-free, and issue-delay. */
2261 if (INSN_BB (next) != target_bb
2262 && (!IS_VALID (INSN_BB (next))
2264 || (IS_SPECULATIVE_INSN (next)
2265 && (insn_issue_delay (next) > 3
2266 || !check_live (next, INSN_BB (next))
2267 || !is_exception_free (next, INSN_BB (next), target_bb)))))
2272 /* Return a string that contains the insn uid and optionally anything else
2273 necessary to identify this insn in an output. It's valid to use a
2274 static buffer for this. The ALIGNED parameter should cause the string
2275 to be formatted so that multiple output lines will line up nicely. */
2278 rgn_print_insn (insn, aligned)
2282 static char tmp[80];
2285 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
2288 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
2289 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
2291 sprintf (tmp, "%d", INSN_UID (insn));
2296 /* Compare priority of two insns. Return a positive number if the second
2297 insn is to be preferred for scheduling, and a negative one if the first
2298 is to be preferred. Zero if they are equally good. */
2301 rgn_rank (insn1, insn2)
2304 /* Some comparison make sense in interblock scheduling only. */
2305 if (INSN_BB (insn1) != INSN_BB (insn2))
2307 int spec_val, prob_val;
2309 /* Prefer an inblock motion on an interblock motion. */
2310 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
2312 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
2315 /* Prefer a useful motion on a speculative one. */
2316 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
2320 /* Prefer a more probable (speculative) insn. */
2321 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
2328 /* NEXT is an instruction that depends on INSN (a backward dependence);
2329 return nonzero if we should include this dependence in priority
2333 contributes_to_priority (next, insn)
2336 return BLOCK_NUM (next) == BLOCK_NUM (insn);
2339 /* INSN is a JUMP_INSN. Store the set of registers that must be considered
2340 to be set by this jump in SET. */
2343 compute_jump_reg_dependencies (insn, set)
2344 rtx insn ATTRIBUTE_UNUSED;
2345 regset set ATTRIBUTE_UNUSED;
2347 /* Nothing to do here, since we postprocess jumps in
2348 add_branch_dependences. */
2351 /* Used in schedule_insns to initialize current_sched_info for scheduling
2352 regions (or single basic blocks). */
2354 static struct sched_info region_sched_info =
2357 can_schedule_ready_p,
2362 contributes_to_priority,
2363 compute_jump_reg_dependencies,
2370 /* Add dependences so that branches are scheduled to run last in their
2374 add_branch_dependences (head, tail)
2379 /* For all branches, calls, uses, clobbers, and cc0 setters, force them
2380 to remain in order at the end of the block by adding dependencies and
2381 giving the last a high priority. There may be notes present, and
2382 prev_head may also be a note.
2384 Branches must obviously remain at the end. Calls should remain at the
2385 end since moving them results in worse register allocation. Uses remain
2386 at the end to ensure proper register allocation. cc0 setters remaim
2387 at the end because they can't be moved away from their cc0 user. */
2390 while (GET_CODE (insn) == CALL_INSN
2391 || GET_CODE (insn) == JUMP_INSN
2392 || (GET_CODE (insn) == INSN
2393 && (GET_CODE (PATTERN (insn)) == USE
2394 || GET_CODE (PATTERN (insn)) == CLOBBER
2396 || sets_cc0_p (PATTERN (insn))
2399 || GET_CODE (insn) == NOTE)
2401 if (GET_CODE (insn) != NOTE)
2404 && !find_insn_list (insn, LOG_LINKS (last)))
2406 add_dependence (last, insn, REG_DEP_ANTI);
2407 INSN_REF_COUNT (insn)++;
2410 CANT_MOVE (insn) = 1;
2413 /* Skip over insns that are part of a group.
2414 Make each insn explicitly depend on the previous insn.
2415 This ensures that only the group header will ever enter
2416 the ready queue (and, when scheduled, will automatically
2417 schedule the SCHED_GROUP_P block). */
2418 while (SCHED_GROUP_P (insn))
2420 rtx temp = prev_nonnote_insn (insn);
2421 add_dependence (insn, temp, REG_DEP_ANTI);
2426 /* Don't overrun the bounds of the basic block. */
2430 insn = PREV_INSN (insn);
2433 /* Make sure these insns are scheduled last in their block. */
2436 while (insn != head)
2438 insn = prev_nonnote_insn (insn);
2440 if (INSN_REF_COUNT (insn) != 0)
2443 add_dependence (last, insn, REG_DEP_ANTI);
2444 INSN_REF_COUNT (insn) = 1;
2446 /* Skip over insns that are part of a group. */
2447 while (SCHED_GROUP_P (insn))
2448 insn = prev_nonnote_insn (insn);
2452 /* Data structures for the computation of data dependences in a regions. We
2453 keep one `deps' structure for every basic block. Before analyzing the
2454 data dependences for a bb, its variables are initialized as a function of
2455 the variables of its predecessors. When the analysis for a bb completes,
2456 we save the contents to the corresponding bb_deps[bb] variable. */
2458 static struct deps *bb_deps;
2460 /* After computing the dependencies for block BB, propagate the dependencies
2461 found in TMP_DEPS to the successors of the block. */
2463 propagate_deps (bb, tmp_deps)
2465 struct deps *tmp_deps;
2467 int b = BB_TO_BLOCK (bb);
2470 rtx link_insn, link_mem;
2473 /* These lists should point to the right place, for correct
2475 bb_deps[bb].pending_read_insns = tmp_deps->pending_read_insns;
2476 bb_deps[bb].pending_read_mems = tmp_deps->pending_read_mems;
2477 bb_deps[bb].pending_write_insns = tmp_deps->pending_write_insns;
2478 bb_deps[bb].pending_write_mems = tmp_deps->pending_write_mems;
2480 /* bb's structures are inherited by its successors. */
2481 first_edge = e = OUT_EDGES (b);
2488 int b_succ = TO_BLOCK (e);
2489 int bb_succ = BLOCK_TO_BB (b_succ);
2490 struct deps *succ_deps = bb_deps + bb_succ;
2492 /* Only bbs "below" bb, in the same region, are interesting. */
2493 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
2500 /* The reg_last lists are inherited by bb_succ. */
2501 EXECUTE_IF_SET_IN_REG_SET (&tmp_deps->reg_last_in_use, 0, reg,
2503 struct deps_reg *tmp_deps_reg = &tmp_deps->reg_last[reg];
2504 struct deps_reg *succ_deps_reg = &succ_deps->reg_last[reg];
2506 for (u = tmp_deps_reg->uses; u; u = XEXP (u, 1))
2507 if (! find_insn_list (XEXP (u, 0), succ_deps_reg->uses))
2509 = alloc_INSN_LIST (XEXP (u, 0), succ_deps_reg->uses);
2511 for (u = tmp_deps_reg->sets; u; u = XEXP (u, 1))
2512 if (! find_insn_list (XEXP (u, 0), succ_deps_reg->sets))
2514 = alloc_INSN_LIST (XEXP (u, 0), succ_deps_reg->sets);
2516 for (u = tmp_deps_reg->clobbers; u; u = XEXP (u, 1))
2517 if (! find_insn_list (XEXP (u, 0), succ_deps_reg->clobbers))
2518 succ_deps_reg->clobbers
2519 = alloc_INSN_LIST (XEXP (u, 0), succ_deps_reg->clobbers);
2521 IOR_REG_SET (&succ_deps->reg_last_in_use, &tmp_deps->reg_last_in_use);
2523 /* Mem read/write lists are inherited by bb_succ. */
2524 link_insn = tmp_deps->pending_read_insns;
2525 link_mem = tmp_deps->pending_read_mems;
2528 if (!(find_insn_mem_list (XEXP (link_insn, 0),
2530 succ_deps->pending_read_insns,
2531 succ_deps->pending_read_mems)))
2532 add_insn_mem_dependence (succ_deps, &succ_deps->pending_read_insns,
2533 &succ_deps->pending_read_mems,
2534 XEXP (link_insn, 0), XEXP (link_mem, 0));
2535 link_insn = XEXP (link_insn, 1);
2536 link_mem = XEXP (link_mem, 1);
2539 link_insn = tmp_deps->pending_write_insns;
2540 link_mem = tmp_deps->pending_write_mems;
2543 if (!(find_insn_mem_list (XEXP (link_insn, 0),
2545 succ_deps->pending_write_insns,
2546 succ_deps->pending_write_mems)))
2547 add_insn_mem_dependence (succ_deps,
2548 &succ_deps->pending_write_insns,
2549 &succ_deps->pending_write_mems,
2550 XEXP (link_insn, 0), XEXP (link_mem, 0));
2552 link_insn = XEXP (link_insn, 1);
2553 link_mem = XEXP (link_mem, 1);
2556 /* last_function_call is inherited by bb_succ. */
2557 for (u = tmp_deps->last_function_call; u; u = XEXP (u, 1))
2558 if (! find_insn_list (XEXP (u, 0), succ_deps->last_function_call))
2559 succ_deps->last_function_call
2560 = alloc_INSN_LIST (XEXP (u, 0), succ_deps->last_function_call);
2562 /* last_pending_memory_flush is inherited by bb_succ. */
2563 for (u = tmp_deps->last_pending_memory_flush; u; u = XEXP (u, 1))
2564 if (! find_insn_list (XEXP (u, 0),
2565 succ_deps->last_pending_memory_flush))
2566 succ_deps->last_pending_memory_flush
2567 = alloc_INSN_LIST (XEXP (u, 0),
2568 succ_deps->last_pending_memory_flush);
2570 /* sched_before_next_call is inherited by bb_succ. */
2571 x = LOG_LINKS (tmp_deps->sched_before_next_call);
2572 for (; x; x = XEXP (x, 1))
2573 add_dependence (succ_deps->sched_before_next_call,
2574 XEXP (x, 0), REG_DEP_ANTI);
2578 while (e != first_edge);
2581 /* Compute backward dependences inside bb. In a multiple blocks region:
2582 (1) a bb is analyzed after its predecessors, and (2) the lists in
2583 effect at the end of bb (after analyzing for bb) are inherited by
2586 Specifically for reg-reg data dependences, the block insns are
2587 scanned by sched_analyze () top-to-bottom. Two lists are
2588 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2589 and reg_last[].uses for register USEs.
2591 When analysis is completed for bb, we update for its successors:
2592 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2593 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2595 The mechanism for computing mem-mem data dependence is very
2596 similar, and the result is interblock dependences in the region. */
2599 compute_block_backward_dependences (bb)
2603 struct deps tmp_deps;
2605 tmp_deps = bb_deps[bb];
2607 /* Do the analysis for this block. */
2608 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2609 sched_analyze (&tmp_deps, head, tail);
2610 add_branch_dependences (head, tail);
2612 if (current_nr_blocks > 1)
2613 propagate_deps (bb, &tmp_deps);
2615 /* Free up the INSN_LISTs. */
2616 free_deps (&tmp_deps);
2619 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2620 them to the unused_*_list variables, so that they can be reused. */
2623 free_pending_lists ()
2627 for (bb = 0; bb < current_nr_blocks; bb++)
2629 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2630 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2631 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2632 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2636 /* Print dependences for debugging, callable from debugger. */
2639 debug_dependencies ()
2643 fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n");
2644 for (bb = 0; bb < current_nr_blocks; bb++)
2652 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2653 next_tail = NEXT_INSN (tail);
2654 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2655 BB_TO_BLOCK (bb), bb);
2657 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2658 "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
2659 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2660 "----", "----", "--", "---", "----", "----", "--------", "-----");
2661 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2666 if (! INSN_P (insn))
2669 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2670 if (GET_CODE (insn) == NOTE)
2672 n = NOTE_LINE_NUMBER (insn);
2674 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2676 fprintf (sched_dump, "line %d, file %s\n", n,
2677 NOTE_SOURCE_FILE (insn));
2680 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2684 unit = insn_unit (insn);
2686 || function_units[unit].blockage_range_function == 0) ? 0 :
2687 function_units[unit].blockage_range_function (insn);
2688 fprintf (sched_dump,
2689 ";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
2690 (SCHED_GROUP_P (insn) ? "+" : " "),
2694 INSN_DEP_COUNT (insn),
2695 INSN_PRIORITY (insn),
2696 insn_cost (insn, 0, 0),
2697 (int) MIN_BLOCKAGE_COST (range),
2698 (int) MAX_BLOCKAGE_COST (range));
2699 insn_print_units (insn);
2700 fprintf (sched_dump, "\t: ");
2701 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2702 fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2703 fprintf (sched_dump, "\n");
2707 fprintf (sched_dump, "\n");
2710 /* Schedule a region. A region is either an inner loop, a loop-free
2711 subroutine, or a single basic block. Each bb in the region is
2712 scheduled after its flow predecessors. */
2715 schedule_region (rgn)
2719 int rgn_n_insns = 0;
2720 int sched_rgn_n_insns = 0;
2722 /* Set variables for the current region. */
2723 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2724 current_blocks = RGN_BLOCKS (rgn);
2726 init_deps_global ();
2728 /* Initializations for region data dependence analyisis. */
2729 bb_deps = (struct deps *) xmalloc (sizeof (struct deps) * current_nr_blocks);
2730 for (bb = 0; bb < current_nr_blocks; bb++)
2731 init_deps (bb_deps + bb);
2733 /* Compute LOG_LINKS. */
2734 for (bb = 0; bb < current_nr_blocks; bb++)
2735 compute_block_backward_dependences (bb);
2737 /* Compute INSN_DEPEND. */
2738 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2741 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2743 compute_forward_dependences (head, tail);
2746 /* Set priorities. */
2747 for (bb = 0; bb < current_nr_blocks; bb++)
2750 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2752 rgn_n_insns += set_priorities (head, tail);
2755 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2756 if (current_nr_blocks > 1)
2760 prob = (float *) xmalloc ((current_nr_blocks) * sizeof (float));
2762 bbset_size = current_nr_blocks / HOST_BITS_PER_WIDE_INT + 1;
2763 dom = (bbset *) xmalloc (current_nr_blocks * sizeof (bbset));
2764 for (i = 0; i < current_nr_blocks; i++)
2765 dom[i] = (bbset) xcalloc (bbset_size, sizeof (HOST_WIDE_INT));
2769 edge_to_bit = (int *) xmalloc (nr_edges * sizeof (int));
2770 for (i = 1; i < nr_edges; i++)
2771 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
2772 EDGE_TO_BIT (i) = rgn_nr_edges++;
2773 rgn_edges = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2776 for (i = 1; i < nr_edges; i++)
2777 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
2778 rgn_edges[rgn_nr_edges++] = i;
2781 edgeset_size = rgn_nr_edges / HOST_BITS_PER_WIDE_INT + 1;
2782 edgeset_bitsize = rgn_nr_edges;
2783 pot_split = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
2785 = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
2786 for (i = 0; i < current_nr_blocks; i++)
2789 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
2791 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
2794 /* Compute probabilities, dominators, split_edges. */
2795 for (bb = 0; bb < current_nr_blocks; bb++)
2796 compute_dom_prob_ps (bb);
2799 /* Now we can schedule all blocks. */
2800 for (bb = 0; bb < current_nr_blocks; bb++)
2803 int b = BB_TO_BLOCK (bb);
2805 get_block_head_tail (b, &head, &tail);
2807 if (no_real_insns_p (head, tail))
2810 current_sched_info->prev_head = PREV_INSN (head);
2811 current_sched_info->next_tail = NEXT_INSN (tail);
2813 if (write_symbols != NO_DEBUG)
2815 save_line_notes (b, head, tail);
2816 rm_line_notes (head, tail);
2819 /* rm_other_notes only removes notes which are _inside_ the
2820 block---that is, it won't remove notes before the first real insn
2821 or after the last real insn of the block. So if the first insn
2822 has a REG_SAVE_NOTE which would otherwise be emitted before the
2823 insn, it is redundant with the note before the start of the
2824 block, and so we have to take it out.
2826 FIXME: Probably the same thing should be done with REG_SAVE_NOTEs
2827 referencing NOTE_INSN_SETJMP at the end of the block. */
2832 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2833 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2835 if (INTVAL (XEXP (note, 0)) != NOTE_INSN_SETJMP)
2837 remove_note (head, note);
2838 note = XEXP (note, 1);
2839 remove_note (head, note);
2842 note = XEXP (note, 1);
2846 /* Remove remaining note insns from the block, save them in
2847 note_list. These notes are restored at the end of
2848 schedule_block (). */
2849 rm_other_notes (head, tail);
2853 current_sched_info->queue_must_finish_empty
2854 = current_nr_blocks > 1 && !flag_schedule_interblock;
2856 schedule_block (b, rgn_n_insns);
2857 sched_rgn_n_insns += sched_n_insns;
2859 /* Update target block boundaries. */
2860 if (head == BLOCK_HEAD (b))
2861 BLOCK_HEAD (b) = current_sched_info->head;
2862 if (tail == BLOCK_END (b))
2863 BLOCK_END (b) = current_sched_info->tail;
2866 if (current_nr_blocks > 1)
2868 free (candidate_table);
2870 free (bitlst_table);
2874 /* Sanity check: verify that all region insns were scheduled. */
2875 if (sched_rgn_n_insns != rgn_n_insns)
2878 /* Restore line notes. */
2879 if (write_symbols != NO_DEBUG)
2881 for (bb = 0; bb < current_nr_blocks; bb++)
2884 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2885 restore_line_notes (head, tail);
2889 /* Done with this region. */
2890 free_pending_lists ();
2892 finish_deps_global ();
2896 if (current_nr_blocks > 1)
2901 for (i = 0; i < current_nr_blocks; ++i)
2904 free (pot_split[i]);
2905 free (ancestor_edges[i]);
2911 free (ancestor_edges);
2915 /* Indexed by region, holds the number of death notes found in that region.
2916 Used for consistency checks. */
2917 static int *deaths_in_region;
2919 /* Initialize data structures for region scheduling. */
2928 rgn_table = (region *) xmalloc ((n_basic_blocks) * sizeof (region));
2929 rgn_bb_table = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2930 block_to_bb = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2931 containing_rgn = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2933 blocks = sbitmap_alloc (n_basic_blocks);
2935 /* Compute regions for scheduling. */
2936 if (reload_completed
2937 || n_basic_blocks == 1
2938 || !flag_schedule_interblock)
2940 find_single_block_region ();
2944 /* Verify that a 'good' control flow graph can be built. */
2945 if (is_cfg_nonregular ())
2947 find_single_block_region ();
2952 struct edge_list *edge_list;
2954 dom = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
2956 /* The scheduler runs after flow; therefore, we can't blindly call
2957 back into find_basic_blocks since doing so could invalidate the
2958 info in global_live_at_start.
2960 Consider a block consisting entirely of dead stores; after life
2961 analysis it would be a block of NOTE_INSN_DELETED notes. If
2962 we call find_basic_blocks again, then the block would be removed
2963 entirely and invalidate our the register live information.
2965 We could (should?) recompute register live information. Doing
2966 so may even be beneficial. */
2967 edge_list = create_edge_list ();
2969 /* Compute the dominators and post dominators. */
2970 calculate_dominance_info (NULL, dom, CDI_DOMINATORS);
2972 /* build_control_flow will return nonzero if it detects unreachable
2973 blocks or any other irregularity with the cfg which prevents
2974 cross block scheduling. */
2975 if (build_control_flow (edge_list) != 0)
2976 find_single_block_region ();
2978 find_rgns (edge_list, dom);
2980 if (sched_verbose >= 3)
2983 /* We are done with flow's edge list. */
2984 free_edge_list (edge_list);
2986 /* For now. This will move as more and more of haifa is converted
2987 to using the cfg code in flow.c. */
2992 deaths_in_region = (int *) xmalloc (sizeof (int) * nr_regions);
2994 /* Remove all death notes from the subroutine. */
2995 for (rgn = 0; rgn < nr_regions; rgn++)
2999 sbitmap_zero (blocks);
3000 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
3001 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
3003 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
3006 sbitmap_free (blocks);
3009 /* The one entry point in this file. DUMP_FILE is the dump file for
3013 schedule_insns (dump_file)
3016 sbitmap large_region_blocks, blocks;
3018 int any_large_regions;
3020 /* Taking care of this degenerate case makes the rest of
3021 this code simpler. */
3022 if (n_basic_blocks == 0)
3028 sched_init (dump_file);
3032 current_sched_info = ®ion_sched_info;
3034 /* Schedule every region in the subroutine. */
3035 for (rgn = 0; rgn < nr_regions; rgn++)
3036 schedule_region (rgn);
3038 /* Update life analysis for the subroutine. Do single block regions
3039 first so that we can verify that live_at_start didn't change. Then
3040 do all other blocks. */
3041 /* ??? There is an outside possibility that update_life_info, or more
3042 to the point propagate_block, could get called with non-zero flags
3043 more than once for one basic block. This would be kinda bad if it
3044 were to happen, since REG_INFO would be accumulated twice for the
3045 block, and we'd have twice the REG_DEAD notes.
3047 I'm fairly certain that this _shouldn't_ happen, since I don't think
3048 that live_at_start should change at region heads. Not sure what the
3049 best way to test for this kind of thing... */
3051 allocate_reg_life_data ();
3052 compute_bb_for_insn (get_max_uid ());
3054 any_large_regions = 0;
3055 large_region_blocks = sbitmap_alloc (n_basic_blocks);
3056 sbitmap_ones (large_region_blocks);
3058 blocks = sbitmap_alloc (n_basic_blocks);
3060 for (rgn = 0; rgn < nr_regions; rgn++)
3061 if (RGN_NR_BLOCKS (rgn) > 1)
3062 any_large_regions = 1;
3065 sbitmap_zero (blocks);
3066 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3067 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3069 /* Don't update reg info after reload, since that affects
3070 regs_ever_live, which should not change after reload. */
3071 update_life_info (blocks, UPDATE_LIFE_LOCAL,
3072 (reload_completed ? PROP_DEATH_NOTES
3073 : PROP_DEATH_NOTES | PROP_REG_INFO));
3075 #ifndef HAVE_conditional_execution
3076 /* ??? REG_DEAD notes only exist for unconditional deaths. We need
3077 a count of the conditional plus unconditional deaths for this to
3079 /* In the single block case, the count of registers that died should
3080 not have changed during the schedule. */
3081 if (count_or_remove_death_notes (blocks, 0) != deaths_in_region[rgn])
3086 if (any_large_regions)
3088 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
3089 PROP_DEATH_NOTES | PROP_REG_INFO);
3092 /* Reposition the prologue and epilogue notes in case we moved the
3093 prologue/epilogue insns. */
3094 if (reload_completed)
3095 reposition_prologue_and_epilogue_notes (get_insns ());
3097 /* Delete redundant line notes. */
3098 if (write_symbols != NO_DEBUG)
3099 rm_redundant_line_notes ();
3103 if (reload_completed == 0 && flag_schedule_interblock)
3105 fprintf (sched_dump,
3106 "\n;; Procedure interblock/speculative motions == %d/%d \n",
3114 fprintf (sched_dump, "\n\n");
3119 free (rgn_bb_table);
3121 free (containing_rgn);
3142 sbitmap_free (blocks);
3143 sbitmap_free (large_region_blocks);
3145 free (deaths_in_region);