1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 GNU CC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to the Free
21 the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
53 #include "hard-reg-set.h"
54 #include "basic-block.h"
58 #include "insn-config.h"
59 #include "insn-attr.h"
63 #include "sched-int.h"
65 #ifdef INSN_SCHEDULING
66 /* Some accessor macros for h_i_d members only used within this file. */
67 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
68 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
69 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
71 #define MAX_RGN_BLOCKS 10
72 #define MAX_RGN_INSNS 100
74 /* nr_inter/spec counts interblock/speculative motion for the function. */
75 static int nr_inter, nr_spec;
77 /* Control flow graph edges are kept in circular lists. */
86 static haifa_edge *edge_table;
88 #define NEXT_IN(edge) (edge_table[edge].next_in)
89 #define NEXT_OUT(edge) (edge_table[edge].next_out)
90 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
91 #define TO_BLOCK(edge) (edge_table[edge].to_block)
93 /* Number of edges in the control flow graph. (In fact, larger than
94 that by 1, since edge 0 is unused.) */
97 /* Circular list of incoming/outgoing edges of a block. */
99 static int *out_edges;
101 #define IN_EDGES(block) (in_edges[block])
102 #define OUT_EDGES(block) (out_edges[block])
104 static int is_cfg_nonregular PARAMS ((void));
105 static int build_control_flow PARAMS ((struct edge_list *));
106 static void new_edge PARAMS ((int, int));
108 /* A region is the main entity for interblock scheduling: insns
109 are allowed to move between blocks in the same region, along
110 control flow graph edges, in the 'up' direction. */
113 int rgn_nr_blocks; /* Number of blocks in region. */
114 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
118 /* Number of regions in the procedure. */
119 static int nr_regions;
121 /* Table of region descriptions. */
122 static region *rgn_table;
124 /* Array of lists of regions' blocks. */
125 static int *rgn_bb_table;
127 /* Topological order of blocks in the region (if b2 is reachable from
128 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
129 always referred to by either block or b, while its topological
130 order name (in the region) is refered to by bb. */
131 static int *block_to_bb;
133 /* The number of the region containing a block. */
134 static int *containing_rgn;
136 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
137 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
138 #define BLOCK_TO_BB(block) (block_to_bb[block])
139 #define CONTAINING_RGN(block) (containing_rgn[block])
141 void debug_regions PARAMS ((void));
142 static void find_single_block_region PARAMS ((void));
143 static void find_rgns PARAMS ((struct edge_list *, sbitmap *));
144 static int too_large PARAMS ((int, int *, int *));
146 extern void debug_live PARAMS ((int, int));
148 /* Blocks of the current region being scheduled. */
149 static int current_nr_blocks;
150 static int current_blocks;
152 /* The mapping from bb to block. */
153 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
155 /* Bit vectors and bitset operations are needed for computations on
156 the control flow graph. */
158 typedef unsigned HOST_WIDE_INT *bitset;
161 int *first_member; /* Pointer to the list start in bitlst_table. */
162 int nr_members; /* The number of members of the bit list. */
166 static int bitlst_table_last;
167 static int bitlst_table_size;
168 static int *bitlst_table;
170 static char bitset_member PARAMS ((bitset, int, int));
171 static void extract_bitlst PARAMS ((bitset, int, int, bitlst *));
173 /* Target info declarations.
175 The block currently being scheduled is referred to as the "target" block,
176 while other blocks in the region from which insns can be moved to the
177 target are called "source" blocks. The candidate structure holds info
178 about such sources: are they valid? Speculative? Etc. */
179 typedef bitlst bblst;
190 static candidate *candidate_table;
192 /* A speculative motion requires checking live information on the path
193 from 'source' to 'target'. The split blocks are those to be checked.
194 After a speculative motion, live information should be modified in
197 Lists of split and update blocks for each candidate of the current
198 target are in array bblst_table. */
199 static int *bblst_table, bblst_size, bblst_last;
201 #define IS_VALID(src) ( candidate_table[src].is_valid )
202 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
203 #define SRC_PROB(src) ( candidate_table[src].src_prob )
205 /* The bb being currently scheduled. */
206 static int target_bb;
209 typedef bitlst edgelst;
211 /* Target info functions. */
212 static void split_edges PARAMS ((int, int, edgelst *));
213 static void compute_trg_info PARAMS ((int));
214 void debug_candidate PARAMS ((int));
215 void debug_candidates PARAMS ((int));
217 /* Bit-set of bbs, where bit 'i' stands for bb 'i'. */
218 typedef bitset bbset;
220 /* Number of words of the bbset. */
221 static int bbset_size;
223 /* Dominators array: dom[i] contains the bbset of dominators of
224 bb i in the region. */
227 /* bb 0 is the only region entry. */
228 #define IS_RGN_ENTRY(bb) (!bb)
230 /* Is bb_src dominated by bb_trg. */
231 #define IS_DOMINATED(bb_src, bb_trg) \
232 ( bitset_member (dom[bb_src], bb_trg, bbset_size) )
234 /* Probability: Prob[i] is a float in [0, 1] which is the probability
235 of bb i relative to the region entry. */
238 /* The probability of bb_src, relative to bb_trg. Note, that while the
239 'prob[bb]' is a float in [0, 1], this macro returns an integer
241 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
244 /* Bit-set of edges, where bit i stands for edge i. */
245 typedef bitset edgeset;
247 /* Number of edges in the region. */
248 static int rgn_nr_edges;
250 /* Array of size rgn_nr_edges. */
251 static int *rgn_edges;
253 /* Number of words in an edgeset. */
254 static int edgeset_size;
256 /* Number of bits in an edgeset. */
257 static int edgeset_bitsize;
259 /* Mapping from each edge in the graph to its number in the rgn. */
260 static int *edge_to_bit;
261 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
263 /* The split edges of a source bb is different for each target
264 bb. In order to compute this efficiently, the 'potential-split edges'
265 are computed for each bb prior to scheduling a region. This is actually
266 the split edges of each bb relative to the region entry.
268 pot_split[bb] is the set of potential split edges of bb. */
269 static edgeset *pot_split;
271 /* For every bb, a set of its ancestor edges. */
272 static edgeset *ancestor_edges;
274 static void compute_dom_prob_ps PARAMS ((int));
276 #define ABS_VALUE(x) (((x)<0)?(-(x)):(x))
277 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
278 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
279 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
281 /* Parameters affecting the decision of rank_for_schedule().
282 ??? Nope. But MIN_PROBABILITY is used in copmute_trg_info. */
283 #define MIN_DIFF_PRIORITY 2
284 #define MIN_PROBABILITY 40
285 #define MIN_PROB_DIFF 10
287 /* Speculative scheduling functions. */
288 static int check_live_1 PARAMS ((int, rtx));
289 static void update_live_1 PARAMS ((int, rtx));
290 static int check_live PARAMS ((rtx, int));
291 static void update_live PARAMS ((rtx, int));
292 static void set_spec_fed PARAMS ((rtx));
293 static int is_pfree PARAMS ((rtx, int, int));
294 static int find_conditional_protection PARAMS ((rtx, int));
295 static int is_conditionally_protected PARAMS ((rtx, int, int));
296 static int may_trap_exp PARAMS ((rtx, int));
297 static int haifa_classify_insn PARAMS ((rtx));
298 static int is_prisky PARAMS ((rtx, int, int));
299 static int is_exception_free PARAMS ((rtx, int, int));
301 static void add_branch_dependences PARAMS ((rtx, rtx));
302 static void compute_block_backward_dependences PARAMS ((int));
303 void debug_dependencies PARAMS ((void));
305 static void init_regions PARAMS ((void));
306 static void schedule_region PARAMS ((int));
307 static void propagate_deps PARAMS ((int, struct deps *));
308 static void free_pending_lists PARAMS ((void));
310 /* Functions for construction of the control flow graph. */
312 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
314 We decide not to build the control flow graph if there is possibly more
315 than one entry to the function, if computed branches exist, of if we
316 have nonlocal gotos. */
325 /* If we have a label that could be the target of a nonlocal goto, then
326 the cfg is not well structured. */
327 if (nonlocal_goto_handler_labels)
330 /* If we have any forced labels, then the cfg is not well structured. */
334 /* If this function has a computed jump, then we consider the cfg
335 not well structured. */
336 if (current_function_has_computed_jump)
339 /* If we have exception handlers, then we consider the cfg not well
340 structured. ?!? We should be able to handle this now that flow.c
341 computes an accurate cfg for EH. */
342 if (exception_handler_labels)
345 /* If we have non-jumping insns which refer to labels, then we consider
346 the cfg not well structured. */
347 /* Check for labels referred to other thn by jumps. */
348 for (b = 0; b < n_basic_blocks; b++)
349 for (insn = BLOCK_HEAD (b);; insn = NEXT_INSN (insn))
351 code = GET_CODE (insn);
352 if (GET_RTX_CLASS (code) == 'i' && code != JUMP_INSN)
354 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
357 && ! (GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
358 && find_reg_note (NEXT_INSN (insn), REG_LABEL,
363 if (insn == BLOCK_END (b))
367 /* All the tests passed. Consider the cfg well structured. */
371 /* Build the control flow graph and set nr_edges.
373 Instead of trying to build a cfg ourselves, we rely on flow to
374 do it for us. Stamp out useless code (and bug) duplication.
376 Return nonzero if an irregularity in the cfg is found which would
377 prevent cross block scheduling. */
380 build_control_flow (edge_list)
381 struct edge_list *edge_list;
383 int i, unreachable, num_edges;
385 /* This already accounts for entry/exit edges. */
386 num_edges = NUM_EDGES (edge_list);
388 /* Unreachable loops with more than one basic block are detected
389 during the DFS traversal in find_rgns.
391 Unreachable loops with a single block are detected here. This
392 test is redundant with the one in find_rgns, but it's much
393 cheaper to go ahead and catch the trivial case here. */
395 for (i = 0; i < n_basic_blocks; i++)
397 basic_block b = BASIC_BLOCK (i);
400 || (b->pred->src == b
401 && b->pred->pred_next == NULL))
405 /* ??? We can kill these soon. */
406 in_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
407 out_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
408 edge_table = (haifa_edge *) xcalloc (num_edges, sizeof (haifa_edge));
411 for (i = 0; i < num_edges; i++)
413 edge e = INDEX_EDGE (edge_list, i);
415 if (e->dest != EXIT_BLOCK_PTR
416 && e->src != ENTRY_BLOCK_PTR)
417 new_edge (e->src->index, e->dest->index);
420 /* Increment by 1, since edge 0 is unused. */
426 /* Record an edge in the control flow graph from SOURCE to TARGET.
428 In theory, this is redundant with the s_succs computed above, but
429 we have not converted all of haifa to use information from the
433 new_edge (source, target)
437 int curr_edge, fst_edge;
439 /* Check for duplicates. */
440 fst_edge = curr_edge = OUT_EDGES (source);
443 if (FROM_BLOCK (curr_edge) == source
444 && TO_BLOCK (curr_edge) == target)
449 curr_edge = NEXT_OUT (curr_edge);
451 if (fst_edge == curr_edge)
457 FROM_BLOCK (e) = source;
458 TO_BLOCK (e) = target;
460 if (OUT_EDGES (source))
462 next_edge = NEXT_OUT (OUT_EDGES (source));
463 NEXT_OUT (OUT_EDGES (source)) = e;
464 NEXT_OUT (e) = next_edge;
468 OUT_EDGES (source) = e;
472 if (IN_EDGES (target))
474 next_edge = NEXT_IN (IN_EDGES (target));
475 NEXT_IN (IN_EDGES (target)) = e;
476 NEXT_IN (e) = next_edge;
480 IN_EDGES (target) = e;
485 /* BITSET macros for operations on the control flow graph. */
487 /* Compute bitwise union of two bitsets. */
488 #define BITSET_UNION(set1, set2, len) \
489 do { register bitset tp = set1, sp = set2; \
491 for (i = 0; i < len; i++) \
492 *(tp++) |= *(sp++); } while (0)
494 /* Compute bitwise intersection of two bitsets. */
495 #define BITSET_INTER(set1, set2, len) \
496 do { register bitset tp = set1, sp = set2; \
498 for (i = 0; i < len; i++) \
499 *(tp++) &= *(sp++); } while (0)
501 /* Compute bitwise difference of two bitsets. */
502 #define BITSET_DIFFER(set1, set2, len) \
503 do { register bitset tp = set1, sp = set2; \
505 for (i = 0; i < len; i++) \
506 *(tp++) &= ~*(sp++); } while (0)
508 /* Inverts every bit of bitset 'set'. */
509 #define BITSET_INVERT(set, len) \
510 do { register bitset tmpset = set; \
512 for (i = 0; i < len; i++, tmpset++) \
513 *tmpset = ~*tmpset; } while (0)
515 /* Turn on the index'th bit in bitset set. */
516 #define BITSET_ADD(set, index, len) \
518 if (index >= HOST_BITS_PER_WIDE_INT * len) \
521 set[index/HOST_BITS_PER_WIDE_INT] |= \
522 ((unsigned HOST_WIDE_INT) 1) << (index % HOST_BITS_PER_WIDE_INT); \
525 /* Turn off the index'th bit in set. */
526 #define BITSET_REMOVE(set, index, len) \
528 if (index >= HOST_BITS_PER_WIDE_INT * len) \
531 set[index/HOST_BITS_PER_WIDE_INT] &= \
532 ~(((unsigned HOST_WIDE_INT) 1) << (index % HOST_BITS_PER_WIDE_INT)); \
535 /* Check if the index'th bit in bitset set is on. */
538 bitset_member (set, index, len)
542 if (index >= HOST_BITS_PER_WIDE_INT * len)
544 return ((set[index / HOST_BITS_PER_WIDE_INT] &
545 ((unsigned HOST_WIDE_INT) 1) << (index % HOST_BITS_PER_WIDE_INT))
549 /* Translate a bit-set SET to a list BL of the bit-set members. */
552 extract_bitlst (set, len, bitlen, bl)
559 unsigned HOST_WIDE_INT word;
561 /* bblst table space is reused in each call to extract_bitlst. */
562 bitlst_table_last = 0;
564 bl->first_member = &bitlst_table[bitlst_table_last];
567 /* Iterate over each word in the bitset. */
568 for (i = 0; i < len; i++)
571 offset = i * HOST_BITS_PER_WIDE_INT;
573 /* Iterate over each bit in the word, but do not
574 go beyond the end of the defined bits. */
575 for (j = 0; offset < bitlen && word; j++)
579 bitlst_table[bitlst_table_last++] = offset;
589 /* Functions for the construction of regions. */
591 /* Print the regions, for debugging purposes. Callable from debugger. */
598 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
599 for (rgn = 0; rgn < nr_regions; rgn++)
601 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
602 rgn_table[rgn].rgn_nr_blocks);
603 fprintf (sched_dump, ";;\tbb/block: ");
605 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
607 current_blocks = RGN_BLOCKS (rgn);
609 if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
612 fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
615 fprintf (sched_dump, "\n\n");
619 /* Build a single block region for each basic block in the function.
620 This allows for using the same code for interblock and basic block
624 find_single_block_region ()
628 for (i = 0; i < n_basic_blocks; i++)
631 RGN_NR_BLOCKS (i) = 1;
633 CONTAINING_RGN (i) = i;
636 nr_regions = n_basic_blocks;
639 /* Update number of blocks and the estimate for number of insns
640 in the region. Return 1 if the region is "too large" for interblock
641 scheduling (compile time considerations), otherwise return 0. */
644 too_large (block, num_bbs, num_insns)
645 int block, *num_bbs, *num_insns;
648 (*num_insns) += (INSN_LUID (BLOCK_END (block)) -
649 INSN_LUID (BLOCK_HEAD (block)));
650 if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS))
656 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
657 is still an inner loop. Put in max_hdr[blk] the header of the most inner
658 loop containing blk. */
659 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
661 if (max_hdr[blk] == -1) \
662 max_hdr[blk] = hdr; \
663 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
664 RESET_BIT (inner, hdr); \
665 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
667 RESET_BIT (inner,max_hdr[blk]); \
668 max_hdr[blk] = hdr; \
672 /* Find regions for interblock scheduling.
674 A region for scheduling can be:
676 * A loop-free procedure, or
678 * A reducible inner loop, or
680 * A basic block not contained in any other region.
682 ?!? In theory we could build other regions based on extended basic
683 blocks or reverse extended basic blocks. Is it worth the trouble?
685 Loop blocks that form a region are put into the region's block list
686 in topological order.
688 This procedure stores its results into the following global (ick) variables
696 We use dominator relationships to avoid making regions out of non-reducible
699 This procedure needs to be converted to work on pred/succ lists instead
700 of edge tables. That would simplify it somewhat. */
703 find_rgns (edge_list, dom)
704 struct edge_list *edge_list;
707 int *max_hdr, *dfs_nr, *stack, *degree;
709 int node, child, loop_head, i, head, tail;
710 int count = 0, sp, idx = 0, current_edge = out_edges[0];
711 int num_bbs, num_insns, unreachable;
712 int too_large_failure;
714 /* Note if an edge has been passed. */
717 /* Note if a block is a natural loop header. */
720 /* Note if a block is an natural inner loop header. */
723 /* Note if a block is in the block queue. */
726 /* Note if a block is in the block queue. */
729 int num_edges = NUM_EDGES (edge_list);
731 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
732 and a mapping from block to its loop header (if the block is contained
735 Store results in HEADER, INNER, and MAX_HDR respectively, these will
736 be used as inputs to the second traversal.
738 STACK, SP and DFS_NR are only used during the first traversal. */
740 /* Allocate and initialize variables for the first traversal. */
741 max_hdr = (int *) xmalloc (n_basic_blocks * sizeof (int));
742 dfs_nr = (int *) xcalloc (n_basic_blocks, sizeof (int));
743 stack = (int *) xmalloc (nr_edges * sizeof (int));
745 inner = sbitmap_alloc (n_basic_blocks);
746 sbitmap_ones (inner);
748 header = sbitmap_alloc (n_basic_blocks);
749 sbitmap_zero (header);
751 passed = sbitmap_alloc (nr_edges);
752 sbitmap_zero (passed);
754 in_queue = sbitmap_alloc (n_basic_blocks);
755 sbitmap_zero (in_queue);
757 in_stack = sbitmap_alloc (n_basic_blocks);
758 sbitmap_zero (in_stack);
760 for (i = 0; i < n_basic_blocks; i++)
763 /* DFS traversal to find inner loops in the cfg. */
768 if (current_edge == 0 || TEST_BIT (passed, current_edge))
770 /* We have reached a leaf node or a node that was already
771 processed. Pop edges off the stack until we find
772 an edge that has not yet been processed. */
774 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
776 /* Pop entry off the stack. */
777 current_edge = stack[sp--];
778 node = FROM_BLOCK (current_edge);
779 child = TO_BLOCK (current_edge);
780 RESET_BIT (in_stack, child);
781 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
782 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
783 current_edge = NEXT_OUT (current_edge);
786 /* See if have finished the DFS tree traversal. */
787 if (sp < 0 && TEST_BIT (passed, current_edge))
790 /* Nope, continue the traversal with the popped node. */
794 /* Process a node. */
795 node = FROM_BLOCK (current_edge);
796 child = TO_BLOCK (current_edge);
797 SET_BIT (in_stack, node);
798 dfs_nr[node] = ++count;
800 /* If the successor is in the stack, then we've found a loop.
801 Mark the loop, if it is not a natural loop, then it will
802 be rejected during the second traversal. */
803 if (TEST_BIT (in_stack, child))
806 SET_BIT (header, child);
807 UPDATE_LOOP_RELATIONS (node, child);
808 SET_BIT (passed, current_edge);
809 current_edge = NEXT_OUT (current_edge);
813 /* If the child was already visited, then there is no need to visit
814 it again. Just update the loop relationships and restart
818 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
819 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
820 SET_BIT (passed, current_edge);
821 current_edge = NEXT_OUT (current_edge);
825 /* Push an entry on the stack and continue DFS traversal. */
826 stack[++sp] = current_edge;
827 SET_BIT (passed, current_edge);
828 current_edge = OUT_EDGES (child);
830 /* This is temporary until haifa is converted to use rth's new
831 cfg routines which have true entry/exit blocks and the
832 appropriate edges from/to those blocks.
834 Generally we update dfs_nr for a node when we process its
835 out edge. However, if the node has no out edge then we will
836 not set dfs_nr for that node. This can confuse the scheduler
837 into thinking that we have unreachable blocks, which in turn
838 disables cross block scheduling.
840 So, if we have a node with no out edges, go ahead and mark it
842 if (current_edge == 0)
843 dfs_nr[child] = ++count;
846 /* Another check for unreachable blocks. The earlier test in
847 is_cfg_nonregular only finds unreachable blocks that do not
850 The DFS traversal will mark every block that is reachable from
851 the entry node by placing a nonzero value in dfs_nr. Thus if
852 dfs_nr is zero for any block, then it must be unreachable. */
854 for (i = 0; i < n_basic_blocks; i++)
861 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
862 to hold degree counts. */
865 for (i = 0; i < n_basic_blocks; i++)
867 for (i = 0; i < num_edges; i++)
869 edge e = INDEX_EDGE (edge_list, i);
871 if (e->dest != EXIT_BLOCK_PTR)
872 degree[e->dest->index]++;
875 /* Do not perform region scheduling if there are any unreachable
884 /* Second travsersal:find reducible inner loops and topologically sort
885 block of each region. */
887 queue = (int *) xmalloc (n_basic_blocks * sizeof (int));
889 /* Find blocks which are inner loop headers. We still have non-reducible
890 loops to consider at this point. */
891 for (i = 0; i < n_basic_blocks; i++)
893 if (TEST_BIT (header, i) && TEST_BIT (inner, i))
898 /* Now check that the loop is reducible. We do this separate
899 from finding inner loops so that we do not find a reducible
900 loop which contains an inner non-reducible loop.
902 A simple way to find reducible/natural loops is to verify
903 that each block in the loop is dominated by the loop
906 If there exists a block that is not dominated by the loop
907 header, then the block is reachable from outside the loop
908 and thus the loop is not a natural loop. */
909 for (j = 0; j < n_basic_blocks; j++)
911 /* First identify blocks in the loop, except for the loop
913 if (i == max_hdr[j] && i != j)
915 /* Now verify that the block is dominated by the loop
917 if (!TEST_BIT (dom[j], i))
922 /* If we exited the loop early, then I is the header of
923 a non-reducible loop and we should quit processing it
925 if (j != n_basic_blocks)
928 /* I is a header of an inner loop, or block 0 in a subroutine
929 with no loops at all. */
931 too_large_failure = 0;
932 loop_head = max_hdr[i];
934 /* Decrease degree of all I's successors for topological
936 for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
937 if (e->dest != EXIT_BLOCK_PTR)
938 --degree[e->dest->index];
940 /* Estimate # insns, and count # blocks in the region. */
942 num_insns = (INSN_LUID (BLOCK_END (i))
943 - INSN_LUID (BLOCK_HEAD (i)));
945 /* Find all loop latches (blocks with back edges to the loop
946 header) or all the leaf blocks in the cfg has no loops.
948 Place those blocks into the queue. */
951 for (j = 0; j < n_basic_blocks; j++)
952 /* Leaf nodes have only a single successor which must
954 if (BASIC_BLOCK (j)->succ
955 && BASIC_BLOCK (j)->succ->dest == EXIT_BLOCK_PTR
956 && BASIC_BLOCK (j)->succ->succ_next == NULL)
959 SET_BIT (in_queue, j);
961 if (too_large (j, &num_bbs, &num_insns))
963 too_large_failure = 1;
972 for (e = BASIC_BLOCK (i)->pred; e; e = e->pred_next)
974 if (e->src == ENTRY_BLOCK_PTR)
977 node = e->src->index;
979 if (max_hdr[node] == loop_head && node != i)
981 /* This is a loop latch. */
982 queue[++tail] = node;
983 SET_BIT (in_queue, node);
985 if (too_large (node, &num_bbs, &num_insns))
987 too_large_failure = 1;
994 /* Now add all the blocks in the loop to the queue.
996 We know the loop is a natural loop; however the algorithm
997 above will not always mark certain blocks as being in the
1005 The algorithm in the DFS traversal may not mark B & D as part
1006 of the loop (ie they will not have max_hdr set to A).
1008 We know they can not be loop latches (else they would have
1009 had max_hdr set since they'd have a backedge to a dominator
1010 block). So we don't need them on the initial queue.
1012 We know they are part of the loop because they are dominated
1013 by the loop header and can be reached by a backwards walk of
1014 the edges starting with nodes on the initial queue.
1016 It is safe and desirable to include those nodes in the
1017 loop/scheduling region. To do so we would need to decrease
1018 the degree of a node if it is the target of a backedge
1019 within the loop itself as the node is placed in the queue.
1021 We do not do this because I'm not sure that the actual
1022 scheduling code will properly handle this case. ?!? */
1024 while (head < tail && !too_large_failure)
1027 child = queue[++head];
1029 for (e = BASIC_BLOCK (child)->pred; e; e = e->pred_next)
1031 node = e->src->index;
1033 /* See discussion above about nodes not marked as in
1034 this loop during the initial DFS traversal. */
1035 if (e->src == ENTRY_BLOCK_PTR
1036 || max_hdr[node] != loop_head)
1041 else if (!TEST_BIT (in_queue, node) && node != i)
1043 queue[++tail] = node;
1044 SET_BIT (in_queue, node);
1046 if (too_large (node, &num_bbs, &num_insns))
1048 too_large_failure = 1;
1055 if (tail >= 0 && !too_large_failure)
1057 /* Place the loop header into list of region blocks. */
1059 rgn_bb_table[idx] = i;
1060 RGN_NR_BLOCKS (nr_regions) = num_bbs;
1061 RGN_BLOCKS (nr_regions) = idx++;
1062 CONTAINING_RGN (i) = nr_regions;
1063 BLOCK_TO_BB (i) = count = 0;
1065 /* Remove blocks from queue[] when their in degree
1066 becomes zero. Repeat until no blocks are left on the
1067 list. This produces a topological list of blocks in
1073 child = queue[head];
1074 if (degree[child] == 0)
1079 rgn_bb_table[idx++] = child;
1080 BLOCK_TO_BB (child) = ++count;
1081 CONTAINING_RGN (child) = nr_regions;
1082 queue[head] = queue[tail--];
1084 for (e = BASIC_BLOCK (child)->succ;
1087 if (e->dest != EXIT_BLOCK_PTR)
1088 --degree[e->dest->index];
1100 /* Any block that did not end up in a region is placed into a region
1102 for (i = 0; i < n_basic_blocks; i++)
1105 rgn_bb_table[idx] = i;
1106 RGN_NR_BLOCKS (nr_regions) = 1;
1107 RGN_BLOCKS (nr_regions) = idx++;
1108 CONTAINING_RGN (i) = nr_regions++;
1109 BLOCK_TO_BB (i) = 0;
1122 /* Functions for regions scheduling information. */
1124 /* Compute dominators, probability, and potential-split-edges of bb.
1125 Assume that these values were already computed for bb's predecessors. */
1128 compute_dom_prob_ps (bb)
1131 int nxt_in_edge, fst_in_edge, pred;
1132 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1135 if (IS_RGN_ENTRY (bb))
1137 BITSET_ADD (dom[bb], 0, bbset_size);
1142 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1144 /* Intialize dom[bb] to '111..1'. */
1145 BITSET_INVERT (dom[bb], bbset_size);
1149 pred = FROM_BLOCK (nxt_in_edge);
1150 BITSET_INTER (dom[bb], dom[BLOCK_TO_BB (pred)], bbset_size);
1152 BITSET_UNION (ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)],
1155 BITSET_ADD (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge), edgeset_size);
1158 nr_rgn_out_edges = 0;
1159 fst_out_edge = OUT_EDGES (pred);
1160 nxt_out_edge = NEXT_OUT (fst_out_edge);
1161 BITSET_UNION (pot_split[bb], pot_split[BLOCK_TO_BB (pred)],
1164 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (fst_out_edge), edgeset_size);
1166 /* The successor doesn't belong in the region? */
1167 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1168 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1171 while (fst_out_edge != nxt_out_edge)
1174 /* The successor doesn't belong in the region? */
1175 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1176 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1178 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (nxt_out_edge), edgeset_size);
1179 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1183 /* Now nr_rgn_out_edges is the number of region-exit edges from
1184 pred, and nr_out_edges will be the number of pred out edges
1185 not leaving the region. */
1186 nr_out_edges -= nr_rgn_out_edges;
1187 if (nr_rgn_out_edges > 0)
1188 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1190 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1191 nxt_in_edge = NEXT_IN (nxt_in_edge);
1193 while (fst_in_edge != nxt_in_edge);
1195 BITSET_ADD (dom[bb], bb, bbset_size);
1196 BITSET_DIFFER (pot_split[bb], ancestor_edges[bb], edgeset_size);
1198 if (sched_verbose >= 2)
1199 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1200 (int) (100.0 * prob[bb]));
1203 /* Functions for target info. */
1205 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1206 Note that bb_trg dominates bb_src. */
1209 split_edges (bb_src, bb_trg, bl)
1214 int es = edgeset_size;
1215 edgeset src = (edgeset) xcalloc (es, sizeof (HOST_WIDE_INT));
1218 src[es] = (pot_split[bb_src])[es];
1219 BITSET_DIFFER (src, pot_split[bb_trg], edgeset_size);
1220 extract_bitlst (src, edgeset_size, edgeset_bitsize, bl);
1224 /* Find the valid candidate-source-blocks for the target block TRG, compute
1225 their probability, and check if they are speculative or not.
1226 For speculative sources, compute their update-blocks and split-blocks. */
1229 compute_trg_info (trg)
1232 register candidate *sp;
1234 int check_block, update_idx;
1235 int i, j, k, fst_edge, nxt_edge;
1237 /* Define some of the fields for the target bb as well. */
1238 sp = candidate_table + trg;
1240 sp->is_speculative = 0;
1243 for (i = trg + 1; i < current_nr_blocks; i++)
1245 sp = candidate_table + i;
1247 sp->is_valid = IS_DOMINATED (i, trg);
1250 sp->src_prob = GET_SRC_PROB (i, trg);
1251 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1256 split_edges (i, trg, &el);
1257 sp->is_speculative = (el.nr_members) ? 1 : 0;
1258 if (sp->is_speculative && !flag_schedule_speculative)
1264 char *update_blocks;
1266 /* Compute split blocks and store them in bblst_table.
1267 The TO block of every split edge is a split block. */
1268 sp->split_bbs.first_member = &bblst_table[bblst_last];
1269 sp->split_bbs.nr_members = el.nr_members;
1270 for (j = 0; j < el.nr_members; bblst_last++, j++)
1271 bblst_table[bblst_last] =
1272 TO_BLOCK (rgn_edges[el.first_member[j]]);
1273 sp->update_bbs.first_member = &bblst_table[bblst_last];
1275 /* Compute update blocks and store them in bblst_table.
1276 For every split edge, look at the FROM block, and check
1277 all out edges. For each out edge that is not a split edge,
1278 add the TO block to the update block list. This list can end
1279 up with a lot of duplicates. We need to weed them out to avoid
1280 overrunning the end of the bblst_table. */
1281 update_blocks = (char *) alloca (n_basic_blocks);
1282 memset (update_blocks, 0, n_basic_blocks);
1285 for (j = 0; j < el.nr_members; j++)
1287 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1288 fst_edge = nxt_edge = OUT_EDGES (check_block);
1291 if (! update_blocks[TO_BLOCK (nxt_edge)])
1293 for (k = 0; k < el.nr_members; k++)
1294 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1297 if (k >= el.nr_members)
1299 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1300 update_blocks[TO_BLOCK (nxt_edge)] = 1;
1305 nxt_edge = NEXT_OUT (nxt_edge);
1307 while (fst_edge != nxt_edge);
1309 sp->update_bbs.nr_members = update_idx;
1311 /* Make sure we didn't overrun the end of bblst_table. */
1312 if (bblst_last > bblst_size)
1317 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1319 sp->is_speculative = 0;
1325 /* Print candidates info, for debugging purposes. Callable from debugger. */
1331 if (!candidate_table[i].is_valid)
1334 if (candidate_table[i].is_speculative)
1337 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1339 fprintf (sched_dump, "split path: ");
1340 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1342 int b = candidate_table[i].split_bbs.first_member[j];
1344 fprintf (sched_dump, " %d ", b);
1346 fprintf (sched_dump, "\n");
1348 fprintf (sched_dump, "update path: ");
1349 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1351 int b = candidate_table[i].update_bbs.first_member[j];
1353 fprintf (sched_dump, " %d ", b);
1355 fprintf (sched_dump, "\n");
1359 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1363 /* Print candidates info, for debugging purposes. Callable from debugger. */
1366 debug_candidates (trg)
1371 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1372 BB_TO_BLOCK (trg), trg);
1373 for (i = trg + 1; i < current_nr_blocks; i++)
1374 debug_candidate (i);
1377 /* Functions for speculative scheduing. */
1379 /* Return 0 if x is a set of a register alive in the beginning of one
1380 of the split-blocks of src, otherwise return 1. */
1383 check_live_1 (src, x)
1389 register rtx reg = SET_DEST (x);
1394 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1395 || GET_CODE (reg) == SIGN_EXTRACT
1396 || GET_CODE (reg) == STRICT_LOW_PART)
1397 reg = XEXP (reg, 0);
1399 if (GET_CODE (reg) == PARALLEL
1400 && GET_MODE (reg) == BLKmode)
1403 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1404 if (check_live_1 (src, XVECEXP (reg, 0, i)))
1409 if (GET_CODE (reg) != REG)
1412 regno = REGNO (reg);
1414 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1416 /* Global registers are assumed live. */
1421 if (regno < FIRST_PSEUDO_REGISTER)
1423 /* Check for hard registers. */
1424 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1427 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1429 int b = candidate_table[src].split_bbs.first_member[i];
1431 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
1441 /* Check for psuedo registers. */
1442 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1444 int b = candidate_table[src].split_bbs.first_member[i];
1446 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
1457 /* If x is a set of a register R, mark that R is alive in the beginning
1458 of every update-block of src. */
1461 update_live_1 (src, x)
1467 register rtx reg = SET_DEST (x);
1472 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1473 || GET_CODE (reg) == SIGN_EXTRACT
1474 || GET_CODE (reg) == STRICT_LOW_PART)
1475 reg = XEXP (reg, 0);
1477 if (GET_CODE (reg) == PARALLEL
1478 && GET_MODE (reg) == BLKmode)
1481 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1482 update_live_1 (src, XVECEXP (reg, 0, i));
1486 if (GET_CODE (reg) != REG)
1489 /* Global registers are always live, so the code below does not apply
1492 regno = REGNO (reg);
1494 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1496 if (regno < FIRST_PSEUDO_REGISTER)
1498 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1501 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1503 int b = candidate_table[src].update_bbs.first_member[i];
1505 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
1512 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1514 int b = candidate_table[src].update_bbs.first_member[i];
1516 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
1522 /* Return 1 if insn can be speculatively moved from block src to trg,
1523 otherwise return 0. Called before first insertion of insn to
1524 ready-list or before the scheduling. */
1527 check_live (insn, src)
1531 /* Find the registers set by instruction. */
1532 if (GET_CODE (PATTERN (insn)) == SET
1533 || GET_CODE (PATTERN (insn)) == CLOBBER)
1534 return check_live_1 (src, PATTERN (insn));
1535 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1538 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1539 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1540 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1541 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1550 /* Update the live registers info after insn was moved speculatively from
1551 block src to trg. */
1554 update_live (insn, src)
1558 /* Find the registers set by instruction. */
1559 if (GET_CODE (PATTERN (insn)) == SET
1560 || GET_CODE (PATTERN (insn)) == CLOBBER)
1561 update_live_1 (src, PATTERN (insn));
1562 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1565 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1566 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1567 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1568 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1572 /* Exception Free Loads:
1574 We define five classes of speculative loads: IFREE, IRISKY,
1575 PFREE, PRISKY, and MFREE.
1577 IFREE loads are loads that are proved to be exception-free, just
1578 by examining the load insn. Examples for such loads are loads
1579 from TOC and loads of global data.
1581 IRISKY loads are loads that are proved to be exception-risky,
1582 just by examining the load insn. Examples for such loads are
1583 volatile loads and loads from shared memory.
1585 PFREE loads are loads for which we can prove, by examining other
1586 insns, that they are exception-free. Currently, this class consists
1587 of loads for which we are able to find a "similar load", either in
1588 the target block, or, if only one split-block exists, in that split
1589 block. Load2 is similar to load1 if both have same single base
1590 register. We identify only part of the similar loads, by finding
1591 an insn upon which both load1 and load2 have a DEF-USE dependence.
1593 PRISKY loads are loads for which we can prove, by examining other
1594 insns, that they are exception-risky. Currently we have two proofs for
1595 such loads. The first proof detects loads that are probably guarded by a
1596 test on the memory address. This proof is based on the
1597 backward and forward data dependence information for the region.
1598 Let load-insn be the examined load.
1599 Load-insn is PRISKY iff ALL the following hold:
1601 - insn1 is not in the same block as load-insn
1602 - there is a DEF-USE dependence chain (insn1, ..., load-insn)
1603 - test-insn is either a compare or a branch, not in the same block
1605 - load-insn is reachable from test-insn
1606 - there is a DEF-USE dependence chain (insn1, ..., test-insn)
1608 This proof might fail when the compare and the load are fed
1609 by an insn not in the region. To solve this, we will add to this
1610 group all loads that have no input DEF-USE dependence.
1612 The second proof detects loads that are directly or indirectly
1613 fed by a speculative load. This proof is affected by the
1614 scheduling process. We will use the flag fed_by_spec_load.
1615 Initially, all insns have this flag reset. After a speculative
1616 motion of an insn, if insn is either a load, or marked as
1617 fed_by_spec_load, we will also mark as fed_by_spec_load every
1618 insn1 for which a DEF-USE dependence (insn, insn1) exists. A
1619 load which is fed_by_spec_load is also PRISKY.
1621 MFREE (maybe-free) loads are all the remaining loads. They may be
1622 exception-free, but we cannot prove it.
1624 Now, all loads in IFREE and PFREE classes are considered
1625 exception-free, while all loads in IRISKY and PRISKY classes are
1626 considered exception-risky. As for loads in the MFREE class,
1627 these are considered either exception-free or exception-risky,
1628 depending on whether we are pessimistic or optimistic. We have
1629 to take the pessimistic approach to assure the safety of
1630 speculative scheduling, but we can take the optimistic approach
1631 by invoking the -fsched_spec_load_dangerous option. */
1633 enum INSN_TRAP_CLASS
1635 TRAP_FREE = 0, IFREE = 1, PFREE_CANDIDATE = 2,
1636 PRISKY_CANDIDATE = 3, IRISKY = 4, TRAP_RISKY = 5
1639 #define WORST_CLASS(class1, class2) \
1640 ((class1 > class2) ? class1 : class2)
1642 /* Non-zero if block bb_to is equal to, or reachable from block bb_from. */
1643 #define IS_REACHABLE(bb_from, bb_to) \
1645 || IS_RGN_ENTRY (bb_from) \
1646 || (bitset_member (ancestor_edges[bb_to], \
1647 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))), \
1650 /* Non-zero iff the address is comprised from at most 1 register. */
1651 #define CONST_BASED_ADDRESS_P(x) \
1652 (GET_CODE (x) == REG \
1653 || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \
1654 || (GET_CODE (x) == LO_SUM)) \
1655 && (GET_CODE (XEXP (x, 0)) == CONST_INT \
1656 || GET_CODE (XEXP (x, 1)) == CONST_INT)))
1658 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1661 set_spec_fed (load_insn)
1666 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1667 if (GET_MODE (link) == VOIDmode)
1668 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1669 } /* set_spec_fed */
1671 /* On the path from the insn to load_insn_bb, find a conditional
1672 branch depending on insn, that guards the speculative load. */
1675 find_conditional_protection (insn, load_insn_bb)
1681 /* Iterate through DEF-USE forward dependences. */
1682 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1684 rtx next = XEXP (link, 0);
1685 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1686 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1687 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1688 && load_insn_bb != INSN_BB (next)
1689 && GET_MODE (link) == VOIDmode
1690 && (GET_CODE (next) == JUMP_INSN
1691 || find_conditional_protection (next, load_insn_bb)))
1695 } /* find_conditional_protection */
1697 /* Returns 1 if the same insn1 that participates in the computation
1698 of load_insn's address is feeding a conditional branch that is
1699 guarding on load_insn. This is true if we find a the two DEF-USE
1701 insn1 -> ... -> conditional-branch
1702 insn1 -> ... -> load_insn,
1703 and if a flow path exist:
1704 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1705 and if insn1 is on the path
1706 region-entry -> ... -> bb_trg -> ... load_insn.
1708 Locate insn1 by climbing on LOG_LINKS from load_insn.
1709 Locate the branch by following INSN_DEPEND from insn1. */
1712 is_conditionally_protected (load_insn, bb_src, bb_trg)
1718 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1720 rtx insn1 = XEXP (link, 0);
1722 /* Must be a DEF-USE dependence upon non-branch. */
1723 if (GET_MODE (link) != VOIDmode
1724 || GET_CODE (insn1) == JUMP_INSN)
1727 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1728 if (INSN_BB (insn1) == bb_src
1729 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1730 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1731 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1732 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1735 /* Now search for the conditional-branch. */
1736 if (find_conditional_protection (insn1, bb_src))
1739 /* Recursive step: search another insn1, "above" current insn1. */
1740 return is_conditionally_protected (insn1, bb_src, bb_trg);
1743 /* The chain does not exist. */
1745 } /* is_conditionally_protected */
1747 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1748 load_insn can move speculatively from bb_src to bb_trg. All the
1749 following must hold:
1751 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1752 (2) load_insn and load1 have a def-use dependence upon
1753 the same insn 'insn1'.
1754 (3) either load2 is in bb_trg, or:
1755 - there's only one split-block, and
1756 - load1 is on the escape path, and
1758 From all these we can conclude that the two loads access memory
1759 addresses that differ at most by a constant, and hence if moving
1760 load_insn would cause an exception, it would have been caused by
1764 is_pfree (load_insn, bb_src, bb_trg)
1769 register candidate *candp = candidate_table + bb_src;
1771 if (candp->split_bbs.nr_members != 1)
1772 /* Must have exactly one escape block. */
1775 for (back_link = LOG_LINKS (load_insn);
1776 back_link; back_link = XEXP (back_link, 1))
1778 rtx insn1 = XEXP (back_link, 0);
1780 if (GET_MODE (back_link) == VOIDmode)
1782 /* Found a DEF-USE dependence (insn1, load_insn). */
1785 for (fore_link = INSN_DEPEND (insn1);
1786 fore_link; fore_link = XEXP (fore_link, 1))
1788 rtx insn2 = XEXP (fore_link, 0);
1789 if (GET_MODE (fore_link) == VOIDmode)
1791 /* Found a DEF-USE dependence (insn1, insn2). */
1792 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1793 /* insn2 not guaranteed to be a 1 base reg load. */
1796 if (INSN_BB (insn2) == bb_trg)
1797 /* insn2 is the similar load, in the target block. */
1800 if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
1801 /* insn2 is a similar load, in a split-block. */
1808 /* Couldn't find a similar load. */
1812 /* Returns a class that insn with GET_DEST(insn)=x may belong to,
1813 as found by analyzing insn's expression. */
1816 may_trap_exp (x, is_store)
1824 code = GET_CODE (x);
1834 /* The insn uses memory: a volatile load. */
1835 if (MEM_VOLATILE_P (x))
1837 /* An exception-free load. */
1838 if (!may_trap_p (x))
1840 /* A load with 1 base register, to be further checked. */
1841 if (CONST_BASED_ADDRESS_P (XEXP (x, 0)))
1842 return PFREE_CANDIDATE;
1843 /* No info on the load, to be further checked. */
1844 return PRISKY_CANDIDATE;
1849 int i, insn_class = TRAP_FREE;
1851 /* Neither store nor load, check if it may cause a trap. */
1854 /* Recursive step: walk the insn... */
1855 fmt = GET_RTX_FORMAT (code);
1856 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1860 int tmp_class = may_trap_exp (XEXP (x, i), is_store);
1861 insn_class = WORST_CLASS (insn_class, tmp_class);
1863 else if (fmt[i] == 'E')
1866 for (j = 0; j < XVECLEN (x, i); j++)
1868 int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store);
1869 insn_class = WORST_CLASS (insn_class, tmp_class);
1870 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1874 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1881 /* Classifies insn for the purpose of verifying that it can be
1882 moved speculatively, by examining it's patterns, returning:
1883 TRAP_RISKY: store, or risky non-load insn (e.g. division by variable).
1884 TRAP_FREE: non-load insn.
1885 IFREE: load from a globaly safe location.
1886 IRISKY: volatile load.
1887 PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for
1888 being either PFREE or PRISKY. */
1891 haifa_classify_insn (insn)
1894 rtx pat = PATTERN (insn);
1895 int tmp_class = TRAP_FREE;
1896 int insn_class = TRAP_FREE;
1899 if (GET_CODE (pat) == PARALLEL)
1901 int i, len = XVECLEN (pat, 0);
1903 for (i = len - 1; i >= 0; i--)
1905 code = GET_CODE (XVECEXP (pat, 0, i));
1909 /* Test if it is a 'store'. */
1910 tmp_class = may_trap_exp (XEXP (XVECEXP (pat, 0, i), 0), 1);
1913 /* Test if it is a store. */
1914 tmp_class = may_trap_exp (SET_DEST (XVECEXP (pat, 0, i)), 1);
1915 if (tmp_class == TRAP_RISKY)
1917 /* Test if it is a load. */
1919 WORST_CLASS (tmp_class,
1920 may_trap_exp (SET_SRC (XVECEXP (pat, 0, i)), 0));
1924 tmp_class = TRAP_RISKY;
1928 insn_class = WORST_CLASS (insn_class, tmp_class);
1929 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1935 code = GET_CODE (pat);
1939 /* Test if it is a 'store'. */
1940 tmp_class = may_trap_exp (XEXP (pat, 0), 1);
1943 /* Test if it is a store. */
1944 tmp_class = may_trap_exp (SET_DEST (pat), 1);
1945 if (tmp_class == TRAP_RISKY)
1947 /* Test if it is a load. */
1949 WORST_CLASS (tmp_class,
1950 may_trap_exp (SET_SRC (pat), 0));
1954 tmp_class = TRAP_RISKY;
1958 insn_class = tmp_class;
1964 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1965 a load moved speculatively, or if load_insn is protected by
1966 a compare on load_insn's address). */
1969 is_prisky (load_insn, bb_src, bb_trg)
1973 if (FED_BY_SPEC_LOAD (load_insn))
1976 if (LOG_LINKS (load_insn) == NULL)
1977 /* Dependence may 'hide' out of the region. */
1980 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1986 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1987 Return 1 if insn is exception-free (and the motion is valid)
1991 is_exception_free (insn, bb_src, bb_trg)
1995 int insn_class = haifa_classify_insn (insn);
1997 /* Handle non-load insns. */
2008 if (!flag_schedule_speculative_load)
2010 IS_LOAD_INSN (insn) = 1;
2017 case PFREE_CANDIDATE:
2018 if (is_pfree (insn, bb_src, bb_trg))
2020 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
2021 case PRISKY_CANDIDATE:
2022 if (!flag_schedule_speculative_load_dangerous
2023 || is_prisky (insn, bb_src, bb_trg))
2029 return flag_schedule_speculative_load_dangerous;
2032 /* The number of insns from the current block scheduled so far. */
2033 static int sched_target_n_insns;
2034 /* The number of insns from the current block to be scheduled in total. */
2035 static int target_n_insns;
2036 /* The number of insns from the entire region scheduled so far. */
2037 static int sched_n_insns;
2038 /* Nonzero if the last scheduled insn was a jump. */
2039 static int last_was_jump;
2041 /* Implementations of the sched_info functions for region scheduling. */
2042 static void init_ready_list PARAMS ((struct ready_list *));
2043 static int can_schedule_ready_p PARAMS ((rtx));
2044 static int new_ready PARAMS ((rtx));
2045 static int schedule_more_p PARAMS ((void));
2046 static const char *rgn_print_insn PARAMS ((rtx, int));
2047 static int rgn_rank PARAMS ((rtx, rtx));
2048 static int contributes_to_priority PARAMS ((rtx, rtx));
2049 static void compute_jump_reg_dependencies PARAMS ((rtx, regset));
2051 /* Return nonzero if there are more insns that should be scheduled. */
2056 return ! last_was_jump && sched_target_n_insns < target_n_insns;
2059 /* Add all insns that are initially ready to the ready list READY. Called
2060 once before scheduling a set of insns. */
2063 init_ready_list (ready)
2064 struct ready_list *ready;
2066 rtx prev_head = current_sched_info->prev_head;
2067 rtx next_tail = current_sched_info->next_tail;
2072 sched_target_n_insns = 0;
2076 /* Print debugging information. */
2077 if (sched_verbose >= 5)
2078 debug_dependencies ();
2080 /* Prepare current target block info. */
2081 if (current_nr_blocks > 1)
2083 candidate_table = (candidate *) xmalloc (current_nr_blocks
2084 * sizeof (candidate));
2087 /* bblst_table holds split blocks and update blocks for each block after
2088 the current one in the region. split blocks and update blocks are
2089 the TO blocks of region edges, so there can be at most rgn_nr_edges
2091 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
2092 bblst_table = (int *) xmalloc (bblst_size * sizeof (int));
2094 bitlst_table_last = 0;
2095 bitlst_table_size = rgn_nr_edges;
2096 bitlst_table = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2098 compute_trg_info (target_bb);
2101 /* Initialize ready list with all 'ready' insns in target block.
2102 Count number of insns in the target block being scheduled. */
2103 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
2107 if (! INSN_P (insn))
2109 next = NEXT_INSN (insn);
2111 if (INSN_DEP_COUNT (insn) == 0
2112 && (SCHED_GROUP_P (next) == 0 || ! INSN_P (next)))
2113 ready_add (ready, insn);
2114 if (!(SCHED_GROUP_P (insn)))
2118 /* Add to ready list all 'ready' insns in valid source blocks.
2119 For speculative insns, check-live, exception-free, and
2121 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
2122 if (IS_VALID (bb_src))
2128 get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
2129 src_next_tail = NEXT_INSN (tail);
2132 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
2134 if (! INSN_P (insn))
2137 if (!CANT_MOVE (insn)
2138 && (!IS_SPECULATIVE_INSN (insn)
2139 || (insn_issue_delay (insn) <= 3
2140 && check_live (insn, bb_src)
2141 && is_exception_free (insn, bb_src, target_bb))))
2145 /* Note that we havn't squirrled away the notes for
2146 blocks other than the current. So if this is a
2147 speculative insn, NEXT might otherwise be a note. */
2148 next = next_nonnote_insn (insn);
2149 if (INSN_DEP_COUNT (insn) == 0
2151 || SCHED_GROUP_P (next) == 0
2152 || ! INSN_P (next)))
2153 ready_add (ready, insn);
2159 /* Called after taking INSN from the ready list. Returns nonzero if this
2160 insn can be scheduled, nonzero if we should silently discard it. */
2163 can_schedule_ready_p (insn)
2166 if (GET_CODE (insn) == JUMP_INSN)
2169 /* An interblock motion? */
2170 if (INSN_BB (insn) != target_bb)
2175 if (IS_SPECULATIVE_INSN (insn))
2177 if (!check_live (insn, INSN_BB (insn)))
2179 update_live (insn, INSN_BB (insn));
2181 /* For speculative load, mark insns fed by it. */
2182 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
2183 set_spec_fed (insn);
2189 /* Find the beginning of the scheduling group. */
2190 /* ??? Ought to update basic block here, but later bits of
2191 schedule_block assumes the original insn block is
2195 while (SCHED_GROUP_P (temp))
2196 temp = PREV_INSN (temp);
2198 /* Update source block boundaries. */
2199 b1 = BLOCK_FOR_INSN (temp);
2200 if (temp == b1->head && insn == b1->end)
2202 /* We moved all the insns in the basic block.
2203 Emit a note after the last insn and update the
2204 begin/end boundaries to point to the note. */
2205 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
2209 else if (insn == b1->end)
2211 /* We took insns from the end of the basic block,
2212 so update the end of block boundary so that it
2213 points to the first insn we did not move. */
2214 b1->end = PREV_INSN (temp);
2216 else if (temp == b1->head)
2218 /* We took insns from the start of the basic block,
2219 so update the start of block boundary so that
2220 it points to the first insn we did not move. */
2221 b1->head = NEXT_INSN (insn);
2226 /* In block motion. */
2227 sched_target_n_insns++;
2234 /* Called after INSN has all its dependencies resolved. Return nonzero
2235 if it should be moved to the ready list or the queue, or zero if we
2236 should silently discard it. */
2241 /* For speculative insns, before inserting to ready/queue,
2242 check live, exception-free, and issue-delay. */
2243 if (INSN_BB (next) != target_bb
2244 && (!IS_VALID (INSN_BB (next))
2246 || (IS_SPECULATIVE_INSN (next)
2247 && (insn_issue_delay (next) > 3
2248 || !check_live (next, INSN_BB (next))
2249 || !is_exception_free (next, INSN_BB (next), target_bb)))))
2254 /* Return a string that contains the insn uid and optionally anything else
2255 necessary to identify this insn in an output. It's valid to use a
2256 static buffer for this. The ALIGNED parameter should cause the string
2257 to be formatted so that multiple output lines will line up nicely. */
2260 rgn_print_insn (insn, aligned)
2264 static char tmp[80];
2267 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
2270 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
2271 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
2273 sprintf (tmp, "%d", INSN_UID (insn));
2278 /* Compare priority of two insns. Return a positive number if the second
2279 insn is to be preferred for scheduling, and a negative one if the first
2280 is to be preferred. Zero if they are equally good. */
2283 rgn_rank (insn1, insn2)
2286 /* Some comparison make sense in interblock scheduling only. */
2287 if (INSN_BB (insn1) != INSN_BB (insn2))
2289 int spec_val, prob_val;
2291 /* Prefer an inblock motion on an interblock motion. */
2292 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
2294 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
2297 /* Prefer a useful motion on a speculative one. */
2298 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
2302 /* Prefer a more probable (speculative) insn. */
2303 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
2310 /* NEXT is an instruction that depends on INSN (a backward dependence);
2311 return nonzero if we should include this dependence in priority
2315 contributes_to_priority (next, insn)
2318 return BLOCK_NUM (next) == BLOCK_NUM (insn);
2321 /* INSN is a JUMP_INSN. Store the set of registers that must be considered
2322 to be set by this jump in SET. */
2325 compute_jump_reg_dependencies (insn, set)
2326 rtx insn ATTRIBUTE_UNUSED;
2327 regset set ATTRIBUTE_UNUSED;
2329 /* Nothing to do here, since we postprocess jumps in
2330 add_branch_dependences. */
2333 /* Used in schedule_insns to initialize current_sched_info for scheduling
2334 regions (or single basic blocks). */
2336 static struct sched_info region_sched_info =
2339 can_schedule_ready_p,
2344 contributes_to_priority,
2345 compute_jump_reg_dependencies,
2352 /* Add dependences so that branches are scheduled to run last in their
2356 add_branch_dependences (head, tail)
2361 /* For all branches, calls, uses, clobbers, and cc0 setters, force them
2362 to remain in order at the end of the block by adding dependencies and
2363 giving the last a high priority. There may be notes present, and
2364 prev_head may also be a note.
2366 Branches must obviously remain at the end. Calls should remain at the
2367 end since moving them results in worse register allocation. Uses remain
2368 at the end to ensure proper register allocation. cc0 setters remaim
2369 at the end because they can't be moved away from their cc0 user. */
2372 while (GET_CODE (insn) == CALL_INSN
2373 || GET_CODE (insn) == JUMP_INSN
2374 || (GET_CODE (insn) == INSN
2375 && (GET_CODE (PATTERN (insn)) == USE
2376 || GET_CODE (PATTERN (insn)) == CLOBBER
2378 || sets_cc0_p (PATTERN (insn))
2381 || GET_CODE (insn) == NOTE)
2383 if (GET_CODE (insn) != NOTE)
2386 && !find_insn_list (insn, LOG_LINKS (last)))
2388 add_dependence (last, insn, REG_DEP_ANTI);
2389 INSN_REF_COUNT (insn)++;
2392 CANT_MOVE (insn) = 1;
2395 /* Skip over insns that are part of a group.
2396 Make each insn explicitly depend on the previous insn.
2397 This ensures that only the group header will ever enter
2398 the ready queue (and, when scheduled, will automatically
2399 schedule the SCHED_GROUP_P block). */
2400 while (SCHED_GROUP_P (insn))
2402 rtx temp = prev_nonnote_insn (insn);
2403 add_dependence (insn, temp, REG_DEP_ANTI);
2408 /* Don't overrun the bounds of the basic block. */
2412 insn = PREV_INSN (insn);
2415 /* Make sure these insns are scheduled last in their block. */
2418 while (insn != head)
2420 insn = prev_nonnote_insn (insn);
2422 if (INSN_REF_COUNT (insn) != 0)
2425 add_dependence (last, insn, REG_DEP_ANTI);
2426 INSN_REF_COUNT (insn) = 1;
2428 /* Skip over insns that are part of a group. */
2429 while (SCHED_GROUP_P (insn))
2430 insn = prev_nonnote_insn (insn);
2434 /* Data structures for the computation of data dependences in a regions. We
2435 keep one `deps' structure for every basic block. Before analyzing the
2436 data dependences for a bb, its variables are initialized as a function of
2437 the variables of its predecessors. When the analysis for a bb completes,
2438 we save the contents to the corresponding bb_deps[bb] variable. */
2440 static struct deps *bb_deps;
2442 /* After computing the dependencies for block BB, propagate the dependencies
2443 found in TMP_DEPS to the successors of the block. */
2445 propagate_deps (bb, tmp_deps)
2447 struct deps *tmp_deps;
2449 int b = BB_TO_BLOCK (bb);
2452 rtx link_insn, link_mem;
2455 /* These lists should point to the right place, for correct
2457 bb_deps[bb].pending_read_insns = tmp_deps->pending_read_insns;
2458 bb_deps[bb].pending_read_mems = tmp_deps->pending_read_mems;
2459 bb_deps[bb].pending_write_insns = tmp_deps->pending_write_insns;
2460 bb_deps[bb].pending_write_mems = tmp_deps->pending_write_mems;
2462 /* bb's structures are inherited by its successors. */
2463 first_edge = e = OUT_EDGES (b);
2470 int b_succ = TO_BLOCK (e);
2471 int bb_succ = BLOCK_TO_BB (b_succ);
2472 struct deps *succ_deps = bb_deps + bb_succ;
2474 /* Only bbs "below" bb, in the same region, are interesting. */
2475 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
2482 /* The reg_last lists are inherited by bb_succ. */
2483 EXECUTE_IF_SET_IN_REG_SET (&tmp_deps->reg_last_in_use, 0, reg,
2485 struct deps_reg *tmp_deps_reg = &tmp_deps->reg_last[reg];
2486 struct deps_reg *succ_deps_reg = &succ_deps->reg_last[reg];
2488 for (u = tmp_deps_reg->uses; u; u = XEXP (u, 1))
2489 if (! find_insn_list (XEXP (u, 0), succ_deps_reg->uses))
2491 = alloc_INSN_LIST (XEXP (u, 0), succ_deps_reg->uses);
2493 for (u = tmp_deps_reg->sets; u; u = XEXP (u, 1))
2494 if (! find_insn_list (XEXP (u, 0), succ_deps_reg->sets))
2496 = alloc_INSN_LIST (XEXP (u, 0), succ_deps_reg->sets);
2498 for (u = tmp_deps_reg->clobbers; u; u = XEXP (u, 1))
2499 if (! find_insn_list (XEXP (u, 0), succ_deps_reg->clobbers))
2500 succ_deps_reg->clobbers
2501 = alloc_INSN_LIST (XEXP (u, 0), succ_deps_reg->clobbers);
2503 IOR_REG_SET (&succ_deps->reg_last_in_use, &tmp_deps->reg_last_in_use);
2505 /* Mem read/write lists are inherited by bb_succ. */
2506 link_insn = tmp_deps->pending_read_insns;
2507 link_mem = tmp_deps->pending_read_mems;
2510 if (!(find_insn_mem_list (XEXP (link_insn, 0),
2512 succ_deps->pending_read_insns,
2513 succ_deps->pending_read_mems)))
2514 add_insn_mem_dependence (succ_deps, &succ_deps->pending_read_insns,
2515 &succ_deps->pending_read_mems,
2516 XEXP (link_insn, 0), XEXP (link_mem, 0));
2517 link_insn = XEXP (link_insn, 1);
2518 link_mem = XEXP (link_mem, 1);
2521 link_insn = tmp_deps->pending_write_insns;
2522 link_mem = tmp_deps->pending_write_mems;
2525 if (!(find_insn_mem_list (XEXP (link_insn, 0),
2527 succ_deps->pending_write_insns,
2528 succ_deps->pending_write_mems)))
2529 add_insn_mem_dependence (succ_deps,
2530 &succ_deps->pending_write_insns,
2531 &succ_deps->pending_write_mems,
2532 XEXP (link_insn, 0), XEXP (link_mem, 0));
2534 link_insn = XEXP (link_insn, 1);
2535 link_mem = XEXP (link_mem, 1);
2538 /* last_function_call is inherited by bb_succ. */
2539 for (u = tmp_deps->last_function_call; u; u = XEXP (u, 1))
2540 if (! find_insn_list (XEXP (u, 0), succ_deps->last_function_call))
2541 succ_deps->last_function_call
2542 = alloc_INSN_LIST (XEXP (u, 0), succ_deps->last_function_call);
2544 /* last_pending_memory_flush is inherited by bb_succ. */
2545 for (u = tmp_deps->last_pending_memory_flush; u; u = XEXP (u, 1))
2546 if (! find_insn_list (XEXP (u, 0),
2547 succ_deps->last_pending_memory_flush))
2548 succ_deps->last_pending_memory_flush
2549 = alloc_INSN_LIST (XEXP (u, 0),
2550 succ_deps->last_pending_memory_flush);
2552 /* sched_before_next_call is inherited by bb_succ. */
2553 x = LOG_LINKS (tmp_deps->sched_before_next_call);
2554 for (; x; x = XEXP (x, 1))
2555 add_dependence (succ_deps->sched_before_next_call,
2556 XEXP (x, 0), REG_DEP_ANTI);
2560 while (e != first_edge);
2563 /* Compute backward dependences inside bb. In a multiple blocks region:
2564 (1) a bb is analyzed after its predecessors, and (2) the lists in
2565 effect at the end of bb (after analyzing for bb) are inherited by
2568 Specifically for reg-reg data dependences, the block insns are
2569 scanned by sched_analyze () top-to-bottom. Two lists are
2570 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2571 and reg_last[].uses for register USEs.
2573 When analysis is completed for bb, we update for its successors:
2574 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2575 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2577 The mechanism for computing mem-mem data dependence is very
2578 similar, and the result is interblock dependences in the region. */
2581 compute_block_backward_dependences (bb)
2585 struct deps tmp_deps;
2587 tmp_deps = bb_deps[bb];
2589 /* Do the analysis for this block. */
2590 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2591 sched_analyze (&tmp_deps, head, tail);
2592 add_branch_dependences (head, tail);
2594 if (current_nr_blocks > 1)
2595 propagate_deps (bb, &tmp_deps);
2597 /* Free up the INSN_LISTs. */
2598 free_deps (&tmp_deps);
2601 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2602 them to the unused_*_list variables, so that they can be reused. */
2605 free_pending_lists ()
2609 for (bb = 0; bb < current_nr_blocks; bb++)
2611 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2612 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2613 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2614 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2618 /* Print dependences for debugging, callable from debugger. */
2621 debug_dependencies ()
2625 fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n");
2626 for (bb = 0; bb < current_nr_blocks; bb++)
2634 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2635 next_tail = NEXT_INSN (tail);
2636 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2637 BB_TO_BLOCK (bb), bb);
2639 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2640 "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
2641 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2642 "----", "----", "--", "---", "----", "----", "--------", "-----");
2643 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2648 if (! INSN_P (insn))
2651 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2652 if (GET_CODE (insn) == NOTE)
2654 n = NOTE_LINE_NUMBER (insn);
2656 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2658 fprintf (sched_dump, "line %d, file %s\n", n,
2659 NOTE_SOURCE_FILE (insn));
2662 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2666 unit = insn_unit (insn);
2668 || function_units[unit].blockage_range_function == 0) ? 0 :
2669 function_units[unit].blockage_range_function (insn);
2670 fprintf (sched_dump,
2671 ";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
2672 (SCHED_GROUP_P (insn) ? "+" : " "),
2676 INSN_DEP_COUNT (insn),
2677 INSN_PRIORITY (insn),
2678 insn_cost (insn, 0, 0),
2679 (int) MIN_BLOCKAGE_COST (range),
2680 (int) MAX_BLOCKAGE_COST (range));
2681 insn_print_units (insn);
2682 fprintf (sched_dump, "\t: ");
2683 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2684 fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2685 fprintf (sched_dump, "\n");
2689 fprintf (sched_dump, "\n");
2692 /* Schedule a region. A region is either an inner loop, a loop-free
2693 subroutine, or a single basic block. Each bb in the region is
2694 scheduled after its flow predecessors. */
2697 schedule_region (rgn)
2701 int rgn_n_insns = 0;
2702 int sched_rgn_n_insns = 0;
2704 /* Set variables for the current region. */
2705 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2706 current_blocks = RGN_BLOCKS (rgn);
2708 init_deps_global ();
2710 /* Initializations for region data dependence analyisis. */
2711 bb_deps = (struct deps *) xmalloc (sizeof (struct deps) * current_nr_blocks);
2712 for (bb = 0; bb < current_nr_blocks; bb++)
2713 init_deps (bb_deps + bb);
2715 /* Compute LOG_LINKS. */
2716 for (bb = 0; bb < current_nr_blocks; bb++)
2717 compute_block_backward_dependences (bb);
2719 /* Compute INSN_DEPEND. */
2720 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2723 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2725 compute_forward_dependences (head, tail);
2728 /* Set priorities. */
2729 for (bb = 0; bb < current_nr_blocks; bb++)
2732 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2734 rgn_n_insns += set_priorities (head, tail);
2737 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2738 if (current_nr_blocks > 1)
2742 prob = (float *) xmalloc ((current_nr_blocks) * sizeof (float));
2744 bbset_size = current_nr_blocks / HOST_BITS_PER_WIDE_INT + 1;
2745 dom = (bbset *) xmalloc (current_nr_blocks * sizeof (bbset));
2746 for (i = 0; i < current_nr_blocks; i++)
2747 dom[i] = (bbset) xcalloc (bbset_size, sizeof (HOST_WIDE_INT));
2751 edge_to_bit = (int *) xmalloc (nr_edges * sizeof (int));
2752 for (i = 1; i < nr_edges; i++)
2753 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
2754 EDGE_TO_BIT (i) = rgn_nr_edges++;
2755 rgn_edges = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2758 for (i = 1; i < nr_edges; i++)
2759 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
2760 rgn_edges[rgn_nr_edges++] = i;
2763 edgeset_size = rgn_nr_edges / HOST_BITS_PER_WIDE_INT + 1;
2764 edgeset_bitsize = rgn_nr_edges;
2765 pot_split = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
2767 = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
2768 for (i = 0; i < current_nr_blocks; i++)
2771 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
2773 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
2776 /* Compute probabilities, dominators, split_edges. */
2777 for (bb = 0; bb < current_nr_blocks; bb++)
2778 compute_dom_prob_ps (bb);
2781 /* Now we can schedule all blocks. */
2782 for (bb = 0; bb < current_nr_blocks; bb++)
2785 int b = BB_TO_BLOCK (bb);
2787 get_block_head_tail (b, &head, &tail);
2789 if (no_real_insns_p (head, tail))
2792 current_sched_info->prev_head = PREV_INSN (head);
2793 current_sched_info->next_tail = NEXT_INSN (tail);
2795 if (write_symbols != NO_DEBUG)
2797 save_line_notes (b, head, tail);
2798 rm_line_notes (head, tail);
2801 /* rm_other_notes only removes notes which are _inside_ the
2802 block---that is, it won't remove notes before the first real insn
2803 or after the last real insn of the block. So if the first insn
2804 has a REG_SAVE_NOTE which would otherwise be emitted before the
2805 insn, it is redundant with the note before the start of the
2806 block, and so we have to take it out.
2808 FIXME: Probably the same thing should be done with REG_SAVE_NOTEs
2809 referencing NOTE_INSN_SETJMP at the end of the block. */
2814 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2815 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2817 if (INTVAL (XEXP (note, 0)) != NOTE_INSN_SETJMP)
2819 remove_note (head, note);
2820 note = XEXP (note, 1);
2821 remove_note (head, note);
2824 note = XEXP (note, 1);
2828 /* Remove remaining note insns from the block, save them in
2829 note_list. These notes are restored at the end of
2830 schedule_block (). */
2831 rm_other_notes (head, tail);
2835 current_sched_info->queue_must_finish_empty
2836 = current_nr_blocks > 1 && !flag_schedule_interblock;
2838 schedule_block (b, rgn_n_insns);
2839 sched_rgn_n_insns += sched_n_insns;
2841 /* Update target block boundaries. */
2842 if (head == BLOCK_HEAD (b))
2843 BLOCK_HEAD (b) = current_sched_info->head;
2844 if (tail == BLOCK_END (b))
2845 BLOCK_END (b) = current_sched_info->tail;
2848 if (current_nr_blocks > 1)
2850 free (candidate_table);
2852 free (bitlst_table);
2856 /* Sanity check: verify that all region insns were scheduled. */
2857 if (sched_rgn_n_insns != rgn_n_insns)
2860 /* Restore line notes. */
2861 if (write_symbols != NO_DEBUG)
2863 for (bb = 0; bb < current_nr_blocks; bb++)
2866 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2867 restore_line_notes (head, tail);
2871 /* Done with this region. */
2872 free_pending_lists ();
2874 finish_deps_global ();
2878 if (current_nr_blocks > 1)
2883 for (i = 0; i < current_nr_blocks; ++i)
2886 free (pot_split[i]);
2887 free (ancestor_edges[i]);
2893 free (ancestor_edges);
2897 /* Indexed by region, holds the number of death notes found in that region.
2898 Used for consistency checks. */
2899 static int *deaths_in_region;
2901 /* Initialize data structures for region scheduling. */
2910 rgn_table = (region *) xmalloc ((n_basic_blocks) * sizeof (region));
2911 rgn_bb_table = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2912 block_to_bb = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2913 containing_rgn = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2915 blocks = sbitmap_alloc (n_basic_blocks);
2917 /* Compute regions for scheduling. */
2918 if (reload_completed
2919 || n_basic_blocks == 1
2920 || !flag_schedule_interblock)
2922 find_single_block_region ();
2926 /* Verify that a 'good' control flow graph can be built. */
2927 if (is_cfg_nonregular ())
2929 find_single_block_region ();
2934 struct edge_list *edge_list;
2936 dom = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
2938 /* The scheduler runs after flow; therefore, we can't blindly call
2939 back into find_basic_blocks since doing so could invalidate the
2940 info in global_live_at_start.
2942 Consider a block consisting entirely of dead stores; after life
2943 analysis it would be a block of NOTE_INSN_DELETED notes. If
2944 we call find_basic_blocks again, then the block would be removed
2945 entirely and invalidate our the register live information.
2947 We could (should?) recompute register live information. Doing
2948 so may even be beneficial. */
2949 edge_list = create_edge_list ();
2951 /* Compute the dominators and post dominators. */
2952 calculate_dominance_info (NULL, dom, CDI_DOMINATORS);
2954 /* build_control_flow will return nonzero if it detects unreachable
2955 blocks or any other irregularity with the cfg which prevents
2956 cross block scheduling. */
2957 if (build_control_flow (edge_list) != 0)
2958 find_single_block_region ();
2960 find_rgns (edge_list, dom);
2962 if (sched_verbose >= 3)
2965 /* We are done with flow's edge list. */
2966 free_edge_list (edge_list);
2968 /* For now. This will move as more and more of haifa is converted
2969 to using the cfg code in flow.c. */
2974 deaths_in_region = (int *) xmalloc (sizeof (int) * nr_regions);
2976 /* Remove all death notes from the subroutine. */
2977 for (rgn = 0; rgn < nr_regions; rgn++)
2981 sbitmap_zero (blocks);
2982 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
2983 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
2985 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
2988 sbitmap_free (blocks);
2991 /* The one entry point in this file. DUMP_FILE is the dump file for
2995 schedule_insns (dump_file)
2998 sbitmap large_region_blocks, blocks;
3000 int any_large_regions;
3002 /* Taking care of this degenerate case makes the rest of
3003 this code simpler. */
3004 if (n_basic_blocks == 0)
3010 sched_init (dump_file);
3014 current_sched_info = ®ion_sched_info;
3016 /* Schedule every region in the subroutine. */
3017 for (rgn = 0; rgn < nr_regions; rgn++)
3018 schedule_region (rgn);
3020 /* Update life analysis for the subroutine. Do single block regions
3021 first so that we can verify that live_at_start didn't change. Then
3022 do all other blocks. */
3023 /* ??? There is an outside possibility that update_life_info, or more
3024 to the point propagate_block, could get called with non-zero flags
3025 more than once for one basic block. This would be kinda bad if it
3026 were to happen, since REG_INFO would be accumulated twice for the
3027 block, and we'd have twice the REG_DEAD notes.
3029 I'm fairly certain that this _shouldn't_ happen, since I don't think
3030 that live_at_start should change at region heads. Not sure what the
3031 best way to test for this kind of thing... */
3033 allocate_reg_life_data ();
3034 compute_bb_for_insn (get_max_uid ());
3036 any_large_regions = 0;
3037 large_region_blocks = sbitmap_alloc (n_basic_blocks);
3038 sbitmap_ones (large_region_blocks);
3040 blocks = sbitmap_alloc (n_basic_blocks);
3042 for (rgn = 0; rgn < nr_regions; rgn++)
3043 if (RGN_NR_BLOCKS (rgn) > 1)
3044 any_large_regions = 1;
3047 sbitmap_zero (blocks);
3048 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3049 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3051 /* Don't update reg info after reload, since that affects
3052 regs_ever_live, which should not change after reload. */
3053 update_life_info (blocks, UPDATE_LIFE_LOCAL,
3054 (reload_completed ? PROP_DEATH_NOTES
3055 : PROP_DEATH_NOTES | PROP_REG_INFO));
3057 #ifndef HAVE_conditional_execution
3058 /* ??? REG_DEAD notes only exist for unconditional deaths. We need
3059 a count of the conditional plus unconditional deaths for this to
3061 /* In the single block case, the count of registers that died should
3062 not have changed during the schedule. */
3063 if (count_or_remove_death_notes (blocks, 0) != deaths_in_region[rgn])
3068 if (any_large_regions)
3070 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
3071 PROP_DEATH_NOTES | PROP_REG_INFO);
3074 /* Reposition the prologue and epilogue notes in case we moved the
3075 prologue/epilogue insns. */
3076 if (reload_completed)
3077 reposition_prologue_and_epilogue_notes (get_insns ());
3079 /* Delete redundant line notes. */
3080 if (write_symbols != NO_DEBUG)
3081 rm_redundant_line_notes ();
3085 if (reload_completed == 0 && flag_schedule_interblock)
3087 fprintf (sched_dump,
3088 "\n;; Procedure interblock/speculative motions == %d/%d \n",
3096 fprintf (sched_dump, "\n\n");
3101 free (rgn_bb_table);
3103 free (containing_rgn);
3124 sbitmap_free (blocks);
3125 sbitmap_free (large_region_blocks);
3127 free (deaths_in_region);