1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 GNU CC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to the Free
21 the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 /* Instruction scheduling pass.
26 This pass implements list scheduling within basic blocks. It is
27 run twice: (1) after flow analysis, but before register allocation,
28 and (2) after register allocation.
30 The first run performs interblock scheduling, moving insns between
31 different blocks in the same "region", and the second runs only
32 basic block scheduling.
34 Interblock motions performed are useful motions and speculative
35 motions, including speculative loads. Motions requiring code
36 duplication are not supported. The identification of motion type
37 and the check for validity of speculative motions requires
38 construction and analysis of the function's control flow graph.
39 The scheduler works as follows:
41 We compute insn priorities based on data dependencies. Flow
42 analysis only creates a fraction of the data-dependencies we must
43 observe: namely, only those dependencies which the combiner can be
44 expected to use. For this pass, we must therefore create the
45 remaining dependencies we need to observe: register dependencies,
46 memory dependencies, dependencies to keep function calls in order,
47 and the dependence between a conditional branch and the setting of
48 condition codes are all dealt with here.
50 The scheduler first traverses the data flow graph, starting with
51 the last instruction, and proceeding to the first, assigning values
52 to insn_priority as it goes. This sorts the instructions
53 topologically by data dependence.
55 Once priorities have been established, we order the insns using
56 list scheduling. This works as follows: starting with a list of
57 all the ready insns, and sorted according to priority number, we
58 schedule the insn from the end of the list by placing its
59 predecessors in the list according to their priority order. We
60 consider this insn scheduled by setting the pointer to the "end" of
61 the list to point to the previous insn. When an insn has no
62 predecessors, we either queue it until sufficient time has elapsed
63 or add it to the ready list. As the instructions are scheduled or
64 when stalls are introduced, the queue advances and dumps insns into
65 the ready list. When all insns down to the lowest priority have
66 been scheduled, the critical path of the basic block has been made
67 as short as possible. The remaining insns are then scheduled in
70 Function unit conflicts are resolved during forward list scheduling
71 by tracking the time when each insn is committed to the schedule
72 and from that, the time the function units it uses must be free.
73 As insns on the ready list are considered for scheduling, those
74 that would result in a blockage of the already committed insns are
75 queued until no blockage will result.
77 The following list shows the order in which we want to break ties
78 among insns in the ready list:
80 1. choose insn with the longest path to end of bb, ties
82 2. choose insn with least contribution to register pressure,
84 3. prefer in-block upon interblock motion, ties broken by
85 4. prefer useful upon speculative motion, ties broken by
86 5. choose insn with largest control flow probability, ties
88 6. choose insn with the least dependences upon the previously
89 scheduled insn, or finally
90 7 choose the insn which has the most insns dependent on it.
91 8. choose insn with lowest UID.
93 Memory references complicate matters. Only if we can be certain
94 that memory references are not part of the data dependency graph
95 (via true, anti, or output dependence), can we move operations past
96 memory references. To first approximation, reads can be done
97 independently, while writes introduce dependencies. Better
98 approximations will yield fewer dependencies.
100 Before reload, an extended analysis of interblock data dependences
101 is required for interblock scheduling. This is performed in
102 compute_block_backward_dependences ().
104 Dependencies set up by memory references are treated in exactly the
105 same way as other dependencies, by using LOG_LINKS backward
106 dependences. LOG_LINKS are translated into INSN_DEPEND forward
107 dependences for the purpose of forward list scheduling.
109 Having optimized the critical path, we may have also unduly
110 extended the lifetimes of some registers. If an operation requires
111 that constants be loaded into registers, it is certainly desirable
112 to load those constants as early as necessary, but no earlier.
113 I.e., it will not do to load up a bunch of registers at the
114 beginning of a basic block only to use them at the end, if they
115 could be loaded later, since this may result in excessive register
118 Note that since branches are never in basic blocks, but only end
119 basic blocks, this pass will not move branches. But that is ok,
120 since we can use GNU's delayed branch scheduling pass to take care
123 Also note that no further optimizations based on algebraic
124 identities are performed, so this pass would be a good one to
125 perform instruction splitting, such as breaking up a multiply
126 instruction into shifts and adds where that is profitable.
128 Given the memory aliasing analysis that this pass should perform,
129 it should be possible to remove redundant stores to memory, and to
130 load values from registers instead of hitting memory.
132 Before reload, speculative insns are moved only if a 'proof' exists
133 that no exception will be caused by this, and if no live registers
134 exist that inhibit the motion (live registers constraints are not
135 represented by data dependence edges).
137 This pass must update information that subsequent passes expect to
138 be correct. Namely: reg_n_refs, reg_n_sets, reg_n_deaths,
139 reg_n_calls_crossed, and reg_live_length. Also, BLOCK_HEAD,
142 The information in the line number notes is carefully retained by
143 this pass. Notes that refer to the starting and ending of
144 exception regions are also carefully retained by this pass. All
145 other NOTE insns are grouped in their same relative order at the
146 beginning of basic blocks and regions that have been scheduled.
148 The main entry point for this pass is schedule_insns(), called for
149 each function. The work of the scheduler is organized in three
150 levels: (1) function level: insns are subject to splitting,
151 control-flow-graph is constructed, regions are computed (after
152 reload, each region is of one block), (2) region level: control
153 flow graph attributes required for interblock scheduling are
154 computed (dominators, reachability, etc.), data dependences and
155 priorities are computed, and (3) block level: insns in the block
156 are actually scheduled. */
163 #include "hard-reg-set.h"
164 #include "basic-block.h"
166 #include "function.h"
168 #include "insn-config.h"
169 #include "insn-attr.h"
174 extern char *reg_known_equiv_p;
175 extern rtx *reg_known_value;
177 #ifdef INSN_SCHEDULING
179 /* target_units bitmask has 1 for each unit in the cpu. It should be
180 possible to compute this variable from the machine description.
181 But currently it is computed by examining the insn list. Since
182 this is only needed for visualization, it seems an acceptable
183 solution. (For understanding the mapping of bits to units, see
184 definition of function_units[] in "insn-attrtab.c".) */
186 static int target_units = 0;
188 /* issue_rate is the number of insns that can be scheduled in the same
189 machine cycle. It can be defined in the config/mach/mach.h file,
190 otherwise we set it to 1. */
192 static int issue_rate;
198 /* sched-verbose controls the amount of debugging output the
199 scheduler prints. It is controlled by -fsched-verbose=N:
200 N>0 and no -DSR : the output is directed to stderr.
201 N>=10 will direct the printouts to stderr (regardless of -dSR).
203 N=2: bb's probabilities, detailed ready list info, unit/insn info.
204 N=3: rtl at abort point, control-flow, regions info.
205 N=5: dependences info. */
207 #define MAX_RGN_BLOCKS 10
208 #define MAX_RGN_INSNS 100
210 static int sched_verbose_param = 0;
211 static int sched_verbose = 0;
213 /* nr_inter/spec counts interblock/speculative motion for the function. */
214 static int nr_inter, nr_spec;
216 /* Debugging file. All printouts are sent to dump, which is always set,
217 either to stderr, or to the dump listing file (-dRS). */
218 static FILE *dump = 0;
220 /* fix_sched_param() is called from toplev.c upon detection
221 of the -fsched-verbose=N option. */
224 fix_sched_param (param, val)
225 const char *param, *val;
227 if (!strcmp (param, "verbose"))
228 sched_verbose_param = atoi (val);
230 warning ("fix_sched_param: unknown param: %s", param);
233 /* Describe state of dependencies used during sched_analyze phase. */
236 /* The *_insns and *_mems are paired lists. Each pending memory operation
237 will have a pointer to the MEM rtx on one list and a pointer to the
238 containing insn on the other list in the same place in the list. */
240 /* We can't use add_dependence like the old code did, because a single insn
241 may have multiple memory accesses, and hence needs to be on the list
242 once for each memory access. Add_dependence won't let you add an insn
243 to a list more than once. */
245 /* An INSN_LIST containing all insns with pending read operations. */
246 rtx pending_read_insns;
248 /* An EXPR_LIST containing all MEM rtx's which are pending reads. */
249 rtx pending_read_mems;
251 /* An INSN_LIST containing all insns with pending write operations. */
252 rtx pending_write_insns;
254 /* An EXPR_LIST containing all MEM rtx's which are pending writes. */
255 rtx pending_write_mems;
257 /* Indicates the combined length of the two pending lists. We must prevent
258 these lists from ever growing too large since the number of dependencies
259 produced is at least O(N*N), and execution time is at least O(4*N*N), as
260 a function of the length of these pending lists. */
261 int pending_lists_length;
263 /* The last insn upon which all memory references must depend.
264 This is an insn which flushed the pending lists, creating a dependency
265 between it and all previously pending memory references. This creates
266 a barrier (or a checkpoint) which no memory reference is allowed to cross.
268 This includes all non constant CALL_INSNs. When we do interprocedural
269 alias analysis, this restriction can be relaxed.
270 This may also be an INSN that writes memory if the pending lists grow
272 rtx last_pending_memory_flush;
274 /* The last function call we have seen. All hard regs, and, of course,
275 the last function call, must depend on this. */
276 rtx last_function_call;
278 /* Used to keep post-call psuedo/hard reg movements together with
280 int in_post_call_group_p;
282 /* The LOG_LINKS field of this is a list of insns which use a pseudo
283 register that does not already cross a call. We create
284 dependencies between each of those insn and the next call insn,
285 to ensure that they won't cross a call after scheduling is done. */
286 rtx sched_before_next_call;
288 /* Element N is the next insn that sets (hard or pseudo) register
289 N within the current basic block; or zero, if there is no
290 such insn. Needed for new registers which may be introduced
291 by splitting insns. */
294 rtx *reg_last_clobbers;
297 static regset reg_pending_sets;
298 static regset reg_pending_clobbers;
299 static int reg_pending_sets_all;
301 /* To speed up the test for duplicate dependency links we keep a
302 record of dependencies created by add_dependence when the average
303 number of instructions in a basic block is very large.
305 Studies have shown that there is typically around 5 instructions between
306 branches for typical C code. So we can make a guess that the average
307 basic block is approximately 5 instructions long; we will choose 100X
308 the average size as a very large basic block.
310 Each insn has associated bitmaps for its dependencies. Each bitmap
311 has enough entries to represent a dependency on any other insn in
312 the insn chain. All bitmap for true dependencies cache is
313 allocated then the rest two ones are also allocated. */
314 static sbitmap *true_dependency_cache;
315 static sbitmap *anti_dependency_cache;
316 static sbitmap *output_dependency_cache;
318 /* To speed up checking consistency of formed forward insn
319 dependencies we use the following cache. Another possible solution
320 could be switching off checking duplication of insns in forward
322 #ifdef ENABLE_CHECKING
323 static sbitmap *forward_dependency_cache;
326 /* Indexed by INSN_UID, the collection of all data associated with
327 a single instruction. */
329 struct haifa_insn_data
331 /* A list of insns which depend on the instruction. Unlike LOG_LINKS,
332 it represents forward dependancies. */
335 /* The line number note in effect for each insn. For line number
336 notes, this indicates whether the note may be reused. */
339 /* Logical uid gives the original ordering of the insns. */
342 /* A priority for each insn. */
345 /* The number of incoming edges in the forward dependency graph.
346 As scheduling proceds, counts are decreased. An insn moves to
347 the ready queue when its counter reaches zero. */
350 /* An encoding of the blockage range function. Both unit and range
352 unsigned int blockage;
354 /* Number of instructions referring to this insn. */
357 /* The minimum clock tick at which the insn becomes ready. This is
358 used to note timing constraints for the insns in the pending list. */
363 /* An encoding of the function units used. */
366 /* This weight is an estimation of the insn's contribution to
367 register pressure. */
370 /* Some insns (e.g. call) are not allowed to move across blocks. */
371 unsigned int cant_move : 1;
373 /* Set if there's DEF-USE dependance between some speculatively
374 moved load insn and this one. */
375 unsigned int fed_by_spec_load : 1;
376 unsigned int is_load_insn : 1;
379 static struct haifa_insn_data *h_i_d;
381 #define INSN_DEPEND(INSN) (h_i_d[INSN_UID (INSN)].depend)
382 #define INSN_LUID(INSN) (h_i_d[INSN_UID (INSN)].luid)
383 #define INSN_PRIORITY(INSN) (h_i_d[INSN_UID (INSN)].priority)
384 #define INSN_DEP_COUNT(INSN) (h_i_d[INSN_UID (INSN)].dep_count)
385 #define INSN_COST(INSN) (h_i_d[INSN_UID (INSN)].cost)
386 #define INSN_UNIT(INSN) (h_i_d[INSN_UID (INSN)].units)
387 #define INSN_REG_WEIGHT(INSN) (h_i_d[INSN_UID (INSN)].reg_weight)
389 #define INSN_BLOCKAGE(INSN) (h_i_d[INSN_UID (INSN)].blockage)
391 #define BLOCKAGE_MASK ((1 << BLOCKAGE_BITS) - 1)
392 #define ENCODE_BLOCKAGE(U, R) \
393 (((U) << BLOCKAGE_BITS \
394 | MIN_BLOCKAGE_COST (R)) << BLOCKAGE_BITS \
395 | MAX_BLOCKAGE_COST (R))
396 #define UNIT_BLOCKED(B) ((B) >> (2 * BLOCKAGE_BITS))
397 #define BLOCKAGE_RANGE(B) \
398 (((((B) >> BLOCKAGE_BITS) & BLOCKAGE_MASK) << (HOST_BITS_PER_INT / 2)) \
399 | ((B) & BLOCKAGE_MASK))
401 /* Encodings of the `<name>_unit_blockage_range' function. */
402 #define MIN_BLOCKAGE_COST(R) ((R) >> (HOST_BITS_PER_INT / 2))
403 #define MAX_BLOCKAGE_COST(R) ((R) & ((1 << (HOST_BITS_PER_INT / 2)) - 1))
405 #define DONE_PRIORITY -1
406 #define MAX_PRIORITY 0x7fffffff
407 #define TAIL_PRIORITY 0x7ffffffe
408 #define LAUNCH_PRIORITY 0x7f000001
409 #define DONE_PRIORITY_P(INSN) (INSN_PRIORITY (INSN) < 0)
410 #define LOW_PRIORITY_P(INSN) ((INSN_PRIORITY (INSN) & 0x7f000000) == 0)
412 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
413 #define LINE_NOTE(INSN) (h_i_d[INSN_UID (INSN)].line_note)
414 #define INSN_TICK(INSN) (h_i_d[INSN_UID (INSN)].tick)
415 #define CANT_MOVE(insn) (h_i_d[INSN_UID (insn)].cant_move)
416 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
417 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
419 /* Vector indexed by basic block number giving the starting line-number
420 for each basic block. */
421 static rtx *line_note_head;
423 /* List of important notes we must keep around. This is a pointer to the
424 last element in the list. */
425 static rtx note_list;
429 /* An instruction is ready to be scheduled when all insns preceding it
430 have already been scheduled. It is important to ensure that all
431 insns which use its result will not be executed until its result
432 has been computed. An insn is maintained in one of four structures:
434 (P) the "Pending" set of insns which cannot be scheduled until
435 their dependencies have been satisfied.
436 (Q) the "Queued" set of insns that can be scheduled when sufficient
438 (R) the "Ready" list of unscheduled, uncommitted insns.
439 (S) the "Scheduled" list of insns.
441 Initially, all insns are either "Pending" or "Ready" depending on
442 whether their dependencies are satisfied.
444 Insns move from the "Ready" list to the "Scheduled" list as they
445 are committed to the schedule. As this occurs, the insns in the
446 "Pending" list have their dependencies satisfied and move to either
447 the "Ready" list or the "Queued" set depending on whether
448 sufficient time has passed to make them ready. As time passes,
449 insns move from the "Queued" set to the "Ready" list. Insns may
450 move from the "Ready" list to the "Queued" set if they are blocked
451 due to a function unit conflict.
453 The "Pending" list (P) are the insns in the INSN_DEPEND of the unscheduled
454 insns, i.e., those that are ready, queued, and pending.
455 The "Queued" set (Q) is implemented by the variable `insn_queue'.
456 The "Ready" list (R) is implemented by the variables `ready' and
458 The "Scheduled" list (S) is the new insn chain built by this pass.
460 The transition (R->S) is implemented in the scheduling loop in
461 `schedule_block' when the best insn to schedule is chosen.
462 The transition (R->Q) is implemented in `queue_insn' when an
463 insn is found to have a function unit conflict with the already
465 The transitions (P->R and P->Q) are implemented in `schedule_insn' as
466 insns move from the ready list to the scheduled list.
467 The transition (Q->R) is implemented in 'queue_to_insn' as time
468 passes or stalls are introduced. */
470 /* Implement a circular buffer to delay instructions until sufficient
471 time has passed. INSN_QUEUE_SIZE is a power of two larger than
472 MAX_BLOCKAGE and MAX_READY_COST computed by genattr.c. This is the
473 longest time an isnsn may be queued. */
474 static rtx insn_queue[INSN_QUEUE_SIZE];
475 static int q_ptr = 0;
476 static int q_size = 0;
477 #define NEXT_Q(X) (((X)+1) & (INSN_QUEUE_SIZE-1))
478 #define NEXT_Q_AFTER(X, C) (((X)+C) & (INSN_QUEUE_SIZE-1))
480 /* Forward declarations. */
481 static void add_dependence PARAMS ((rtx, rtx, enum reg_note));
482 static void remove_dependence PARAMS ((rtx, rtx));
483 static rtx find_insn_list PARAMS ((rtx, rtx));
484 static void set_sched_group_p PARAMS ((rtx));
485 static int insn_unit PARAMS ((rtx));
486 static unsigned int blockage_range PARAMS ((int, rtx));
487 static void clear_units PARAMS ((void));
488 static int actual_hazard_this_instance PARAMS ((int, int, rtx, int, int));
489 static void schedule_unit PARAMS ((int, rtx, int));
490 static int actual_hazard PARAMS ((int, rtx, int, int));
491 static int potential_hazard PARAMS ((int, rtx, int));
492 static int insn_cost PARAMS ((rtx, rtx, rtx));
493 static int priority PARAMS ((rtx));
494 static void free_pending_lists PARAMS ((void));
495 static void add_insn_mem_dependence PARAMS ((struct deps *, rtx *, rtx *, rtx,
497 static void flush_pending_lists PARAMS ((struct deps *, rtx, int));
498 static void sched_analyze_1 PARAMS ((struct deps *, rtx, rtx));
499 static void sched_analyze_2 PARAMS ((struct deps *, rtx, rtx));
500 static void sched_analyze_insn PARAMS ((struct deps *, rtx, rtx, rtx));
501 static void sched_analyze PARAMS ((struct deps *, rtx, rtx));
502 static int rank_for_schedule PARAMS ((const PTR, const PTR));
503 static void swap_sort PARAMS ((rtx *, int));
504 static void queue_insn PARAMS ((rtx, int));
505 static int schedule_insn PARAMS ((rtx, rtx *, int, int));
506 static void find_insn_reg_weight PARAMS ((int));
507 static int schedule_block PARAMS ((int, int));
508 static char *safe_concat PARAMS ((char *, char *, const char *));
509 static int insn_issue_delay PARAMS ((rtx));
510 static void adjust_priority PARAMS ((rtx));
512 /* Control flow graph edges are kept in circular lists. */
521 static haifa_edge *edge_table;
523 #define NEXT_IN(edge) (edge_table[edge].next_in)
524 #define NEXT_OUT(edge) (edge_table[edge].next_out)
525 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
526 #define TO_BLOCK(edge) (edge_table[edge].to_block)
528 /* Number of edges in the control flow graph. (In fact, larger than
529 that by 1, since edge 0 is unused.) */
532 /* Circular list of incoming/outgoing edges of a block. */
533 static int *in_edges;
534 static int *out_edges;
536 #define IN_EDGES(block) (in_edges[block])
537 #define OUT_EDGES(block) (out_edges[block])
539 static int is_cfg_nonregular PARAMS ((void));
540 static int build_control_flow PARAMS ((struct edge_list *));
541 static void new_edge PARAMS ((int, int));
543 /* A region is the main entity for interblock scheduling: insns
544 are allowed to move between blocks in the same region, along
545 control flow graph edges, in the 'up' direction. */
548 int rgn_nr_blocks; /* Number of blocks in region. */
549 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
553 /* Number of regions in the procedure. */
554 static int nr_regions;
556 /* Table of region descriptions. */
557 static region *rgn_table;
559 /* Array of lists of regions' blocks. */
560 static int *rgn_bb_table;
562 /* Topological order of blocks in the region (if b2 is reachable from
563 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
564 always referred to by either block or b, while its topological
565 order name (in the region) is refered to by bb. */
566 static int *block_to_bb;
568 /* The number of the region containing a block. */
569 static int *containing_rgn;
571 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
572 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
573 #define BLOCK_TO_BB(block) (block_to_bb[block])
574 #define CONTAINING_RGN(block) (containing_rgn[block])
576 void debug_regions PARAMS ((void));
577 static void find_single_block_region PARAMS ((void));
578 static void find_rgns PARAMS ((struct edge_list *, sbitmap *));
579 static int too_large PARAMS ((int, int *, int *));
581 extern void debug_live PARAMS ((int, int));
583 /* Blocks of the current region being scheduled. */
584 static int current_nr_blocks;
585 static int current_blocks;
587 /* The mapping from bb to block. */
588 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
590 /* Bit vectors and bitset operations are needed for computations on
591 the control flow graph. */
593 typedef unsigned HOST_WIDE_INT *bitset;
596 int *first_member; /* Pointer to the list start in bitlst_table. */
597 int nr_members; /* The number of members of the bit list. */
601 static int bitlst_table_last;
602 static int bitlst_table_size;
603 static int *bitlst_table;
605 static char bitset_member PARAMS ((bitset, int, int));
606 static void extract_bitlst PARAMS ((bitset, int, int, bitlst *));
608 /* Target info declarations.
610 The block currently being scheduled is referred to as the "target" block,
611 while other blocks in the region from which insns can be moved to the
612 target are called "source" blocks. The candidate structure holds info
613 about such sources: are they valid? Speculative? Etc. */
614 typedef bitlst bblst;
625 static candidate *candidate_table;
627 /* A speculative motion requires checking live information on the path
628 from 'source' to 'target'. The split blocks are those to be checked.
629 After a speculative motion, live information should be modified in
632 Lists of split and update blocks for each candidate of the current
633 target are in array bblst_table. */
634 static int *bblst_table, bblst_size, bblst_last;
636 #define IS_VALID(src) ( candidate_table[src].is_valid )
637 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
638 #define SRC_PROB(src) ( candidate_table[src].src_prob )
640 /* The bb being currently scheduled. */
641 static int target_bb;
644 typedef bitlst edgelst;
646 /* Target info functions. */
647 static void split_edges PARAMS ((int, int, edgelst *));
648 static void compute_trg_info PARAMS ((int));
649 void debug_candidate PARAMS ((int));
650 void debug_candidates PARAMS ((int));
652 /* Bit-set of bbs, where bit 'i' stands for bb 'i'. */
653 typedef bitset bbset;
655 /* Number of words of the bbset. */
656 static int bbset_size;
658 /* Dominators array: dom[i] contains the bbset of dominators of
659 bb i in the region. */
662 /* bb 0 is the only region entry. */
663 #define IS_RGN_ENTRY(bb) (!bb)
665 /* Is bb_src dominated by bb_trg. */
666 #define IS_DOMINATED(bb_src, bb_trg) \
667 ( bitset_member (dom[bb_src], bb_trg, bbset_size) )
669 /* Probability: Prob[i] is a float in [0, 1] which is the probability
670 of bb i relative to the region entry. */
673 /* The probability of bb_src, relative to bb_trg. Note, that while the
674 'prob[bb]' is a float in [0, 1], this macro returns an integer
676 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
679 /* Bit-set of edges, where bit i stands for edge i. */
680 typedef bitset edgeset;
682 /* Number of edges in the region. */
683 static int rgn_nr_edges;
685 /* Array of size rgn_nr_edges. */
686 static int *rgn_edges;
688 /* Number of words in an edgeset. */
689 static int edgeset_size;
691 /* Number of bits in an edgeset. */
692 static int edgeset_bitsize;
694 /* Mapping from each edge in the graph to its number in the rgn. */
695 static int *edge_to_bit;
696 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
698 /* The split edges of a source bb is different for each target
699 bb. In order to compute this efficiently, the 'potential-split edges'
700 are computed for each bb prior to scheduling a region. This is actually
701 the split edges of each bb relative to the region entry.
703 pot_split[bb] is the set of potential split edges of bb. */
704 static edgeset *pot_split;
706 /* For every bb, a set of its ancestor edges. */
707 static edgeset *ancestor_edges;
709 static void compute_dom_prob_ps PARAMS ((int));
711 #define ABS_VALUE(x) (((x)<0)?(-(x)):(x))
712 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
713 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
714 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
716 /* Parameters affecting the decision of rank_for_schedule(). */
717 #define MIN_DIFF_PRIORITY 2
718 #define MIN_PROBABILITY 40
719 #define MIN_PROB_DIFF 10
721 /* Speculative scheduling functions. */
722 static int check_live_1 PARAMS ((int, rtx));
723 static void update_live_1 PARAMS ((int, rtx));
724 static int check_live PARAMS ((rtx, int));
725 static void update_live PARAMS ((rtx, int));
726 static void set_spec_fed PARAMS ((rtx));
727 static int is_pfree PARAMS ((rtx, int, int));
728 static int find_conditional_protection PARAMS ((rtx, int));
729 static int is_conditionally_protected PARAMS ((rtx, int, int));
730 static int may_trap_exp PARAMS ((rtx, int));
731 static int haifa_classify_insn PARAMS ((rtx));
732 static int is_prisky PARAMS ((rtx, int, int));
733 static int is_exception_free PARAMS ((rtx, int, int));
735 static char find_insn_mem_list PARAMS ((rtx, rtx, rtx, rtx));
736 static void compute_block_forward_dependences PARAMS ((int));
737 static void add_branch_dependences PARAMS ((rtx, rtx));
738 static void compute_block_backward_dependences PARAMS ((int));
739 void debug_dependencies PARAMS ((void));
741 /* Notes handling mechanism:
742 =========================
743 Generally, NOTES are saved before scheduling and restored after scheduling.
744 The scheduler distinguishes between three types of notes:
746 (1) LINE_NUMBER notes, generated and used for debugging. Here,
747 before scheduling a region, a pointer to the LINE_NUMBER note is
748 added to the insn following it (in save_line_notes()), and the note
749 is removed (in rm_line_notes() and unlink_line_notes()). After
750 scheduling the region, this pointer is used for regeneration of
751 the LINE_NUMBER note (in restore_line_notes()).
753 (2) LOOP_BEGIN, LOOP_END, SETJMP, EHREGION_BEG, EHREGION_END notes:
754 Before scheduling a region, a pointer to the note is added to the insn
755 that follows or precedes it. (This happens as part of the data dependence
756 computation). After scheduling an insn, the pointer contained in it is
757 used for regenerating the corresponding note (in reemit_notes).
759 (3) All other notes (e.g. INSN_DELETED): Before scheduling a block,
760 these notes are put in a list (in rm_other_notes() and
761 unlink_other_notes ()). After scheduling the block, these notes are
762 inserted at the beginning of the block (in schedule_block()). */
764 static rtx unlink_other_notes PARAMS ((rtx, rtx));
765 static rtx unlink_line_notes PARAMS ((rtx, rtx));
766 static void rm_line_notes PARAMS ((int));
767 static void save_line_notes PARAMS ((int));
768 static void restore_line_notes PARAMS ((int));
769 static void rm_redundant_line_notes PARAMS ((void));
770 static void rm_other_notes PARAMS ((rtx, rtx));
771 static rtx reemit_notes PARAMS ((rtx, rtx));
773 static void get_block_head_tail PARAMS ((int, rtx *, rtx *));
774 static void get_bb_head_tail PARAMS ((int, rtx *, rtx *));
776 static int queue_to_ready PARAMS ((rtx[], int));
778 static void debug_ready_list PARAMS ((rtx[], int));
779 static void init_target_units PARAMS ((void));
780 static void insn_print_units PARAMS ((rtx));
781 static int get_visual_tbl_length PARAMS ((void));
782 static void init_block_visualization PARAMS ((void));
783 static void print_block_visualization PARAMS ((int, const char *));
784 static void visualize_scheduled_insns PARAMS ((int, int));
785 static void visualize_no_unit PARAMS ((rtx));
786 static void visualize_stall_cycles PARAMS ((int, int));
787 static void print_exp PARAMS ((char *, rtx, int));
788 static void print_value PARAMS ((char *, rtx, int));
789 static void print_pattern PARAMS ((char *, rtx, int));
790 static void print_insn PARAMS ((char *, rtx, int));
791 void debug_reg_vector PARAMS ((regset));
793 static rtx move_insn1 PARAMS ((rtx, rtx));
794 static rtx move_insn PARAMS ((rtx, rtx));
795 static rtx group_leader PARAMS ((rtx));
796 static int set_priorities PARAMS ((int));
797 static void init_deps PARAMS ((struct deps *));
798 static void schedule_region PARAMS ((int));
799 static void propagate_deps PARAMS ((int, struct deps *, int));
801 #endif /* INSN_SCHEDULING */
803 #define SIZE_FOR_MODE(X) (GET_MODE_SIZE (GET_MODE (X)))
805 /* Add ELEM wrapped in an INSN_LIST with reg note kind DEP_TYPE to the
806 LOG_LINKS of INSN, if not already there. DEP_TYPE indicates the type
807 of dependence that this link represents. */
810 add_dependence (insn, elem, dep_type)
813 enum reg_note dep_type;
817 enum reg_note present_dep_type;
819 /* Don't depend an insn on itself. */
823 /* We can get a dependency on deleted insns due to optimizations in
824 the register allocation and reloading or due to splitting. Any
825 such dependency is useless and can be ignored. */
826 if (GET_CODE (elem) == NOTE)
829 /* If elem is part of a sequence that must be scheduled together, then
830 make the dependence point to the last insn of the sequence.
831 When HAVE_cc0, it is possible for NOTEs to exist between users and
832 setters of the condition codes, so we must skip past notes here.
833 Otherwise, NOTEs are impossible here. */
834 next = next_nonnote_insn (elem);
835 if (next && SCHED_GROUP_P (next)
836 && GET_CODE (next) != CODE_LABEL)
838 /* Notes will never intervene here though, so don't bother checking
841 /* We must reject CODE_LABELs, so that we don't get confused by one
842 that has LABEL_PRESERVE_P set, which is represented by the same
843 bit in the rtl as SCHED_GROUP_P. A CODE_LABEL can never be
847 while ((nnext = next_nonnote_insn (next)) != NULL
848 && SCHED_GROUP_P (nnext)
849 && GET_CODE (nnext) != CODE_LABEL)
852 /* Again, don't depend an insn on itself. */
856 /* Make the dependence to NEXT, the last insn of the group, instead
857 of the original ELEM. */
862 #ifdef INSN_SCHEDULING
863 /* (This code is guarded by INSN_SCHEDULING, otherwise INSN_BB is undefined.)
864 No need for interblock dependences with calls, since
865 calls are not moved between blocks. Note: the edge where
866 elem is a CALL is still required. */
867 if (GET_CODE (insn) == CALL_INSN
868 && (INSN_BB (elem) != INSN_BB (insn)))
871 /* If we already have a dependency for ELEM, then we do not need to
872 do anything. Avoiding the list walk below can cut compile times
873 dramatically for some code. */
874 if (true_dependency_cache != NULL)
876 if (anti_dependency_cache == NULL || output_dependency_cache == NULL)
878 if (TEST_BIT (true_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem)))
879 present_dep_type = 0;
880 else if (TEST_BIT (anti_dependency_cache[INSN_LUID (insn)],
882 present_dep_type = REG_DEP_ANTI;
883 else if (TEST_BIT (output_dependency_cache[INSN_LUID (insn)],
885 present_dep_type = REG_DEP_OUTPUT;
888 if (present_p && (int) dep_type >= (int) present_dep_type)
893 /* Check that we don't already have this dependence. */
895 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
896 if (XEXP (link, 0) == elem)
898 #ifdef INSN_SCHEDULING
899 /* Clear corresponding cache entry because type of the link
901 if (true_dependency_cache != NULL)
903 if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
904 RESET_BIT (anti_dependency_cache[INSN_LUID (insn)],
906 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
907 && output_dependency_cache)
908 RESET_BIT (output_dependency_cache[INSN_LUID (insn)],
915 /* If this is a more restrictive type of dependence than the existing
916 one, then change the existing dependence to this type. */
917 if ((int) dep_type < (int) REG_NOTE_KIND (link))
918 PUT_REG_NOTE_KIND (link, dep_type);
920 #ifdef INSN_SCHEDULING
921 /* If we are adding a dependency to INSN's LOG_LINKs, then
922 note that in the bitmap caches of dependency information. */
923 if (true_dependency_cache != NULL)
925 if ((int)REG_NOTE_KIND (link) == 0)
926 SET_BIT (true_dependency_cache[INSN_LUID (insn)],
928 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
929 SET_BIT (anti_dependency_cache[INSN_LUID (insn)],
931 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
932 SET_BIT (output_dependency_cache[INSN_LUID (insn)],
938 /* Might want to check one level of transitivity to save conses. */
940 link = alloc_INSN_LIST (elem, LOG_LINKS (insn));
941 LOG_LINKS (insn) = link;
943 /* Insn dependency, not data dependency. */
944 PUT_REG_NOTE_KIND (link, dep_type);
946 #ifdef INSN_SCHEDULING
947 /* If we are adding a dependency to INSN's LOG_LINKs, then note that
948 in the bitmap caches of dependency information. */
949 if (true_dependency_cache != NULL)
951 if ((int)dep_type == 0)
952 SET_BIT (true_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem));
953 else if (dep_type == REG_DEP_ANTI)
954 SET_BIT (anti_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem));
955 else if (dep_type == REG_DEP_OUTPUT)
956 SET_BIT (output_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem));
961 /* Remove ELEM wrapped in an INSN_LIST from the LOG_LINKS
962 of INSN. Abort if not found. */
965 remove_dependence (insn, elem)
969 rtx prev, link, next;
972 for (prev = 0, link = LOG_LINKS (insn); link; link = next)
974 next = XEXP (link, 1);
975 if (XEXP (link, 0) == elem)
978 XEXP (prev, 1) = next;
980 LOG_LINKS (insn) = next;
982 #ifdef INSN_SCHEDULING
983 /* If we are removing a dependency from the LOG_LINKS list,
984 make sure to remove it from the cache too. */
985 if (true_dependency_cache != NULL)
987 if (REG_NOTE_KIND (link) == 0)
988 RESET_BIT (true_dependency_cache[INSN_LUID (insn)],
990 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
991 RESET_BIT (anti_dependency_cache[INSN_LUID (insn)],
993 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
994 RESET_BIT (output_dependency_cache[INSN_LUID (insn)],
999 free_INSN_LIST_node (link);
1012 /* Return the INSN_LIST containing INSN in LIST, or NULL
1013 if LIST does not contain INSN. */
1016 find_insn_list (insn, list)
1022 if (XEXP (list, 0) == insn)
1024 list = XEXP (list, 1);
1029 /* Set SCHED_GROUP_P and care for the rest of the bookkeeping that
1030 goes along with that. */
1033 set_sched_group_p (insn)
1038 SCHED_GROUP_P (insn) = 1;
1040 /* There may be a note before this insn now, but all notes will
1041 be removed before we actually try to schedule the insns, so
1042 it won't cause a problem later. We must avoid it here though. */
1043 prev = prev_nonnote_insn (insn);
1045 /* Make a copy of all dependencies on the immediately previous insn,
1046 and add to this insn. This is so that all the dependencies will
1047 apply to the group. Remove an explicit dependence on this insn
1048 as SCHED_GROUP_P now represents it. */
1050 if (find_insn_list (prev, LOG_LINKS (insn)))
1051 remove_dependence (insn, prev);
1053 for (link = LOG_LINKS (prev); link; link = XEXP (link, 1))
1054 add_dependence (insn, XEXP (link, 0), REG_NOTE_KIND (link));
1057 #ifndef INSN_SCHEDULING
1059 schedule_insns (dump_file)
1060 FILE *dump_file ATTRIBUTE_UNUSED;
1068 #ifndef HAIFA_INLINE
1069 #define HAIFA_INLINE __inline
1072 /* Computation of memory dependencies. */
1074 /* Data structures for the computation of data dependences in a regions. We
1075 keep one mem_deps structure for every basic block. Before analyzing the
1076 data dependences for a bb, its variables are initialized as a function of
1077 the variables of its predecessors. When the analysis for a bb completes,
1078 we save the contents to the corresponding bb_mem_deps[bb] variable. */
1080 static struct deps *bb_deps;
1082 /* Pointer to the last instruction scheduled. Used by rank_for_schedule,
1083 so that insns independent of the last scheduled insn will be preferred
1084 over dependent instructions. */
1086 static rtx last_scheduled_insn;
1088 /* Functions for construction of the control flow graph. */
1090 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
1092 We decide not to build the control flow graph if there is possibly more
1093 than one entry to the function, if computed branches exist, of if we
1094 have nonlocal gotos. */
1097 is_cfg_nonregular ()
1103 /* If we have a label that could be the target of a nonlocal goto, then
1104 the cfg is not well structured. */
1105 if (nonlocal_goto_handler_labels)
1108 /* If we have any forced labels, then the cfg is not well structured. */
1112 /* If this function has a computed jump, then we consider the cfg
1113 not well structured. */
1114 if (current_function_has_computed_jump)
1117 /* If we have exception handlers, then we consider the cfg not well
1118 structured. ?!? We should be able to handle this now that flow.c
1119 computes an accurate cfg for EH. */
1120 if (exception_handler_labels)
1123 /* If we have non-jumping insns which refer to labels, then we consider
1124 the cfg not well structured. */
1125 /* Check for labels referred to other thn by jumps. */
1126 for (b = 0; b < n_basic_blocks; b++)
1127 for (insn = BLOCK_HEAD (b);; insn = NEXT_INSN (insn))
1129 code = GET_CODE (insn);
1130 if (GET_RTX_CLASS (code) == 'i')
1134 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
1135 if (REG_NOTE_KIND (note) == REG_LABEL)
1139 if (insn == BLOCK_END (b))
1143 /* All the tests passed. Consider the cfg well structured. */
1147 /* Build the control flow graph and set nr_edges.
1149 Instead of trying to build a cfg ourselves, we rely on flow to
1150 do it for us. Stamp out useless code (and bug) duplication.
1152 Return nonzero if an irregularity in the cfg is found which would
1153 prevent cross block scheduling. */
1156 build_control_flow (edge_list)
1157 struct edge_list *edge_list;
1159 int i, unreachable, num_edges;
1161 /* This already accounts for entry/exit edges. */
1162 num_edges = NUM_EDGES (edge_list);
1164 /* Unreachable loops with more than one basic block are detected
1165 during the DFS traversal in find_rgns.
1167 Unreachable loops with a single block are detected here. This
1168 test is redundant with the one in find_rgns, but it's much
1169 cheaper to go ahead and catch the trivial case here. */
1171 for (i = 0; i < n_basic_blocks; i++)
1173 basic_block b = BASIC_BLOCK (i);
1176 || (b->pred->src == b
1177 && b->pred->pred_next == NULL))
1181 /* ??? We can kill these soon. */
1182 in_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
1183 out_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
1184 edge_table = (haifa_edge *) xcalloc (num_edges, sizeof (haifa_edge));
1187 for (i = 0; i < num_edges; i++)
1189 edge e = INDEX_EDGE (edge_list, i);
1191 if (e->dest != EXIT_BLOCK_PTR
1192 && e->src != ENTRY_BLOCK_PTR)
1193 new_edge (e->src->index, e->dest->index);
1196 /* Increment by 1, since edge 0 is unused. */
1202 /* Record an edge in the control flow graph from SOURCE to TARGET.
1204 In theory, this is redundant with the s_succs computed above, but
1205 we have not converted all of haifa to use information from the
1209 new_edge (source, target)
1213 int curr_edge, fst_edge;
1215 /* Check for duplicates. */
1216 fst_edge = curr_edge = OUT_EDGES (source);
1219 if (FROM_BLOCK (curr_edge) == source
1220 && TO_BLOCK (curr_edge) == target)
1225 curr_edge = NEXT_OUT (curr_edge);
1227 if (fst_edge == curr_edge)
1233 FROM_BLOCK (e) = source;
1234 TO_BLOCK (e) = target;
1236 if (OUT_EDGES (source))
1238 next_edge = NEXT_OUT (OUT_EDGES (source));
1239 NEXT_OUT (OUT_EDGES (source)) = e;
1240 NEXT_OUT (e) = next_edge;
1244 OUT_EDGES (source) = e;
1248 if (IN_EDGES (target))
1250 next_edge = NEXT_IN (IN_EDGES (target));
1251 NEXT_IN (IN_EDGES (target)) = e;
1252 NEXT_IN (e) = next_edge;
1256 IN_EDGES (target) = e;
1261 /* BITSET macros for operations on the control flow graph. */
1263 /* Compute bitwise union of two bitsets. */
1264 #define BITSET_UNION(set1, set2, len) \
1265 do { register bitset tp = set1, sp = set2; \
1267 for (i = 0; i < len; i++) \
1268 *(tp++) |= *(sp++); } while (0)
1270 /* Compute bitwise intersection of two bitsets. */
1271 #define BITSET_INTER(set1, set2, len) \
1272 do { register bitset tp = set1, sp = set2; \
1274 for (i = 0; i < len; i++) \
1275 *(tp++) &= *(sp++); } while (0)
1277 /* Compute bitwise difference of two bitsets. */
1278 #define BITSET_DIFFER(set1, set2, len) \
1279 do { register bitset tp = set1, sp = set2; \
1281 for (i = 0; i < len; i++) \
1282 *(tp++) &= ~*(sp++); } while (0)
1284 /* Inverts every bit of bitset 'set'. */
1285 #define BITSET_INVERT(set, len) \
1286 do { register bitset tmpset = set; \
1288 for (i = 0; i < len; i++, tmpset++) \
1289 *tmpset = ~*tmpset; } while (0)
1291 /* Turn on the index'th bit in bitset set. */
1292 #define BITSET_ADD(set, index, len) \
1294 if (index >= HOST_BITS_PER_WIDE_INT * len) \
1297 set[index/HOST_BITS_PER_WIDE_INT] |= \
1298 1 << (index % HOST_BITS_PER_WIDE_INT); \
1301 /* Turn off the index'th bit in set. */
1302 #define BITSET_REMOVE(set, index, len) \
1304 if (index >= HOST_BITS_PER_WIDE_INT * len) \
1307 set[index/HOST_BITS_PER_WIDE_INT] &= \
1308 ~(1 << (index%HOST_BITS_PER_WIDE_INT)); \
1311 /* Check if the index'th bit in bitset set is on. */
1314 bitset_member (set, index, len)
1318 if (index >= HOST_BITS_PER_WIDE_INT * len)
1320 return (set[index / HOST_BITS_PER_WIDE_INT] &
1321 1 << (index % HOST_BITS_PER_WIDE_INT)) ? 1 : 0;
1324 /* Translate a bit-set SET to a list BL of the bit-set members. */
1327 extract_bitlst (set, len, bitlen, bl)
1334 unsigned HOST_WIDE_INT word;
1336 /* bblst table space is reused in each call to extract_bitlst. */
1337 bitlst_table_last = 0;
1339 bl->first_member = &bitlst_table[bitlst_table_last];
1342 /* Iterate over each word in the bitset. */
1343 for (i = 0; i < len; i++)
1346 offset = i * HOST_BITS_PER_WIDE_INT;
1348 /* Iterate over each bit in the word, but do not
1349 go beyond the end of the defined bits. */
1350 for (j = 0; offset < bitlen && word; j++)
1354 bitlst_table[bitlst_table_last++] = offset;
1364 /* Functions for the construction of regions. */
1366 /* Print the regions, for debugging purposes. Callable from debugger. */
1373 fprintf (dump, "\n;; ------------ REGIONS ----------\n\n");
1374 for (rgn = 0; rgn < nr_regions; rgn++)
1376 fprintf (dump, ";;\trgn %d nr_blocks %d:\n", rgn,
1377 rgn_table[rgn].rgn_nr_blocks);
1378 fprintf (dump, ";;\tbb/block: ");
1380 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
1382 current_blocks = RGN_BLOCKS (rgn);
1384 if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
1387 fprintf (dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
1390 fprintf (dump, "\n\n");
1394 /* Build a single block region for each basic block in the function.
1395 This allows for using the same code for interblock and basic block
1399 find_single_block_region ()
1403 for (i = 0; i < n_basic_blocks; i++)
1405 rgn_bb_table[i] = i;
1406 RGN_NR_BLOCKS (i) = 1;
1408 CONTAINING_RGN (i) = i;
1409 BLOCK_TO_BB (i) = 0;
1411 nr_regions = n_basic_blocks;
1414 /* Update number of blocks and the estimate for number of insns
1415 in the region. Return 1 if the region is "too large" for interblock
1416 scheduling (compile time considerations), otherwise return 0. */
1419 too_large (block, num_bbs, num_insns)
1420 int block, *num_bbs, *num_insns;
1423 (*num_insns) += (INSN_LUID (BLOCK_END (block)) -
1424 INSN_LUID (BLOCK_HEAD (block)));
1425 if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS))
1431 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
1432 is still an inner loop. Put in max_hdr[blk] the header of the most inner
1433 loop containing blk. */
1434 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
1436 if (max_hdr[blk] == -1) \
1437 max_hdr[blk] = hdr; \
1438 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
1439 RESET_BIT (inner, hdr); \
1440 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
1442 RESET_BIT (inner,max_hdr[blk]); \
1443 max_hdr[blk] = hdr; \
1447 /* Find regions for interblock scheduling.
1449 A region for scheduling can be:
1451 * A loop-free procedure, or
1453 * A reducible inner loop, or
1455 * A basic block not contained in any other region.
1457 ?!? In theory we could build other regions based on extended basic
1458 blocks or reverse extended basic blocks. Is it worth the trouble?
1460 Loop blocks that form a region are put into the region's block list
1461 in topological order.
1463 This procedure stores its results into the following global (ick) variables
1471 We use dominator relationships to avoid making regions out of non-reducible
1474 This procedure needs to be converted to work on pred/succ lists instead
1475 of edge tables. That would simplify it somewhat. */
1478 find_rgns (edge_list, dom)
1479 struct edge_list *edge_list;
1482 int *max_hdr, *dfs_nr, *stack, *degree;
1484 int node, child, loop_head, i, head, tail;
1485 int count = 0, sp, idx = 0, current_edge = out_edges[0];
1486 int num_bbs, num_insns, unreachable;
1487 int too_large_failure;
1489 /* Note if an edge has been passed. */
1492 /* Note if a block is a natural loop header. */
1495 /* Note if a block is an natural inner loop header. */
1498 /* Note if a block is in the block queue. */
1501 /* Note if a block is in the block queue. */
1504 int num_edges = NUM_EDGES (edge_list);
1506 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
1507 and a mapping from block to its loop header (if the block is contained
1508 in a loop, else -1).
1510 Store results in HEADER, INNER, and MAX_HDR respectively, these will
1511 be used as inputs to the second traversal.
1513 STACK, SP and DFS_NR are only used during the first traversal. */
1515 /* Allocate and initialize variables for the first traversal. */
1516 max_hdr = (int *) xmalloc (n_basic_blocks * sizeof (int));
1517 dfs_nr = (int *) xcalloc (n_basic_blocks, sizeof (int));
1518 stack = (int *) xmalloc (nr_edges * sizeof (int));
1520 inner = sbitmap_alloc (n_basic_blocks);
1521 sbitmap_ones (inner);
1523 header = sbitmap_alloc (n_basic_blocks);
1524 sbitmap_zero (header);
1526 passed = sbitmap_alloc (nr_edges);
1527 sbitmap_zero (passed);
1529 in_queue = sbitmap_alloc (n_basic_blocks);
1530 sbitmap_zero (in_queue);
1532 in_stack = sbitmap_alloc (n_basic_blocks);
1533 sbitmap_zero (in_stack);
1535 for (i = 0; i < n_basic_blocks; i++)
1538 /* DFS traversal to find inner loops in the cfg. */
1543 if (current_edge == 0 || TEST_BIT (passed, current_edge))
1545 /* We have reached a leaf node or a node that was already
1546 processed. Pop edges off the stack until we find
1547 an edge that has not yet been processed. */
1549 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
1551 /* Pop entry off the stack. */
1552 current_edge = stack[sp--];
1553 node = FROM_BLOCK (current_edge);
1554 child = TO_BLOCK (current_edge);
1555 RESET_BIT (in_stack, child);
1556 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
1557 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
1558 current_edge = NEXT_OUT (current_edge);
1561 /* See if have finished the DFS tree traversal. */
1562 if (sp < 0 && TEST_BIT (passed, current_edge))
1565 /* Nope, continue the traversal with the popped node. */
1569 /* Process a node. */
1570 node = FROM_BLOCK (current_edge);
1571 child = TO_BLOCK (current_edge);
1572 SET_BIT (in_stack, node);
1573 dfs_nr[node] = ++count;
1575 /* If the successor is in the stack, then we've found a loop.
1576 Mark the loop, if it is not a natural loop, then it will
1577 be rejected during the second traversal. */
1578 if (TEST_BIT (in_stack, child))
1581 SET_BIT (header, child);
1582 UPDATE_LOOP_RELATIONS (node, child);
1583 SET_BIT (passed, current_edge);
1584 current_edge = NEXT_OUT (current_edge);
1588 /* If the child was already visited, then there is no need to visit
1589 it again. Just update the loop relationships and restart
1593 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
1594 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
1595 SET_BIT (passed, current_edge);
1596 current_edge = NEXT_OUT (current_edge);
1600 /* Push an entry on the stack and continue DFS traversal. */
1601 stack[++sp] = current_edge;
1602 SET_BIT (passed, current_edge);
1603 current_edge = OUT_EDGES (child);
1605 /* This is temporary until haifa is converted to use rth's new
1606 cfg routines which have true entry/exit blocks and the
1607 appropriate edges from/to those blocks.
1609 Generally we update dfs_nr for a node when we process its
1610 out edge. However, if the node has no out edge then we will
1611 not set dfs_nr for that node. This can confuse the scheduler
1612 into thinking that we have unreachable blocks, which in turn
1613 disables cross block scheduling.
1615 So, if we have a node with no out edges, go ahead and mark it
1616 as reachable now. */
1617 if (current_edge == 0)
1618 dfs_nr[child] = ++count;
1621 /* Another check for unreachable blocks. The earlier test in
1622 is_cfg_nonregular only finds unreachable blocks that do not
1625 The DFS traversal will mark every block that is reachable from
1626 the entry node by placing a nonzero value in dfs_nr. Thus if
1627 dfs_nr is zero for any block, then it must be unreachable. */
1629 for (i = 0; i < n_basic_blocks; i++)
1636 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
1637 to hold degree counts. */
1640 for (i = 0; i < n_basic_blocks; i++)
1642 for (i = 0; i < num_edges; i++)
1644 edge e = INDEX_EDGE (edge_list, i);
1646 if (e->dest != EXIT_BLOCK_PTR)
1647 degree[e->dest->index]++;
1650 /* Do not perform region scheduling if there are any unreachable
1657 SET_BIT (header, 0);
1659 /* Second travsersal:find reducible inner loops and topologically sort
1660 block of each region. */
1662 queue = (int *) xmalloc (n_basic_blocks * sizeof (int));
1664 /* Find blocks which are inner loop headers. We still have non-reducible
1665 loops to consider at this point. */
1666 for (i = 0; i < n_basic_blocks; i++)
1668 if (TEST_BIT (header, i) && TEST_BIT (inner, i))
1673 /* Now check that the loop is reducible. We do this separate
1674 from finding inner loops so that we do not find a reducible
1675 loop which contains an inner non-reducible loop.
1677 A simple way to find reducible/natural loops is to verify
1678 that each block in the loop is dominated by the loop
1681 If there exists a block that is not dominated by the loop
1682 header, then the block is reachable from outside the loop
1683 and thus the loop is not a natural loop. */
1684 for (j = 0; j < n_basic_blocks; j++)
1686 /* First identify blocks in the loop, except for the loop
1688 if (i == max_hdr[j] && i != j)
1690 /* Now verify that the block is dominated by the loop
1692 if (!TEST_BIT (dom[j], i))
1697 /* If we exited the loop early, then I is the header of
1698 a non-reducible loop and we should quit processing it
1700 if (j != n_basic_blocks)
1703 /* I is a header of an inner loop, or block 0 in a subroutine
1704 with no loops at all. */
1706 too_large_failure = 0;
1707 loop_head = max_hdr[i];
1709 /* Decrease degree of all I's successors for topological
1711 for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
1712 if (e->dest != EXIT_BLOCK_PTR)
1713 --degree[e->dest->index];
1715 /* Estimate # insns, and count # blocks in the region. */
1717 num_insns = (INSN_LUID (BLOCK_END (i))
1718 - INSN_LUID (BLOCK_HEAD (i)));
1720 /* Find all loop latches (blocks with back edges to the loop
1721 header) or all the leaf blocks in the cfg has no loops.
1723 Place those blocks into the queue. */
1726 for (j = 0; j < n_basic_blocks; j++)
1727 /* Leaf nodes have only a single successor which must
1729 if (BASIC_BLOCK (j)->succ
1730 && BASIC_BLOCK (j)->succ->dest == EXIT_BLOCK_PTR
1731 && BASIC_BLOCK (j)->succ->succ_next == NULL)
1734 SET_BIT (in_queue, j);
1736 if (too_large (j, &num_bbs, &num_insns))
1738 too_large_failure = 1;
1747 for (e = BASIC_BLOCK (i)->pred; e; e = e->pred_next)
1749 if (e->src == ENTRY_BLOCK_PTR)
1752 node = e->src->index;
1754 if (max_hdr[node] == loop_head && node != i)
1756 /* This is a loop latch. */
1757 queue[++tail] = node;
1758 SET_BIT (in_queue, node);
1760 if (too_large (node, &num_bbs, &num_insns))
1762 too_large_failure = 1;
1769 /* Now add all the blocks in the loop to the queue.
1771 We know the loop is a natural loop; however the algorithm
1772 above will not always mark certain blocks as being in the
1780 The algorithm in the DFS traversal may not mark B & D as part
1781 of the loop (ie they will not have max_hdr set to A).
1783 We know they can not be loop latches (else they would have
1784 had max_hdr set since they'd have a backedge to a dominator
1785 block). So we don't need them on the initial queue.
1787 We know they are part of the loop because they are dominated
1788 by the loop header and can be reached by a backwards walk of
1789 the edges starting with nodes on the initial queue.
1791 It is safe and desirable to include those nodes in the
1792 loop/scheduling region. To do so we would need to decrease
1793 the degree of a node if it is the target of a backedge
1794 within the loop itself as the node is placed in the queue.
1796 We do not do this because I'm not sure that the actual
1797 scheduling code will properly handle this case. ?!? */
1799 while (head < tail && !too_large_failure)
1802 child = queue[++head];
1804 for (e = BASIC_BLOCK (child)->pred; e; e = e->pred_next)
1806 node = e->src->index;
1808 /* See discussion above about nodes not marked as in
1809 this loop during the initial DFS traversal. */
1810 if (e->src == ENTRY_BLOCK_PTR
1811 || max_hdr[node] != loop_head)
1816 else if (!TEST_BIT (in_queue, node) && node != i)
1818 queue[++tail] = node;
1819 SET_BIT (in_queue, node);
1821 if (too_large (node, &num_bbs, &num_insns))
1823 too_large_failure = 1;
1830 if (tail >= 0 && !too_large_failure)
1832 /* Place the loop header into list of region blocks. */
1834 rgn_bb_table[idx] = i;
1835 RGN_NR_BLOCKS (nr_regions) = num_bbs;
1836 RGN_BLOCKS (nr_regions) = idx++;
1837 CONTAINING_RGN (i) = nr_regions;
1838 BLOCK_TO_BB (i) = count = 0;
1840 /* Remove blocks from queue[] when their in degree
1841 becomes zero. Repeat until no blocks are left on the
1842 list. This produces a topological list of blocks in
1848 child = queue[head];
1849 if (degree[child] == 0)
1854 rgn_bb_table[idx++] = child;
1855 BLOCK_TO_BB (child) = ++count;
1856 CONTAINING_RGN (child) = nr_regions;
1857 queue[head] = queue[tail--];
1859 for (e = BASIC_BLOCK (child)->succ;
1862 if (e->dest != EXIT_BLOCK_PTR)
1863 --degree[e->dest->index];
1875 /* Any block that did not end up in a region is placed into a region
1877 for (i = 0; i < n_basic_blocks; i++)
1880 rgn_bb_table[idx] = i;
1881 RGN_NR_BLOCKS (nr_regions) = 1;
1882 RGN_BLOCKS (nr_regions) = idx++;
1883 CONTAINING_RGN (i) = nr_regions++;
1884 BLOCK_TO_BB (i) = 0;
1897 /* Functions for regions scheduling information. */
1899 /* Compute dominators, probability, and potential-split-edges of bb.
1900 Assume that these values were already computed for bb's predecessors. */
1903 compute_dom_prob_ps (bb)
1906 int nxt_in_edge, fst_in_edge, pred;
1907 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1910 if (IS_RGN_ENTRY (bb))
1912 BITSET_ADD (dom[bb], 0, bbset_size);
1917 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1919 /* Intialize dom[bb] to '111..1'. */
1920 BITSET_INVERT (dom[bb], bbset_size);
1924 pred = FROM_BLOCK (nxt_in_edge);
1925 BITSET_INTER (dom[bb], dom[BLOCK_TO_BB (pred)], bbset_size);
1927 BITSET_UNION (ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)],
1930 BITSET_ADD (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge), edgeset_size);
1933 nr_rgn_out_edges = 0;
1934 fst_out_edge = OUT_EDGES (pred);
1935 nxt_out_edge = NEXT_OUT (fst_out_edge);
1936 BITSET_UNION (pot_split[bb], pot_split[BLOCK_TO_BB (pred)],
1939 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (fst_out_edge), edgeset_size);
1941 /* The successor doesn't belong in the region? */
1942 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1943 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1946 while (fst_out_edge != nxt_out_edge)
1949 /* The successor doesn't belong in the region? */
1950 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1951 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1953 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (nxt_out_edge), edgeset_size);
1954 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1958 /* Now nr_rgn_out_edges is the number of region-exit edges from
1959 pred, and nr_out_edges will be the number of pred out edges
1960 not leaving the region. */
1961 nr_out_edges -= nr_rgn_out_edges;
1962 if (nr_rgn_out_edges > 0)
1963 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1965 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1966 nxt_in_edge = NEXT_IN (nxt_in_edge);
1968 while (fst_in_edge != nxt_in_edge);
1970 BITSET_ADD (dom[bb], bb, bbset_size);
1971 BITSET_DIFFER (pot_split[bb], ancestor_edges[bb], edgeset_size);
1973 if (sched_verbose >= 2)
1974 fprintf (dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1975 (int) (100.0 * prob[bb]));
1978 /* Functions for target info. */
1980 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1981 Note that bb_trg dominates bb_src. */
1984 split_edges (bb_src, bb_trg, bl)
1989 int es = edgeset_size;
1990 edgeset src = (edgeset) xcalloc (es, sizeof (HOST_WIDE_INT));
1993 src[es] = (pot_split[bb_src])[es];
1994 BITSET_DIFFER (src, pot_split[bb_trg], edgeset_size);
1995 extract_bitlst (src, edgeset_size, edgeset_bitsize, bl);
1999 /* Find the valid candidate-source-blocks for the target block TRG, compute
2000 their probability, and check if they are speculative or not.
2001 For speculative sources, compute their update-blocks and split-blocks. */
2004 compute_trg_info (trg)
2007 register candidate *sp;
2009 int check_block, update_idx;
2010 int i, j, k, fst_edge, nxt_edge;
2012 /* Define some of the fields for the target bb as well. */
2013 sp = candidate_table + trg;
2015 sp->is_speculative = 0;
2018 for (i = trg + 1; i < current_nr_blocks; i++)
2020 sp = candidate_table + i;
2022 sp->is_valid = IS_DOMINATED (i, trg);
2025 sp->src_prob = GET_SRC_PROB (i, trg);
2026 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
2031 split_edges (i, trg, &el);
2032 sp->is_speculative = (el.nr_members) ? 1 : 0;
2033 if (sp->is_speculative && !flag_schedule_speculative)
2039 sp->split_bbs.first_member = &bblst_table[bblst_last];
2040 sp->split_bbs.nr_members = el.nr_members;
2041 for (j = 0; j < el.nr_members; bblst_last++, j++)
2042 bblst_table[bblst_last] =
2043 TO_BLOCK (rgn_edges[el.first_member[j]]);
2044 sp->update_bbs.first_member = &bblst_table[bblst_last];
2046 for (j = 0; j < el.nr_members; j++)
2048 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
2049 fst_edge = nxt_edge = OUT_EDGES (check_block);
2052 for (k = 0; k < el.nr_members; k++)
2053 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
2056 if (k >= el.nr_members)
2058 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
2062 nxt_edge = NEXT_OUT (nxt_edge);
2064 while (fst_edge != nxt_edge);
2066 sp->update_bbs.nr_members = update_idx;
2071 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
2073 sp->is_speculative = 0;
2079 /* Print candidates info, for debugging purposes. Callable from debugger. */
2085 if (!candidate_table[i].is_valid)
2088 if (candidate_table[i].is_speculative)
2091 fprintf (dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
2093 fprintf (dump, "split path: ");
2094 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
2096 int b = candidate_table[i].split_bbs.first_member[j];
2098 fprintf (dump, " %d ", b);
2100 fprintf (dump, "\n");
2102 fprintf (dump, "update path: ");
2103 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
2105 int b = candidate_table[i].update_bbs.first_member[j];
2107 fprintf (dump, " %d ", b);
2109 fprintf (dump, "\n");
2113 fprintf (dump, " src %d equivalent\n", BB_TO_BLOCK (i));
2117 /* Print candidates info, for debugging purposes. Callable from debugger. */
2120 debug_candidates (trg)
2125 fprintf (dump, "----------- candidate table: target: b=%d bb=%d ---\n",
2126 BB_TO_BLOCK (trg), trg);
2127 for (i = trg + 1; i < current_nr_blocks; i++)
2128 debug_candidate (i);
2131 /* Functions for speculative scheduing. */
2133 /* Return 0 if x is a set of a register alive in the beginning of one
2134 of the split-blocks of src, otherwise return 1. */
2137 check_live_1 (src, x)
2143 register rtx reg = SET_DEST (x);
2148 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
2149 || GET_CODE (reg) == SIGN_EXTRACT
2150 || GET_CODE (reg) == STRICT_LOW_PART)
2151 reg = XEXP (reg, 0);
2153 if (GET_CODE (reg) == PARALLEL
2154 && GET_MODE (reg) == BLKmode)
2157 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
2158 if (check_live_1 (src, XVECEXP (reg, 0, i)))
2163 if (GET_CODE (reg) != REG)
2166 regno = REGNO (reg);
2168 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
2170 /* Global registers are assumed live. */
2175 if (regno < FIRST_PSEUDO_REGISTER)
2177 /* Check for hard registers. */
2178 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2181 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
2183 int b = candidate_table[src].split_bbs.first_member[i];
2185 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
2195 /* Check for psuedo registers. */
2196 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
2198 int b = candidate_table[src].split_bbs.first_member[i];
2200 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
2211 /* If x is a set of a register R, mark that R is alive in the beginning
2212 of every update-block of src. */
2215 update_live_1 (src, x)
2221 register rtx reg = SET_DEST (x);
2226 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
2227 || GET_CODE (reg) == SIGN_EXTRACT
2228 || GET_CODE (reg) == STRICT_LOW_PART)
2229 reg = XEXP (reg, 0);
2231 if (GET_CODE (reg) == PARALLEL
2232 && GET_MODE (reg) == BLKmode)
2235 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
2236 update_live_1 (src, XVECEXP (reg, 0, i));
2240 if (GET_CODE (reg) != REG)
2243 /* Global registers are always live, so the code below does not apply
2246 regno = REGNO (reg);
2248 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
2250 if (regno < FIRST_PSEUDO_REGISTER)
2252 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2255 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
2257 int b = candidate_table[src].update_bbs.first_member[i];
2259 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
2266 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
2268 int b = candidate_table[src].update_bbs.first_member[i];
2270 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
2276 /* Return 1 if insn can be speculatively moved from block src to trg,
2277 otherwise return 0. Called before first insertion of insn to
2278 ready-list or before the scheduling. */
2281 check_live (insn, src)
2285 /* Find the registers set by instruction. */
2286 if (GET_CODE (PATTERN (insn)) == SET
2287 || GET_CODE (PATTERN (insn)) == CLOBBER)
2288 return check_live_1 (src, PATTERN (insn));
2289 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
2292 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
2293 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
2294 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
2295 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
2304 /* Update the live registers info after insn was moved speculatively from
2305 block src to trg. */
2308 update_live (insn, src)
2312 /* Find the registers set by instruction. */
2313 if (GET_CODE (PATTERN (insn)) == SET
2314 || GET_CODE (PATTERN (insn)) == CLOBBER)
2315 update_live_1 (src, PATTERN (insn));
2316 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
2319 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
2320 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
2321 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
2322 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
2326 /* Exception Free Loads:
2328 We define five classes of speculative loads: IFREE, IRISKY,
2329 PFREE, PRISKY, and MFREE.
2331 IFREE loads are loads that are proved to be exception-free, just
2332 by examining the load insn. Examples for such loads are loads
2333 from TOC and loads of global data.
2335 IRISKY loads are loads that are proved to be exception-risky,
2336 just by examining the load insn. Examples for such loads are
2337 volatile loads and loads from shared memory.
2339 PFREE loads are loads for which we can prove, by examining other
2340 insns, that they are exception-free. Currently, this class consists
2341 of loads for which we are able to find a "similar load", either in
2342 the target block, or, if only one split-block exists, in that split
2343 block. Load2 is similar to load1 if both have same single base
2344 register. We identify only part of the similar loads, by finding
2345 an insn upon which both load1 and load2 have a DEF-USE dependence.
2347 PRISKY loads are loads for which we can prove, by examining other
2348 insns, that they are exception-risky. Currently we have two proofs for
2349 such loads. The first proof detects loads that are probably guarded by a
2350 test on the memory address. This proof is based on the
2351 backward and forward data dependence information for the region.
2352 Let load-insn be the examined load.
2353 Load-insn is PRISKY iff ALL the following hold:
2355 - insn1 is not in the same block as load-insn
2356 - there is a DEF-USE dependence chain (insn1, ..., load-insn)
2357 - test-insn is either a compare or a branch, not in the same block
2359 - load-insn is reachable from test-insn
2360 - there is a DEF-USE dependence chain (insn1, ..., test-insn)
2362 This proof might fail when the compare and the load are fed
2363 by an insn not in the region. To solve this, we will add to this
2364 group all loads that have no input DEF-USE dependence.
2366 The second proof detects loads that are directly or indirectly
2367 fed by a speculative load. This proof is affected by the
2368 scheduling process. We will use the flag fed_by_spec_load.
2369 Initially, all insns have this flag reset. After a speculative
2370 motion of an insn, if insn is either a load, or marked as
2371 fed_by_spec_load, we will also mark as fed_by_spec_load every
2372 insn1 for which a DEF-USE dependence (insn, insn1) exists. A
2373 load which is fed_by_spec_load is also PRISKY.
2375 MFREE (maybe-free) loads are all the remaining loads. They may be
2376 exception-free, but we cannot prove it.
2378 Now, all loads in IFREE and PFREE classes are considered
2379 exception-free, while all loads in IRISKY and PRISKY classes are
2380 considered exception-risky. As for loads in the MFREE class,
2381 these are considered either exception-free or exception-risky,
2382 depending on whether we are pessimistic or optimistic. We have
2383 to take the pessimistic approach to assure the safety of
2384 speculative scheduling, but we can take the optimistic approach
2385 by invoking the -fsched_spec_load_dangerous option. */
2387 enum INSN_TRAP_CLASS
2389 TRAP_FREE = 0, IFREE = 1, PFREE_CANDIDATE = 2,
2390 PRISKY_CANDIDATE = 3, IRISKY = 4, TRAP_RISKY = 5
2393 #define WORST_CLASS(class1, class2) \
2394 ((class1 > class2) ? class1 : class2)
2396 /* Non-zero if block bb_to is equal to, or reachable from block bb_from. */
2397 #define IS_REACHABLE(bb_from, bb_to) \
2399 || IS_RGN_ENTRY (bb_from) \
2400 || (bitset_member (ancestor_edges[bb_to], \
2401 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))), \
2404 /* Non-zero iff the address is comprised from at most 1 register. */
2405 #define CONST_BASED_ADDRESS_P(x) \
2406 (GET_CODE (x) == REG \
2407 || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \
2408 || (GET_CODE (x) == LO_SUM)) \
2409 && (GET_CODE (XEXP (x, 0)) == CONST_INT \
2410 || GET_CODE (XEXP (x, 1)) == CONST_INT)))
2412 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
2415 set_spec_fed (load_insn)
2420 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
2421 if (GET_MODE (link) == VOIDmode)
2422 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
2423 } /* set_spec_fed */
2425 /* On the path from the insn to load_insn_bb, find a conditional
2426 branch depending on insn, that guards the speculative load. */
2429 find_conditional_protection (insn, load_insn_bb)
2435 /* Iterate through DEF-USE forward dependences. */
2436 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2438 rtx next = XEXP (link, 0);
2439 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
2440 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
2441 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
2442 && load_insn_bb != INSN_BB (next)
2443 && GET_MODE (link) == VOIDmode
2444 && (GET_CODE (next) == JUMP_INSN
2445 || find_conditional_protection (next, load_insn_bb)))
2449 } /* find_conditional_protection */
2451 /* Returns 1 if the same insn1 that participates in the computation
2452 of load_insn's address is feeding a conditional branch that is
2453 guarding on load_insn. This is true if we find a the two DEF-USE
2455 insn1 -> ... -> conditional-branch
2456 insn1 -> ... -> load_insn,
2457 and if a flow path exist:
2458 insn1 -> ... -> conditional-branch -> ... -> load_insn,
2459 and if insn1 is on the path
2460 region-entry -> ... -> bb_trg -> ... load_insn.
2462 Locate insn1 by climbing on LOG_LINKS from load_insn.
2463 Locate the branch by following INSN_DEPEND from insn1. */
2466 is_conditionally_protected (load_insn, bb_src, bb_trg)
2472 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
2474 rtx insn1 = XEXP (link, 0);
2476 /* Must be a DEF-USE dependence upon non-branch. */
2477 if (GET_MODE (link) != VOIDmode
2478 || GET_CODE (insn1) == JUMP_INSN)
2481 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
2482 if (INSN_BB (insn1) == bb_src
2483 || (CONTAINING_RGN (BLOCK_NUM (insn1))
2484 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
2485 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
2486 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
2489 /* Now search for the conditional-branch. */
2490 if (find_conditional_protection (insn1, bb_src))
2493 /* Recursive step: search another insn1, "above" current insn1. */
2494 return is_conditionally_protected (insn1, bb_src, bb_trg);
2497 /* The chain does not exist. */
2499 } /* is_conditionally_protected */
2501 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
2502 load_insn can move speculatively from bb_src to bb_trg. All the
2503 following must hold:
2505 (1) both loads have 1 base register (PFREE_CANDIDATEs).
2506 (2) load_insn and load1 have a def-use dependence upon
2507 the same insn 'insn1'.
2508 (3) either load2 is in bb_trg, or:
2509 - there's only one split-block, and
2510 - load1 is on the escape path, and
2512 From all these we can conclude that the two loads access memory
2513 addresses that differ at most by a constant, and hence if moving
2514 load_insn would cause an exception, it would have been caused by
2518 is_pfree (load_insn, bb_src, bb_trg)
2523 register candidate *candp = candidate_table + bb_src;
2525 if (candp->split_bbs.nr_members != 1)
2526 /* Must have exactly one escape block. */
2529 for (back_link = LOG_LINKS (load_insn);
2530 back_link; back_link = XEXP (back_link, 1))
2532 rtx insn1 = XEXP (back_link, 0);
2534 if (GET_MODE (back_link) == VOIDmode)
2536 /* Found a DEF-USE dependence (insn1, load_insn). */
2539 for (fore_link = INSN_DEPEND (insn1);
2540 fore_link; fore_link = XEXP (fore_link, 1))
2542 rtx insn2 = XEXP (fore_link, 0);
2543 if (GET_MODE (fore_link) == VOIDmode)
2545 /* Found a DEF-USE dependence (insn1, insn2). */
2546 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
2547 /* insn2 not guaranteed to be a 1 base reg load. */
2550 if (INSN_BB (insn2) == bb_trg)
2551 /* insn2 is the similar load, in the target block. */
2554 if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
2555 /* insn2 is a similar load, in a split-block. */
2562 /* Couldn't find a similar load. */
2566 /* Returns a class that insn with GET_DEST(insn)=x may belong to,
2567 as found by analyzing insn's expression. */
2570 may_trap_exp (x, is_store)
2578 code = GET_CODE (x);
2588 /* The insn uses memory: a volatile load. */
2589 if (MEM_VOLATILE_P (x))
2591 /* An exception-free load. */
2592 if (!may_trap_p (x))
2594 /* A load with 1 base register, to be further checked. */
2595 if (CONST_BASED_ADDRESS_P (XEXP (x, 0)))
2596 return PFREE_CANDIDATE;
2597 /* No info on the load, to be further checked. */
2598 return PRISKY_CANDIDATE;
2603 int i, insn_class = TRAP_FREE;
2605 /* Neither store nor load, check if it may cause a trap. */
2608 /* Recursive step: walk the insn... */
2609 fmt = GET_RTX_FORMAT (code);
2610 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2614 int tmp_class = may_trap_exp (XEXP (x, i), is_store);
2615 insn_class = WORST_CLASS (insn_class, tmp_class);
2617 else if (fmt[i] == 'E')
2620 for (j = 0; j < XVECLEN (x, i); j++)
2622 int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store);
2623 insn_class = WORST_CLASS (insn_class, tmp_class);
2624 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
2628 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
2635 /* Classifies insn for the purpose of verifying that it can be
2636 moved speculatively, by examining it's patterns, returning:
2637 TRAP_RISKY: store, or risky non-load insn (e.g. division by variable).
2638 TRAP_FREE: non-load insn.
2639 IFREE: load from a globaly safe location.
2640 IRISKY: volatile load.
2641 PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for
2642 being either PFREE or PRISKY. */
2645 haifa_classify_insn (insn)
2648 rtx pat = PATTERN (insn);
2649 int tmp_class = TRAP_FREE;
2650 int insn_class = TRAP_FREE;
2653 if (GET_CODE (pat) == PARALLEL)
2655 int i, len = XVECLEN (pat, 0);
2657 for (i = len - 1; i >= 0; i--)
2659 code = GET_CODE (XVECEXP (pat, 0, i));
2663 /* Test if it is a 'store'. */
2664 tmp_class = may_trap_exp (XEXP (XVECEXP (pat, 0, i), 0), 1);
2667 /* Test if it is a store. */
2668 tmp_class = may_trap_exp (SET_DEST (XVECEXP (pat, 0, i)), 1);
2669 if (tmp_class == TRAP_RISKY)
2671 /* Test if it is a load. */
2673 WORST_CLASS (tmp_class,
2674 may_trap_exp (SET_SRC (XVECEXP (pat, 0, i)), 0));
2678 tmp_class = TRAP_RISKY;
2682 insn_class = WORST_CLASS (insn_class, tmp_class);
2683 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
2689 code = GET_CODE (pat);
2693 /* Test if it is a 'store'. */
2694 tmp_class = may_trap_exp (XEXP (pat, 0), 1);
2697 /* Test if it is a store. */
2698 tmp_class = may_trap_exp (SET_DEST (pat), 1);
2699 if (tmp_class == TRAP_RISKY)
2701 /* Test if it is a load. */
2703 WORST_CLASS (tmp_class,
2704 may_trap_exp (SET_SRC (pat), 0));
2708 tmp_class = TRAP_RISKY;
2712 insn_class = tmp_class;
2718 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
2719 a load moved speculatively, or if load_insn is protected by
2720 a compare on load_insn's address). */
2723 is_prisky (load_insn, bb_src, bb_trg)
2727 if (FED_BY_SPEC_LOAD (load_insn))
2730 if (LOG_LINKS (load_insn) == NULL)
2731 /* Dependence may 'hide' out of the region. */
2734 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
2740 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
2741 Return 1 if insn is exception-free (and the motion is valid)
2745 is_exception_free (insn, bb_src, bb_trg)
2749 int insn_class = haifa_classify_insn (insn);
2751 /* Handle non-load insns. */
2762 if (!flag_schedule_speculative_load)
2764 IS_LOAD_INSN (insn) = 1;
2771 case PFREE_CANDIDATE:
2772 if (is_pfree (insn, bb_src, bb_trg))
2774 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
2775 case PRISKY_CANDIDATE:
2776 if (!flag_schedule_speculative_load_dangerous
2777 || is_prisky (insn, bb_src, bb_trg))
2783 return flag_schedule_speculative_load_dangerous;
2786 /* Process an insn's memory dependencies. There are four kinds of
2789 (0) read dependence: read follows read
2790 (1) true dependence: read follows write
2791 (2) anti dependence: write follows read
2792 (3) output dependence: write follows write
2794 We are careful to build only dependencies which actually exist, and
2795 use transitivity to avoid building too many links. */
2797 /* Return 1 if the pair (insn, x) is found in (LIST, LIST1), or 0
2800 HAIFA_INLINE static char
2801 find_insn_mem_list (insn, x, list, list1)
2807 if (XEXP (list, 0) == insn
2808 && XEXP (list1, 0) == x)
2810 list = XEXP (list, 1);
2811 list1 = XEXP (list1, 1);
2816 /* Compute the function units used by INSN. This caches the value
2817 returned by function_units_used. A function unit is encoded as the
2818 unit number if the value is non-negative and the compliment of a
2819 mask if the value is negative. A function unit index is the
2820 non-negative encoding. */
2822 HAIFA_INLINE static int
2826 register int unit = INSN_UNIT (insn);
2830 recog_memoized (insn);
2832 /* A USE insn, or something else we don't need to understand.
2833 We can't pass these directly to function_units_used because it will
2834 trigger a fatal error for unrecognizable insns. */
2835 if (INSN_CODE (insn) < 0)
2839 unit = function_units_used (insn);
2840 /* Increment non-negative values so we can cache zero. */
2844 /* We only cache 16 bits of the result, so if the value is out of
2845 range, don't cache it. */
2846 if (FUNCTION_UNITS_SIZE < HOST_BITS_PER_SHORT
2848 || (unit & ~((1 << (HOST_BITS_PER_SHORT - 1)) - 1)) == 0)
2849 INSN_UNIT (insn) = unit;
2851 return (unit > 0 ? unit - 1 : unit);
2854 /* Compute the blockage range for executing INSN on UNIT. This caches
2855 the value returned by the blockage_range_function for the unit.
2856 These values are encoded in an int where the upper half gives the
2857 minimum value and the lower half gives the maximum value. */
2859 HAIFA_INLINE static unsigned int
2860 blockage_range (unit, insn)
2864 unsigned int blockage = INSN_BLOCKAGE (insn);
2867 if ((int) UNIT_BLOCKED (blockage) != unit + 1)
2869 range = function_units[unit].blockage_range_function (insn);
2870 /* We only cache the blockage range for one unit and then only if
2872 if (HOST_BITS_PER_INT >= UNIT_BITS + 2 * BLOCKAGE_BITS)
2873 INSN_BLOCKAGE (insn) = ENCODE_BLOCKAGE (unit + 1, range);
2876 range = BLOCKAGE_RANGE (blockage);
2881 /* A vector indexed by function unit instance giving the last insn to use
2882 the unit. The value of the function unit instance index for unit U
2883 instance I is (U + I * FUNCTION_UNITS_SIZE). */
2884 static rtx unit_last_insn[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY];
2886 /* A vector indexed by function unit instance giving the minimum time when
2887 the unit will unblock based on the maximum blockage cost. */
2888 static int unit_tick[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY];
2890 /* A vector indexed by function unit number giving the number of insns
2891 that remain to use the unit. */
2892 static int unit_n_insns[FUNCTION_UNITS_SIZE];
2894 /* Reset the function unit state to the null state. */
2899 bzero ((char *) unit_last_insn, sizeof (unit_last_insn));
2900 bzero ((char *) unit_tick, sizeof (unit_tick));
2901 bzero ((char *) unit_n_insns, sizeof (unit_n_insns));
2904 /* Return the issue-delay of an insn. */
2906 HAIFA_INLINE static int
2907 insn_issue_delay (insn)
2911 int unit = insn_unit (insn);
2913 /* Efficiency note: in fact, we are working 'hard' to compute a
2914 value that was available in md file, and is not available in
2915 function_units[] structure. It would be nice to have this
2916 value there, too. */
2919 if (function_units[unit].blockage_range_function &&
2920 function_units[unit].blockage_function)
2921 delay = function_units[unit].blockage_function (insn, insn);
2924 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
2925 if ((unit & 1) != 0 && function_units[i].blockage_range_function
2926 && function_units[i].blockage_function)
2927 delay = MAX (delay, function_units[i].blockage_function (insn, insn));
2932 /* Return the actual hazard cost of executing INSN on the unit UNIT,
2933 instance INSTANCE at time CLOCK if the previous actual hazard cost
2936 HAIFA_INLINE static int
2937 actual_hazard_this_instance (unit, instance, insn, clock, cost)
2938 int unit, instance, clock, cost;
2941 int tick = unit_tick[instance]; /* Issue time of the last issued insn. */
2943 if (tick - clock > cost)
2945 /* The scheduler is operating forward, so unit's last insn is the
2946 executing insn and INSN is the candidate insn. We want a
2947 more exact measure of the blockage if we execute INSN at CLOCK
2948 given when we committed the execution of the unit's last insn.
2950 The blockage value is given by either the unit's max blockage
2951 constant, blockage range function, or blockage function. Use
2952 the most exact form for the given unit. */
2954 if (function_units[unit].blockage_range_function)
2956 if (function_units[unit].blockage_function)
2957 tick += (function_units[unit].blockage_function
2958 (unit_last_insn[instance], insn)
2959 - function_units[unit].max_blockage);
2961 tick += ((int) MAX_BLOCKAGE_COST (blockage_range (unit, insn))
2962 - function_units[unit].max_blockage);
2964 if (tick - clock > cost)
2965 cost = tick - clock;
2970 /* Record INSN as having begun execution on the units encoded by UNIT at
2973 HAIFA_INLINE static void
2974 schedule_unit (unit, insn, clock)
2982 int instance = unit;
2983 #if MAX_MULTIPLICITY > 1
2984 /* Find the first free instance of the function unit and use that
2985 one. We assume that one is free. */
2986 for (i = function_units[unit].multiplicity - 1; i > 0; i--)
2988 if (!actual_hazard_this_instance (unit, instance, insn, clock, 0))
2990 instance += FUNCTION_UNITS_SIZE;
2993 unit_last_insn[instance] = insn;
2994 unit_tick[instance] = (clock + function_units[unit].max_blockage);
2997 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
2998 if ((unit & 1) != 0)
2999 schedule_unit (i, insn, clock);
3002 /* Return the actual hazard cost of executing INSN on the units encoded by
3003 UNIT at time CLOCK if the previous actual hazard cost was COST. */
3005 HAIFA_INLINE static int
3006 actual_hazard (unit, insn, clock, cost)
3007 int unit, clock, cost;
3014 /* Find the instance of the function unit with the minimum hazard. */
3015 int instance = unit;
3016 int best_cost = actual_hazard_this_instance (unit, instance, insn,
3018 #if MAX_MULTIPLICITY > 1
3021 if (best_cost > cost)
3023 for (i = function_units[unit].multiplicity - 1; i > 0; i--)
3025 instance += FUNCTION_UNITS_SIZE;
3026 this_cost = actual_hazard_this_instance (unit, instance, insn,
3028 if (this_cost < best_cost)
3030 best_cost = this_cost;
3031 if (this_cost <= cost)
3037 cost = MAX (cost, best_cost);
3040 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
3041 if ((unit & 1) != 0)
3042 cost = actual_hazard (i, insn, clock, cost);
3047 /* Return the potential hazard cost of executing an instruction on the
3048 units encoded by UNIT if the previous potential hazard cost was COST.
3049 An insn with a large blockage time is chosen in preference to one
3050 with a smaller time; an insn that uses a unit that is more likely
3051 to be used is chosen in preference to one with a unit that is less
3052 used. We are trying to minimize a subsequent actual hazard. */
3054 HAIFA_INLINE static int
3055 potential_hazard (unit, insn, cost)
3060 unsigned int minb, maxb;
3064 minb = maxb = function_units[unit].max_blockage;
3067 if (function_units[unit].blockage_range_function)
3069 maxb = minb = blockage_range (unit, insn);
3070 maxb = MAX_BLOCKAGE_COST (maxb);
3071 minb = MIN_BLOCKAGE_COST (minb);
3076 /* Make the number of instructions left dominate. Make the
3077 minimum delay dominate the maximum delay. If all these
3078 are the same, use the unit number to add an arbitrary
3079 ordering. Other terms can be added. */
3080 ncost = minb * 0x40 + maxb;
3081 ncost *= (unit_n_insns[unit] - 1) * 0x1000 + unit;
3088 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
3089 if ((unit & 1) != 0)
3090 cost = potential_hazard (i, insn, cost);
3095 /* Compute cost of executing INSN given the dependence LINK on the insn USED.
3096 This is the number of cycles between instruction issue and
3097 instruction results. */
3099 HAIFA_INLINE static int
3100 insn_cost (insn, link, used)
3101 rtx insn, link, used;
3103 register int cost = INSN_COST (insn);
3107 recog_memoized (insn);
3109 /* A USE insn, or something else we don't need to understand.
3110 We can't pass these directly to result_ready_cost because it will
3111 trigger a fatal error for unrecognizable insns. */
3112 if (INSN_CODE (insn) < 0)
3114 INSN_COST (insn) = 1;
3119 cost = result_ready_cost (insn);
3124 INSN_COST (insn) = cost;
3128 /* In this case estimate cost without caring how insn is used. */
3129 if (link == 0 && used == 0)
3132 /* A USE insn should never require the value used to be computed. This
3133 allows the computation of a function's result and parameter values to
3134 overlap the return and call. */
3135 recog_memoized (used);
3136 if (INSN_CODE (used) < 0)
3137 LINK_COST_FREE (link) = 1;
3139 /* If some dependencies vary the cost, compute the adjustment. Most
3140 commonly, the adjustment is complete: either the cost is ignored
3141 (in the case of an output- or anti-dependence), or the cost is
3142 unchanged. These values are cached in the link as LINK_COST_FREE
3143 and LINK_COST_ZERO. */
3145 if (LINK_COST_FREE (link))
3148 else if (!LINK_COST_ZERO (link))
3152 ADJUST_COST (used, link, insn, ncost);
3155 LINK_COST_FREE (link) = 1;
3159 LINK_COST_ZERO (link) = 1;
3166 /* Compute the priority number for INSN. */
3175 if (! INSN_P (insn))
3178 if ((this_priority = INSN_PRIORITY (insn)) == 0)
3180 if (INSN_DEPEND (insn) == 0)
3181 this_priority = insn_cost (insn, 0, 0);
3183 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
3188 if (RTX_INTEGRATED_P (link))
3191 next = XEXP (link, 0);
3193 /* Critical path is meaningful in block boundaries only. */
3194 if (BLOCK_NUM (next) != BLOCK_NUM (insn))
3197 next_priority = insn_cost (insn, link, next) + priority (next);
3198 if (next_priority > this_priority)
3199 this_priority = next_priority;
3201 INSN_PRIORITY (insn) = this_priority;
3203 return this_priority;
3206 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
3207 them to the unused_*_list variables, so that they can be reused. */
3210 free_pending_lists ()
3214 for (bb = 0; bb < current_nr_blocks; bb++)
3216 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
3217 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
3218 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
3219 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
3223 /* Add an INSN and MEM reference pair to a pending INSN_LIST and MEM_LIST.
3224 The MEM is a memory reference contained within INSN, which we are saving
3225 so that we can do memory aliasing on it. */
3228 add_insn_mem_dependence (deps, insn_list, mem_list, insn, mem)
3230 rtx *insn_list, *mem_list, insn, mem;
3234 link = alloc_INSN_LIST (insn, *insn_list);
3237 link = alloc_EXPR_LIST (VOIDmode, mem, *mem_list);
3240 deps->pending_lists_length++;
3243 /* Make a dependency between every memory reference on the pending lists
3244 and INSN, thus flushing the pending lists. If ONLY_WRITE, don't flush
3248 flush_pending_lists (deps, insn, only_write)
3256 while (deps->pending_read_insns && ! only_write)
3258 add_dependence (insn, XEXP (deps->pending_read_insns, 0),
3261 link = deps->pending_read_insns;
3262 deps->pending_read_insns = XEXP (deps->pending_read_insns, 1);
3263 free_INSN_LIST_node (link);
3265 link = deps->pending_read_mems;
3266 deps->pending_read_mems = XEXP (deps->pending_read_mems, 1);
3267 free_EXPR_LIST_node (link);
3269 while (deps->pending_write_insns)
3271 add_dependence (insn, XEXP (deps->pending_write_insns, 0),
3274 link = deps->pending_write_insns;
3275 deps->pending_write_insns = XEXP (deps->pending_write_insns, 1);
3276 free_INSN_LIST_node (link);
3278 link = deps->pending_write_mems;
3279 deps->pending_write_mems = XEXP (deps->pending_write_mems, 1);
3280 free_EXPR_LIST_node (link);
3282 deps->pending_lists_length = 0;
3284 /* last_pending_memory_flush is now a list of insns. */
3285 for (u = deps->last_pending_memory_flush; u; u = XEXP (u, 1))
3286 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3288 free_INSN_LIST_list (&deps->last_pending_memory_flush);
3289 deps->last_pending_memory_flush = alloc_INSN_LIST (insn, NULL_RTX);
3292 /* Analyze a single SET, CLOBBER, PRE_DEC, POST_DEC, PRE_INC or POST_INC
3293 rtx, X, creating all dependencies generated by the write to the
3294 destination of X, and reads of everything mentioned. */
3297 sched_analyze_1 (deps, x, insn)
3303 register rtx dest = XEXP (x, 0);
3304 enum rtx_code code = GET_CODE (x);
3309 if (GET_CODE (dest) == PARALLEL
3310 && GET_MODE (dest) == BLKmode)
3313 for (i = XVECLEN (dest, 0) - 1; i >= 0; i--)
3314 sched_analyze_1 (deps, XVECEXP (dest, 0, i), insn);
3315 if (GET_CODE (x) == SET)
3316 sched_analyze_2 (deps, SET_SRC (x), insn);
3320 while (GET_CODE (dest) == STRICT_LOW_PART || GET_CODE (dest) == SUBREG
3321 || GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SIGN_EXTRACT)
3323 if (GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SIGN_EXTRACT)
3325 /* The second and third arguments are values read by this insn. */
3326 sched_analyze_2 (deps, XEXP (dest, 1), insn);
3327 sched_analyze_2 (deps, XEXP (dest, 2), insn);
3329 dest = XEXP (dest, 0);
3332 if (GET_CODE (dest) == REG)
3336 regno = REGNO (dest);
3338 /* A hard reg in a wide mode may really be multiple registers.
3339 If so, mark all of them just like the first. */
3340 if (regno < FIRST_PSEUDO_REGISTER)
3342 i = HARD_REGNO_NREGS (regno, GET_MODE (dest));
3348 for (u = deps->reg_last_uses[r]; u; u = XEXP (u, 1))
3349 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3351 for (u = deps->reg_last_sets[r]; u; u = XEXP (u, 1))
3352 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3354 /* Clobbers need not be ordered with respect to one
3355 another, but sets must be ordered with respect to a
3359 free_INSN_LIST_list (&deps->reg_last_uses[r]);
3360 for (u = deps->reg_last_clobbers[r]; u; u = XEXP (u, 1))
3361 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3362 SET_REGNO_REG_SET (reg_pending_sets, r);
3365 SET_REGNO_REG_SET (reg_pending_clobbers, r);
3367 /* Function calls clobber all call_used regs. */
3368 if (global_regs[r] || (code == SET && call_used_regs[r]))
3369 for (u = deps->last_function_call; u; u = XEXP (u, 1))
3370 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3377 for (u = deps->reg_last_uses[regno]; u; u = XEXP (u, 1))
3378 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3380 for (u = deps->reg_last_sets[regno]; u; u = XEXP (u, 1))
3381 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3385 free_INSN_LIST_list (&deps->reg_last_uses[regno]);
3386 for (u = deps->reg_last_clobbers[regno]; u; u = XEXP (u, 1))
3387 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3388 SET_REGNO_REG_SET (reg_pending_sets, regno);
3391 SET_REGNO_REG_SET (reg_pending_clobbers, regno);
3393 /* Pseudos that are REG_EQUIV to something may be replaced
3394 by that during reloading. We need only add dependencies for
3395 the address in the REG_EQUIV note. */
3396 if (!reload_completed
3397 && reg_known_equiv_p[regno]
3398 && GET_CODE (reg_known_value[regno]) == MEM)
3399 sched_analyze_2 (deps, XEXP (reg_known_value[regno], 0), insn);
3401 /* Don't let it cross a call after scheduling if it doesn't
3402 already cross one. */
3404 if (REG_N_CALLS_CROSSED (regno) == 0)
3405 for (u = deps->last_function_call; u; u = XEXP (u, 1))
3406 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3409 else if (GET_CODE (dest) == MEM)
3411 /* Writing memory. */
3413 if (deps->pending_lists_length > 32)
3415 /* Flush all pending reads and writes to prevent the pending lists
3416 from getting any larger. Insn scheduling runs too slowly when
3417 these lists get long. The number 32 was chosen because it
3418 seems like a reasonable number. When compiling GCC with itself,
3419 this flush occurs 8 times for sparc, and 10 times for m88k using
3421 flush_pending_lists (deps, insn, 0);
3426 rtx pending, pending_mem;
3428 pending = deps->pending_read_insns;
3429 pending_mem = deps->pending_read_mems;
3432 if (anti_dependence (XEXP (pending_mem, 0), dest))
3433 add_dependence (insn, XEXP (pending, 0), REG_DEP_ANTI);
3435 pending = XEXP (pending, 1);
3436 pending_mem = XEXP (pending_mem, 1);
3439 pending = deps->pending_write_insns;
3440 pending_mem = deps->pending_write_mems;
3443 if (output_dependence (XEXP (pending_mem, 0), dest))
3444 add_dependence (insn, XEXP (pending, 0), REG_DEP_OUTPUT);
3446 pending = XEXP (pending, 1);
3447 pending_mem = XEXP (pending_mem, 1);
3450 for (u = deps->last_pending_memory_flush; u; u = XEXP (u, 1))
3451 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3453 add_insn_mem_dependence (deps, &deps->pending_write_insns,
3454 &deps->pending_write_mems, insn, dest);
3456 sched_analyze_2 (deps, XEXP (dest, 0), insn);
3459 /* Analyze reads. */
3460 if (GET_CODE (x) == SET)
3461 sched_analyze_2 (deps, SET_SRC (x), insn);
3464 /* Analyze the uses of memory and registers in rtx X in INSN. */
3467 sched_analyze_2 (deps, x, insn)
3474 register enum rtx_code code;
3475 register const char *fmt;
3480 code = GET_CODE (x);
3489 /* Ignore constants. Note that we must handle CONST_DOUBLE here
3490 because it may have a cc0_rtx in its CONST_DOUBLE_CHAIN field, but
3491 this does not mean that this insn is using cc0. */
3496 /* User of CC0 depends on immediately preceding insn. */
3497 set_sched_group_p (insn);
3504 int regno = REGNO (x);
3505 if (regno < FIRST_PSEUDO_REGISTER)
3509 i = HARD_REGNO_NREGS (regno, GET_MODE (x));
3513 deps->reg_last_uses[r]
3514 = alloc_INSN_LIST (insn, deps->reg_last_uses[r]);
3516 for (u = deps->reg_last_sets[r]; u; u = XEXP (u, 1))
3517 add_dependence (insn, XEXP (u, 0), 0);
3519 /* ??? This should never happen. */
3520 for (u = deps->reg_last_clobbers[r]; u; u = XEXP (u, 1))
3521 add_dependence (insn, XEXP (u, 0), 0);
3523 if (call_used_regs[r] || global_regs[r])
3524 /* Function calls clobber all call_used regs. */
3525 for (u = deps->last_function_call; u; u = XEXP (u, 1))
3526 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3531 deps->reg_last_uses[regno]
3532 = alloc_INSN_LIST (insn, deps->reg_last_uses[regno]);
3534 for (u = deps->reg_last_sets[regno]; u; u = XEXP (u, 1))
3535 add_dependence (insn, XEXP (u, 0), 0);
3537 /* ??? This should never happen. */
3538 for (u = deps->reg_last_clobbers[regno]; u; u = XEXP (u, 1))
3539 add_dependence (insn, XEXP (u, 0), 0);
3541 /* Pseudos that are REG_EQUIV to something may be replaced
3542 by that during reloading. We need only add dependencies for
3543 the address in the REG_EQUIV note. */
3544 if (!reload_completed
3545 && reg_known_equiv_p[regno]
3546 && GET_CODE (reg_known_value[regno]) == MEM)
3547 sched_analyze_2 (deps, XEXP (reg_known_value[regno], 0), insn);
3549 /* If the register does not already cross any calls, then add this
3550 insn to the sched_before_next_call list so that it will still
3551 not cross calls after scheduling. */
3552 if (REG_N_CALLS_CROSSED (regno) == 0)
3553 add_dependence (deps->sched_before_next_call, insn,
3561 /* Reading memory. */
3563 rtx pending, pending_mem;
3565 pending = deps->pending_read_insns;
3566 pending_mem = deps->pending_read_mems;
3569 if (read_dependence (XEXP (pending_mem, 0), x))
3570 add_dependence (insn, XEXP (pending, 0), REG_DEP_ANTI);
3572 pending = XEXP (pending, 1);
3573 pending_mem = XEXP (pending_mem, 1);
3576 pending = deps->pending_write_insns;
3577 pending_mem = deps->pending_write_mems;
3580 if (true_dependence (XEXP (pending_mem, 0), VOIDmode,
3582 add_dependence (insn, XEXP (pending, 0), 0);
3584 pending = XEXP (pending, 1);
3585 pending_mem = XEXP (pending_mem, 1);
3588 for (u = deps->last_pending_memory_flush; u; u = XEXP (u, 1))
3589 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3591 /* Always add these dependencies to pending_reads, since
3592 this insn may be followed by a write. */
3593 add_insn_mem_dependence (deps, &deps->pending_read_insns,
3594 &deps->pending_read_mems, insn, x);
3596 /* Take advantage of tail recursion here. */
3597 sched_analyze_2 (deps, XEXP (x, 0), insn);
3601 /* Force pending stores to memory in case a trap handler needs them. */
3603 flush_pending_lists (deps, insn, 1);
3608 case UNSPEC_VOLATILE:
3612 /* Traditional and volatile asm instructions must be considered to use
3613 and clobber all hard registers, all pseudo-registers and all of
3614 memory. So must TRAP_IF and UNSPEC_VOLATILE operations.
3616 Consider for instance a volatile asm that changes the fpu rounding
3617 mode. An insn should not be moved across this even if it only uses
3618 pseudo-regs because it might give an incorrectly rounded result. */
3619 if (code != ASM_OPERANDS || MEM_VOLATILE_P (x))
3621 int max_reg = max_reg_num ();
3622 for (i = 0; i < max_reg; i++)
3624 for (u = deps->reg_last_uses[i]; u; u = XEXP (u, 1))
3625 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3626 free_INSN_LIST_list (&deps->reg_last_uses[i]);
3628 for (u = deps->reg_last_sets[i]; u; u = XEXP (u, 1))
3629 add_dependence (insn, XEXP (u, 0), 0);
3631 for (u = deps->reg_last_clobbers[i]; u; u = XEXP (u, 1))
3632 add_dependence (insn, XEXP (u, 0), 0);
3634 reg_pending_sets_all = 1;
3636 flush_pending_lists (deps, insn, 0);
3639 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
3640 We can not just fall through here since then we would be confused
3641 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
3642 traditional asms unlike their normal usage. */
3644 if (code == ASM_OPERANDS)
3646 for (j = 0; j < ASM_OPERANDS_INPUT_LENGTH (x); j++)
3647 sched_analyze_2 (deps, ASM_OPERANDS_INPUT (x, j), insn);
3657 /* These both read and modify the result. We must handle them as writes
3658 to get proper dependencies for following instructions. We must handle
3659 them as reads to get proper dependencies from this to previous
3660 instructions. Thus we need to pass them to both sched_analyze_1
3661 and sched_analyze_2. We must call sched_analyze_2 first in order
3662 to get the proper antecedent for the read. */
3663 sched_analyze_2 (deps, XEXP (x, 0), insn);
3664 sched_analyze_1 (deps, x, insn);
3669 /* op0 = op0 + op1 */
3670 sched_analyze_2 (deps, XEXP (x, 0), insn);
3671 sched_analyze_2 (deps, XEXP (x, 1), insn);
3672 sched_analyze_1 (deps, x, insn);
3679 /* Other cases: walk the insn. */
3680 fmt = GET_RTX_FORMAT (code);
3681 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3684 sched_analyze_2 (deps, XEXP (x, i), insn);
3685 else if (fmt[i] == 'E')
3686 for (j = 0; j < XVECLEN (x, i); j++)
3687 sched_analyze_2 (deps, XVECEXP (x, i, j), insn);
3691 /* Analyze an INSN with pattern X to find all dependencies. */
3694 sched_analyze_insn (deps, x, insn, loop_notes)
3699 register RTX_CODE code = GET_CODE (x);
3701 int maxreg = max_reg_num ();
3704 if (code == COND_EXEC)
3706 sched_analyze_2 (deps, COND_EXEC_TEST (x), insn);
3708 /* ??? Should be recording conditions so we reduce the number of
3709 false dependancies. */
3710 x = COND_EXEC_CODE (x);
3711 code = GET_CODE (x);
3713 if (code == SET || code == CLOBBER)
3714 sched_analyze_1 (deps, x, insn);
3715 else if (code == PARALLEL)
3718 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
3720 rtx sub = XVECEXP (x, 0, i);
3721 code = GET_CODE (sub);
3723 if (code == COND_EXEC)
3725 sched_analyze_2 (deps, COND_EXEC_TEST (sub), insn);
3726 sub = COND_EXEC_CODE (sub);
3727 code = GET_CODE (sub);
3729 if (code == SET || code == CLOBBER)
3730 sched_analyze_1 (deps, sub, insn);
3732 sched_analyze_2 (deps, sub, insn);
3736 sched_analyze_2 (deps, x, insn);
3738 /* Mark registers CLOBBERED or used by called function. */
3739 if (GET_CODE (insn) == CALL_INSN)
3740 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
3742 if (GET_CODE (XEXP (link, 0)) == CLOBBER)
3743 sched_analyze_1 (deps, XEXP (link, 0), insn);
3745 sched_analyze_2 (deps, XEXP (link, 0), insn);
3748 /* If there is a {LOOP,EHREGION}_{BEG,END} note in the middle of a basic
3749 block, then we must be sure that no instructions are scheduled across it.
3750 Otherwise, the reg_n_refs info (which depends on loop_depth) would
3751 become incorrect. */
3755 int max_reg = max_reg_num ();
3756 int schedule_barrier_found = 0;
3759 /* Update loop_notes with any notes from this insn. Also determine
3760 if any of the notes on the list correspond to instruction scheduling
3761 barriers (loop, eh & setjmp notes, but not range notes. */
3763 while (XEXP (link, 1))
3765 if (INTVAL (XEXP (link, 0)) == NOTE_INSN_LOOP_BEG
3766 || INTVAL (XEXP (link, 0)) == NOTE_INSN_LOOP_END
3767 || INTVAL (XEXP (link, 0)) == NOTE_INSN_EH_REGION_BEG
3768 || INTVAL (XEXP (link, 0)) == NOTE_INSN_EH_REGION_END
3769 || INTVAL (XEXP (link, 0)) == NOTE_INSN_SETJMP)
3770 schedule_barrier_found = 1;
3772 link = XEXP (link, 1);
3774 XEXP (link, 1) = REG_NOTES (insn);
3775 REG_NOTES (insn) = loop_notes;
3777 /* Add dependencies if a scheduling barrier was found. */
3778 if (schedule_barrier_found)
3780 for (i = 0; i < max_reg; i++)
3783 for (u = deps->reg_last_uses[i]; u; u = XEXP (u, 1))
3784 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3785 free_INSN_LIST_list (&deps->reg_last_uses[i]);
3787 for (u = deps->reg_last_sets[i]; u; u = XEXP (u, 1))
3788 add_dependence (insn, XEXP (u, 0), 0);
3790 for (u = deps->reg_last_clobbers[i]; u; u = XEXP (u, 1))
3791 add_dependence (insn, XEXP (u, 0), 0);
3793 reg_pending_sets_all = 1;
3795 flush_pending_lists (deps, insn, 0);
3800 /* Accumulate clobbers until the next set so that it will be output dependent
3801 on all of them. At the next set we can clear the clobber list, since
3802 subsequent sets will be output dependent on it. */
3803 EXECUTE_IF_SET_IN_REG_SET
3804 (reg_pending_sets, 0, i,
3806 free_INSN_LIST_list (&deps->reg_last_sets[i]);
3807 free_INSN_LIST_list (&deps->reg_last_clobbers[i]);
3808 deps->reg_last_sets[i] = alloc_INSN_LIST (insn, NULL_RTX);
3810 EXECUTE_IF_SET_IN_REG_SET
3811 (reg_pending_clobbers, 0, i,
3813 deps->reg_last_clobbers[i]
3814 = alloc_INSN_LIST (insn, deps->reg_last_clobbers[i]);
3816 CLEAR_REG_SET (reg_pending_sets);
3817 CLEAR_REG_SET (reg_pending_clobbers);
3819 if (reg_pending_sets_all)
3821 for (i = 0; i < maxreg; i++)
3823 free_INSN_LIST_list (&deps->reg_last_sets[i]);
3824 free_INSN_LIST_list (&deps->reg_last_clobbers[i]);
3825 deps->reg_last_sets[i] = alloc_INSN_LIST (insn, NULL_RTX);
3828 reg_pending_sets_all = 0;
3831 /* If a post-call group is still open, see if it should remain so.
3832 This insn must be a simple move of a hard reg to a pseudo or
3835 We must avoid moving these insns for correctness on
3836 SMALL_REGISTER_CLASS machines, and for special registers like
3837 PIC_OFFSET_TABLE_REGNUM. For simplicity, extend this to all
3838 hard regs for all targets. */
3840 if (deps->in_post_call_group_p)
3842 rtx tmp, set = single_set (insn);
3843 int src_regno, dest_regno;
3846 goto end_call_group;
3848 tmp = SET_DEST (set);
3849 if (GET_CODE (tmp) == SUBREG)
3850 tmp = SUBREG_REG (tmp);
3851 if (GET_CODE (tmp) == REG)
3852 dest_regno = REGNO (tmp);
3854 goto end_call_group;
3856 tmp = SET_SRC (set);
3857 if (GET_CODE (tmp) == SUBREG)
3858 tmp = SUBREG_REG (tmp);
3859 if (GET_CODE (tmp) == REG)
3860 src_regno = REGNO (tmp);
3862 goto end_call_group;
3864 if (src_regno < FIRST_PSEUDO_REGISTER
3865 || dest_regno < FIRST_PSEUDO_REGISTER)
3867 set_sched_group_p (insn);
3868 CANT_MOVE (insn) = 1;
3873 deps->in_post_call_group_p = 0;
3878 /* Analyze every insn between HEAD and TAIL inclusive, creating LOG_LINKS
3879 for every dependency. */
3882 sched_analyze (deps, head, tail)
3890 for (insn = head;; insn = NEXT_INSN (insn))
3892 if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN)
3894 /* Clear out the stale LOG_LINKS from flow. */
3895 free_INSN_LIST_list (&LOG_LINKS (insn));
3897 /* Clear out stale SCHED_GROUP_P. */
3898 SCHED_GROUP_P (insn) = 0;
3900 /* Make each JUMP_INSN a scheduling barrier for memory
3902 if (GET_CODE (insn) == JUMP_INSN)
3903 deps->last_pending_memory_flush
3904 = alloc_INSN_LIST (insn, deps->last_pending_memory_flush);
3905 sched_analyze_insn (deps, PATTERN (insn), insn, loop_notes);
3908 else if (GET_CODE (insn) == CALL_INSN)
3913 /* Clear out stale SCHED_GROUP_P. */
3914 SCHED_GROUP_P (insn) = 0;
3916 CANT_MOVE (insn) = 1;
3918 /* Clear out the stale LOG_LINKS from flow. */
3919 free_INSN_LIST_list (&LOG_LINKS (insn));
3921 /* Any instruction using a hard register which may get clobbered
3922 by a call needs to be marked as dependent on this call.
3923 This prevents a use of a hard return reg from being moved
3924 past a void call (i.e. it does not explicitly set the hard
3927 /* If this call is followed by a NOTE_INSN_SETJMP, then assume that
3928 all registers, not just hard registers, may be clobbered by this
3931 /* Insn, being a CALL_INSN, magically depends on
3932 `last_function_call' already. */
3934 if (NEXT_INSN (insn) && GET_CODE (NEXT_INSN (insn)) == NOTE
3935 && NOTE_LINE_NUMBER (NEXT_INSN (insn)) == NOTE_INSN_SETJMP)
3937 int max_reg = max_reg_num ();
3938 for (i = 0; i < max_reg; i++)
3940 for (u = deps->reg_last_uses[i]; u; u = XEXP (u, 1))
3941 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3942 free_INSN_LIST_list (&deps->reg_last_uses[i]);
3944 for (u = deps->reg_last_sets[i]; u; u = XEXP (u, 1))
3945 add_dependence (insn, XEXP (u, 0), 0);
3947 for (u = deps->reg_last_clobbers[i]; u; u = XEXP (u, 1))
3948 add_dependence (insn, XEXP (u, 0), 0);
3950 reg_pending_sets_all = 1;
3952 /* Add a pair of REG_SAVE_NOTEs which we will later
3953 convert back into a NOTE_INSN_SETJMP note. See
3954 reemit_notes for why we use a pair of NOTEs. */
3955 REG_NOTES (insn) = alloc_EXPR_LIST (REG_SAVE_NOTE,
3958 REG_NOTES (insn) = alloc_EXPR_LIST (REG_SAVE_NOTE,
3959 GEN_INT (NOTE_INSN_SETJMP),
3964 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3965 if (call_used_regs[i] || global_regs[i])
3967 for (u = deps->reg_last_uses[i]; u; u = XEXP (u, 1))
3968 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3970 for (u = deps->reg_last_sets[i]; u; u = XEXP (u, 1))
3971 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3973 SET_REGNO_REG_SET (reg_pending_clobbers, i);
3977 /* For each insn which shouldn't cross a call, add a dependence
3978 between that insn and this call insn. */
3979 x = LOG_LINKS (deps->sched_before_next_call);
3982 add_dependence (insn, XEXP (x, 0), REG_DEP_ANTI);
3985 free_INSN_LIST_list (&LOG_LINKS (deps->sched_before_next_call));
3987 sched_analyze_insn (deps, PATTERN (insn), insn, loop_notes);
3990 /* In the absence of interprocedural alias analysis, we must flush
3991 all pending reads and writes, and start new dependencies starting
3992 from here. But only flush writes for constant calls (which may
3993 be passed a pointer to something we haven't written yet). */
3994 flush_pending_lists (deps, insn, CONST_CALL_P (insn));
3996 /* Depend this function call (actually, the user of this
3997 function call) on all hard register clobberage. */
3999 /* last_function_call is now a list of insns. */
4000 free_INSN_LIST_list (&deps->last_function_call);
4001 deps->last_function_call = alloc_INSN_LIST (insn, NULL_RTX);
4003 /* Before reload, begin a post-call group, so as to keep the
4004 lifetimes of hard registers correct. */
4005 if (! reload_completed)
4006 deps->in_post_call_group_p = 1;
4009 /* See comments on reemit_notes as to why we do this.
4010 ??? Actually, the reemit_notes just say what is done, not why. */
4012 else if (GET_CODE (insn) == NOTE
4013 && (NOTE_LINE_NUMBER (insn) == NOTE_INSN_RANGE_BEG
4014 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_RANGE_END))
4016 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE, NOTE_RANGE_INFO (insn),
4018 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE,
4019 GEN_INT (NOTE_LINE_NUMBER (insn)),
4022 else if (GET_CODE (insn) == NOTE
4023 && (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG
4024 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END
4025 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_BEG
4026 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_END
4027 || (NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP
4028 && GET_CODE (PREV_INSN (insn)) != CALL_INSN)))
4032 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_BEG
4033 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_END)
4034 rtx_region = GEN_INT (NOTE_EH_HANDLER (insn));
4036 rtx_region = GEN_INT (0);
4038 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE,
4041 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE,
4042 GEN_INT (NOTE_LINE_NUMBER (insn)),
4044 CONST_CALL_P (loop_notes) = CONST_CALL_P (insn);
4053 /* Macros and functions for keeping the priority queue sorted, and
4054 dealing with queueing and dequeueing of instructions. */
4056 #define SCHED_SORT(READY, N_READY) \
4057 do { if ((N_READY) == 2) \
4058 swap_sort (READY, N_READY); \
4059 else if ((N_READY) > 2) \
4060 qsort (READY, N_READY, sizeof (rtx), rank_for_schedule); } \
4063 /* Returns a positive value if x is preferred; returns a negative value if
4064 y is preferred. Should never return 0, since that will make the sort
4068 rank_for_schedule (x, y)
4072 rtx tmp = *(const rtx *) y;
4073 rtx tmp2 = *(const rtx *) x;
4075 int tmp_class, tmp2_class, depend_count1, depend_count2;
4076 int val, priority_val, spec_val, prob_val, weight_val;
4078 /* Prefer insn with higher priority. */
4079 priority_val = INSN_PRIORITY (tmp2) - INSN_PRIORITY (tmp);
4081 return priority_val;
4083 /* Prefer an insn with smaller contribution to registers-pressure. */
4084 if (!reload_completed &&
4085 (weight_val = INSN_REG_WEIGHT (tmp) - INSN_REG_WEIGHT (tmp2)))
4086 return (weight_val);
4088 /* Some comparison make sense in interblock scheduling only. */
4089 if (INSN_BB (tmp) != INSN_BB (tmp2))
4091 /* Prefer an inblock motion on an interblock motion. */
4092 if ((INSN_BB (tmp2) == target_bb) && (INSN_BB (tmp) != target_bb))
4094 if ((INSN_BB (tmp) == target_bb) && (INSN_BB (tmp2) != target_bb))
4097 /* Prefer a useful motion on a speculative one. */
4098 if ((spec_val = IS_SPECULATIVE_INSN (tmp) - IS_SPECULATIVE_INSN (tmp2)))
4101 /* Prefer a more probable (speculative) insn. */
4102 prob_val = INSN_PROBABILITY (tmp2) - INSN_PROBABILITY (tmp);
4107 /* Compare insns based on their relation to the last-scheduled-insn. */
4108 if (last_scheduled_insn)
4110 /* Classify the instructions into three classes:
4111 1) Data dependent on last schedule insn.
4112 2) Anti/Output dependent on last scheduled insn.
4113 3) Independent of last scheduled insn, or has latency of one.
4114 Choose the insn from the highest numbered class if different. */
4115 link = find_insn_list (tmp, INSN_DEPEND (last_scheduled_insn));
4116 if (link == 0 || insn_cost (last_scheduled_insn, link, tmp) == 1)
4118 else if (REG_NOTE_KIND (link) == 0) /* Data dependence. */
4123 link = find_insn_list (tmp2, INSN_DEPEND (last_scheduled_insn));
4124 if (link == 0 || insn_cost (last_scheduled_insn, link, tmp2) == 1)
4126 else if (REG_NOTE_KIND (link) == 0) /* Data dependence. */
4131 if ((val = tmp2_class - tmp_class))
4135 /* Prefer the insn which has more later insns that depend on it.
4136 This gives the scheduler more freedom when scheduling later
4137 instructions at the expense of added register pressure. */
4139 for (link = INSN_DEPEND (tmp); link; link = XEXP (link, 1))
4143 for (link = INSN_DEPEND (tmp2); link; link = XEXP (link, 1))
4146 val = depend_count2 - depend_count1;
4150 /* If insns are equally good, sort by INSN_LUID (original insn order),
4151 so that we make the sort stable. This minimizes instruction movement,
4152 thus minimizing sched's effect on debugging and cross-jumping. */
4153 return INSN_LUID (tmp) - INSN_LUID (tmp2);
4156 /* Resort the array A in which only element at index N may be out of order. */
4158 HAIFA_INLINE static void
4163 rtx insn = a[n - 1];
4166 while (i >= 0 && rank_for_schedule (a + i, &insn) >= 0)
4174 static int max_priority;
4176 /* Add INSN to the insn queue so that it can be executed at least
4177 N_CYCLES after the currently executing insn. Preserve insns
4178 chain for debugging purposes. */
4180 HAIFA_INLINE static void
4181 queue_insn (insn, n_cycles)
4185 int next_q = NEXT_Q_AFTER (q_ptr, n_cycles);
4186 rtx link = alloc_INSN_LIST (insn, insn_queue[next_q]);
4187 insn_queue[next_q] = link;
4190 if (sched_verbose >= 2)
4192 fprintf (dump, ";;\t\tReady-->Q: insn %d: ", INSN_UID (insn));
4194 if (INSN_BB (insn) != target_bb)
4195 fprintf (dump, "(b%d) ", BLOCK_NUM (insn));
4197 fprintf (dump, "queued for %d cycles.\n", n_cycles);
4202 /* PREV is an insn that is ready to execute. Adjust its priority if that
4203 will help shorten or lengthen register lifetimes as appropriate. Also
4204 provide a hook for the target to tweek itself. */
4206 HAIFA_INLINE static void
4207 adjust_priority (prev)
4208 rtx prev ATTRIBUTE_UNUSED;
4210 /* ??? There used to be code here to try and estimate how an insn
4211 affected register lifetimes, but it did it by looking at REG_DEAD
4212 notes, which we removed in schedule_region. Nor did it try to
4213 take into account register pressure or anything useful like that.
4215 Revisit when we have a machine model to work with and not before. */
4217 #ifdef ADJUST_PRIORITY
4218 ADJUST_PRIORITY (prev);
4222 /* Clock at which the previous instruction was issued. */
4223 static int last_clock_var;
4225 /* INSN is the "currently executing insn". Launch each insn which was
4226 waiting on INSN. READY is a vector of insns which are ready to fire.
4227 N_READY is the number of elements in READY. CLOCK is the current
4231 schedule_insn (insn, ready, n_ready, clock)
4240 unit = insn_unit (insn);
4242 if (sched_verbose >= 2)
4244 fprintf (dump, ";;\t\t--> scheduling insn <<<%d>>> on unit ",
4246 insn_print_units (insn);
4247 fprintf (dump, "\n");
4250 if (sched_verbose && unit == -1)
4251 visualize_no_unit (insn);
4253 if (MAX_BLOCKAGE > 1 || issue_rate > 1 || sched_verbose)
4254 schedule_unit (unit, insn, clock);
4256 if (INSN_DEPEND (insn) == 0)
4259 /* This is used by the function adjust_priority above. */
4261 max_priority = MAX (INSN_PRIORITY (ready[0]), INSN_PRIORITY (insn));
4263 max_priority = INSN_PRIORITY (insn);
4265 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
4267 rtx next = XEXP (link, 0);
4268 int cost = insn_cost (insn, link, next);
4270 INSN_TICK (next) = MAX (INSN_TICK (next), clock + cost);
4272 if ((INSN_DEP_COUNT (next) -= 1) == 0)
4274 int effective_cost = INSN_TICK (next) - clock;
4276 /* For speculative insns, before inserting to ready/queue,
4277 check live, exception-free, and issue-delay. */
4278 if (INSN_BB (next) != target_bb
4279 && (!IS_VALID (INSN_BB (next))
4281 || (IS_SPECULATIVE_INSN (next)
4282 && (insn_issue_delay (next) > 3
4283 || !check_live (next, INSN_BB (next))
4284 || !is_exception_free (next, INSN_BB (next), target_bb)))))
4287 if (sched_verbose >= 2)
4289 fprintf (dump, ";;\t\tdependences resolved: insn %d ",
4292 if (current_nr_blocks > 1 && INSN_BB (next) != target_bb)
4293 fprintf (dump, "/b%d ", BLOCK_NUM (next));
4295 if (effective_cost < 1)
4296 fprintf (dump, "into ready\n");
4298 fprintf (dump, "into queue with cost=%d\n", effective_cost);
4301 /* Adjust the priority of NEXT and either put it on the ready
4302 list or queue it. */
4303 adjust_priority (next);
4304 if (effective_cost < 1)
4305 ready[n_ready++] = next;
4307 queue_insn (next, effective_cost);
4311 /* Annotate the instruction with issue information -- TImode
4312 indicates that the instruction is expected not to be able
4313 to issue on the same cycle as the previous insn. A machine
4314 may use this information to decide how the instruction should
4316 if (reload_completed && issue_rate > 1)
4318 PUT_MODE (insn, clock > last_clock_var ? TImode : VOIDmode);
4319 last_clock_var = clock;
4325 /* Functions for handling of notes. */
4327 /* Delete notes beginning with INSN and put them in the chain
4328 of notes ended by NOTE_LIST.
4329 Returns the insn following the notes. */
4332 unlink_other_notes (insn, tail)
4335 rtx prev = PREV_INSN (insn);
4337 while (insn != tail && GET_CODE (insn) == NOTE)
4339 rtx next = NEXT_INSN (insn);
4340 /* Delete the note from its current position. */
4342 NEXT_INSN (prev) = next;
4344 PREV_INSN (next) = prev;
4346 /* See sched_analyze to see how these are handled. */
4347 if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_SETJMP
4348 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG
4349 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END
4350 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_RANGE_BEG
4351 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_RANGE_END
4352 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_BEG
4353 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_END)
4355 /* Insert the note at the end of the notes list. */
4356 PREV_INSN (insn) = note_list;
4358 NEXT_INSN (note_list) = insn;
4367 /* Delete line notes beginning with INSN. Record line-number notes so
4368 they can be reused. Returns the insn following the notes. */
4371 unlink_line_notes (insn, tail)
4374 rtx prev = PREV_INSN (insn);
4376 while (insn != tail && GET_CODE (insn) == NOTE)
4378 rtx next = NEXT_INSN (insn);
4380 if (write_symbols != NO_DEBUG && NOTE_LINE_NUMBER (insn) > 0)
4382 /* Delete the note from its current position. */
4384 NEXT_INSN (prev) = next;
4386 PREV_INSN (next) = prev;
4388 /* Record line-number notes so they can be reused. */
4389 LINE_NOTE (insn) = insn;
4399 /* Return the head and tail pointers of BB. */
4401 HAIFA_INLINE static void
4402 get_block_head_tail (b, headp, tailp)
4411 /* HEAD and TAIL delimit the basic block being scheduled. */
4412 head = BLOCK_HEAD (b);
4413 tail = BLOCK_END (b);
4415 /* Don't include any notes or labels at the beginning of the
4416 basic block, or notes at the ends of basic blocks. */
4417 while (head != tail)
4419 if (GET_CODE (head) == NOTE)
4420 head = NEXT_INSN (head);
4421 else if (GET_CODE (tail) == NOTE)
4422 tail = PREV_INSN (tail);
4423 else if (GET_CODE (head) == CODE_LABEL)
4424 head = NEXT_INSN (head);
4433 HAIFA_INLINE static void
4434 get_bb_head_tail (bb, headp, tailp)
4439 get_block_head_tail (BB_TO_BLOCK (bb), headp, tailp);
4442 /* Delete line notes from bb. Save them so they can be later restored
4443 (in restore_line_notes ()). */
4454 get_bb_head_tail (bb, &head, &tail);
4456 if (head == tail && (! INSN_P (head)))
4459 next_tail = NEXT_INSN (tail);
4460 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4464 /* Farm out notes, and maybe save them in NOTE_LIST.
4465 This is needed to keep the debugger from
4466 getting completely deranged. */
4467 if (GET_CODE (insn) == NOTE)
4470 insn = unlink_line_notes (insn, next_tail);
4476 if (insn == next_tail)
4482 /* Save line number notes for each insn in bb. */
4485 save_line_notes (bb)
4491 /* We must use the true line number for the first insn in the block
4492 that was computed and saved at the start of this pass. We can't
4493 use the current line number, because scheduling of the previous
4494 block may have changed the current line number. */
4496 rtx line = line_note_head[BB_TO_BLOCK (bb)];
4499 get_bb_head_tail (bb, &head, &tail);
4500 next_tail = NEXT_INSN (tail);
4502 for (insn = BLOCK_HEAD (BB_TO_BLOCK (bb));
4504 insn = NEXT_INSN (insn))
4505 if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
4508 LINE_NOTE (insn) = line;
4511 /* After bb was scheduled, insert line notes into the insns list. */
4514 restore_line_notes (bb)
4517 rtx line, note, prev, new;
4518 int added_notes = 0;
4520 rtx head, next_tail, insn;
4522 b = BB_TO_BLOCK (bb);
4524 head = BLOCK_HEAD (b);
4525 next_tail = NEXT_INSN (BLOCK_END (b));
4527 /* Determine the current line-number. We want to know the current
4528 line number of the first insn of the block here, in case it is
4529 different from the true line number that was saved earlier. If
4530 different, then we need a line number note before the first insn
4531 of this block. If it happens to be the same, then we don't want to
4532 emit another line number note here. */
4533 for (line = head; line; line = PREV_INSN (line))
4534 if (GET_CODE (line) == NOTE && NOTE_LINE_NUMBER (line) > 0)
4537 /* Walk the insns keeping track of the current line-number and inserting
4538 the line-number notes as needed. */
4539 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4540 if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
4542 /* This used to emit line number notes before every non-deleted note.
4543 However, this confuses a debugger, because line notes not separated
4544 by real instructions all end up at the same address. I can find no
4545 use for line number notes before other notes, so none are emitted. */
4546 else if (GET_CODE (insn) != NOTE
4547 && (note = LINE_NOTE (insn)) != 0
4550 || NOTE_LINE_NUMBER (note) != NOTE_LINE_NUMBER (line)
4551 || NOTE_SOURCE_FILE (note) != NOTE_SOURCE_FILE (line)))
4554 prev = PREV_INSN (insn);
4555 if (LINE_NOTE (note))
4557 /* Re-use the original line-number note. */
4558 LINE_NOTE (note) = 0;
4559 PREV_INSN (note) = prev;
4560 NEXT_INSN (prev) = note;
4561 PREV_INSN (insn) = note;
4562 NEXT_INSN (note) = insn;
4567 new = emit_note_after (NOTE_LINE_NUMBER (note), prev);
4568 NOTE_SOURCE_FILE (new) = NOTE_SOURCE_FILE (note);
4569 RTX_INTEGRATED_P (new) = RTX_INTEGRATED_P (note);
4572 if (sched_verbose && added_notes)
4573 fprintf (dump, ";; added %d line-number notes\n", added_notes);
4576 /* After scheduling the function, delete redundant line notes from the
4580 rm_redundant_line_notes ()
4583 rtx insn = get_insns ();
4584 int active_insn = 0;
4587 /* Walk the insns deleting redundant line-number notes. Many of these
4588 are already present. The remainder tend to occur at basic
4589 block boundaries. */
4590 for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
4591 if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
4593 /* If there are no active insns following, INSN is redundant. */
4594 if (active_insn == 0)
4597 NOTE_SOURCE_FILE (insn) = 0;
4598 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4600 /* If the line number is unchanged, LINE is redundant. */
4602 && NOTE_LINE_NUMBER (line) == NOTE_LINE_NUMBER (insn)
4603 && NOTE_SOURCE_FILE (line) == NOTE_SOURCE_FILE (insn))
4606 NOTE_SOURCE_FILE (line) = 0;
4607 NOTE_LINE_NUMBER (line) = NOTE_INSN_DELETED;
4614 else if (!((GET_CODE (insn) == NOTE
4615 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED)
4616 || (GET_CODE (insn) == INSN
4617 && (GET_CODE (PATTERN (insn)) == USE
4618 || GET_CODE (PATTERN (insn)) == CLOBBER))))
4621 if (sched_verbose && notes)
4622 fprintf (dump, ";; deleted %d line-number notes\n", notes);
4625 /* Delete notes between head and tail and put them in the chain
4626 of notes ended by NOTE_LIST. */
4629 rm_other_notes (head, tail)
4636 if (head == tail && (! INSN_P (head)))
4639 next_tail = NEXT_INSN (tail);
4640 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4644 /* Farm out notes, and maybe save them in NOTE_LIST.
4645 This is needed to keep the debugger from
4646 getting completely deranged. */
4647 if (GET_CODE (insn) == NOTE)
4651 insn = unlink_other_notes (insn, next_tail);
4657 if (insn == next_tail)
4663 /* Functions for computation of registers live/usage info. */
4665 /* Calculate INSN_REG_WEIGHT for all insns of a block. */
4668 find_insn_reg_weight (b)
4671 rtx insn, next_tail, head, tail;
4673 get_block_head_tail (b, &head, &tail);
4674 next_tail = NEXT_INSN (tail);
4676 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4681 /* Handle register life information. */
4682 if (! INSN_P (insn))
4685 /* Increment weight for each register born here. */
4687 if ((GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
4688 && register_operand (SET_DEST (x), VOIDmode))
4690 else if (GET_CODE (x) == PARALLEL)
4693 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
4695 x = XVECEXP (PATTERN (insn), 0, j);
4696 if ((GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
4697 && register_operand (SET_DEST (x), VOIDmode))
4702 /* Decrement weight for each register that dies here. */
4703 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
4705 if (REG_NOTE_KIND (x) == REG_DEAD
4706 || REG_NOTE_KIND (x) == REG_UNUSED)
4710 INSN_REG_WEIGHT (insn) = reg_weight;
4714 /* Scheduling clock, modified in schedule_block() and queue_to_ready (). */
4715 static int clock_var;
4717 /* Move insns that became ready to fire from queue to ready list. */
4720 queue_to_ready (ready, n_ready)
4727 q_ptr = NEXT_Q (q_ptr);
4729 /* Add all pending insns that can be scheduled without stalls to the
4731 for (link = insn_queue[q_ptr]; link; link = XEXP (link, 1))
4734 insn = XEXP (link, 0);
4737 if (sched_verbose >= 2)
4738 fprintf (dump, ";;\t\tQ-->Ready: insn %d: ", INSN_UID (insn));
4740 if (sched_verbose >= 2 && INSN_BB (insn) != target_bb)
4741 fprintf (dump, "(b%d) ", BLOCK_NUM (insn));
4743 ready[n_ready++] = insn;
4744 if (sched_verbose >= 2)
4745 fprintf (dump, "moving to ready without stalls\n");
4747 insn_queue[q_ptr] = 0;
4749 /* If there are no ready insns, stall until one is ready and add all
4750 of the pending insns at that point to the ready list. */
4753 register int stalls;
4755 for (stalls = 1; stalls < INSN_QUEUE_SIZE; stalls++)
4757 if ((link = insn_queue[NEXT_Q_AFTER (q_ptr, stalls)]))
4759 for (; link; link = XEXP (link, 1))
4761 insn = XEXP (link, 0);
4764 if (sched_verbose >= 2)
4765 fprintf (dump, ";;\t\tQ-->Ready: insn %d: ",
4768 if (sched_verbose >= 2 && INSN_BB (insn) != target_bb)
4769 fprintf (dump, "(b%d) ", BLOCK_NUM (insn));
4771 ready[n_ready++] = insn;
4772 if (sched_verbose >= 2)
4773 fprintf (dump, "moving to ready with %d stalls\n", stalls);
4775 insn_queue[NEXT_Q_AFTER (q_ptr, stalls)] = 0;
4782 if (sched_verbose && stalls)
4783 visualize_stall_cycles (BB_TO_BLOCK (target_bb), stalls);
4784 q_ptr = NEXT_Q_AFTER (q_ptr, stalls);
4785 clock_var += stalls;
4790 /* Print the ready list for debugging purposes. Callable from debugger. */
4793 debug_ready_list (ready, n_ready)
4799 for (i = 0; i < n_ready; i++)
4801 fprintf (dump, " %d", INSN_UID (ready[i]));
4802 if (current_nr_blocks > 1 && INSN_BB (ready[i]) != target_bb)
4803 fprintf (dump, "/b%d", BLOCK_NUM (ready[i]));
4805 fprintf (dump, "\n");
4808 /* Print names of units on which insn can/should execute, for debugging. */
4811 insn_print_units (insn)
4815 int unit = insn_unit (insn);
4818 fprintf (dump, "none");
4820 fprintf (dump, "%s", function_units[unit].name);
4823 fprintf (dump, "[");
4824 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
4827 fprintf (dump, "%s", function_units[i].name);
4829 fprintf (dump, " ");
4831 fprintf (dump, "]");
4835 /* MAX_VISUAL_LINES is the maximum number of lines in visualization table
4836 of a basic block. If more lines are needed, table is splitted to two.
4837 n_visual_lines is the number of lines printed so far for a block.
4838 visual_tbl contains the block visualization info.
4839 vis_no_unit holds insns in a cycle that are not mapped to any unit. */
4840 #define MAX_VISUAL_LINES 100
4845 rtx vis_no_unit[10];
4847 /* Finds units that are in use in this fuction. Required only
4848 for visualization. */
4851 init_target_units ()
4856 for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
4858 if (! INSN_P (insn))
4861 unit = insn_unit (insn);
4864 target_units |= ~unit;
4866 target_units |= (1 << unit);
4870 /* Return the length of the visualization table. */
4873 get_visual_tbl_length ()
4879 /* Compute length of one field in line. */
4880 s = (char *) alloca (INSN_LEN + 6);
4881 sprintf (s, " %33s", "uname");
4884 /* Compute length of one line. */
4887 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
4888 if (function_units[unit].bitmask & target_units)
4889 for (i = 0; i < function_units[unit].multiplicity; i++)
4892 n += strlen ("\n") + 2;
4894 /* Compute length of visualization string. */
4895 return (MAX_VISUAL_LINES * n);
4898 /* Init block visualization debugging info. */
4901 init_block_visualization ()
4903 strcpy (visual_tbl, "");
4908 #define BUF_LEN 2048
4911 safe_concat (buf, cur, str)
4916 char *end = buf + BUF_LEN - 2; /* Leave room for null. */
4925 while (cur < end && (c = *str++) != '\0')
4932 /* This recognizes rtx, I classified as expressions. These are always
4933 represent some action on values or results of other expression, that
4934 may be stored in objects representing values. */
4937 print_exp (buf, x, verbose)
4945 const char *fun = (char *) 0;
4950 for (i = 0; i < 4; i++)
4956 switch (GET_CODE (x))
4959 op[0] = XEXP (x, 0);
4960 if (GET_CODE (XEXP (x, 1)) == CONST_INT
4961 && INTVAL (XEXP (x, 1)) < 0)
4964 op[1] = GEN_INT (-INTVAL (XEXP (x, 1)));
4969 op[1] = XEXP (x, 1);
4973 op[0] = XEXP (x, 0);
4975 op[1] = XEXP (x, 1);
4979 op[0] = XEXP (x, 0);
4981 op[1] = XEXP (x, 1);
4985 op[0] = XEXP (x, 0);
4986 op[1] = XEXP (x, 1);
4990 op[0] = XEXP (x, 0);
4993 op[0] = XEXP (x, 0);
4995 op[1] = XEXP (x, 1);
4998 op[0] = XEXP (x, 0);
5000 op[1] = XEXP (x, 1);
5004 op[0] = XEXP (x, 0);
5005 op[1] = XEXP (x, 1);
5008 op[0] = XEXP (x, 0);
5010 op[1] = XEXP (x, 1);
5014 op[0] = XEXP (x, 0);
5015 op[1] = XEXP (x, 1);
5019 op[0] = XEXP (x, 0);
5020 op[1] = XEXP (x, 1);
5024 op[0] = XEXP (x, 0);
5025 op[1] = XEXP (x, 1);
5029 op[0] = XEXP (x, 0);
5030 op[1] = XEXP (x, 1);
5034 op[0] = XEXP (x, 0);
5035 op[1] = XEXP (x, 1);
5039 op[0] = XEXP (x, 0);
5042 op[0] = XEXP (x, 0);
5044 op[1] = XEXP (x, 1);
5047 op[0] = XEXP (x, 0);
5049 op[1] = XEXP (x, 1);
5052 op[0] = XEXP (x, 0);
5054 op[1] = XEXP (x, 1);
5057 op[0] = XEXP (x, 0);
5059 op[1] = XEXP (x, 1);
5062 op[0] = XEXP (x, 0);
5064 op[1] = XEXP (x, 1);
5067 op[0] = XEXP (x, 0);
5069 op[1] = XEXP (x, 1);
5072 op[0] = XEXP (x, 0);
5074 op[1] = XEXP (x, 1);
5077 op[0] = XEXP (x, 0);
5079 op[1] = XEXP (x, 1);
5083 op[0] = XEXP (x, 0);
5087 op[0] = XEXP (x, 0);
5091 op[0] = XEXP (x, 0);
5094 op[0] = XEXP (x, 0);
5096 op[1] = XEXP (x, 1);
5099 op[0] = XEXP (x, 0);
5101 op[1] = XEXP (x, 1);
5104 op[0] = XEXP (x, 0);
5106 op[1] = XEXP (x, 1);
5110 op[0] = XEXP (x, 0);
5111 op[1] = XEXP (x, 1);
5114 op[0] = XEXP (x, 0);
5116 op[1] = XEXP (x, 1);
5120 op[0] = XEXP (x, 0);
5121 op[1] = XEXP (x, 1);
5124 op[0] = XEXP (x, 0);
5126 op[1] = XEXP (x, 1);
5130 op[0] = XEXP (x, 0);
5131 op[1] = XEXP (x, 1);
5134 op[0] = XEXP (x, 0);
5136 op[1] = XEXP (x, 1);
5140 op[0] = XEXP (x, 0);
5141 op[1] = XEXP (x, 1);
5144 fun = (verbose) ? "sign_extract" : "sxt";
5145 op[0] = XEXP (x, 0);
5146 op[1] = XEXP (x, 1);
5147 op[2] = XEXP (x, 2);
5150 fun = (verbose) ? "zero_extract" : "zxt";
5151 op[0] = XEXP (x, 0);
5152 op[1] = XEXP (x, 1);
5153 op[2] = XEXP (x, 2);
5156 fun = (verbose) ? "sign_extend" : "sxn";
5157 op[0] = XEXP (x, 0);
5160 fun = (verbose) ? "zero_extend" : "zxn";
5161 op[0] = XEXP (x, 0);
5164 fun = (verbose) ? "float_extend" : "fxn";
5165 op[0] = XEXP (x, 0);
5168 fun = (verbose) ? "trunc" : "trn";
5169 op[0] = XEXP (x, 0);
5171 case FLOAT_TRUNCATE:
5172 fun = (verbose) ? "float_trunc" : "ftr";
5173 op[0] = XEXP (x, 0);
5176 fun = (verbose) ? "float" : "flt";
5177 op[0] = XEXP (x, 0);
5179 case UNSIGNED_FLOAT:
5180 fun = (verbose) ? "uns_float" : "ufl";
5181 op[0] = XEXP (x, 0);
5185 op[0] = XEXP (x, 0);
5188 fun = (verbose) ? "uns_fix" : "ufx";
5189 op[0] = XEXP (x, 0);
5193 op[0] = XEXP (x, 0);
5197 op[0] = XEXP (x, 0);
5200 op[0] = XEXP (x, 0);
5204 op[0] = XEXP (x, 0);
5209 op[0] = XEXP (x, 0);
5213 op[1] = XEXP (x, 1);
5218 op[0] = XEXP (x, 0);
5220 op[1] = XEXP (x, 1);
5222 op[2] = XEXP (x, 2);
5227 op[0] = TRAP_CONDITION (x);
5230 case UNSPEC_VOLATILE:
5232 cur = safe_concat (buf, cur, "unspec");
5233 if (GET_CODE (x) == UNSPEC_VOLATILE)
5234 cur = safe_concat (buf, cur, "/v");
5235 cur = safe_concat (buf, cur, "[");
5237 for (i = 0; i < XVECLEN (x, 0); i++)
5239 print_pattern (tmp, XVECEXP (x, 0, i), verbose);
5240 cur = safe_concat (buf, cur, sep);
5241 cur = safe_concat (buf, cur, tmp);
5244 cur = safe_concat (buf, cur, "] ");
5245 sprintf (tmp, "%d", XINT (x, 1));
5246 cur = safe_concat (buf, cur, tmp);
5250 /* If (verbose) debug_rtx (x); */
5251 st[0] = GET_RTX_NAME (GET_CODE (x));
5255 /* Print this as a function? */
5258 cur = safe_concat (buf, cur, fun);
5259 cur = safe_concat (buf, cur, "(");
5262 for (i = 0; i < 4; i++)
5265 cur = safe_concat (buf, cur, st[i]);
5270 cur = safe_concat (buf, cur, ",");
5272 print_value (tmp, op[i], verbose);
5273 cur = safe_concat (buf, cur, tmp);
5278 cur = safe_concat (buf, cur, ")");
5281 /* Prints rtxes, I customly classified as values. They're constants,
5282 registers, labels, symbols and memory accesses. */
5285 print_value (buf, x, verbose)
5293 switch (GET_CODE (x))
5296 sprintf (t, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
5297 cur = safe_concat (buf, cur, t);
5300 sprintf (t, "<0x%lx,0x%lx>", (long) XWINT (x, 2), (long) XWINT (x, 3));
5301 cur = safe_concat (buf, cur, t);
5304 cur = safe_concat (buf, cur, "\"");
5305 cur = safe_concat (buf, cur, XSTR (x, 0));
5306 cur = safe_concat (buf, cur, "\"");
5309 cur = safe_concat (buf, cur, "`");
5310 cur = safe_concat (buf, cur, XSTR (x, 0));
5311 cur = safe_concat (buf, cur, "'");
5314 sprintf (t, "L%d", INSN_UID (XEXP (x, 0)));
5315 cur = safe_concat (buf, cur, t);
5318 print_value (t, XEXP (x, 0), verbose);
5319 cur = safe_concat (buf, cur, "const(");
5320 cur = safe_concat (buf, cur, t);
5321 cur = safe_concat (buf, cur, ")");
5324 print_value (t, XEXP (x, 0), verbose);
5325 cur = safe_concat (buf, cur, "high(");
5326 cur = safe_concat (buf, cur, t);
5327 cur = safe_concat (buf, cur, ")");
5330 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
5332 int c = reg_names[REGNO (x)][0];
5333 if (c >= '0' && c <= '9')
5334 cur = safe_concat (buf, cur, "%");
5336 cur = safe_concat (buf, cur, reg_names[REGNO (x)]);
5340 sprintf (t, "r%d", REGNO (x));
5341 cur = safe_concat (buf, cur, t);
5345 print_value (t, SUBREG_REG (x), verbose);
5346 cur = safe_concat (buf, cur, t);
5347 sprintf (t, "#%d", SUBREG_WORD (x));
5348 cur = safe_concat (buf, cur, t);
5351 cur = safe_concat (buf, cur, "scratch");
5354 cur = safe_concat (buf, cur, "cc0");
5357 cur = safe_concat (buf, cur, "pc");
5360 print_value (t, XEXP (x, 0), verbose);
5361 cur = safe_concat (buf, cur, "[");
5362 cur = safe_concat (buf, cur, t);
5363 cur = safe_concat (buf, cur, "]");
5366 print_exp (t, x, verbose);
5367 cur = safe_concat (buf, cur, t);
5372 /* The next step in insn detalization, its pattern recognition. */
5375 print_pattern (buf, x, verbose)
5380 char t1[BUF_LEN], t2[BUF_LEN], t3[BUF_LEN];
5382 switch (GET_CODE (x))
5385 print_value (t1, SET_DEST (x), verbose);
5386 print_value (t2, SET_SRC (x), verbose);
5387 sprintf (buf, "%s=%s", t1, t2);
5390 sprintf (buf, "return");
5393 print_exp (buf, x, verbose);
5396 print_value (t1, XEXP (x, 0), verbose);
5397 sprintf (buf, "clobber %s", t1);
5400 print_value (t1, XEXP (x, 0), verbose);
5401 sprintf (buf, "use %s", t1);
5404 print_value (t1, COND_EXEC_CODE (x), verbose);
5405 print_value (t2, COND_EXEC_TEST (x), verbose);
5406 sprintf (buf, "cond_exec %s %s", t1, t2);
5413 for (i = 0; i < XVECLEN (x, 0); i++)
5415 print_pattern (t2, XVECEXP (x, 0, i), verbose);
5416 sprintf (t3, "%s%s;", t1, t2);
5419 sprintf (buf, "%s}", t1);
5426 sprintf (t1, "%%{");
5427 for (i = 0; i < XVECLEN (x, 0); i++)
5429 print_insn (t2, XVECEXP (x, 0, i), verbose);
5430 sprintf (t3, "%s%s;", t1, t2);
5433 sprintf (buf, "%s%%}", t1);
5437 sprintf (buf, "asm {%s}", XSTR (x, 0));
5442 print_value (buf, XEXP (x, 0), verbose);
5445 print_value (t1, TRAP_CONDITION (x), verbose);
5446 sprintf (buf, "trap_if %s", t1);
5452 sprintf (t1, "unspec{");
5453 for (i = 0; i < XVECLEN (x, 0); i++)
5455 print_pattern (t2, XVECEXP (x, 0, i), verbose);
5456 sprintf (t3, "%s%s;", t1, t2);
5459 sprintf (buf, "%s}", t1);
5462 case UNSPEC_VOLATILE:
5466 sprintf (t1, "unspec/v{");
5467 for (i = 0; i < XVECLEN (x, 0); i++)
5469 print_pattern (t2, XVECEXP (x, 0, i), verbose);
5470 sprintf (t3, "%s%s;", t1, t2);
5473 sprintf (buf, "%s}", t1);
5477 print_value (buf, x, verbose);
5479 } /* print_pattern */
5481 /* This is the main function in rtl visualization mechanism. It
5482 accepts an rtx and tries to recognize it as an insn, then prints it
5483 properly in human readable form, resembling assembler mnemonics.
5484 For every insn it prints its UID and BB the insn belongs too.
5485 (Probably the last "option" should be extended somehow, since it
5486 depends now on sched.c inner variables ...) */
5489 print_insn (buf, x, verbose)
5497 switch (GET_CODE (x))
5500 print_pattern (t, PATTERN (x), verbose);
5502 sprintf (buf, "b%d: i% 4d: %s", INSN_BB (x),
5505 sprintf (buf, "%-4d %s", INSN_UID (x), t);
5508 print_pattern (t, PATTERN (x), verbose);
5510 sprintf (buf, "b%d: i% 4d: jump %s", INSN_BB (x),
5513 sprintf (buf, "%-4d %s", INSN_UID (x), t);
5517 if (GET_CODE (x) == PARALLEL)
5519 x = XVECEXP (x, 0, 0);
5520 print_pattern (t, x, verbose);
5523 strcpy (t, "call <...>");
5525 sprintf (buf, "b%d: i% 4d: %s", INSN_BB (insn),
5526 INSN_UID (insn), t);
5528 sprintf (buf, "%-4d %s", INSN_UID (insn), t);
5531 sprintf (buf, "L%d:", INSN_UID (x));
5534 sprintf (buf, "i% 4d: barrier", INSN_UID (x));
5537 if (NOTE_LINE_NUMBER (x) > 0)
5538 sprintf (buf, "%4d note \"%s\" %d", INSN_UID (x),
5539 NOTE_SOURCE_FILE (x), NOTE_LINE_NUMBER (x));
5541 sprintf (buf, "%4d %s", INSN_UID (x),
5542 GET_NOTE_INSN_NAME (NOTE_LINE_NUMBER (x)));
5547 sprintf (buf, "Not an INSN at all\n");
5551 sprintf (buf, "i%-4d <What?>", INSN_UID (x));
5555 /* Print visualization debugging info. */
5558 print_block_visualization (b, s)
5565 fprintf (dump, "\n;; ==================== scheduling visualization for block %d %s \n", b, s);
5567 /* Print names of units. */
5568 fprintf (dump, ";; %-8s", "clock");
5569 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
5570 if (function_units[unit].bitmask & target_units)
5571 for (i = 0; i < function_units[unit].multiplicity; i++)
5572 fprintf (dump, " %-33s", function_units[unit].name);
5573 fprintf (dump, " %-8s\n", "no-unit");
5575 fprintf (dump, ";; %-8s", "=====");
5576 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
5577 if (function_units[unit].bitmask & target_units)
5578 for (i = 0; i < function_units[unit].multiplicity; i++)
5579 fprintf (dump, " %-33s", "==============================");
5580 fprintf (dump, " %-8s\n", "=======");
5582 /* Print insns in each cycle. */
5583 fprintf (dump, "%s\n", visual_tbl);
5586 /* Print insns in the 'no_unit' column of visualization. */
5589 visualize_no_unit (insn)
5592 vis_no_unit[n_vis_no_unit] = insn;
5596 /* Print insns scheduled in clock, for visualization. */
5599 visualize_scheduled_insns (b, clock)
5604 /* If no more room, split table into two. */
5605 if (n_visual_lines >= MAX_VISUAL_LINES)
5607 print_block_visualization (b, "(incomplete)");
5608 init_block_visualization ();
5613 sprintf (visual_tbl + strlen (visual_tbl), ";; %-8d", clock);
5614 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
5615 if (function_units[unit].bitmask & target_units)
5616 for (i = 0; i < function_units[unit].multiplicity; i++)
5618 int instance = unit + i * FUNCTION_UNITS_SIZE;
5619 rtx insn = unit_last_insn[instance];
5621 /* Print insns that still keep the unit busy. */
5623 actual_hazard_this_instance (unit, instance, insn, clock, 0))
5626 print_insn (str, insn, 0);
5627 str[INSN_LEN] = '\0';
5628 sprintf (visual_tbl + strlen (visual_tbl), " %-33s", str);
5631 sprintf (visual_tbl + strlen (visual_tbl), " %-33s", "------------------------------");
5634 /* Print insns that are not assigned to any unit. */
5635 for (i = 0; i < n_vis_no_unit; i++)
5636 sprintf (visual_tbl + strlen (visual_tbl), " %-8d",
5637 INSN_UID (vis_no_unit[i]));
5640 sprintf (visual_tbl + strlen (visual_tbl), "\n");
5643 /* Print stalled cycles. */
5646 visualize_stall_cycles (b, stalls)
5651 /* If no more room, split table into two. */
5652 if (n_visual_lines >= MAX_VISUAL_LINES)
5654 print_block_visualization (b, "(incomplete)");
5655 init_block_visualization ();
5660 sprintf (visual_tbl + strlen (visual_tbl), ";; ");
5661 for (i = 0; i < stalls; i++)
5662 sprintf (visual_tbl + strlen (visual_tbl), ".");
5663 sprintf (visual_tbl + strlen (visual_tbl), "\n");
5666 /* move_insn1: Remove INSN from insn chain, and link it after LAST insn. */
5669 move_insn1 (insn, last)
5672 NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn);
5673 PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn);
5675 NEXT_INSN (insn) = NEXT_INSN (last);
5676 PREV_INSN (NEXT_INSN (last)) = insn;
5678 NEXT_INSN (last) = insn;
5679 PREV_INSN (insn) = last;
5684 /* Search INSN for REG_SAVE_NOTE note pairs for NOTE_INSN_SETJMP,
5685 NOTE_INSN_{LOOP,EHREGION}_{BEG,END}; and convert them back into
5686 NOTEs. The REG_SAVE_NOTE note following first one is contains the
5687 saved value for NOTE_BLOCK_NUMBER which is useful for
5688 NOTE_INSN_EH_REGION_{BEG,END} NOTEs. LAST is the last instruction
5689 output by the instruction scheduler. Return the new value of LAST. */
5692 reemit_notes (insn, last)
5699 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
5701 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
5703 enum insn_note note_type = INTVAL (XEXP (note, 0));
5705 if (note_type == NOTE_INSN_SETJMP)
5707 retval = emit_note_after (NOTE_INSN_SETJMP, insn);
5708 CONST_CALL_P (retval) = CONST_CALL_P (note);
5709 remove_note (insn, note);
5710 note = XEXP (note, 1);
5712 else if (note_type == NOTE_INSN_RANGE_BEG
5713 || note_type == NOTE_INSN_RANGE_END)
5715 last = emit_note_before (note_type, last);
5716 remove_note (insn, note);
5717 note = XEXP (note, 1);
5718 NOTE_RANGE_INFO (last) = XEXP (note, 0);
5722 last = emit_note_before (note_type, last);
5723 remove_note (insn, note);
5724 note = XEXP (note, 1);
5725 if (note_type == NOTE_INSN_EH_REGION_BEG
5726 || note_type == NOTE_INSN_EH_REGION_END)
5727 NOTE_EH_HANDLER (last) = INTVAL (XEXP (note, 0));
5729 remove_note (insn, note);
5735 /* Move INSN, and all insns which should be issued before it,
5736 due to SCHED_GROUP_P flag. Reemit notes if needed.
5738 Return the last insn emitted by the scheduler, which is the
5739 return value from the first call to reemit_notes. */
5742 move_insn (insn, last)
5747 /* If INSN has SCHED_GROUP_P set, then issue it and any other
5748 insns with SCHED_GROUP_P set first. */
5749 while (SCHED_GROUP_P (insn))
5751 rtx prev = PREV_INSN (insn);
5753 /* Move a SCHED_GROUP_P insn. */
5754 move_insn1 (insn, last);
5755 /* If this is the first call to reemit_notes, then record
5756 its return value. */
5757 if (retval == NULL_RTX)
5758 retval = reemit_notes (insn, insn);
5760 reemit_notes (insn, insn);
5764 /* Now move the first non SCHED_GROUP_P insn. */
5765 move_insn1 (insn, last);
5767 /* If this is the first call to reemit_notes, then record
5768 its return value. */
5769 if (retval == NULL_RTX)
5770 retval = reemit_notes (insn, insn);
5772 reemit_notes (insn, insn);
5777 /* Return an insn which represents a SCHED_GROUP, which is
5778 the last insn in the group. */
5789 insn = next_nonnote_insn (insn);
5791 while (insn && SCHED_GROUP_P (insn) && (GET_CODE (insn) != CODE_LABEL));
5796 /* Use forward list scheduling to rearrange insns of block BB in region RGN,
5797 possibly bringing insns from subsequent blocks in the same region.
5798 Return number of insns scheduled. */
5801 schedule_block (bb, rgn_n_insns)
5805 /* Local variables. */
5811 /* Flow block of this bb. */
5812 int b = BB_TO_BLOCK (bb);
5814 /* target_n_insns == number of insns in b before scheduling starts.
5815 sched_target_n_insns == how many of b's insns were scheduled.
5816 sched_n_insns == how many insns were scheduled in b. */
5817 int target_n_insns = 0;
5818 int sched_target_n_insns = 0;
5819 int sched_n_insns = 0;
5821 #define NEED_NOTHING 0
5826 /* Head/tail info for this block. */
5833 /* We used to have code to avoid getting parameters moved from hard
5834 argument registers into pseudos.
5836 However, it was removed when it proved to be of marginal benefit
5837 and caused problems because schedule_block and compute_forward_dependences
5838 had different notions of what the "head" insn was. */
5839 get_bb_head_tail (bb, &head, &tail);
5841 /* rm_other_notes only removes notes which are _inside_ the
5842 block---that is, it won't remove notes before the first real insn
5843 or after the last real insn of the block. So if the first insn
5844 has a REG_SAVE_NOTE which would otherwise be emitted before the
5845 insn, it is redundant with the note before the start of the
5846 block, and so we have to take it out.
5848 FIXME: Probably the same thing should be done with REG_SAVE_NOTEs
5849 referencing NOTE_INSN_SETJMP at the end of the block. */
5854 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
5855 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
5857 if (INTVAL (XEXP (note, 0)) != NOTE_INSN_SETJMP)
5859 remove_note (head, note);
5860 note = XEXP (note, 1);
5861 remove_note (head, note);
5864 note = XEXP (note, 1);
5868 next_tail = NEXT_INSN (tail);
5869 prev_head = PREV_INSN (head);
5871 /* If the only insn left is a NOTE or a CODE_LABEL, then there is no need
5872 to schedule this block. */
5873 if (head == tail && (! INSN_P (head)))
5874 return (sched_n_insns);
5879 fprintf (dump, ";; ======================================================\n");
5881 ";; -- basic block %d from %d to %d -- %s reload\n",
5882 b, INSN_UID (BLOCK_HEAD (b)), INSN_UID (BLOCK_END (b)),
5883 (reload_completed ? "after" : "before"));
5884 fprintf (dump, ";; ======================================================\n");
5885 fprintf (dump, "\n");
5887 visual_tbl = (char *) alloca (get_visual_tbl_length ());
5888 init_block_visualization ();
5891 /* Remove remaining note insns from the block, save them in
5892 note_list. These notes are restored at the end of
5893 schedule_block (). */
5895 rm_other_notes (head, tail);
5899 /* Prepare current target block info. */
5900 if (current_nr_blocks > 1)
5902 candidate_table = (candidate *) xmalloc (current_nr_blocks
5903 * sizeof (candidate));
5906 /* ??? It is not clear why bblst_size is computed this way. The original
5907 number was clearly too small as it resulted in compiler failures.
5908 Multiplying by the original number by 2 (to account for update_bbs
5909 members) seems to be a reasonable solution. */
5910 /* ??? Or perhaps there is a bug somewhere else in this file? */
5911 bblst_size = (current_nr_blocks - bb) * rgn_nr_edges * 2;
5912 bblst_table = (int *) xmalloc (bblst_size * sizeof (int));
5914 bitlst_table_last = 0;
5915 bitlst_table_size = rgn_nr_edges;
5916 bitlst_table = (int *) xmalloc (rgn_nr_edges * sizeof (int));
5918 compute_trg_info (bb);
5923 /* Allocate the ready list. */
5924 ready = (rtx *) xmalloc ((rgn_n_insns + 1) * sizeof (rtx));
5926 /* Print debugging information. */
5927 if (sched_verbose >= 5)
5928 debug_dependencies ();
5930 /* Initialize ready list with all 'ready' insns in target block.
5931 Count number of insns in the target block being scheduled. */
5933 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5937 if (! INSN_P (insn))
5939 next = NEXT_INSN (insn);
5941 if (INSN_DEP_COUNT (insn) == 0
5942 && (SCHED_GROUP_P (next) == 0 || ! INSN_P (next)))
5943 ready[n_ready++] = insn;
5944 if (!(SCHED_GROUP_P (insn)))
5948 /* Add to ready list all 'ready' insns in valid source blocks.
5949 For speculative insns, check-live, exception-free, and
5951 for (bb_src = bb + 1; bb_src < current_nr_blocks; bb_src++)
5952 if (IS_VALID (bb_src))
5958 get_bb_head_tail (bb_src, &head, &tail);
5959 src_next_tail = NEXT_INSN (tail);
5962 if (head == tail && (! INSN_P (head)))
5965 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
5967 if (! INSN_P (insn))
5970 if (!CANT_MOVE (insn)
5971 && (!IS_SPECULATIVE_INSN (insn)
5972 || (insn_issue_delay (insn) <= 3
5973 && check_live (insn, bb_src)
5974 && is_exception_free (insn, bb_src, target_bb))))
5978 /* Note that we havn't squirrled away the notes for
5979 blocks other than the current. So if this is a
5980 speculative insn, NEXT might otherwise be a note. */
5981 next = next_nonnote_insn (insn);
5982 if (INSN_DEP_COUNT (insn) == 0
5984 || SCHED_GROUP_P (next) == 0
5985 || ! INSN_P (next)))
5986 ready[n_ready++] = insn;
5991 #ifdef MD_SCHED_INIT
5992 MD_SCHED_INIT (dump, sched_verbose);
5995 /* No insns scheduled in this block yet. */
5996 last_scheduled_insn = 0;
5998 /* Q_SIZE is the total number of insns in the queue. */
6002 bzero ((char *) insn_queue, sizeof (insn_queue));
6004 /* Start just before the beginning of time. */
6007 /* We start inserting insns after PREV_HEAD. */
6010 /* Initialize INSN_QUEUE, LIST and NEW_NEEDS. */
6011 new_needs = (NEXT_INSN (prev_head) == BLOCK_HEAD (b)
6012 ? NEED_HEAD : NEED_NOTHING);
6013 if (PREV_INSN (next_tail) == BLOCK_END (b))
6014 new_needs |= NEED_TAIL;
6016 /* Loop until all the insns in BB are scheduled. */
6017 while (sched_target_n_insns < target_n_insns)
6021 /* Add to the ready list all pending insns that can be issued now.
6022 If there are no ready insns, increment clock until one
6023 is ready and add all pending insns at that point to the ready
6025 n_ready = queue_to_ready (ready, n_ready);
6030 if (sched_verbose >= 2)
6032 fprintf (dump, ";;\t\tReady list after queue_to_ready: ");
6033 debug_ready_list (ready, n_ready);
6036 /* Sort the ready list based on priority. */
6037 SCHED_SORT (ready, n_ready);
6039 /* Allow the target to reorder the list, typically for
6040 better instruction bundling. */
6041 #ifdef MD_SCHED_REORDER
6042 MD_SCHED_REORDER (dump, sched_verbose, ready, n_ready, clock_var,
6045 can_issue_more = issue_rate;
6050 fprintf (dump, "\n;;\tReady list (t =%3d): ", clock_var);
6051 debug_ready_list (ready, n_ready);
6054 /* Issue insns from ready list. */
6055 while (n_ready != 0 && can_issue_more)
6057 /* Select and remove the insn from the ready list. */
6058 rtx insn = ready[--n_ready];
6059 int cost = actual_hazard (insn_unit (insn), insn, clock_var, 0);
6063 queue_insn (insn, cost);
6067 /* An interblock motion? */
6068 if (INSN_BB (insn) != target_bb)
6073 if (IS_SPECULATIVE_INSN (insn))
6075 if (!check_live (insn, INSN_BB (insn)))
6077 update_live (insn, INSN_BB (insn));
6079 /* For speculative load, mark insns fed by it. */
6080 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
6081 set_spec_fed (insn);
6087 /* Find the beginning of the scheduling group. */
6088 /* ??? Ought to update basic block here, but later bits of
6089 schedule_block assumes the original insn block is
6093 while (SCHED_GROUP_P (temp))
6094 temp = PREV_INSN (temp);
6096 /* Update source block boundaries. */
6097 b1 = BLOCK_FOR_INSN (temp);
6098 if (temp == b1->head && insn == b1->end)
6100 /* We moved all the insns in the basic block.
6101 Emit a note after the last insn and update the
6102 begin/end boundaries to point to the note. */
6103 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
6107 else if (insn == b1->end)
6109 /* We took insns from the end of the basic block,
6110 so update the end of block boundary so that it
6111 points to the first insn we did not move. */
6112 b1->end = PREV_INSN (temp);
6114 else if (temp == b1->head)
6116 /* We took insns from the start of the basic block,
6117 so update the start of block boundary so that
6118 it points to the first insn we did not move. */
6119 b1->head = NEXT_INSN (insn);
6124 /* In block motion. */
6125 sched_target_n_insns++;
6128 last_scheduled_insn = insn;
6129 last = move_insn (insn, last);
6132 #ifdef MD_SCHED_VARIABLE_ISSUE
6133 MD_SCHED_VARIABLE_ISSUE (dump, sched_verbose, insn,
6139 n_ready = schedule_insn (insn, ready, n_ready, clock_var);
6141 /* Close this block after scheduling its jump. */
6142 if (GET_CODE (last_scheduled_insn) == JUMP_INSN)
6148 visualize_scheduled_insns (b, clock_var);
6154 fprintf (dump, ";;\tReady list (final): ");
6155 debug_ready_list (ready, n_ready);
6156 print_block_visualization (b, "");
6159 /* Sanity check -- queue must be empty now. Meaningless if region has
6161 if (current_nr_blocks > 1)
6162 if (!flag_schedule_interblock && q_size != 0)
6165 /* Update head/tail boundaries. */
6166 head = NEXT_INSN (prev_head);
6169 /* Restore-other-notes: NOTE_LIST is the end of a chain of notes
6170 previously found among the insns. Insert them at the beginning
6174 rtx note_head = note_list;
6176 while (PREV_INSN (note_head))
6178 note_head = PREV_INSN (note_head);
6181 PREV_INSN (note_head) = PREV_INSN (head);
6182 NEXT_INSN (PREV_INSN (head)) = note_head;
6183 PREV_INSN (head) = note_list;
6184 NEXT_INSN (note_list) = head;
6188 /* Update target block boundaries. */
6189 if (new_needs & NEED_HEAD)
6190 BLOCK_HEAD (b) = head;
6192 if (new_needs & NEED_TAIL)
6193 BLOCK_END (b) = tail;
6198 fprintf (dump, ";; total time = %d\n;; new basic block head = %d\n",
6199 clock_var, INSN_UID (BLOCK_HEAD (b)));
6200 fprintf (dump, ";; new basic block end = %d\n\n",
6201 INSN_UID (BLOCK_END (b)));
6205 if (current_nr_blocks > 1)
6207 free (candidate_table);
6209 free (bitlst_table);
6213 return (sched_n_insns);
6216 /* Print the bit-set of registers, S, callable from debugger. */
6219 debug_reg_vector (s)
6224 EXECUTE_IF_SET_IN_REG_SET (s, 0, regno,
6226 fprintf (dump, " %d", regno);
6229 fprintf (dump, "\n");
6232 /* Use the backward dependences from LOG_LINKS to build
6233 forward dependences in INSN_DEPEND. */
6236 compute_block_forward_dependences (bb)
6242 enum reg_note dep_type;
6244 get_bb_head_tail (bb, &head, &tail);
6245 next_tail = NEXT_INSN (tail);
6246 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6248 if (! INSN_P (insn))
6251 insn = group_leader (insn);
6253 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6255 rtx x = group_leader (XEXP (link, 0));
6258 if (x != XEXP (link, 0))
6261 #ifdef ENABLE_CHECKING
6262 /* If add_dependence is working properly there should never
6263 be notes, deleted insns or duplicates in the backward
6264 links. Thus we need not check for them here.
6266 However, if we have enabled checking we might as well go
6267 ahead and verify that add_dependence worked properly. */
6268 if (GET_CODE (x) == NOTE
6269 || INSN_DELETED_P (x)
6270 || (forward_dependency_cache != NULL
6271 && TEST_BIT (forward_dependency_cache[INSN_LUID (x)],
6273 || (forward_dependency_cache == NULL
6274 && find_insn_list (insn, INSN_DEPEND (x))))
6276 if (forward_dependency_cache != NULL)
6277 SET_BIT (forward_dependency_cache[INSN_LUID (x)],
6281 new_link = alloc_INSN_LIST (insn, INSN_DEPEND (x));
6283 dep_type = REG_NOTE_KIND (link);
6284 PUT_REG_NOTE_KIND (new_link, dep_type);
6286 INSN_DEPEND (x) = new_link;
6287 INSN_DEP_COUNT (insn) += 1;
6292 /* Initialize variables for region data dependence analysis.
6293 n_bbs is the number of region blocks. */
6299 int maxreg = max_reg_num ();
6300 deps->reg_last_uses = (rtx *) xcalloc (maxreg, sizeof (rtx));
6301 deps->reg_last_sets = (rtx *) xcalloc (maxreg, sizeof (rtx));
6302 deps->reg_last_clobbers = (rtx *) xcalloc (maxreg, sizeof (rtx));
6304 deps->pending_read_insns = 0;
6305 deps->pending_read_mems = 0;
6306 deps->pending_write_insns = 0;
6307 deps->pending_write_mems = 0;
6308 deps->pending_lists_length = 0;
6309 deps->last_pending_memory_flush = 0;
6310 deps->last_function_call = 0;
6311 deps->in_post_call_group_p = 0;
6313 deps->sched_before_next_call
6314 = gen_rtx_INSN (VOIDmode, 0, NULL_RTX, NULL_RTX,
6315 NULL_RTX, 0, NULL_RTX, NULL_RTX);
6316 LOG_LINKS (deps->sched_before_next_call) = 0;
6319 /* Add dependences so that branches are scheduled to run last in their
6323 add_branch_dependences (head, tail)
6328 /* For all branches, calls, uses, clobbers, and cc0 setters, force them
6329 to remain in order at the end of the block by adding dependencies and
6330 giving the last a high priority. There may be notes present, and
6331 prev_head may also be a note.
6333 Branches must obviously remain at the end. Calls should remain at the
6334 end since moving them results in worse register allocation. Uses remain
6335 at the end to ensure proper register allocation. cc0 setters remaim
6336 at the end because they can't be moved away from their cc0 user. */
6339 while (GET_CODE (insn) == CALL_INSN
6340 || GET_CODE (insn) == JUMP_INSN
6341 || (GET_CODE (insn) == INSN
6342 && (GET_CODE (PATTERN (insn)) == USE
6343 || GET_CODE (PATTERN (insn)) == CLOBBER
6345 || sets_cc0_p (PATTERN (insn))
6348 || GET_CODE (insn) == NOTE)
6350 if (GET_CODE (insn) != NOTE)
6353 && !find_insn_list (insn, LOG_LINKS (last)))
6355 add_dependence (last, insn, REG_DEP_ANTI);
6356 INSN_REF_COUNT (insn)++;
6359 CANT_MOVE (insn) = 1;
6362 /* Skip over insns that are part of a group.
6363 Make each insn explicitly depend on the previous insn.
6364 This ensures that only the group header will ever enter
6365 the ready queue (and, when scheduled, will automatically
6366 schedule the SCHED_GROUP_P block). */
6367 while (SCHED_GROUP_P (insn))
6369 rtx temp = prev_nonnote_insn (insn);
6370 add_dependence (insn, temp, REG_DEP_ANTI);
6375 /* Don't overrun the bounds of the basic block. */
6379 insn = PREV_INSN (insn);
6382 /* Make sure these insns are scheduled last in their block. */
6385 while (insn != head)
6387 insn = prev_nonnote_insn (insn);
6389 if (INSN_REF_COUNT (insn) != 0)
6392 add_dependence (last, insn, REG_DEP_ANTI);
6393 INSN_REF_COUNT (insn) = 1;
6395 /* Skip over insns that are part of a group. */
6396 while (SCHED_GROUP_P (insn))
6397 insn = prev_nonnote_insn (insn);
6401 /* After computing the dependencies for block BB, propagate the dependencies
6402 found in TMP_DEPS to the successors of the block. MAX_REG is the number
6405 propagate_deps (bb, tmp_deps, max_reg)
6407 struct deps *tmp_deps;
6410 int b = BB_TO_BLOCK (bb);
6413 rtx link_insn, link_mem;
6416 /* These lists should point to the right place, for correct
6418 bb_deps[bb].pending_read_insns = tmp_deps->pending_read_insns;
6419 bb_deps[bb].pending_read_mems = tmp_deps->pending_read_mems;
6420 bb_deps[bb].pending_write_insns = tmp_deps->pending_write_insns;
6421 bb_deps[bb].pending_write_mems = tmp_deps->pending_write_mems;
6423 /* bb's structures are inherited by its successors. */
6424 first_edge = e = OUT_EDGES (b);
6431 int b_succ = TO_BLOCK (e);
6432 int bb_succ = BLOCK_TO_BB (b_succ);
6433 struct deps *succ_deps = bb_deps + bb_succ;
6435 /* Only bbs "below" bb, in the same region, are interesting. */
6436 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
6443 for (reg = 0; reg < max_reg; reg++)
6445 /* reg-last-uses lists are inherited by bb_succ. */
6446 for (u = tmp_deps->reg_last_uses[reg]; u; u = XEXP (u, 1))
6448 if (find_insn_list (XEXP (u, 0),
6449 succ_deps->reg_last_uses[reg]))
6452 succ_deps->reg_last_uses[reg]
6453 = alloc_INSN_LIST (XEXP (u, 0),
6454 succ_deps->reg_last_uses[reg]);
6457 /* reg-last-defs lists are inherited by bb_succ. */
6458 for (u = tmp_deps->reg_last_sets[reg]; u; u = XEXP (u, 1))
6460 if (find_insn_list (XEXP (u, 0),
6461 succ_deps->reg_last_sets[reg]))
6464 succ_deps->reg_last_sets[reg]
6465 = alloc_INSN_LIST (XEXP (u, 0),
6466 succ_deps->reg_last_sets[reg]);
6469 for (u = tmp_deps->reg_last_clobbers[reg]; u; u = XEXP (u, 1))
6471 if (find_insn_list (XEXP (u, 0),
6472 succ_deps->reg_last_clobbers[reg]))
6475 succ_deps->reg_last_clobbers[reg]
6476 = alloc_INSN_LIST (XEXP (u, 0),
6477 succ_deps->reg_last_clobbers[reg]);
6481 /* Mem read/write lists are inherited by bb_succ. */
6482 link_insn = tmp_deps->pending_read_insns;
6483 link_mem = tmp_deps->pending_read_mems;
6486 if (!(find_insn_mem_list (XEXP (link_insn, 0),
6488 succ_deps->pending_read_insns,
6489 succ_deps->pending_read_mems)))
6490 add_insn_mem_dependence (succ_deps, &succ_deps->pending_read_insns,
6491 &succ_deps->pending_read_mems,
6492 XEXP (link_insn, 0), XEXP (link_mem, 0));
6493 link_insn = XEXP (link_insn, 1);
6494 link_mem = XEXP (link_mem, 1);
6497 link_insn = tmp_deps->pending_write_insns;
6498 link_mem = tmp_deps->pending_write_mems;
6501 if (!(find_insn_mem_list (XEXP (link_insn, 0),
6503 succ_deps->pending_write_insns,
6504 succ_deps->pending_write_mems)))
6505 add_insn_mem_dependence (succ_deps,
6506 &succ_deps->pending_write_insns,
6507 &succ_deps->pending_write_mems,
6508 XEXP (link_insn, 0), XEXP (link_mem, 0));
6510 link_insn = XEXP (link_insn, 1);
6511 link_mem = XEXP (link_mem, 1);
6514 /* last_function_call is inherited by bb_succ. */
6515 for (u = tmp_deps->last_function_call; u; u = XEXP (u, 1))
6517 if (find_insn_list (XEXP (u, 0),
6518 succ_deps->last_function_call))
6521 succ_deps->last_function_call
6522 = alloc_INSN_LIST (XEXP (u, 0),
6523 succ_deps->last_function_call);
6526 /* last_pending_memory_flush is inherited by bb_succ. */
6527 for (u = tmp_deps->last_pending_memory_flush; u; u = XEXP (u, 1))
6529 if (find_insn_list (XEXP (u, 0),
6530 succ_deps->last_pending_memory_flush))
6533 succ_deps->last_pending_memory_flush
6534 = alloc_INSN_LIST (XEXP (u, 0),
6535 succ_deps->last_pending_memory_flush);
6538 /* sched_before_next_call is inherited by bb_succ. */
6539 x = LOG_LINKS (tmp_deps->sched_before_next_call);
6540 for (; x; x = XEXP (x, 1))
6541 add_dependence (succ_deps->sched_before_next_call,
6542 XEXP (x, 0), REG_DEP_ANTI);
6546 while (e != first_edge);
6549 /* Compute backward dependences inside bb. In a multiple blocks region:
6550 (1) a bb is analyzed after its predecessors, and (2) the lists in
6551 effect at the end of bb (after analyzing for bb) are inherited by
6554 Specifically for reg-reg data dependences, the block insns are
6555 scanned by sched_analyze () top-to-bottom. Two lists are
6556 maintained by sched_analyze (): reg_last_sets[] for register DEFs,
6557 and reg_last_uses[] for register USEs.
6559 When analysis is completed for bb, we update for its successors:
6560 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
6561 ; - USES[succ] = Union (USES [succ], DEFS [bb])
6563 The mechanism for computing mem-mem data dependence is very
6564 similar, and the result is interblock dependences in the region. */
6567 compute_block_backward_dependences (bb)
6572 int max_reg = max_reg_num ();
6573 struct deps tmp_deps;
6575 tmp_deps = bb_deps[bb];
6577 /* Do the analysis for this block. */
6578 get_bb_head_tail (bb, &head, &tail);
6579 sched_analyze (&tmp_deps, head, tail);
6580 add_branch_dependences (head, tail);
6582 if (current_nr_blocks > 1)
6583 propagate_deps (bb, &tmp_deps, max_reg);
6585 /* Free up the INSN_LISTs.
6587 Note this loop is executed max_reg * nr_regions times. It's first
6588 implementation accounted for over 90% of the calls to free_INSN_LIST_list.
6589 The list was empty for the vast majority of those calls. On the PA, not
6590 calling free_INSN_LIST_list in those cases improves -O2 compile times by
6592 for (i = 0; i < max_reg; ++i)
6594 if (tmp_deps.reg_last_clobbers[i])
6595 free_INSN_LIST_list (&tmp_deps.reg_last_clobbers[i]);
6596 if (tmp_deps.reg_last_sets[i])
6597 free_INSN_LIST_list (&tmp_deps.reg_last_sets[i]);
6598 if (tmp_deps.reg_last_uses[i])
6599 free_INSN_LIST_list (&tmp_deps.reg_last_uses[i]);
6602 /* Assert that we won't need bb_reg_last_* for this block anymore. */
6603 free (bb_deps[bb].reg_last_uses);
6604 free (bb_deps[bb].reg_last_sets);
6605 free (bb_deps[bb].reg_last_clobbers);
6606 bb_deps[bb].reg_last_uses = 0;
6607 bb_deps[bb].reg_last_sets = 0;
6608 bb_deps[bb].reg_last_clobbers = 0;
6611 /* Print dependences for debugging, callable from debugger. */
6614 debug_dependencies ()
6618 fprintf (dump, ";; --------------- forward dependences: ------------ \n");
6619 for (bb = 0; bb < current_nr_blocks; bb++)
6627 get_bb_head_tail (bb, &head, &tail);
6628 next_tail = NEXT_INSN (tail);
6629 fprintf (dump, "\n;; --- Region Dependences --- b %d bb %d \n",
6630 BB_TO_BLOCK (bb), bb);
6632 fprintf (dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
6633 "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
6634 fprintf (dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
6635 "----", "----", "--", "---", "----", "----", "--------", "-----");
6636 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6641 if (! INSN_P (insn))
6644 fprintf (dump, ";; %6d ", INSN_UID (insn));
6645 if (GET_CODE (insn) == NOTE)
6647 n = NOTE_LINE_NUMBER (insn);
6649 fprintf (dump, "%s\n", GET_NOTE_INSN_NAME (n));
6651 fprintf (dump, "line %d, file %s\n", n,
6652 NOTE_SOURCE_FILE (insn));
6655 fprintf (dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
6659 unit = insn_unit (insn);
6661 || function_units[unit].blockage_range_function == 0) ? 0 :
6662 function_units[unit].blockage_range_function (insn);
6664 ";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
6665 (SCHED_GROUP_P (insn) ? "+" : " "),
6669 INSN_DEP_COUNT (insn),
6670 INSN_PRIORITY (insn),
6671 insn_cost (insn, 0, 0),
6672 (int) MIN_BLOCKAGE_COST (range),
6673 (int) MAX_BLOCKAGE_COST (range));
6674 insn_print_units (insn);
6675 fprintf (dump, "\t: ");
6676 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
6677 fprintf (dump, "%d ", INSN_UID (XEXP (link, 0)));
6678 fprintf (dump, "\n");
6682 fprintf (dump, "\n");
6685 /* Set_priorities: compute priority of each insn in the block. */
6698 get_bb_head_tail (bb, &head, &tail);
6699 prev_head = PREV_INSN (head);
6701 if (head == tail && (! INSN_P (head)))
6705 for (insn = tail; insn != prev_head; insn = PREV_INSN (insn))
6708 if (GET_CODE (insn) == NOTE)
6711 if (!(SCHED_GROUP_P (insn)))
6713 (void) priority (insn);
6719 /* Schedule a region. A region is either an inner loop, a loop-free
6720 subroutine, or a single basic block. Each bb in the region is
6721 scheduled after its flow predecessors. */
6724 schedule_region (rgn)
6728 int rgn_n_insns = 0;
6729 int sched_rgn_n_insns = 0;
6730 regset_head reg_pending_sets_head;
6731 regset_head reg_pending_clobbers_head;
6733 /* Set variables for the current region. */
6734 current_nr_blocks = RGN_NR_BLOCKS (rgn);
6735 current_blocks = RGN_BLOCKS (rgn);
6737 reg_pending_sets = INITIALIZE_REG_SET (reg_pending_sets_head);
6738 reg_pending_clobbers = INITIALIZE_REG_SET (reg_pending_clobbers_head);
6739 reg_pending_sets_all = 0;
6741 /* Initializations for region data dependence analyisis. */
6742 bb_deps = (struct deps *) xmalloc (sizeof (struct deps) * current_nr_blocks);
6743 for (bb = 0; bb < current_nr_blocks; bb++)
6744 init_deps (bb_deps + bb);
6746 /* Compute LOG_LINKS. */
6747 for (bb = 0; bb < current_nr_blocks; bb++)
6748 compute_block_backward_dependences (bb);
6750 /* Compute INSN_DEPEND. */
6751 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
6752 compute_block_forward_dependences (bb);
6754 /* Delete line notes and set priorities. */
6755 for (bb = 0; bb < current_nr_blocks; bb++)
6757 if (write_symbols != NO_DEBUG)
6759 save_line_notes (bb);
6763 rgn_n_insns += set_priorities (bb);
6766 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
6767 if (current_nr_blocks > 1)
6771 prob = (float *) xmalloc ((current_nr_blocks) * sizeof (float));
6773 bbset_size = current_nr_blocks / HOST_BITS_PER_WIDE_INT + 1;
6774 dom = (bbset *) xmalloc (current_nr_blocks * sizeof (bbset));
6775 for (i = 0; i < current_nr_blocks; i++)
6776 dom[i] = (bbset) xcalloc (bbset_size, sizeof (HOST_WIDE_INT));
6780 edge_to_bit = (int *) xmalloc (nr_edges * sizeof (int));
6781 for (i = 1; i < nr_edges; i++)
6782 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
6783 EDGE_TO_BIT (i) = rgn_nr_edges++;
6784 rgn_edges = (int *) xmalloc (rgn_nr_edges * sizeof (int));
6787 for (i = 1; i < nr_edges; i++)
6788 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
6789 rgn_edges[rgn_nr_edges++] = i;
6792 edgeset_size = rgn_nr_edges / HOST_BITS_PER_WIDE_INT + 1;
6793 edgeset_bitsize = rgn_nr_edges;
6794 pot_split = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
6796 = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
6797 for (i = 0; i < current_nr_blocks; i++)
6800 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
6802 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
6805 /* Compute probabilities, dominators, split_edges. */
6806 for (bb = 0; bb < current_nr_blocks; bb++)
6807 compute_dom_prob_ps (bb);
6810 /* Now we can schedule all blocks. */
6811 for (bb = 0; bb < current_nr_blocks; bb++)
6812 sched_rgn_n_insns += schedule_block (bb, rgn_n_insns);
6814 /* Sanity check: verify that all region insns were scheduled. */
6815 if (sched_rgn_n_insns != rgn_n_insns)
6818 /* Restore line notes. */
6819 if (write_symbols != NO_DEBUG)
6821 for (bb = 0; bb < current_nr_blocks; bb++)
6822 restore_line_notes (bb);
6825 /* Done with this region. */
6826 free_pending_lists ();
6828 FREE_REG_SET (reg_pending_sets);
6829 FREE_REG_SET (reg_pending_clobbers);
6833 if (current_nr_blocks > 1)
6838 for (i = 0; i < current_nr_blocks; ++i)
6841 free (pot_split[i]);
6842 free (ancestor_edges[i]);
6848 free (ancestor_edges);
6852 /* The one entry point in this file. DUMP_FILE is the dump file for
6856 schedule_insns (dump_file)
6859 int *deaths_in_region;
6860 sbitmap blocks, large_region_blocks;
6866 int any_large_regions;
6868 /* Disable speculative loads in their presence if cc0 defined. */
6870 flag_schedule_speculative_load = 0;
6873 /* Taking care of this degenerate case makes the rest of
6874 this code simpler. */
6875 if (n_basic_blocks == 0)
6878 /* Set dump and sched_verbose for the desired debugging output. If no
6879 dump-file was specified, but -fsched-verbose=N (any N), print to stderr.
6880 For -fsched-verbose=N, N>=10, print everything to stderr. */
6881 sched_verbose = sched_verbose_param;
6882 if (sched_verbose_param == 0 && dump_file)
6884 dump = ((sched_verbose_param >= 10 || !dump_file) ? stderr : dump_file);
6889 /* Initialize issue_rate. */
6890 issue_rate = ISSUE_RATE;
6892 split_all_insns (1);
6894 /* We use LUID 0 for the fake insn (UID 0) which holds dependencies for
6895 pseudos which do not cross calls. */
6896 max_uid = get_max_uid () + 1;
6898 h_i_d = (struct haifa_insn_data *) xcalloc (max_uid, sizeof (*h_i_d));
6902 for (b = 0; b < n_basic_blocks; b++)
6903 for (insn = BLOCK_HEAD (b);; insn = NEXT_INSN (insn))
6905 INSN_LUID (insn) = luid;
6907 /* Increment the next luid, unless this is a note. We don't
6908 really need separate IDs for notes and we don't want to
6909 schedule differently depending on whether or not there are
6910 line-number notes, i.e., depending on whether or not we're
6911 generating debugging information. */
6912 if (GET_CODE (insn) != NOTE)
6915 if (insn == BLOCK_END (b))
6919 /* ?!? We could save some memory by computing a per-region luid mapping
6920 which could reduce both the number of vectors in the cache and the size
6921 of each vector. Instead we just avoid the cache entirely unless the
6922 average number of instructions in a basic block is very high. See
6923 the comment before the declaration of true_dependency_cache for
6924 what we consider "very high". */
6925 if (luid / n_basic_blocks > 100 * 5)
6927 true_dependency_cache = sbitmap_vector_alloc (luid, luid);
6928 sbitmap_vector_zero (true_dependency_cache, luid);
6929 anti_dependency_cache = sbitmap_vector_alloc (luid, luid);
6930 sbitmap_vector_zero (anti_dependency_cache, luid);
6931 output_dependency_cache = sbitmap_vector_alloc (luid, luid);
6932 sbitmap_vector_zero (output_dependency_cache, luid);
6933 #ifdef ENABLE_CHECKING
6934 forward_dependency_cache = sbitmap_vector_alloc (luid, luid);
6935 sbitmap_vector_zero (forward_dependency_cache, luid);
6940 rgn_table = (region *) xmalloc ((n_basic_blocks) * sizeof (region));
6941 rgn_bb_table = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
6942 block_to_bb = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
6943 containing_rgn = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
6945 blocks = sbitmap_alloc (n_basic_blocks);
6946 large_region_blocks = sbitmap_alloc (n_basic_blocks);
6948 compute_bb_for_insn (max_uid);
6950 /* Compute regions for scheduling. */
6951 if (reload_completed
6952 || n_basic_blocks == 1
6953 || !flag_schedule_interblock)
6955 find_single_block_region ();
6959 /* Verify that a 'good' control flow graph can be built. */
6960 if (is_cfg_nonregular ())
6962 find_single_block_region ();
6967 struct edge_list *edge_list;
6969 dom = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
6971 /* The scheduler runs after flow; therefore, we can't blindly call
6972 back into find_basic_blocks since doing so could invalidate the
6973 info in global_live_at_start.
6975 Consider a block consisting entirely of dead stores; after life
6976 analysis it would be a block of NOTE_INSN_DELETED notes. If
6977 we call find_basic_blocks again, then the block would be removed
6978 entirely and invalidate our the register live information.
6980 We could (should?) recompute register live information. Doing
6981 so may even be beneficial. */
6982 edge_list = create_edge_list ();
6984 /* Compute the dominators and post dominators. We don't
6985 currently use post dominators, but we should for
6986 speculative motion analysis. */
6987 compute_flow_dominators (dom, NULL);
6989 /* build_control_flow will return nonzero if it detects unreachable
6990 blocks or any other irregularity with the cfg which prevents
6991 cross block scheduling. */
6992 if (build_control_flow (edge_list) != 0)
6993 find_single_block_region ();
6995 find_rgns (edge_list, dom);
6997 if (sched_verbose >= 3)
7000 /* We are done with flow's edge list. */
7001 free_edge_list (edge_list);
7003 /* For now. This will move as more and more of haifa is converted
7004 to using the cfg code in flow.c. */
7009 deaths_in_region = (int *) xmalloc (sizeof (int) * nr_regions);
7011 init_alias_analysis ();
7013 if (write_symbols != NO_DEBUG)
7017 line_note_head = (rtx *) xcalloc (n_basic_blocks, sizeof (rtx));
7019 /* Save-line-note-head:
7020 Determine the line-number at the start of each basic block.
7021 This must be computed and saved now, because after a basic block's
7022 predecessor has been scheduled, it is impossible to accurately
7023 determine the correct line number for the first insn of the block. */
7025 for (b = 0; b < n_basic_blocks; b++)
7026 for (line = BLOCK_HEAD (b); line; line = PREV_INSN (line))
7027 if (GET_CODE (line) == NOTE && NOTE_LINE_NUMBER (line) > 0)
7029 line_note_head[b] = line;
7034 /* Find units used in this fuction, for visualization. */
7036 init_target_units ();
7038 /* ??? Add a NOTE after the last insn of the last basic block. It is not
7039 known why this is done. */
7041 insn = BLOCK_END (n_basic_blocks - 1);
7042 if (NEXT_INSN (insn) == 0
7043 || (GET_CODE (insn) != NOTE
7044 && GET_CODE (insn) != CODE_LABEL
7045 /* Don't emit a NOTE if it would end up between an unconditional
7046 jump and a BARRIER. */
7047 && !(GET_CODE (insn) == JUMP_INSN
7048 && GET_CODE (NEXT_INSN (insn)) == BARRIER)))
7049 emit_note_after (NOTE_INSN_DELETED, BLOCK_END (n_basic_blocks - 1));
7051 /* Compute INSN_REG_WEIGHT for all blocks. We must do this before
7052 removing death notes. */
7053 for (b = n_basic_blocks - 1; b >= 0; b--)
7054 find_insn_reg_weight (b);
7056 /* Remove all death notes from the subroutine. */
7057 for (rgn = 0; rgn < nr_regions; rgn++)
7059 sbitmap_zero (blocks);
7060 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
7061 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
7063 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
7066 /* Schedule every region in the subroutine. */
7067 for (rgn = 0; rgn < nr_regions; rgn++)
7068 schedule_region (rgn);
7070 /* Update life analysis for the subroutine. Do single block regions
7071 first so that we can verify that live_at_start didn't change. Then
7072 do all other blocks. */
7073 /* ??? There is an outside possibility that update_life_info, or more
7074 to the point propagate_block, could get called with non-zero flags
7075 more than once for one basic block. This would be kinda bad if it
7076 were to happen, since REG_INFO would be accumulated twice for the
7077 block, and we'd have twice the REG_DEAD notes.
7079 I'm fairly certain that this _shouldn't_ happen, since I don't think
7080 that live_at_start should change at region heads. Not sure what the
7081 best way to test for this kind of thing... */
7083 allocate_reg_life_data ();
7084 compute_bb_for_insn (max_uid);
7086 any_large_regions = 0;
7087 sbitmap_ones (large_region_blocks);
7089 for (rgn = 0; rgn < nr_regions; rgn++)
7090 if (RGN_NR_BLOCKS (rgn) > 1)
7091 any_large_regions = 1;
7094 sbitmap_zero (blocks);
7095 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
7096 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
7098 /* Don't update reg info after reload, since that affects
7099 regs_ever_live, which should not change after reload. */
7100 update_life_info (blocks, UPDATE_LIFE_LOCAL,
7101 (reload_completed ? PROP_DEATH_NOTES
7102 : PROP_DEATH_NOTES | PROP_REG_INFO));
7104 #ifndef HAVE_conditional_execution
7105 /* ??? REG_DEAD notes only exist for unconditional deaths. We need
7106 a count of the conditional plus unconditional deaths for this to
7108 /* In the single block case, the count of registers that died should
7109 not have changed during the schedule. */
7110 if (count_or_remove_death_notes (blocks, 0) != deaths_in_region[rgn])
7115 if (any_large_regions)
7117 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
7118 PROP_DEATH_NOTES | PROP_REG_INFO);
7121 /* Reposition the prologue and epilogue notes in case we moved the
7122 prologue/epilogue insns. */
7123 if (reload_completed)
7124 reposition_prologue_and_epilogue_notes (get_insns ());
7126 /* Delete redundant line notes. */
7127 if (write_symbols != NO_DEBUG)
7128 rm_redundant_line_notes ();
7132 if (reload_completed == 0 && flag_schedule_interblock)
7135 "\n;; Procedure interblock/speculative motions == %d/%d \n",
7143 fprintf (dump, "\n\n");
7147 end_alias_analysis ();
7149 if (true_dependency_cache)
7151 free (true_dependency_cache);
7152 true_dependency_cache = NULL;
7153 free (anti_dependency_cache);
7154 anti_dependency_cache = NULL;
7155 free (output_dependency_cache);
7156 output_dependency_cache = NULL;
7157 #ifdef ENABLE_CHECKING
7158 free (forward_dependency_cache);
7159 forward_dependency_cache = NULL;
7163 free (rgn_bb_table);
7165 free (containing_rgn);
7169 if (write_symbols != NO_DEBUG)
7170 free (line_note_head);
7189 sbitmap_free (blocks);
7190 sbitmap_free (large_region_blocks);
7192 free (deaths_in_region);
7195 #endif /* INSN_SCHEDULING */