2 /* Perform branch target register load optimizations.
3 Copyright (C) 2001-2014 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
26 #include "hard-reg-set.h"
32 #include "insn-attr.h"
41 #include "diagnostic-core.h"
42 #include "tree-pass.h"
48 /* Target register optimizations - these are performed after reload. */
50 typedef struct btr_def_group_s
52 struct btr_def_group_s *next;
54 struct btr_def_s *members;
57 typedef struct btr_user_s
59 struct btr_user_s *next;
63 /* If INSN has a single use of a single branch register, then
64 USE points to it within INSN. If there is more than
65 one branch register use, or the use is in some way ambiguous,
69 int first_reaching_def;
70 char other_use_this_block;
73 /* btr_def structs appear on three lists:
74 1. A list of all btr_def structures (head is
75 ALL_BTR_DEFS, linked by the NEXT field).
76 2. A list of branch reg definitions per basic block (head is
77 BB_BTR_DEFS[i], linked by the NEXT_THIS_BB field).
78 3. A list of all branch reg definitions belonging to the same
79 group (head is in a BTR_DEF_GROUP struct, linked by
80 NEXT_THIS_GROUP field). */
82 typedef struct btr_def_s
84 struct btr_def_s *next_this_bb;
85 struct btr_def_s *next_this_group;
91 /* For a branch register setting insn that has a constant
92 source (i.e. a label), group links together all the
93 insns with the same source. For other branch register
94 setting insns, group is NULL. */
97 /* If this def has a reaching use which is not a simple use
98 in a branch instruction, then has_ambiguous_use will be true,
99 and we will not attempt to migrate this definition. */
100 char has_ambiguous_use;
101 /* live_range is an approximation to the true live range for this
102 def/use web, because it records the set of blocks that contain
103 the live range. There could be other live ranges for the same
104 branch register in that set of blocks, either in the block
105 containing the def (before the def), or in a block containing
106 a use (after the use). If there are such other live ranges, then
107 other_btr_uses_before_def or other_btr_uses_after_use must be set true
109 char other_btr_uses_before_def;
110 char other_btr_uses_after_use;
111 /* We set own_end when we have moved a definition into a dominator.
112 Thus, when a later combination removes this definition again, we know
113 to clear out trs_live_at_end again. */
118 static int issue_rate;
120 static int basic_block_freq (const_basic_block);
121 static int insn_sets_btr_p (const rtx_insn *, int, int *);
122 static void find_btr_def_group (btr_def_group *, btr_def);
123 static btr_def add_btr_def (fibheap_t, basic_block, int, rtx_insn *,
124 unsigned int, int, btr_def_group *);
125 static btr_user new_btr_user (basic_block, int, rtx_insn *);
126 static void dump_hard_reg_set (HARD_REG_SET);
127 static void dump_btrs_live (int);
128 static void note_other_use_this_block (unsigned int, btr_user);
129 static void compute_defs_uses_and_gen (fibheap_t, btr_def *,btr_user *,
130 sbitmap *, sbitmap *, HARD_REG_SET *);
131 static void compute_kill (sbitmap *, sbitmap *, HARD_REG_SET *);
132 static void compute_out (sbitmap *bb_out, sbitmap *, sbitmap *, int);
133 static void link_btr_uses (btr_def *, btr_user *, sbitmap *, sbitmap *, int);
134 static void build_btr_def_use_webs (fibheap_t);
135 static int block_at_edge_of_live_range_p (int, btr_def);
136 static void clear_btr_from_live_range (btr_def def);
137 static void add_btr_to_live_range (btr_def, int);
138 static void augment_live_range (bitmap, HARD_REG_SET *, basic_block,
140 static int choose_btr (HARD_REG_SET);
141 static void combine_btr_defs (btr_def, HARD_REG_SET *);
142 static void btr_def_live_range (btr_def, HARD_REG_SET *);
143 static void move_btr_def (basic_block, int, btr_def, bitmap, HARD_REG_SET *);
144 static int migrate_btr_def (btr_def, int);
145 static void migrate_btr_defs (enum reg_class, int);
146 static int can_move_up (const_basic_block, const rtx_insn *, int);
147 static void note_btr_set (rtx, const_rtx, void *);
149 /* The following code performs code motion of target load instructions
150 (instructions that set branch target registers), to move them
151 forward away from the branch instructions and out of loops (or,
152 more generally, from a more frequently executed place to a less
153 frequently executed place).
154 Moving target load instructions further in front of the branch
155 instruction that uses the target register value means that the hardware
156 has a better chance of preloading the instructions at the branch
157 target by the time the branch is reached. This avoids bubbles
158 when a taken branch needs to flush out the pipeline.
159 Moving target load instructions out of loops means they are executed
162 /* An obstack to hold the def-use web data structures built up for
163 migrating branch target load instructions. */
164 static struct obstack migrate_btrl_obstack;
166 /* Array indexed by basic block number, giving the set of registers
167 live in that block. */
168 static HARD_REG_SET *btrs_live;
170 /* Array indexed by basic block number, giving the set of registers live at
171 the end of that block, including any uses by a final jump insn, if any. */
172 static HARD_REG_SET *btrs_live_at_end;
174 /* Set of all target registers that we are willing to allocate. */
175 static HARD_REG_SET all_btrs;
177 /* Provide lower and upper bounds for target register numbers, so that
178 we don't need to search through all the hard registers all the time. */
179 static int first_btr, last_btr;
183 /* Return an estimate of the frequency of execution of block bb. */
185 basic_block_freq (const_basic_block bb)
187 return bb->frequency;
190 /* If X references (sets or reads) any branch target register, return one
191 such register. If EXCLUDEP is set, disregard any references within
194 find_btr_use (rtx x, rtx *excludep = 0)
196 subrtx_ptr_iterator::array_type array;
197 FOR_EACH_SUBRTX_PTR (iter, array, &x, NONCONST)
201 iter.skip_subrtxes ();
206 && overlaps_hard_reg_set_p (all_btrs, GET_MODE (x), REGNO (x)))
213 /* Return true if insn is an instruction that sets a target register.
214 if CHECK_CONST is true, only return true if the source is constant.
215 If such a set is found and REGNO is nonzero, assign the register number
216 of the destination register to *REGNO. */
218 insn_sets_btr_p (const rtx_insn *insn, int check_const, int *regno)
222 if (NONJUMP_INSN_P (insn)
223 && (set = single_set (insn)))
225 rtx dest = SET_DEST (set);
226 rtx src = SET_SRC (set);
228 if (GET_CODE (dest) == SUBREG)
229 dest = XEXP (dest, 0);
232 && TEST_HARD_REG_BIT (all_btrs, REGNO (dest)))
234 gcc_assert (!find_btr_use (src));
236 if (!check_const || CONSTANT_P (src))
239 *regno = REGNO (dest);
247 /* Find the group that the target register definition DEF belongs
248 to in the list starting with *ALL_BTR_DEF_GROUPS. If no such
249 group exists, create one. Add def to the group. */
251 find_btr_def_group (btr_def_group *all_btr_def_groups, btr_def def)
253 if (insn_sets_btr_p (def->insn, 1, NULL))
255 btr_def_group this_group;
256 rtx def_src = SET_SRC (single_set (def->insn));
258 /* ?? This linear search is an efficiency concern, particularly
259 as the search will almost always fail to find a match. */
260 for (this_group = *all_btr_def_groups;
262 this_group = this_group->next)
263 if (rtx_equal_p (def_src, this_group->src))
268 this_group = XOBNEW (&migrate_btrl_obstack, struct btr_def_group_s);
269 this_group->src = def_src;
270 this_group->members = NULL;
271 this_group->next = *all_btr_def_groups;
272 *all_btr_def_groups = this_group;
274 def->group = this_group;
275 def->next_this_group = this_group->members;
276 this_group->members = def;
282 /* Create a new target register definition structure, for a definition in
283 block BB, instruction INSN, and insert it into ALL_BTR_DEFS. Return
284 the new definition. */
286 add_btr_def (fibheap_t all_btr_defs, basic_block bb, int insn_luid,
288 unsigned int dest_reg, int other_btr_uses_before_def,
289 btr_def_group *all_btr_def_groups)
291 btr_def this_def = XOBNEW (&migrate_btrl_obstack, struct btr_def_s);
293 this_def->luid = insn_luid;
294 this_def->insn = insn;
295 this_def->btr = dest_reg;
296 this_def->cost = basic_block_freq (bb);
297 this_def->has_ambiguous_use = 0;
298 this_def->other_btr_uses_before_def = other_btr_uses_before_def;
299 this_def->other_btr_uses_after_use = 0;
300 this_def->next_this_bb = NULL;
301 this_def->next_this_group = NULL;
302 this_def->uses = NULL;
303 this_def->live_range = NULL;
304 find_btr_def_group (all_btr_def_groups, this_def);
306 fibheap_insert (all_btr_defs, -this_def->cost, this_def);
310 "Found target reg definition: sets %u { bb %d, insn %d }%s priority %d\n",
311 dest_reg, bb->index, INSN_UID (insn),
312 (this_def->group ? "" : ":not const"), this_def->cost);
317 /* Create a new target register user structure, for a use in block BB,
318 instruction INSN. Return the new user. */
320 new_btr_user (basic_block bb, int insn_luid, rtx_insn *insn)
322 /* This instruction reads target registers. We need
323 to decide whether we can replace all target register
326 rtx *usep = find_btr_use (PATTERN (insn));
328 btr_user user = NULL;
332 int unambiguous_single_use;
334 /* We want to ensure that USE is the only use of a target
335 register in INSN, so that we know that to rewrite INSN to use
336 a different target register, all we have to do is replace USE. */
337 unambiguous_single_use = !find_btr_use (PATTERN (insn), usep);
338 if (!unambiguous_single_use)
341 use = usep ? *usep : NULL_RTX;
342 user = XOBNEW (&migrate_btrl_obstack, struct btr_user_s);
344 user->luid = insn_luid;
347 user->other_use_this_block = 0;
349 user->n_reaching_defs = 0;
350 user->first_reaching_def = -1;
354 fprintf (dump_file, "Uses target reg: { bb %d, insn %d }",
355 bb->index, INSN_UID (insn));
358 fprintf (dump_file, ": unambiguous use of reg %d\n",
365 /* Write the contents of S to the dump file. */
367 dump_hard_reg_set (HARD_REG_SET s)
370 for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++)
371 if (TEST_HARD_REG_BIT (s, reg))
372 fprintf (dump_file, " %d", reg);
375 /* Write the set of target regs live in block BB to the dump file. */
377 dump_btrs_live (int bb)
379 fprintf (dump_file, "BB%d live:", bb);
380 dump_hard_reg_set (btrs_live[bb]);
381 fprintf (dump_file, "\n");
384 /* REGNO is the number of a branch target register that is being used or
385 set. USERS_THIS_BB is a list of preceding branch target register users;
386 If any of them use the same register, set their other_use_this_block
389 note_other_use_this_block (unsigned int regno, btr_user users_this_bb)
393 for (user = users_this_bb; user != NULL; user = user->next)
394 if (user->use && REGNO (user->use) == regno)
395 user->other_use_this_block = 1;
399 btr_user users_this_bb;
400 HARD_REG_SET btrs_written_in_block;
401 HARD_REG_SET btrs_live_in_block;
406 /* Called via note_stores or directly to register stores into /
407 clobbers of a branch target register DEST that are not recognized as
408 straightforward definitions. DATA points to information about the
409 current basic block that needs updating. */
411 note_btr_set (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
413 defs_uses_info *info = (defs_uses_info *) data;
414 int regno, end_regno;
418 regno = REGNO (dest);
419 end_regno = END_HARD_REGNO (dest);
420 for (; regno < end_regno; regno++)
421 if (TEST_HARD_REG_BIT (all_btrs, regno))
423 note_other_use_this_block (regno, info->users_this_bb);
424 SET_HARD_REG_BIT (info->btrs_written_in_block, regno);
425 SET_HARD_REG_BIT (info->btrs_live_in_block, regno);
426 bitmap_and_compl (info->bb_gen, info->bb_gen,
427 info->btr_defset[regno - first_btr]);
432 compute_defs_uses_and_gen (fibheap_t all_btr_defs, btr_def *def_array,
433 btr_user *use_array, sbitmap *btr_defset,
434 sbitmap *bb_gen, HARD_REG_SET *btrs_written)
436 /* Scan the code building up the set of all defs and all uses.
437 For each target register, build the set of defs of that register.
438 For each block, calculate the set of target registers
439 written in that block.
440 Also calculate the set of btrs ever live in that block.
444 btr_def_group all_btr_def_groups = NULL;
447 bitmap_vector_clear (bb_gen, last_basic_block_for_fn (cfun));
448 for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++)
450 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
452 btr_def defs_this_bb = NULL;
457 info.users_this_bb = NULL;
458 info.bb_gen = bb_gen[i];
459 info.btr_defset = btr_defset;
461 CLEAR_HARD_REG_SET (info.btrs_live_in_block);
462 CLEAR_HARD_REG_SET (info.btrs_written_in_block);
463 for (reg = first_btr; reg <= last_btr; reg++)
464 if (TEST_HARD_REG_BIT (all_btrs, reg)
465 && REGNO_REG_SET_P (df_get_live_in (bb), reg))
466 SET_HARD_REG_BIT (info.btrs_live_in_block, reg);
468 for (insn = BB_HEAD (bb), last = NEXT_INSN (BB_END (bb));
470 insn = NEXT_INSN (insn), insn_luid++)
475 int insn_uid = INSN_UID (insn);
477 if (insn_sets_btr_p (insn, 0, ®no))
479 btr_def def = add_btr_def (
480 all_btr_defs, bb, insn_luid, insn, regno,
481 TEST_HARD_REG_BIT (info.btrs_live_in_block, regno),
482 &all_btr_def_groups);
484 def_array[insn_uid] = def;
485 SET_HARD_REG_BIT (info.btrs_written_in_block, regno);
486 SET_HARD_REG_BIT (info.btrs_live_in_block, regno);
487 bitmap_and_compl (bb_gen[i], bb_gen[i],
488 btr_defset[regno - first_btr]);
489 bitmap_set_bit (bb_gen[i], insn_uid);
490 def->next_this_bb = defs_this_bb;
492 bitmap_set_bit (btr_defset[regno - first_btr], insn_uid);
493 note_other_use_this_block (regno, info.users_this_bb);
495 /* Check for the blockage emitted by expand_nl_goto_receiver. */
496 else if (cfun->has_nonlocal_label
497 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE)
501 /* Do the equivalent of calling note_other_use_this_block
502 for every target register. */
503 for (user = info.users_this_bb; user != NULL;
506 user->other_use_this_block = 1;
507 IOR_HARD_REG_SET (info.btrs_written_in_block, all_btrs);
508 IOR_HARD_REG_SET (info.btrs_live_in_block, all_btrs);
509 bitmap_clear (info.bb_gen);
513 if (find_btr_use (PATTERN (insn)))
515 btr_user user = new_btr_user (bb, insn_luid, insn);
517 use_array[insn_uid] = user;
519 SET_HARD_REG_BIT (info.btrs_live_in_block,
524 for (reg = first_btr; reg <= last_btr; reg++)
525 if (TEST_HARD_REG_BIT (all_btrs, reg)
526 && refers_to_regno_p (reg, reg + 1, user->insn,
529 note_other_use_this_block (reg,
531 SET_HARD_REG_BIT (info.btrs_live_in_block, reg);
533 note_stores (PATTERN (insn), note_btr_set, &info);
535 user->next = info.users_this_bb;
536 info.users_this_bb = user;
540 HARD_REG_SET *clobbered = &call_used_reg_set;
541 HARD_REG_SET call_saved;
542 rtx pat = PATTERN (insn);
545 /* Check for sibcall. */
546 if (GET_CODE (pat) == PARALLEL)
547 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
548 if (ANY_RETURN_P (XVECEXP (pat, 0, i)))
550 COMPL_HARD_REG_SET (call_saved,
552 clobbered = &call_saved;
555 for (regno = first_btr; regno <= last_btr; regno++)
556 if (TEST_HARD_REG_BIT (*clobbered, regno))
557 note_btr_set (regno_reg_rtx[regno], NULL_RTX, &info);
563 COPY_HARD_REG_SET (btrs_live[i], info.btrs_live_in_block);
564 COPY_HARD_REG_SET (btrs_written[i], info.btrs_written_in_block);
566 REG_SET_TO_HARD_REG_SET (btrs_live_at_end[i], df_get_live_out (bb));
567 /* If this block ends in a jump insn, add any uses or even clobbers
568 of branch target registers that it might have. */
569 for (insn = BB_END (bb); insn != BB_HEAD (bb) && ! INSN_P (insn); )
570 insn = PREV_INSN (insn);
571 /* ??? for the fall-through edge, it would make sense to insert the
572 btr set on the edge, but that would require to split the block
573 early on so that we can distinguish between dominance from the fall
574 through edge - which can use the call-clobbered registers - from
575 dominance by the throw edge. */
576 if (can_throw_internal (insn))
580 COPY_HARD_REG_SET (tmp, call_used_reg_set);
581 AND_HARD_REG_SET (tmp, all_btrs);
582 IOR_HARD_REG_SET (btrs_live_at_end[i], tmp);
585 if (can_throw || JUMP_P (insn))
589 for (regno = first_btr; regno <= last_btr; regno++)
590 if (refers_to_regno_p (regno, regno+1, insn, NULL))
591 SET_HARD_REG_BIT (btrs_live_at_end[i], regno);
600 compute_kill (sbitmap *bb_kill, sbitmap *btr_defset,
601 HARD_REG_SET *btrs_written)
606 /* For each basic block, form the set BB_KILL - the set
607 of definitions that the block kills. */
608 bitmap_vector_clear (bb_kill, last_basic_block_for_fn (cfun));
609 for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++)
611 for (regno = first_btr; regno <= last_btr; regno++)
612 if (TEST_HARD_REG_BIT (all_btrs, regno)
613 && TEST_HARD_REG_BIT (btrs_written[i], regno))
614 bitmap_ior (bb_kill[i], bb_kill[i],
615 btr_defset[regno - first_btr]);
620 compute_out (sbitmap *bb_out, sbitmap *bb_gen, sbitmap *bb_kill, int max_uid)
622 /* Perform iterative dataflow:
623 Initially, for all blocks, BB_OUT = BB_GEN.
625 BB_IN = union over predecessors of BB_OUT(pred)
626 BB_OUT = (BB_IN - BB_KILL) + BB_GEN
627 Iterate until the bb_out sets stop growing. */
630 sbitmap bb_in = sbitmap_alloc (max_uid);
632 for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++)
633 bitmap_copy (bb_out[i], bb_gen[i]);
639 for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++)
641 bitmap_union_of_preds (bb_in, bb_out, BASIC_BLOCK_FOR_FN (cfun, i));
642 changed |= bitmap_ior_and_compl (bb_out[i], bb_gen[i],
646 sbitmap_free (bb_in);
650 link_btr_uses (btr_def *def_array, btr_user *use_array, sbitmap *bb_out,
651 sbitmap *btr_defset, int max_uid)
654 sbitmap reaching_defs = sbitmap_alloc (max_uid);
656 /* Link uses to the uses lists of all of their reaching defs.
657 Count up the number of reaching defs of each use. */
658 for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++)
660 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
664 bitmap_union_of_preds (reaching_defs, bb_out, BASIC_BLOCK_FOR_FN (cfun, i));
665 for (insn = BB_HEAD (bb), last = NEXT_INSN (BB_END (bb));
667 insn = NEXT_INSN (insn))
671 int insn_uid = INSN_UID (insn);
673 btr_def def = def_array[insn_uid];
674 btr_user user = use_array[insn_uid];
677 /* Remove all reaching defs of regno except
679 bitmap_and_compl (reaching_defs, reaching_defs,
680 btr_defset[def->btr - first_btr]);
681 bitmap_set_bit (reaching_defs, insn_uid);
686 /* Find all the reaching defs for this use. */
687 sbitmap reaching_defs_of_reg = sbitmap_alloc (max_uid);
688 unsigned int uid = 0;
689 sbitmap_iterator sbi;
693 reaching_defs_of_reg,
695 btr_defset[REGNO (user->use) - first_btr]);
700 bitmap_clear (reaching_defs_of_reg);
701 for (reg = first_btr; reg <= last_btr; reg++)
702 if (TEST_HARD_REG_BIT (all_btrs, reg)
703 && refers_to_regno_p (reg, reg + 1, user->insn,
705 bitmap_or_and (reaching_defs_of_reg,
706 reaching_defs_of_reg,
708 btr_defset[reg - first_btr]);
710 EXECUTE_IF_SET_IN_BITMAP (reaching_defs_of_reg, 0, uid, sbi)
712 btr_def def = def_array[uid];
714 /* We now know that def reaches user. */
718 "Def in insn %d reaches use in insn %d\n",
721 user->n_reaching_defs++;
723 def->has_ambiguous_use = 1;
724 if (user->first_reaching_def != -1)
725 { /* There is more than one reaching def. This is
726 a rare case, so just give up on this def/use
727 web when it occurs. */
728 def->has_ambiguous_use = 1;
729 def_array[user->first_reaching_def]
730 ->has_ambiguous_use = 1;
733 "(use %d has multiple reaching defs)\n",
737 user->first_reaching_def = uid;
738 if (user->other_use_this_block)
739 def->other_btr_uses_after_use = 1;
740 user->next = def->uses;
743 sbitmap_free (reaching_defs_of_reg);
750 for (regno = first_btr; regno <= last_btr; regno++)
751 if (TEST_HARD_REG_BIT (all_btrs, regno)
752 && TEST_HARD_REG_BIT (call_used_reg_set, regno))
753 bitmap_and_compl (reaching_defs, reaching_defs,
754 btr_defset[regno - first_btr]);
759 sbitmap_free (reaching_defs);
763 build_btr_def_use_webs (fibheap_t all_btr_defs)
765 const int max_uid = get_max_uid ();
766 btr_def *def_array = XCNEWVEC (btr_def, max_uid);
767 btr_user *use_array = XCNEWVEC (btr_user, max_uid);
768 sbitmap *btr_defset = sbitmap_vector_alloc (
769 (last_btr - first_btr) + 1, max_uid);
770 sbitmap *bb_gen = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
772 HARD_REG_SET *btrs_written = XCNEWVEC (HARD_REG_SET,
773 last_basic_block_for_fn (cfun));
777 bitmap_vector_clear (btr_defset, (last_btr - first_btr) + 1);
779 compute_defs_uses_and_gen (all_btr_defs, def_array, use_array, btr_defset,
780 bb_gen, btrs_written);
782 bb_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), max_uid);
783 compute_kill (bb_kill, btr_defset, btrs_written);
786 bb_out = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), max_uid);
787 compute_out (bb_out, bb_gen, bb_kill, max_uid);
789 sbitmap_vector_free (bb_gen);
790 sbitmap_vector_free (bb_kill);
792 link_btr_uses (def_array, use_array, bb_out, btr_defset, max_uid);
794 sbitmap_vector_free (bb_out);
795 sbitmap_vector_free (btr_defset);
800 /* Return true if basic block BB contains the start or end of the
801 live range of the definition DEF, AND there are other live
802 ranges of the same target register that include BB. */
804 block_at_edge_of_live_range_p (int bb, btr_def def)
806 if (def->other_btr_uses_before_def
807 && BASIC_BLOCK_FOR_FN (cfun, bb) == def->bb)
809 else if (def->other_btr_uses_after_use)
812 for (user = def->uses; user != NULL; user = user->next)
813 if (BASIC_BLOCK_FOR_FN (cfun, bb) == user->bb)
819 /* We are removing the def/use web DEF. The target register
820 used in this web is therefore no longer live in the live range
821 of this web, so remove it from the live set of all basic blocks
822 in the live range of the web.
823 Blocks at the boundary of the live range may contain other live
824 ranges for the same target register, so we have to be careful
825 to remove the target register from the live set of these blocks
826 only if they do not contain other live ranges for the same register. */
828 clear_btr_from_live_range (btr_def def)
833 EXECUTE_IF_SET_IN_BITMAP (def->live_range, 0, bb, bi)
835 if ((!def->other_btr_uses_before_def
836 && !def->other_btr_uses_after_use)
837 || !block_at_edge_of_live_range_p (bb, def))
839 CLEAR_HARD_REG_BIT (btrs_live[bb], def->btr);
840 CLEAR_HARD_REG_BIT (btrs_live_at_end[bb], def->btr);
846 CLEAR_HARD_REG_BIT (btrs_live_at_end[def->bb->index], def->btr);
850 /* We are adding the def/use web DEF. Add the target register used
851 in this web to the live set of all of the basic blocks that contain
852 the live range of the web.
853 If OWN_END is set, also show that the register is live from our
854 definitions at the end of the basic block where it is defined. */
856 add_btr_to_live_range (btr_def def, int own_end)
861 EXECUTE_IF_SET_IN_BITMAP (def->live_range, 0, bb, bi)
863 SET_HARD_REG_BIT (btrs_live[bb], def->btr);
864 SET_HARD_REG_BIT (btrs_live_at_end[bb], def->btr);
870 SET_HARD_REG_BIT (btrs_live_at_end[def->bb->index], def->btr);
875 /* Update a live range to contain the basic block NEW_BLOCK, and all
876 blocks on paths between the existing live range and NEW_BLOCK.
877 HEAD is a block contained in the existing live range that dominates
878 all other blocks in the existing live range.
879 Also add to the set BTRS_LIVE_IN_RANGE all target registers that
880 are live in the blocks that we add to the live range.
881 If FULL_RANGE is set, include the full live range of NEW_BB;
882 otherwise, if NEW_BB dominates HEAD_BB, only add registers that
883 are life at the end of NEW_BB for NEW_BB itself.
884 It is a precondition that either NEW_BLOCK dominates HEAD,or
885 HEAD dom NEW_BLOCK. This is used to speed up the
886 implementation of this function. */
888 augment_live_range (bitmap live_range, HARD_REG_SET *btrs_live_in_range,
889 basic_block head_bb, basic_block new_bb, int full_range)
891 basic_block *worklist, *tos;
893 tos = worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) + 1);
895 if (dominated_by_p (CDI_DOMINATORS, new_bb, head_bb))
897 if (new_bb == head_bb)
900 IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live[new_bb->index]);
910 int new_block = new_bb->index;
912 gcc_assert (dominated_by_p (CDI_DOMINATORS, head_bb, new_bb));
914 IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live[head_bb->index]);
915 bitmap_set_bit (live_range, new_block);
916 /* A previous btr migration could have caused a register to be
917 live just at the end of new_block which we need in full, so
918 use trs_live_at_end even if full_range is set. */
919 IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live_at_end[new_block]);
921 IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live[new_block]);
925 "Adding end of block %d and rest of %d to live range\n",
926 new_block, head_bb->index);
927 fprintf (dump_file,"Now live btrs are ");
928 dump_hard_reg_set (*btrs_live_in_range);
929 fprintf (dump_file, "\n");
931 FOR_EACH_EDGE (e, ei, head_bb->preds)
935 while (tos != worklist)
937 basic_block bb = *--tos;
938 if (!bitmap_bit_p (live_range, bb->index))
943 bitmap_set_bit (live_range, bb->index);
944 IOR_HARD_REG_SET (*btrs_live_in_range,
945 btrs_live[bb->index]);
946 /* A previous btr migration could have caused a register to be
947 live just at the end of a block which we need in full. */
948 IOR_HARD_REG_SET (*btrs_live_in_range,
949 btrs_live_at_end[bb->index]);
953 "Adding block %d to live range\n", bb->index);
954 fprintf (dump_file,"Now live btrs are ");
955 dump_hard_reg_set (*btrs_live_in_range);
956 fprintf (dump_file, "\n");
959 FOR_EACH_EDGE (e, ei, bb->preds)
961 basic_block pred = e->src;
962 if (!bitmap_bit_p (live_range, pred->index))
971 /* Return the most desirable target register that is not in
972 the set USED_BTRS. */
974 choose_btr (HARD_REG_SET used_btrs)
978 if (!hard_reg_set_subset_p (all_btrs, used_btrs))
979 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
981 #ifdef REG_ALLOC_ORDER
982 int regno = reg_alloc_order[i];
986 if (TEST_HARD_REG_BIT (all_btrs, regno)
987 && !TEST_HARD_REG_BIT (used_btrs, regno))
993 /* Calculate the set of basic blocks that contain the live range of
995 Also calculate the set of target registers that are live at time
996 in this live range, but ignore the live range represented by DEF
997 when calculating this set. */
999 btr_def_live_range (btr_def def, HARD_REG_SET *btrs_live_in_range)
1001 if (!def->live_range)
1005 def->live_range = BITMAP_ALLOC (NULL);
1007 bitmap_set_bit (def->live_range, def->bb->index);
1008 COPY_HARD_REG_SET (*btrs_live_in_range,
1009 (flag_btr_bb_exclusive
1010 ? btrs_live : btrs_live_at_end)[def->bb->index]);
1012 for (user = def->uses; user != NULL; user = user->next)
1013 augment_live_range (def->live_range, btrs_live_in_range,
1015 (flag_btr_bb_exclusive
1016 || user->insn != BB_END (def->bb)
1017 || !JUMP_P (user->insn)));
1021 /* def->live_range is accurate, but we need to recompute
1022 the set of target registers live over it, because migration
1023 of other PT instructions may have affected it.
1026 unsigned def_bb = flag_btr_bb_exclusive ? -1 : def->bb->index;
1029 CLEAR_HARD_REG_SET (*btrs_live_in_range);
1030 EXECUTE_IF_SET_IN_BITMAP (def->live_range, 0, bb, bi)
1032 IOR_HARD_REG_SET (*btrs_live_in_range,
1034 ? btrs_live_at_end : btrs_live) [bb]);
1037 if (!def->other_btr_uses_before_def &&
1038 !def->other_btr_uses_after_use)
1039 CLEAR_HARD_REG_BIT (*btrs_live_in_range, def->btr);
1042 /* Merge into the def/use web DEF any other def/use webs in the same
1043 group that are dominated by DEF, provided that there is a target
1044 register available to allocate to the merged web. */
1046 combine_btr_defs (btr_def def, HARD_REG_SET *btrs_live_in_range)
1050 for (other_def = def->group->members;
1052 other_def = other_def->next_this_group)
1054 if (other_def != def
1055 && other_def->uses != NULL
1056 && ! other_def->has_ambiguous_use
1057 && dominated_by_p (CDI_DOMINATORS, other_def->bb, def->bb))
1059 /* def->bb dominates the other def, so def and other_def could
1061 /* Merge their live ranges, and get the set of
1062 target registers live over the merged range. */
1064 HARD_REG_SET combined_btrs_live;
1065 bitmap combined_live_range = BITMAP_ALLOC (NULL);
1068 if (other_def->live_range == NULL)
1070 HARD_REG_SET dummy_btrs_live_in_range;
1071 btr_def_live_range (other_def, &dummy_btrs_live_in_range);
1073 COPY_HARD_REG_SET (combined_btrs_live, *btrs_live_in_range);
1074 bitmap_copy (combined_live_range, def->live_range);
1076 for (user = other_def->uses; user != NULL; user = user->next)
1077 augment_live_range (combined_live_range, &combined_btrs_live,
1079 (flag_btr_bb_exclusive
1080 || user->insn != BB_END (def->bb)
1081 || !JUMP_P (user->insn)));
1083 btr = choose_btr (combined_btrs_live);
1086 /* We can combine them. */
1089 "Combining def in insn %d with def in insn %d\n",
1090 INSN_UID (other_def->insn), INSN_UID (def->insn));
1093 user = other_def->uses;
1094 while (user != NULL)
1096 btr_user next = user->next;
1098 user->next = def->uses;
1102 /* Combining def/use webs can make target registers live
1103 after uses where they previously were not. This means
1104 some REG_DEAD notes may no longer be correct. We could
1105 be more precise about this if we looked at the combined
1106 live range, but here I just delete any REG_DEAD notes
1107 in case they are no longer correct. */
1108 for (user = def->uses; user != NULL; user = user->next)
1109 remove_note (user->insn,
1110 find_regno_note (user->insn, REG_DEAD,
1111 REGNO (user->use)));
1112 clear_btr_from_live_range (other_def);
1113 other_def->uses = NULL;
1114 bitmap_copy (def->live_range, combined_live_range);
1115 if (other_def->btr == btr && other_def->other_btr_uses_after_use)
1116 def->other_btr_uses_after_use = 1;
1117 COPY_HARD_REG_SET (*btrs_live_in_range, combined_btrs_live);
1119 /* Delete the old target register initialization. */
1120 delete_insn (other_def->insn);
1123 BITMAP_FREE (combined_live_range);
1128 /* Move the definition DEF from its current position to basic
1129 block NEW_DEF_BB, and modify it to use branch target register BTR.
1130 Delete the old defining insn, and insert a new one in NEW_DEF_BB.
1131 Update all reaching uses of DEF in the RTL to use BTR.
1132 If this new position means that other defs in the
1133 same group can be combined with DEF then combine them. */
1135 move_btr_def (basic_block new_def_bb, int btr, btr_def def, bitmap live_range,
1136 HARD_REG_SET *btrs_live_in_range)
1138 /* We can move the instruction.
1139 Set a target register in block NEW_DEF_BB to the value
1140 needed for this target register definition.
1141 Replace all uses of the old target register definition by
1142 uses of the new definition. Delete the old definition. */
1143 basic_block b = new_def_bb;
1144 rtx_insn *insp = BB_HEAD (b);
1145 rtx_insn *old_insn = def->insn;
1149 enum machine_mode btr_mode;
1154 fprintf(dump_file, "migrating to basic block %d, using reg %d\n",
1155 new_def_bb->index, btr);
1157 clear_btr_from_live_range (def);
1159 def->bb = new_def_bb;
1161 def->cost = basic_block_freq (new_def_bb);
1162 bitmap_copy (def->live_range, live_range);
1163 combine_btr_defs (def, btrs_live_in_range);
1165 def->other_btr_uses_before_def
1166 = TEST_HARD_REG_BIT (btrs_live[b->index], btr) ? 1 : 0;
1167 add_btr_to_live_range (def, 1);
1169 insp = NEXT_INSN (insp);
1170 /* N.B.: insp is expected to be NOTE_INSN_BASIC_BLOCK now. Some
1171 optimizations can result in insp being both first and last insn of
1173 /* ?? some assertions to check that insp is sensible? */
1175 if (def->other_btr_uses_before_def)
1178 for (insp = BB_END (b); ! INSN_P (insp); insp = PREV_INSN (insp))
1179 gcc_assert (insp != BB_HEAD (b));
1181 if (JUMP_P (insp) || can_throw_internal (insp))
1182 insp = PREV_INSN (insp);
1185 set = single_set (old_insn);
1186 src = SET_SRC (set);
1187 btr_mode = GET_MODE (SET_DEST (set));
1188 btr_rtx = gen_rtx_REG (btr_mode, btr);
1190 new_insn = as_a <rtx_insn *> (gen_move_insn (btr_rtx, src));
1192 /* Insert target register initialization at head of basic block. */
1193 def->insn = emit_insn_after (new_insn, insp);
1195 df_set_regs_ever_live (btr, true);
1198 fprintf (dump_file, "New pt is insn %d, inserted after insn %d\n",
1199 INSN_UID (def->insn), INSN_UID (insp));
1201 /* Delete the old target register initialization. */
1202 delete_insn (old_insn);
1204 /* Replace each use of the old target register by a use of the new target
1206 for (user = def->uses; user != NULL; user = user->next)
1208 /* Some extra work here to ensure consistent modes, because
1209 it seems that a target register REG rtx can be given a different
1210 mode depending on the context (surely that should not be
1212 rtx replacement_rtx;
1213 if (GET_MODE (user->use) == GET_MODE (btr_rtx)
1214 || GET_MODE (user->use) == VOIDmode)
1215 replacement_rtx = btr_rtx;
1217 replacement_rtx = gen_rtx_REG (GET_MODE (user->use), btr);
1218 validate_replace_rtx (user->use, replacement_rtx, user->insn);
1219 user->use = replacement_rtx;
1223 /* We anticipate intra-block scheduling to be done. See if INSN could move
1224 up within BB by N_INSNS. */
1226 can_move_up (const_basic_block bb, const rtx_insn *insn, int n_insns)
1228 while (insn != BB_HEAD (bb) && n_insns > 0)
1230 insn = PREV_INSN (insn);
1231 /* ??? What if we have an anti-dependency that actually prevents the
1232 scheduler from doing the move? We'd like to re-allocate the register,
1233 but not necessarily put the load into another basic block. */
1237 return n_insns <= 0;
1240 /* Attempt to migrate the target register definition DEF to an
1241 earlier point in the flowgraph.
1243 It is a precondition of this function that DEF is migratable:
1244 i.e. it has a constant source, and all uses are unambiguous.
1246 Only migrations that reduce the cost of DEF will be made.
1247 MIN_COST is the lower bound on the cost of the DEF after migration.
1248 If we migrate DEF so that its cost falls below MIN_COST,
1249 then we do not attempt to migrate further. The idea is that
1250 we migrate definitions in a priority order based on their cost,
1251 when the cost of this definition falls below MIN_COST, then
1252 there is another definition with cost == MIN_COST which now
1253 has a higher priority than this definition.
1255 Return nonzero if there may be benefit from attempting to
1256 migrate this DEF further (i.e. we have reduced the cost below
1257 MIN_COST, but we may be able to reduce it further).
1258 Return zero if no further migration is possible. */
1260 migrate_btr_def (btr_def def, int min_cost)
1263 HARD_REG_SET btrs_live_in_range;
1264 int btr_used_near_def = 0;
1265 int def_basic_block_freq;
1266 basic_block attempt;
1274 "Attempting to migrate pt from insn %d (cost = %d, min_cost = %d) ... ",
1275 INSN_UID (def->insn), def->cost, min_cost);
1277 if (!def->group || def->has_ambiguous_use)
1278 /* These defs are not migratable. */
1281 fprintf (dump_file, "it's not migratable\n");
1286 /* We have combined this def with another in the same group, so
1287 no need to consider it further.
1291 fprintf (dump_file, "it's already combined with another pt\n");
1295 btr_def_live_range (def, &btrs_live_in_range);
1296 live_range = BITMAP_ALLOC (NULL);
1297 bitmap_copy (live_range, def->live_range);
1299 #ifdef INSN_SCHEDULING
1300 def_latency = insn_default_latency (def->insn) * issue_rate;
1302 def_latency = issue_rate;
1305 for (user = def->uses; user != NULL; user = user->next)
1307 if (user->bb == def->bb
1308 && user->luid > def->luid
1309 && (def->luid + def_latency) > user->luid
1310 && ! can_move_up (def->bb, def->insn,
1311 (def->luid + def_latency) - user->luid))
1313 btr_used_near_def = 1;
1318 def_basic_block_freq = basic_block_freq (def->bb);
1320 for (attempt = get_immediate_dominator (CDI_DOMINATORS, def->bb);
1321 !give_up && attempt && attempt != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1322 && def->cost >= min_cost;
1323 attempt = get_immediate_dominator (CDI_DOMINATORS, attempt))
1325 /* Try to move the instruction that sets the target register into
1326 basic block ATTEMPT. */
1327 int try_freq = basic_block_freq (attempt);
1331 /* If ATTEMPT has abnormal edges, skip it. */
1332 FOR_EACH_EDGE (e, ei, attempt->succs)
1333 if (e->flags & EDGE_COMPLEX)
1339 fprintf (dump_file, "trying block %d ...", attempt->index);
1341 if (try_freq < def_basic_block_freq
1342 || (try_freq == def_basic_block_freq && btr_used_near_def))
1345 augment_live_range (live_range, &btrs_live_in_range, def->bb, attempt,
1346 flag_btr_bb_exclusive);
1349 fprintf (dump_file, "Now btrs live in range are: ");
1350 dump_hard_reg_set (btrs_live_in_range);
1351 fprintf (dump_file, "\n");
1353 btr = choose_btr (btrs_live_in_range);
1356 move_btr_def (attempt, btr, def, live_range, &btrs_live_in_range);
1357 bitmap_copy (live_range, def->live_range);
1358 btr_used_near_def = 0;
1360 def_basic_block_freq = basic_block_freq (def->bb);
1364 /* There are no free target registers available to move
1365 this far forward, so give up */
1369 "giving up because there are no free target registers\n");
1378 fprintf (dump_file, "failed to move\n");
1380 BITMAP_FREE (live_range);
1384 /* Attempt to move instructions that set target registers earlier
1385 in the flowgraph, away from their corresponding uses. */
1387 migrate_btr_defs (enum reg_class btr_class, int allow_callee_save)
1389 fibheap_t all_btr_defs = fibheap_new ();
1392 gcc_obstack_init (&migrate_btrl_obstack);
1397 for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++)
1399 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
1401 "Basic block %d: count = %" PRId64
1402 " loop-depth = %d idom = %d\n",
1403 i, (int64_t) bb->count, bb_loop_depth (bb),
1404 get_immediate_dominator (CDI_DOMINATORS, bb)->index);
1408 CLEAR_HARD_REG_SET (all_btrs);
1409 for (first_btr = -1, reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++)
1410 if (TEST_HARD_REG_BIT (reg_class_contents[(int) btr_class], reg)
1411 && (allow_callee_save || call_used_regs[reg]
1412 || df_regs_ever_live_p (reg)))
1414 SET_HARD_REG_BIT (all_btrs, reg);
1420 btrs_live = XCNEWVEC (HARD_REG_SET, last_basic_block_for_fn (cfun));
1421 btrs_live_at_end = XCNEWVEC (HARD_REG_SET, last_basic_block_for_fn (cfun));
1423 build_btr_def_use_webs (all_btr_defs);
1425 while (!fibheap_empty (all_btr_defs))
1427 btr_def def = (btr_def) fibheap_extract_min (all_btr_defs);
1428 int min_cost = -fibheap_min_key (all_btr_defs);
1429 if (migrate_btr_def (def, min_cost))
1431 fibheap_insert (all_btr_defs, -def->cost, (void *) def);
1435 "Putting insn %d back on queue with priority %d\n",
1436 INSN_UID (def->insn), def->cost);
1440 BITMAP_FREE (def->live_range);
1444 free (btrs_live_at_end);
1445 obstack_free (&migrate_btrl_obstack, NULL);
1446 fibheap_delete (all_btr_defs);
1450 branch_target_load_optimize (bool after_prologue_epilogue_gen)
1452 enum reg_class klass
1453 = (enum reg_class) targetm.branch_target_register_class ();
1454 if (klass != NO_REGS)
1456 /* Initialize issue_rate. */
1457 if (targetm.sched.issue_rate)
1458 issue_rate = targetm.sched.issue_rate ();
1462 if (!after_prologue_epilogue_gen)
1464 /* Build the CFG for migrate_btr_defs. */
1466 /* This may or may not be needed, depending on where we
1468 cleanup_cfg (optimize ? CLEANUP_EXPENSIVE : 0);
1474 /* Dominator info is also needed for migrate_btr_def. */
1475 calculate_dominance_info (CDI_DOMINATORS);
1476 migrate_btr_defs (klass,
1477 (targetm.branch_target_register_callee_saved
1478 (after_prologue_epilogue_gen)));
1480 free_dominance_info (CDI_DOMINATORS);
1486 const pass_data pass_data_branch_target_load_optimize1 =
1488 RTL_PASS, /* type */
1490 OPTGROUP_NONE, /* optinfo_flags */
1491 TV_NONE, /* tv_id */
1492 0, /* properties_required */
1493 0, /* properties_provided */
1494 0, /* properties_destroyed */
1495 0, /* todo_flags_start */
1496 0, /* todo_flags_finish */
1499 class pass_branch_target_load_optimize1 : public rtl_opt_pass
1502 pass_branch_target_load_optimize1 (gcc::context *ctxt)
1503 : rtl_opt_pass (pass_data_branch_target_load_optimize1, ctxt)
1506 /* opt_pass methods: */
1507 virtual bool gate (function *) { return flag_branch_target_load_optimize; }
1508 virtual unsigned int execute (function *)
1510 branch_target_load_optimize (epilogue_completed);
1514 }; // class pass_branch_target_load_optimize1
1519 make_pass_branch_target_load_optimize1 (gcc::context *ctxt)
1521 return new pass_branch_target_load_optimize1 (ctxt);
1527 const pass_data pass_data_branch_target_load_optimize2 =
1529 RTL_PASS, /* type */
1531 OPTGROUP_NONE, /* optinfo_flags */
1532 TV_NONE, /* tv_id */
1533 0, /* properties_required */
1534 0, /* properties_provided */
1535 0, /* properties_destroyed */
1536 0, /* todo_flags_start */
1537 0, /* todo_flags_finish */
1540 class pass_branch_target_load_optimize2 : public rtl_opt_pass
1543 pass_branch_target_load_optimize2 (gcc::context *ctxt)
1544 : rtl_opt_pass (pass_data_branch_target_load_optimize2, ctxt)
1547 /* opt_pass methods: */
1548 virtual bool gate (function *)
1550 return (optimize > 0 && flag_branch_target_load_optimize2);
1553 virtual unsigned int execute (function *);
1555 }; // class pass_branch_target_load_optimize2
1558 pass_branch_target_load_optimize2::execute (function *)
1560 static int warned = 0;
1562 /* Leave this a warning for now so that it is possible to experiment
1563 with running this pass twice. In 3.6, we should either make this
1564 an error, or use separate dump files. */
1565 if (flag_branch_target_load_optimize
1566 && flag_branch_target_load_optimize2
1569 warning (0, "branch target register load optimization is not intended "
1575 branch_target_load_optimize (epilogue_completed);
1582 make_pass_branch_target_load_optimize2 (gcc::context *ctxt)
1584 return new pass_branch_target_load_optimize2 (ctxt);