1 /* RTL-based forward propagation pass for GNU compiler.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Paolo Bonzini and Steven Bosscher.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "diagnostic-core.h"
30 #include "sparseset.h"
32 #include "insn-config.h"
36 #include "cfgcleanup.h"
39 #include "tree-pass.h"
45 /* This pass does simple forward propagation and simplification when an
46 operand of an insn can only come from a single def. This pass uses
47 df.c, so it is global. However, we only do limited analysis of
48 available expressions.
50 1) The pass tries to propagate the source of the def into the use,
51 and checks if the result is independent of the substituted value.
52 For example, the high word of a (zero_extend:DI (reg:SI M)) is always
53 zero, independent of the source register.
55 In particular, we propagate constants into the use site. Sometimes
56 RTL expansion did not put the constant in the same insn on purpose,
57 to satisfy a predicate, and the result will fail to be recognized;
58 but this happens rarely and in this case we can still create a
59 REG_EQUAL note. For multi-word operations, this
61 (set (subreg:SI (reg:DI 120) 0) (const_int 0))
62 (set (subreg:SI (reg:DI 120) 4) (const_int -1))
63 (set (subreg:SI (reg:DI 122) 0)
64 (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
65 (set (subreg:SI (reg:DI 122) 4)
66 (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
68 can be simplified to the much simpler
70 (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
71 (set (subreg:SI (reg:DI 122) 4) (const_int -1))
73 This particular propagation is also effective at putting together
74 complex addressing modes. We are more aggressive inside MEMs, in
75 that all definitions are propagated if the use is in a MEM; if the
76 result is a valid memory address we check address_cost to decide
77 whether the substitution is worthwhile.
79 2) The pass propagates register copies. This is not as effective as
80 the copy propagation done by CSE's canon_reg, which works by walking
81 the instruction chain, it can help the other transformations.
83 We should consider removing this optimization, and instead reorder the
84 RTL passes, because GCSE does this transformation too. With some luck,
85 the CSE pass at the end of rest_of_handle_gcse could also go away.
87 3) The pass looks for paradoxical subregs that are actually unnecessary.
90 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
91 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
92 (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
93 (subreg:SI (reg:QI 121) 0)))
95 are very common on machines that can only do word-sized operations.
96 For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
97 if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
98 we can replace the paradoxical subreg with simply (reg:WIDE M). The
99 above will simplify this to
101 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
102 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
103 (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
105 where the first two insns are now dead.
107 We used to use reaching definitions to find which uses have a
108 single reaching definition (sounds obvious...), but this is too
109 complex a problem in nasty testcases like PR33928. Now we use the
110 multiple definitions problem in df-problems.c. The similarity
111 between that problem and SSA form creation is taken further, in
112 that fwprop does a dominator walk to create its chains; however,
113 instead of creating a PHI function where multiple definitions meet
114 I just punt and record only singleton use-def chains, which is
115 all that is needed by fwprop. */
118 static int num_changes;
120 static vec<df_ref> use_def_ref;
121 static vec<df_ref> reg_defs;
122 static vec<df_ref> reg_defs_stack;
124 /* The MD bitmaps are trimmed to include only live registers to cut
125 memory usage on testcases like insn-recog.c. Track live registers
126 in the basic block and do not perform forward propagation if the
127 destination is a dead pseudo occurring in a note. */
128 static bitmap local_md;
129 static bitmap local_lr;
131 /* Return the only def in USE's use-def chain, or NULL if there is
132 more than one def in the chain. */
135 get_def_for_use (df_ref use)
137 return use_def_ref[DF_REF_ID (use)];
141 /* Update the reg_defs vector with non-partial definitions in DEF_REC.
142 TOP_FLAG says which artificials uses should be used, when DEF_REC
143 is an artificial def vector. LOCAL_MD is modified as after a
144 df_md_simulate_* function; we do more or less the same processing
145 done there, so we do not use those functions. */
147 #define DF_MD_GEN_FLAGS \
148 (DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER)
151 process_defs (df_ref def, int top_flag)
153 for (; def; def = DF_REF_NEXT_LOC (def))
155 df_ref curr_def = reg_defs[DF_REF_REGNO (def)];
158 if ((DF_REF_FLAGS (def) & DF_REF_AT_TOP) != top_flag)
161 dregno = DF_REF_REGNO (def);
163 reg_defs_stack.safe_push (curr_def);
166 /* Do not store anything if "transitioning" from NULL to NULL. But
167 otherwise, push a special entry on the stack to tell the
168 leave_block callback that the entry in reg_defs was NULL. */
169 if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
172 reg_defs_stack.safe_push (def);
175 if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
177 bitmap_set_bit (local_md, dregno);
178 reg_defs[dregno] = NULL;
182 bitmap_clear_bit (local_md, dregno);
183 reg_defs[dregno] = def;
189 /* Fill the use_def_ref vector with values for the uses in USE_REC,
190 taking reaching definitions info from LOCAL_MD and REG_DEFS.
191 TOP_FLAG says which artificials uses should be used, when USE_REC
192 is an artificial use vector. */
195 process_uses (df_ref use, int top_flag)
197 for (; use; use = DF_REF_NEXT_LOC (use))
198 if ((DF_REF_FLAGS (use) & DF_REF_AT_TOP) == top_flag)
200 unsigned int uregno = DF_REF_REGNO (use);
202 && !bitmap_bit_p (local_md, uregno)
203 && bitmap_bit_p (local_lr, uregno))
204 use_def_ref[DF_REF_ID (use)] = reg_defs[uregno];
208 class single_def_use_dom_walker : public dom_walker
211 single_def_use_dom_walker (cdi_direction direction)
212 : dom_walker (direction) {}
213 virtual void before_dom_children (basic_block);
214 virtual void after_dom_children (basic_block);
218 single_def_use_dom_walker::before_dom_children (basic_block bb)
220 int bb_index = bb->index;
221 struct df_md_bb_info *md_bb_info = df_md_get_bb_info (bb_index);
222 struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index);
225 bitmap_copy (local_md, &md_bb_info->in);
226 bitmap_copy (local_lr, &lr_bb_info->in);
228 /* Push a marker for the leave_block callback. */
229 reg_defs_stack.safe_push (NULL);
231 process_uses (df_get_artificial_uses (bb_index), DF_REF_AT_TOP);
232 process_defs (df_get_artificial_defs (bb_index), DF_REF_AT_TOP);
234 /* We don't call df_simulate_initialize_forwards, as it may overestimate
235 the live registers if there are unused artificial defs. We prefer
236 liveness to be underestimated. */
238 FOR_BB_INSNS (bb, insn)
241 unsigned int uid = INSN_UID (insn);
242 process_uses (DF_INSN_UID_USES (uid), 0);
243 process_uses (DF_INSN_UID_EQ_USES (uid), 0);
244 process_defs (DF_INSN_UID_DEFS (uid), 0);
245 df_simulate_one_insn_forwards (bb, insn, local_lr);
248 process_uses (df_get_artificial_uses (bb_index), 0);
249 process_defs (df_get_artificial_defs (bb_index), 0);
252 /* Pop the definitions created in this basic block when leaving its
256 single_def_use_dom_walker::after_dom_children (basic_block bb ATTRIBUTE_UNUSED)
259 while ((saved_def = reg_defs_stack.pop ()) != NULL)
261 unsigned int dregno = DF_REF_REGNO (saved_def);
263 /* See also process_defs. */
264 if (saved_def == reg_defs[dregno])
265 reg_defs[dregno] = NULL;
267 reg_defs[dregno] = saved_def;
272 /* Build a vector holding the reaching definitions of uses reached by a
273 single dominating definition. */
276 build_single_def_use_links (void)
278 /* We use the multiple definitions problem to compute our restricted
280 df_set_flags (DF_EQ_NOTES);
281 df_md_add_problem ();
282 df_note_add_problem ();
284 df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
286 use_def_ref.create (DF_USES_TABLE_SIZE ());
287 use_def_ref.safe_grow_cleared (DF_USES_TABLE_SIZE ());
289 reg_defs.create (max_reg_num ());
290 reg_defs.safe_grow_cleared (max_reg_num ());
292 reg_defs_stack.create (n_basic_blocks_for_fn (cfun) * 10);
293 local_md = BITMAP_ALLOC (NULL);
294 local_lr = BITMAP_ALLOC (NULL);
296 /* Walk the dominator tree looking for single reaching definitions
297 dominating the uses. This is similar to how SSA form is built. */
298 single_def_use_dom_walker (CDI_DOMINATORS)
299 .walk (cfun->cfg->x_entry_block_ptr);
301 BITMAP_FREE (local_lr);
302 BITMAP_FREE (local_md);
304 reg_defs_stack.release ();
308 /* Do not try to replace constant addresses or addresses of local and
309 argument slots. These MEM expressions are made only once and inserted
310 in many instructions, as well as being used to control symbol table
311 output. It is not safe to clobber them.
313 There are some uncommon cases where the address is already in a register
314 for some reason, but we cannot take advantage of that because we have
315 no easy way to unshare the MEM. In addition, looking up all stack
316 addresses is costly. */
319 can_simplify_addr (rtx addr)
323 if (CONSTANT_ADDRESS_P (addr))
326 if (GET_CODE (addr) == PLUS)
327 reg = XEXP (addr, 0);
332 || (REGNO (reg) != FRAME_POINTER_REGNUM
333 && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
334 && REGNO (reg) != ARG_POINTER_REGNUM));
337 /* Returns a canonical version of X for the address, from the point of view,
338 that all multiplications are represented as MULT instead of the multiply
339 by a power of 2 being represented as ASHIFT.
341 Every ASHIFT we find has been made by simplify_gen_binary and was not
342 there before, so it is not shared. So we can do this in place. */
345 canonicalize_address (rtx x)
348 switch (GET_CODE (x))
351 if (CONST_INT_P (XEXP (x, 1))
352 && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x))
353 && INTVAL (XEXP (x, 1)) >= 0)
355 HOST_WIDE_INT shift = INTVAL (XEXP (x, 1));
357 XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift,
365 if (GET_CODE (XEXP (x, 0)) == PLUS
366 || GET_CODE (XEXP (x, 0)) == ASHIFT
367 || GET_CODE (XEXP (x, 0)) == CONST)
368 canonicalize_address (XEXP (x, 0));
382 /* OLD is a memory address. Return whether it is good to use NEW instead,
383 for a memory access in the given MODE. */
386 should_replace_address (rtx old_rtx, rtx new_rtx, machine_mode mode,
387 addr_space_t as, bool speed)
391 if (rtx_equal_p (old_rtx, new_rtx)
392 || !memory_address_addr_space_p (mode, new_rtx, as))
395 /* Copy propagation is always ok. */
396 if (REG_P (old_rtx) && REG_P (new_rtx))
399 /* Prefer the new address if it is less expensive. */
400 gain = (address_cost (old_rtx, mode, as, speed)
401 - address_cost (new_rtx, mode, as, speed));
403 /* If the addresses have equivalent cost, prefer the new address
404 if it has the highest `set_src_cost'. That has the potential of
405 eliminating the most insns without additional costs, and it
406 is the same that cse.c used to do. */
408 gain = (set_src_cost (new_rtx, VOIDmode, speed)
409 - set_src_cost (old_rtx, VOIDmode, speed));
415 /* Flags for the last parameter of propagate_rtx_1. */
418 /* If PR_CAN_APPEAR is true, propagate_rtx_1 always returns true;
419 if it is false, propagate_rtx_1 returns false if, for at least
420 one occurrence OLD, it failed to collapse the result to a constant.
421 For example, (mult:M (reg:M A) (minus:M (reg:M B) (reg:M A))) may
422 collapse to zero if replacing (reg:M B) with (reg:M A).
424 PR_CAN_APPEAR is disregarded inside MEMs: in that case,
425 propagate_rtx_1 just tries to make cheaper and valid memory
429 /* If PR_HANDLE_MEM is not set, propagate_rtx_1 won't attempt any replacement
430 outside memory addresses. This is needed because propagate_rtx_1 does
431 not do any analysis on memory; thus it is very conservative and in general
432 it will fail if non-read-only MEMs are found in the source expression.
434 PR_HANDLE_MEM is set when the source of the propagation was not
435 another MEM. Then, it is safe not to treat non-read-only MEMs as
436 ``opaque'' objects. */
439 /* Set when costs should be optimized for speed. */
440 PR_OPTIMIZE_FOR_SPEED = 4
444 /* Replace all occurrences of OLD in *PX with NEW and try to simplify the
445 resulting expression. Replace *PX with a new RTL expression if an
446 occurrence of OLD was found.
448 This is only a wrapper around simplify-rtx.c: do not add any pattern
449 matching code here. (The sole exception is the handling of LO_SUM, but
450 that is because there is no simplify_gen_* function for LO_SUM). */
453 propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
455 rtx x = *px, tem = NULL_RTX, op0, op1, op2;
456 enum rtx_code code = GET_CODE (x);
457 machine_mode mode = GET_MODE (x);
458 machine_mode op_mode;
459 bool can_appear = (flags & PR_CAN_APPEAR) != 0;
460 bool valid_ops = true;
462 if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x))
464 /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether
465 they have side effects or not). */
466 *px = (side_effects_p (x)
467 ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx)
468 : gen_rtx_SCRATCH (GET_MODE (x)));
472 /* If X is OLD_RTX, return NEW_RTX. But not if replacing only within an
473 address, and we are *not* inside one. */
480 /* If this is an expression, try recursive substitution. */
481 switch (GET_RTX_CLASS (code))
485 op_mode = GET_MODE (op0);
486 valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
487 if (op0 == XEXP (x, 0))
489 tem = simplify_gen_unary (code, mode, op0, op_mode);
496 valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
497 valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
498 if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
500 tem = simplify_gen_binary (code, mode, op0, op1);
504 case RTX_COMM_COMPARE:
507 op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
508 valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
509 valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
510 if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
512 tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
516 case RTX_BITFIELD_OPS:
520 op_mode = GET_MODE (op0);
521 valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
522 valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
523 valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags);
524 if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
526 if (op_mode == VOIDmode)
527 op_mode = GET_MODE (op0);
528 tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2);
532 /* The only case we try to handle is a SUBREG. */
536 valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
537 if (op0 == XEXP (x, 0))
539 tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
545 if (code == MEM && x != new_rtx)
550 /* There are some addresses that we cannot work on. */
551 if (!can_simplify_addr (op0))
554 op0 = new_op0 = targetm.delegitimize_address (op0);
555 valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx,
556 flags | PR_CAN_APPEAR);
558 /* Dismiss transformation that we do not want to carry on. */
561 || !(GET_MODE (new_op0) == GET_MODE (op0)
562 || GET_MODE (new_op0) == VOIDmode))
565 canonicalize_address (new_op0);
567 /* Copy propagations are always ok. Otherwise check the costs. */
568 if (!(REG_P (old_rtx) && REG_P (new_rtx))
569 && !should_replace_address (op0, new_op0, GET_MODE (x),
571 flags & PR_OPTIMIZE_FOR_SPEED))
574 tem = replace_equiv_address_nv (x, new_op0);
577 else if (code == LO_SUM)
582 /* The only simplification we do attempts to remove references to op0
583 or make it constant -- in both cases, op0's invalidity will not
584 make the result invalid. */
585 propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR);
586 valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
587 if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
590 /* (lo_sum (high x) x) -> x */
591 if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1))
594 tem = gen_rtx_LO_SUM (mode, op0, op1);
596 /* OP1 is likely not a legitimate address, otherwise there would have
597 been no LO_SUM. We want it to disappear if it is invalid, return
598 false in that case. */
599 return memory_address_p (mode, tem);
602 else if (code == REG)
604 if (rtx_equal_p (x, old_rtx))
616 /* No change, no trouble. */
622 /* The replacement we made so far is valid, if all of the recursive
623 replacements were valid, or we could simplify everything to
625 return valid_ops || can_appear || CONSTANT_P (tem);
629 /* Return true if X constains a non-constant mem. */
632 varying_mem_p (const_rtx x)
634 subrtx_iterator::array_type array;
635 FOR_EACH_SUBRTX (iter, array, x, NONCONST)
636 if (MEM_P (*iter) && !MEM_READONLY_P (*iter))
642 /* Replace all occurrences of OLD in X with NEW and try to simplify the
643 resulting expression (in mode MODE). Return a new expression if it is
644 a constant, otherwise X.
646 Simplifications where occurrences of NEW collapse to a constant are always
647 accepted. All simplifications are accepted if NEW is a pseudo too.
648 Otherwise, we accept simplifications that have a lower or equal cost. */
651 propagate_rtx (rtx x, machine_mode mode, rtx old_rtx, rtx new_rtx,
658 if (REG_P (new_rtx) && REGNO (new_rtx) < FIRST_PSEUDO_REGISTER)
663 || CONSTANT_P (new_rtx)
664 || (GET_CODE (new_rtx) == SUBREG
665 && REG_P (SUBREG_REG (new_rtx))
666 && (GET_MODE_SIZE (mode)
667 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (new_rtx))))))
668 flags |= PR_CAN_APPEAR;
669 if (!varying_mem_p (new_rtx))
670 flags |= PR_HANDLE_MEM;
673 flags |= PR_OPTIMIZE_FOR_SPEED;
676 collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags);
677 if (tem == x || !collapsed)
680 /* gen_lowpart_common will not be able to process VOIDmode entities other
682 if (GET_MODE (tem) == VOIDmode && !CONST_INT_P (tem))
685 if (GET_MODE (tem) == VOIDmode)
686 tem = rtl_hooks.gen_lowpart_no_emit (mode, tem);
688 gcc_assert (GET_MODE (tem) == mode);
696 /* Return true if the register from reference REF is killed
697 between FROM to (but not including) TO. */
700 local_ref_killed_between_p (df_ref ref, rtx_insn *from, rtx_insn *to)
704 for (insn = from; insn != to; insn = NEXT_INSN (insn))
710 FOR_EACH_INSN_DEF (def, insn)
711 if (DF_REF_REGNO (ref) == DF_REF_REGNO (def))
718 /* Check if the given DEF is available in INSN. This would require full
719 computation of available expressions; we check only restricted conditions:
720 - if DEF is the sole definition of its register, go ahead;
721 - in the same basic block, we check for no definitions killing the
722 definition of DEF_INSN;
723 - if USE's basic block has DEF's basic block as the sole predecessor,
724 we check if the definition is killed after DEF_INSN or before
725 TARGET_INSN insn, in their respective basic blocks. */
727 use_killed_between (df_ref use, rtx_insn *def_insn, rtx_insn *target_insn)
729 basic_block def_bb = BLOCK_FOR_INSN (def_insn);
730 basic_block target_bb = BLOCK_FOR_INSN (target_insn);
734 /* We used to have a def reaching a use that is _before_ the def,
735 with the def not dominating the use even though the use and def
736 are in the same basic block, when a register may be used
737 uninitialized in a loop. This should not happen anymore since
738 we do not use reaching definitions, but still we test for such
739 cases and assume that DEF is not available. */
740 if (def_bb == target_bb
741 ? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn)
742 : !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb))
745 /* Check if the reg in USE has only one definition. We already
746 know that this definition reaches use, or we wouldn't be here.
747 However, this is invalid for hard registers because if they are
748 live at the beginning of the function it does not mean that we
749 have an uninitialized access. */
750 regno = DF_REF_REGNO (use);
751 def = DF_REG_DEF_CHAIN (regno);
753 && DF_REF_NEXT_REG (def) == NULL
754 && regno >= FIRST_PSEUDO_REGISTER)
757 /* Check locally if we are in the same basic block. */
758 if (def_bb == target_bb)
759 return local_ref_killed_between_p (use, def_insn, target_insn);
761 /* Finally, if DEF_BB is the sole predecessor of TARGET_BB. */
762 if (single_pred_p (target_bb)
763 && single_pred (target_bb) == def_bb)
767 /* See if USE is killed between DEF_INSN and the last insn in the
768 basic block containing DEF_INSN. */
769 x = df_bb_regno_last_def_find (def_bb, regno);
770 if (x && DF_INSN_LUID (DF_REF_INSN (x)) >= DF_INSN_LUID (def_insn))
773 /* See if USE is killed between TARGET_INSN and the first insn in the
774 basic block containing TARGET_INSN. */
775 x = df_bb_regno_first_def_find (target_bb, regno);
776 if (x && DF_INSN_LUID (DF_REF_INSN (x)) < DF_INSN_LUID (target_insn))
782 /* Otherwise assume the worst case. */
787 /* Check if all uses in DEF_INSN can be used in TARGET_INSN. This
788 would require full computation of available expressions;
789 we check only restricted conditions, see use_killed_between. */
791 all_uses_available_at (rtx_insn *def_insn, rtx_insn *target_insn)
794 struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
795 rtx def_set = single_set (def_insn);
798 gcc_assert (def_set);
800 /* If target_insn comes right after def_insn, which is very common
801 for addresses, we can use a quicker test. Ignore debug insns
802 other than target insns for this. */
803 next = NEXT_INSN (def_insn);
804 while (next && next != target_insn && DEBUG_INSN_P (next))
805 next = NEXT_INSN (next);
806 if (next == target_insn && REG_P (SET_DEST (def_set)))
808 rtx def_reg = SET_DEST (def_set);
810 /* If the insn uses the reg that it defines, the substitution is
812 FOR_EACH_INSN_INFO_USE (use, insn_info)
813 if (rtx_equal_p (DF_REF_REG (use), def_reg))
815 FOR_EACH_INSN_INFO_EQ_USE (use, insn_info)
816 if (rtx_equal_p (DF_REF_REG (use), def_reg))
821 rtx def_reg = REG_P (SET_DEST (def_set)) ? SET_DEST (def_set) : NULL_RTX;
823 /* Look at all the uses of DEF_INSN, and see if they are not
824 killed between DEF_INSN and TARGET_INSN. */
825 FOR_EACH_INSN_INFO_USE (use, insn_info)
827 if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
829 if (use_killed_between (use, def_insn, target_insn))
832 FOR_EACH_INSN_INFO_EQ_USE (use, insn_info)
834 if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
836 if (use_killed_between (use, def_insn, target_insn))
845 static df_ref *active_defs;
846 static sparseset active_defs_check;
848 /* Fill the ACTIVE_DEFS array with the use->def link for the registers
849 mentioned in USE_REC. Register the valid entries in ACTIVE_DEFS_CHECK
850 too, for checking purposes. */
853 register_active_defs (df_ref use)
855 for (; use; use = DF_REF_NEXT_LOC (use))
857 df_ref def = get_def_for_use (use);
858 int regno = DF_REF_REGNO (use);
861 sparseset_set_bit (active_defs_check, regno);
862 active_defs[regno] = def;
867 /* Build the use->def links that we use to update the dataflow info
868 for new uses. Note that building the links is very cheap and if
869 it were done earlier, they could be used to rule out invalid
870 propagations (in addition to what is done in all_uses_available_at).
871 I'm not doing this yet, though. */
874 update_df_init (rtx_insn *def_insn, rtx_insn *insn)
877 sparseset_clear (active_defs_check);
878 register_active_defs (DF_INSN_USES (def_insn));
879 register_active_defs (DF_INSN_USES (insn));
880 register_active_defs (DF_INSN_EQ_USES (insn));
884 /* Update the USE_DEF_REF array for the given use, using the active definitions
885 in the ACTIVE_DEFS array to match pseudos to their def. */
888 update_uses (df_ref use)
890 for (; use; use = DF_REF_NEXT_LOC (use))
892 int regno = DF_REF_REGNO (use);
894 /* Set up the use-def chain. */
895 if (DF_REF_ID (use) >= (int) use_def_ref.length ())
896 use_def_ref.safe_grow_cleared (DF_REF_ID (use) + 1);
898 gcc_checking_assert (sparseset_bit_p (active_defs_check, regno));
899 use_def_ref[DF_REF_ID (use)] = active_defs[regno];
904 /* Update the USE_DEF_REF array for the uses in INSN. Only update note
905 uses if NOTES_ONLY is true. */
908 update_df (rtx_insn *insn, rtx note)
910 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
914 df_uses_create (&XEXP (note, 0), insn, DF_REF_IN_NOTE);
915 df_notes_rescan (insn);
919 df_uses_create (&PATTERN (insn), insn, 0);
920 df_insn_rescan (insn);
921 update_uses (DF_INSN_INFO_USES (insn_info));
924 update_uses (DF_INSN_INFO_EQ_USES (insn_info));
928 /* Try substituting NEW into LOC, which originated from forward propagation
929 of USE's value from DEF_INSN. SET_REG_EQUAL says whether we are
930 substituting the whole SET_SRC, so we can set a REG_EQUAL note if the
931 new insn is not recognized. Return whether the substitution was
935 try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx_insn *def_insn,
938 rtx_insn *insn = DF_REF_INSN (use);
939 rtx set = single_set (insn);
941 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
945 update_df_init (def_insn, insn);
947 /* forward_propagate_subreg may be operating on an instruction with
948 multiple sets. If so, assume the cost of the new instruction is
949 not greater than the old one. */
951 old_cost = set_src_cost (SET_SRC (set), GET_MODE (SET_DEST (set)), speed);
954 fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn));
955 print_inline_rtx (dump_file, *loc, 2);
956 fprintf (dump_file, "\n with ");
957 print_inline_rtx (dump_file, new_rtx, 2);
958 fprintf (dump_file, "\n");
961 validate_unshare_change (insn, loc, new_rtx, true);
962 if (!verify_changes (0))
965 fprintf (dump_file, "Changes to insn %d not recognized\n",
970 else if (DF_REF_TYPE (use) == DF_REF_REG_USE
972 && (set_src_cost (SET_SRC (set), GET_MODE (SET_DEST (set)), speed)
976 fprintf (dump_file, "Changes to insn %d not profitable\n",
984 fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn));
990 confirm_change_group ();
997 /* Can also record a simplified value in a REG_EQUAL note,
998 making a new one if one does not already exist. */
1002 fprintf (dump_file, " Setting REG_EQUAL note\n");
1004 note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx));
1008 if ((ok || note) && !CONSTANT_P (new_rtx))
1009 update_df (insn, note);
1014 /* For the given single_set INSN, containing SRC known to be a
1015 ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
1016 is redundant due to the register being set by a LOAD_EXTEND_OP
1017 load from memory. */
1020 free_load_extend (rtx src, rtx_insn *insn)
1025 reg = XEXP (src, 0);
1026 #ifdef LOAD_EXTEND_OP
1027 if (LOAD_EXTEND_OP (GET_MODE (reg)) != GET_CODE (src))
1031 FOR_EACH_INSN_USE (use, insn)
1032 if (!DF_REF_IS_ARTIFICIAL (use)
1033 && DF_REF_TYPE (use) == DF_REF_REG_USE
1034 && DF_REF_REG (use) == reg)
1039 def = get_def_for_use (use);
1043 if (DF_REF_IS_ARTIFICIAL (def))
1046 if (NONJUMP_INSN_P (DF_REF_INSN (def)))
1048 rtx patt = PATTERN (DF_REF_INSN (def));
1050 if (GET_CODE (patt) == SET
1051 && GET_CODE (SET_SRC (patt)) == MEM
1052 && rtx_equal_p (SET_DEST (patt), reg))
1058 /* If USE is a subreg, see if it can be replaced by a pseudo. */
1061 forward_propagate_subreg (df_ref use, rtx_insn *def_insn, rtx def_set)
1063 rtx use_reg = DF_REF_REG (use);
1067 /* Only consider subregs... */
1068 machine_mode use_mode = GET_MODE (use_reg);
1069 if (GET_CODE (use_reg) != SUBREG
1070 || !REG_P (SET_DEST (def_set)))
1073 /* If this is a paradoxical SUBREG... */
1074 if (GET_MODE_SIZE (use_mode)
1075 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg))))
1077 /* If this is a paradoxical SUBREG, we have no idea what value the
1078 extra bits would have. However, if the operand is equivalent to
1079 a SUBREG whose operand is the same as our mode, and all the modes
1080 are within a word, we can just use the inner operand because
1081 these SUBREGs just say how to treat the register. */
1082 use_insn = DF_REF_INSN (use);
1083 src = SET_SRC (def_set);
1084 if (GET_CODE (src) == SUBREG
1085 && REG_P (SUBREG_REG (src))
1086 && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
1087 && GET_MODE (SUBREG_REG (src)) == use_mode
1088 && subreg_lowpart_p (src)
1089 && all_uses_available_at (def_insn, use_insn))
1090 return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src),
1094 /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
1095 is the low part of the reg being extended then just use the inner
1096 operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
1097 be removed due to it matching a LOAD_EXTEND_OP load from memory,
1098 or due to the operation being a no-op when applied to registers.
1099 For example, if we have:
1101 A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
1102 B: (... (subreg:SI (reg:DI X)) ...)
1104 and mode_rep_extended says that Y is already sign-extended,
1105 the backend will typically allow A to be combined with the
1106 definition of Y or, failing that, allow A to be deleted after
1107 reload through register tying. Introducing more uses of Y
1108 prevents both optimisations. */
1109 else if (subreg_lowpart_p (use_reg))
1111 use_insn = DF_REF_INSN (use);
1112 src = SET_SRC (def_set);
1113 if ((GET_CODE (src) == ZERO_EXTEND
1114 || GET_CODE (src) == SIGN_EXTEND)
1115 && REG_P (XEXP (src, 0))
1116 && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
1117 && GET_MODE (XEXP (src, 0)) == use_mode
1118 && !free_load_extend (src, def_insn)
1119 && (targetm.mode_rep_extended (use_mode, GET_MODE (src))
1120 != (int) GET_CODE (src))
1121 && all_uses_available_at (def_insn, use_insn))
1122 return try_fwprop_subst (use, DF_REF_LOC (use), XEXP (src, 0),
1129 /* Try to replace USE with SRC (defined in DEF_INSN) in __asm. */
1132 forward_propagate_asm (df_ref use, rtx_insn *def_insn, rtx def_set, rtx reg)
1134 rtx_insn *use_insn = DF_REF_INSN (use);
1135 rtx src, use_pat, asm_operands, new_rtx, *loc;
1139 gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0);
1141 src = SET_SRC (def_set);
1142 use_pat = PATTERN (use_insn);
1144 /* In __asm don't replace if src might need more registers than
1145 reg, as that could increase register pressure on the __asm. */
1146 uses = DF_INSN_USES (def_insn);
1147 if (uses && DF_REF_NEXT_LOC (uses))
1150 update_df_init (def_insn, use_insn);
1151 speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
1152 asm_operands = NULL_RTX;
1153 switch (GET_CODE (use_pat))
1156 asm_operands = use_pat;
1159 if (MEM_P (SET_DEST (use_pat)))
1161 loc = &SET_DEST (use_pat);
1162 new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1164 validate_unshare_change (use_insn, loc, new_rtx, true);
1166 asm_operands = SET_SRC (use_pat);
1169 for (i = 0; i < XVECLEN (use_pat, 0); i++)
1170 if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET)
1172 if (MEM_P (SET_DEST (XVECEXP (use_pat, 0, i))))
1174 loc = &SET_DEST (XVECEXP (use_pat, 0, i));
1175 new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg,
1178 validate_unshare_change (use_insn, loc, new_rtx, true);
1180 asm_operands = SET_SRC (XVECEXP (use_pat, 0, i));
1182 else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS)
1183 asm_operands = XVECEXP (use_pat, 0, i);
1189 gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS);
1190 for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++)
1192 loc = &ASM_OPERANDS_INPUT (asm_operands, i);
1193 new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1195 validate_unshare_change (use_insn, loc, new_rtx, true);
1198 if (num_changes_pending () == 0 || !apply_change_group ())
1201 update_df (use_insn, NULL);
1206 /* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
1210 forward_propagate_and_simplify (df_ref use, rtx_insn *def_insn, rtx def_set)
1212 rtx_insn *use_insn = DF_REF_INSN (use);
1213 rtx use_set = single_set (use_insn);
1214 rtx src, reg, new_rtx, *loc;
1219 if (INSN_CODE (use_insn) < 0)
1220 asm_use = asm_noperands (PATTERN (use_insn));
1222 if (!use_set && asm_use < 0 && !DEBUG_INSN_P (use_insn))
1225 /* Do not propagate into PC, CC0, etc. */
1226 if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
1229 /* If def and use are subreg, check if they match. */
1230 reg = DF_REF_REG (use);
1231 if (GET_CODE (reg) == SUBREG && GET_CODE (SET_DEST (def_set)) == SUBREG)
1233 if (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg))
1236 /* Check if the def had a subreg, but the use has the whole reg. */
1237 else if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG)
1239 /* Check if the use has a subreg, but the def had the whole reg. Unlike the
1240 previous case, the optimization is possible and often useful indeed. */
1241 else if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set)))
1242 reg = SUBREG_REG (reg);
1244 /* Make sure that we can treat REG as having the same mode as the
1245 source of DEF_SET. */
1246 if (GET_MODE (SET_DEST (def_set)) != GET_MODE (reg))
1249 /* Check if the substitution is valid (last, because it's the most
1250 expensive check!). */
1251 src = SET_SRC (def_set);
1252 if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn))
1255 /* Check if the def is loading something from the constant pool; in this
1256 case we would undo optimization such as compress_float_constant.
1257 Still, we can set a REG_EQUAL note. */
1258 if (MEM_P (src) && MEM_READONLY_P (src))
1260 rtx x = avoid_constant_pool_reference (src);
1261 if (x != src && use_set)
1263 rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1264 rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set);
1265 rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
1266 if (old_rtx != new_rtx)
1267 set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new_rtx));
1273 return forward_propagate_asm (use, def_insn, def_set, reg);
1275 /* Else try simplifying. */
1277 if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
1279 loc = &SET_DEST (use_set);
1280 set_reg_equal = false;
1284 loc = &INSN_VAR_LOCATION_LOC (use_insn);
1285 set_reg_equal = false;
1289 rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1290 if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1291 loc = &XEXP (note, 0);
1293 loc = &SET_SRC (use_set);
1295 /* Do not replace an existing REG_EQUAL note if the insn is not
1296 recognized. Either we're already replacing in the note, or we'll
1297 separately try plugging the definition in the note and simplifying.
1298 And only install a REQ_EQUAL note when the destination is a REG
1299 that isn't mentioned in USE_SET, as the note would be invalid
1300 otherwise. We also don't want to install a note if we are merely
1301 propagating a pseudo since verifying that this pseudo isn't dead
1302 is a pain; moreover such a note won't help anything. */
1303 set_reg_equal = (note == NULL_RTX
1304 && REG_P (SET_DEST (use_set))
1306 && !(GET_CODE (src) == SUBREG
1307 && REG_P (SUBREG_REG (src)))
1308 && !reg_mentioned_p (SET_DEST (use_set),
1309 SET_SRC (use_set)));
1312 if (GET_MODE (*loc) == VOIDmode)
1313 mode = GET_MODE (SET_DEST (use_set));
1315 mode = GET_MODE (*loc);
1317 new_rtx = propagate_rtx (*loc, mode, reg, src,
1318 optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)));
1323 return try_fwprop_subst (use, loc, new_rtx, def_insn, set_reg_equal);
1327 /* Given a use USE of an insn, if it has a single reaching
1328 definition, try to forward propagate it into that insn.
1329 Return true if cfg cleanup will be needed. */
1332 forward_propagate_into (df_ref use)
1335 rtx_insn *def_insn, *use_insn;
1339 if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
1341 if (DF_REF_IS_ARTIFICIAL (use))
1344 /* Only consider uses that have a single definition. */
1345 def = get_def_for_use (use);
1348 if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
1350 if (DF_REF_IS_ARTIFICIAL (def))
1353 /* Do not propagate loop invariant definitions inside the loop. */
1354 if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
1357 /* Check if the use is still present in the insn! */
1358 use_insn = DF_REF_INSN (use);
1359 if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1360 parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1362 parent = PATTERN (use_insn);
1364 if (!reg_mentioned_p (DF_REF_REG (use), parent))
1367 def_insn = DF_REF_INSN (def);
1368 if (multiple_sets (def_insn))
1370 def_set = single_set (def_insn);
1374 /* Only try one kind of propagation. If two are possible, we'll
1375 do it on the following iterations. */
1376 if (forward_propagate_and_simplify (use, def_insn, def_set)
1377 || forward_propagate_subreg (use, def_insn, def_set))
1379 if (cfun->can_throw_non_call_exceptions
1380 && find_reg_note (use_insn, REG_EH_REGION, NULL_RTX)
1381 && purge_dead_edges (DF_REF_BB (use)))
1392 calculate_dominance_info (CDI_DOMINATORS);
1394 /* We do not always want to propagate into loops, so we have to find
1395 loops and be careful about them. Avoid CFG modifications so that
1396 we don't have to update dominance information afterwards for
1397 build_single_def_use_links. */
1398 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
1400 build_single_def_use_links ();
1401 df_set_flags (DF_DEFER_INSN_RESCAN);
1403 active_defs = XNEWVEC (df_ref, max_reg_num ());
1405 active_defs_check = sparseset_alloc (max_reg_num ());
1411 loop_optimizer_finalize ();
1413 use_def_ref.release ();
1416 sparseset_free (active_defs_check);
1418 free_dominance_info (CDI_DOMINATORS);
1420 delete_trivially_dead_insns (get_insns (), max_reg_num ());
1424 "\nNumber of successful forward propagations: %d\n\n",
1429 /* Main entry point. */
1434 return optimize > 0 && flag_forward_propagate;
1441 bool need_cleanup = false;
1445 /* Go through all the uses. df_uses_create will create new ones at the
1446 end, and we'll go through them as well.
1448 Do not forward propagate addresses into loops until after unrolling.
1449 CSE did so because it was able to fix its own mess, but we are not. */
1451 for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1453 df_ref use = DF_USES_GET (i);
1455 if (DF_REF_TYPE (use) == DF_REF_REG_USE
1456 || DF_REF_BB (use)->loop_father == NULL
1457 /* The outer most loop is not really a loop. */
1458 || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
1459 need_cleanup |= forward_propagate_into (use);
1470 const pass_data pass_data_rtl_fwprop =
1472 RTL_PASS, /* type */
1473 "fwprop1", /* name */
1474 OPTGROUP_NONE, /* optinfo_flags */
1475 TV_FWPROP, /* tv_id */
1476 0, /* properties_required */
1477 0, /* properties_provided */
1478 0, /* properties_destroyed */
1479 0, /* todo_flags_start */
1480 TODO_df_finish, /* todo_flags_finish */
1483 class pass_rtl_fwprop : public rtl_opt_pass
1486 pass_rtl_fwprop (gcc::context *ctxt)
1487 : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
1490 /* opt_pass methods: */
1491 virtual bool gate (function *) { return gate_fwprop (); }
1492 virtual unsigned int execute (function *) { return fwprop (); }
1494 }; // class pass_rtl_fwprop
1499 make_pass_rtl_fwprop (gcc::context *ctxt)
1501 return new pass_rtl_fwprop (ctxt);
1508 bool need_cleanup = false;
1512 /* Go through all the uses. df_uses_create will create new ones at the
1513 end, and we'll go through them as well. */
1514 for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1516 df_ref use = DF_USES_GET (i);
1518 if (DF_REF_TYPE (use) != DF_REF_REG_USE
1519 && DF_REF_BB (use)->loop_father != NULL
1520 /* The outer most loop is not really a loop. */
1521 && loop_outer (DF_REF_BB (use)->loop_father) != NULL)
1522 need_cleanup |= forward_propagate_into (use);
1534 const pass_data pass_data_rtl_fwprop_addr =
1536 RTL_PASS, /* type */
1537 "fwprop2", /* name */
1538 OPTGROUP_NONE, /* optinfo_flags */
1539 TV_FWPROP, /* tv_id */
1540 0, /* properties_required */
1541 0, /* properties_provided */
1542 0, /* properties_destroyed */
1543 0, /* todo_flags_start */
1544 TODO_df_finish, /* todo_flags_finish */
1547 class pass_rtl_fwprop_addr : public rtl_opt_pass
1550 pass_rtl_fwprop_addr (gcc::context *ctxt)
1551 : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
1554 /* opt_pass methods: */
1555 virtual bool gate (function *) { return gate_fwprop (); }
1556 virtual unsigned int execute (function *) { return fwprop_addr (); }
1558 }; // class pass_rtl_fwprop_addr
1563 make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1565 return new pass_rtl_fwprop_addr (ctxt);