1 /* Natural loop analysis code for GNU compiler.
2 Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 #include "coretypes.h"
26 #include "hard-reg-set.h"
27 #include "basic-block.h"
32 struct unmark_altered_insn_data;
33 static void unmark_altered (rtx, rtx, regset);
34 static void blocks_invariant_registers (basic_block *, int, regset);
35 static void unmark_altered_insn (rtx, rtx, struct unmark_altered_insn_data *);
36 static void blocks_single_set_registers (basic_block *, int, rtx *);
37 static int invariant_rtx_wrto_regs_p_helper (rtx *, regset);
38 static bool invariant_rtx_wrto_regs_p (rtx, regset);
39 static rtx test_for_iteration (struct loop_desc *desc, unsigned HOST_WIDE_INT);
40 static bool constant_iterations (struct loop_desc *, unsigned HOST_WIDE_INT *,
42 static bool simple_loop_exit_p (struct loop *, edge, regset,
43 rtx *, struct loop_desc *);
44 static rtx variable_initial_value (rtx, regset, rtx, rtx *, enum machine_mode);
45 static rtx variable_initial_values (edge, rtx, enum machine_mode);
46 static bool simple_condition_p (struct loop *, rtx, regset,
48 static basic_block simple_increment (struct loop *, rtx *, struct loop_desc *);
49 static rtx count_strange_loop_iterations (rtx, rtx, enum rtx_code,
50 int, rtx, enum machine_mode,
52 static unsigned HOST_WIDEST_INT inverse (unsigned HOST_WIDEST_INT, int);
53 static bool fits_in_mode_p (enum machine_mode mode, rtx expr);
55 /* Computes inverse to X modulo (1 << MOD). */
56 static unsigned HOST_WIDEST_INT
57 inverse (unsigned HOST_WIDEST_INT x, int mod)
59 unsigned HOST_WIDEST_INT mask =
60 ((unsigned HOST_WIDEST_INT) 1 << (mod - 1) << 1) - 1;
61 unsigned HOST_WIDEST_INT rslt = 1;
64 for (i = 0; i < mod - 1; i++)
66 rslt = (rslt * x) & mask;
73 /* Checks whether BB is executed exactly once in each LOOP iteration. */
75 just_once_each_iteration_p (struct loop *loop, basic_block bb)
77 /* It must be executed at least once each iteration. */
78 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
82 if (bb->loop_father != loop)
85 /* But this was not enough. We might have some irreducible loop here. */
86 if (bb->flags & BB_IRREDUCIBLE_LOOP)
93 /* Unmarks modified registers; helper to blocks_invariant_registers. */
95 unmark_altered (rtx what, rtx by ATTRIBUTE_UNUSED, regset regs)
97 if (GET_CODE (what) == SUBREG)
98 what = SUBREG_REG (what);
101 CLEAR_REGNO_REG_SET (regs, REGNO (what));
104 /* Marks registers that are invariant inside blocks BBS. */
106 blocks_invariant_registers (basic_block *bbs, int nbbs, regset regs)
111 for (i = 0; i < max_reg_num (); i++)
112 SET_REGNO_REG_SET (regs, i);
113 for (i = 0; i < nbbs; i++)
114 for (insn = BB_HEAD (bbs[i]);
115 insn != NEXT_INSN (BB_END (bbs[i]));
116 insn = NEXT_INSN (insn))
118 note_stores (PATTERN (insn),
119 (void (*) (rtx, rtx, void *)) unmark_altered,
123 /* Unmarks modified registers; helper to blocks_single_set_registers. */
124 struct unmark_altered_insn_data
131 unmark_altered_insn (rtx what, rtx by ATTRIBUTE_UNUSED,
132 struct unmark_altered_insn_data *data)
136 if (GET_CODE (what) == SUBREG)
137 what = SUBREG_REG (what);
141 if (data->regs[rn] == data->insn)
143 data->regs[rn] = NULL;
146 /* Marks registers that have just single simple set in BBS; the relevant
147 insn is returned in REGS. */
149 blocks_single_set_registers (basic_block *bbs, int nbbs, rtx *regs)
153 struct unmark_altered_insn_data data;
155 for (i = 0; i < max_reg_num (); i++)
158 for (i = 0; i < nbbs; i++)
159 for (insn = BB_HEAD (bbs[i]);
160 insn != NEXT_INSN (BB_END (bbs[i]));
161 insn = NEXT_INSN (insn))
163 rtx set = single_set (insn);
166 if (!REG_P (SET_DEST (set)))
168 regs[REGNO (SET_DEST (set))] = insn;
172 for (i = 0; i < nbbs; i++)
173 for (insn = BB_HEAD (bbs[i]);
174 insn != NEXT_INSN (BB_END (bbs[i]));
175 insn = NEXT_INSN (insn))
180 note_stores (PATTERN (insn),
181 (void (*) (rtx, rtx, void *)) unmark_altered_insn,
186 /* Helper for invariant_rtx_wrto_regs_p. */
188 invariant_rtx_wrto_regs_p_helper (rtx *expr, regset invariant_regs)
190 switch (GET_CODE (*expr))
194 case UNSPEC_VOLATILE:
205 return MEM_VOLATILE_P (*expr);
208 /* If the memory is not constant, assume it is modified. If it is
209 constant, we still have to check the address. */
210 return !RTX_UNCHANGING_P (*expr);
213 return !REGNO_REG_SET_P (invariant_regs, REGNO (*expr));
220 /* Checks that EXPR is invariant provided that INVARIANT_REGS are invariant. */
222 invariant_rtx_wrto_regs_p (rtx expr, regset invariant_regs)
224 return !for_each_rtx (&expr, (rtx_function) invariant_rtx_wrto_regs_p_helper,
228 /* Checks whether CONDITION is a simple comparison in that one of operands
229 is register and the other one is invariant in the LOOP. Fills var, lim
230 and cond fields in DESC. */
232 simple_condition_p (struct loop *loop ATTRIBUTE_UNUSED, rtx condition,
233 regset invariant_regs, struct loop_desc *desc)
237 /* Check condition. */
238 switch (GET_CODE (condition))
255 /* Of integers or pointers. */
256 if (GET_MODE_CLASS (GET_MODE (XEXP (condition, 0))) != MODE_INT
257 && GET_MODE_CLASS (GET_MODE (XEXP (condition, 0))) != MODE_PARTIAL_INT)
260 /* One of operands must be a simple register. */
261 op0 = XEXP (condition, 0);
262 op1 = XEXP (condition, 1);
264 /* One of operands must be invariant. */
265 if (invariant_rtx_wrto_regs_p (op0, invariant_regs))
267 /* And the other one must be a register. */
273 desc->cond = swap_condition (GET_CODE (condition));
274 if (desc->cond == UNKNOWN)
279 /* Check the other operand. */
280 if (!invariant_rtx_wrto_regs_p (op1, invariant_regs))
288 desc->cond = GET_CODE (condition);
293 /* Checks whether DESC->var is incremented/decremented exactly once each
294 iteration. Fills in DESC->stride and returns block in that DESC->var is
297 simple_increment (struct loop *loop, rtx *simple_increment_regs,
298 struct loop_desc *desc)
300 rtx mod_insn, mod_insn1, set, set_src, set_add;
301 basic_block mod_bb, mod_bb1;
303 /* Find insn that modifies var. */
304 mod_insn = simple_increment_regs[REGNO (desc->var)];
307 mod_bb = BLOCK_FOR_INSN (mod_insn);
309 /* Check that it is executed exactly once each iteration. */
310 if (!just_once_each_iteration_p (loop, mod_bb))
313 /* mod_insn must be a simple increment/decrement. */
314 set = single_set (mod_insn);
317 if (!rtx_equal_p (SET_DEST (set), desc->var))
320 set_src = find_reg_equal_equiv_note (mod_insn);
322 set_src = SET_SRC (set);
324 /* Check for variables that iterate in narrower mode. */
325 if (GET_CODE (set_src) == SIGN_EXTEND
326 || GET_CODE (set_src) == ZERO_EXTEND)
328 /* If we are sign extending variable that is then compared unsigned
329 or vice versa, there is something weird happening. */
332 && ((desc->cond == LEU
335 || desc->cond == GTU)
336 ^ (GET_CODE (set_src) == ZERO_EXTEND)))
339 if (GET_CODE (XEXP (set_src, 0)) != SUBREG
340 || SUBREG_BYTE (XEXP (set_src, 0)) != 0
341 || GET_MODE (SUBREG_REG (XEXP (set_src, 0))) != GET_MODE (desc->var))
344 desc->inner_mode = GET_MODE (XEXP (set_src, 0));
345 desc->extend = GET_CODE (set_src);
346 set_src = SUBREG_REG (XEXP (set_src, 0));
348 if (GET_CODE (set_src) != REG)
351 /* Find where the reg is set. */
352 mod_insn1 = simple_increment_regs[REGNO (set_src)];
356 mod_bb1 = BLOCK_FOR_INSN (mod_insn1);
357 if (!dominated_by_p (CDI_DOMINATORS, mod_bb, mod_bb1))
359 if (mod_bb1 == mod_bb)
362 mod_insn != PREV_INSN (BB_HEAD (mod_bb));
363 mod_insn = PREV_INSN (mod_insn))
364 if (mod_insn == mod_insn1)
367 if (mod_insn == PREV_INSN (BB_HEAD (mod_bb)))
371 /* Replace the source with the possible place of increment. */
372 set = single_set (mod_insn1);
375 if (!rtx_equal_p (SET_DEST (set), set_src))
378 set_src = find_reg_equal_equiv_note (mod_insn1);
380 set_src = SET_SRC (set);
384 desc->inner_mode = GET_MODE (desc->var);
388 if (GET_CODE (set_src) != PLUS)
390 if (!rtx_equal_p (XEXP (set_src, 0), desc->var))
393 /* Set desc->stride. */
394 set_add = XEXP (set_src, 1);
395 if (CONSTANT_P (set_add))
396 desc->stride = set_add;
403 /* Tries to find initial value of VAR in INSN. This value must be invariant
404 wrto INVARIANT_REGS. If SET_INSN is not NULL, insn in that var is set is
405 placed here. INNER_MODE is mode in that induction variable VAR iterates. */
407 variable_initial_value (rtx insn, regset invariant_regs,
408 rtx var, rtx *set_insn, enum machine_mode inner_mode)
414 /* Go back through cfg. */
415 bb = BLOCK_FOR_INSN (insn);
418 for (; insn != BB_HEAD (bb); insn = PREV_INSN (insn))
421 note_stores (PATTERN (insn),
422 (void (*) (rtx, rtx, void *)) unmark_altered,
424 if (modified_between_p (var, PREV_INSN (insn), NEXT_INSN (insn)))
428 if (insn != BB_HEAD (bb))
430 /* We found place where var is set. */
435 set = single_set (insn);
438 set_dest = SET_DEST (set);
439 if (!rtx_equal_p (set_dest, var))
442 note = find_reg_equal_equiv_note (insn);
443 if (note && GET_CODE (XEXP (note, 0)) != EXPR_LIST)
444 val = XEXP (note, 0);
448 /* If we know that the initial value is indeed in range of
449 the inner mode, record the fact even in case the value itself
451 if ((GET_CODE (val) == SIGN_EXTEND
452 || GET_CODE (val) == ZERO_EXTEND)
453 && GET_MODE (XEXP (val, 0)) == inner_mode)
454 ret = gen_rtx_fmt_e (GET_CODE (val),
456 gen_rtx_fmt_ei (SUBREG,
460 if (!invariant_rtx_wrto_regs_p (val, invariant_regs))
469 if (bb->pred->pred_next || bb->pred->src == ENTRY_BLOCK_PTR)
479 /* Returns list of definitions of initial value of VAR at edge E. INNER_MODE
480 is mode in that induction variable VAR really iterates. */
482 variable_initial_values (edge e, rtx var, enum machine_mode inner_mode)
485 regset invariant_regs;
486 regset_head invariant_regs_head;
489 invariant_regs = INITIALIZE_REG_SET (invariant_regs_head);
490 for (i = 0; i < max_reg_num (); i++)
491 SET_REGNO_REG_SET (invariant_regs, i);
493 list = alloc_EXPR_LIST (0, copy_rtx (var), NULL);
495 if (e->src == ENTRY_BLOCK_PTR)
498 set_insn = BB_END (e->src);
500 && (var = variable_initial_value (set_insn, invariant_regs, var,
501 &set_insn, inner_mode)))
502 list = alloc_EXPR_LIST (0, copy_rtx (var), list);
504 FREE_REG_SET (invariant_regs);
508 /* Counts constant number of iterations of the loop described by DESC;
509 returns false if impossible. */
511 constant_iterations (struct loop_desc *desc, unsigned HOST_WIDE_INT *niter,
517 test = test_for_iteration (desc, 0);
518 if (test == const0_rtx)
521 *may_be_zero = false;
525 *may_be_zero = (test != const_true_rtx);
527 /* It would make a little sense to check every with every when we
528 know that all but the first alternative are simply registers. */
529 for (ainit = desc->var_alts; ainit; ainit = XEXP (ainit, 1))
531 alim = XEXP (desc->lim_alts, 0);
532 if (!(expr = count_loop_iterations (desc, XEXP (ainit, 0), alim)))
534 if (GET_CODE (expr) == CONST_INT)
536 *niter = INTVAL (expr);
540 for (alim = XEXP (desc->lim_alts, 1); alim; alim = XEXP (alim, 1))
542 ainit = XEXP (desc->var_alts, 0);
543 if (!(expr = count_loop_iterations (desc, ainit, XEXP (alim, 0))))
545 if (GET_CODE (expr) == CONST_INT)
547 *niter = INTVAL (expr);
555 /* Attempts to determine a number of iterations of a "strange" loop.
556 Its induction variable starts with value INIT, is compared by COND
557 with LIM. If POSTINCR, it is incremented after the test. It is incremented
558 by STRIDE each iteration, has mode MODE but iterates in INNER_MODE.
560 By "strange" we mean loops where induction variable increases in the wrong
561 direction wrto comparison, i.e. for (i = 6; i > 5; i++). */
563 count_strange_loop_iterations (rtx init, rtx lim, enum rtx_code cond,
564 int postincr, rtx stride, enum machine_mode mode,
565 enum machine_mode inner_mode)
567 rtx rqmt, n_to_wrap, before_wrap, after_wrap;
568 rtx mode_min, mode_max;
571 /* This could be handled, but it is not important enough to lose time with
573 if (mode != inner_mode)
577 init = simplify_gen_binary (PLUS, mode, init, stride);
579 /* If we are able to prove that we don't pass the first test, we are
581 rqmt = simplify_relational_operation (cond, mode, init, lim);
582 if (rqmt == const0_rtx)
585 /* And if we don't know we pass it, the things are too complicated for us. */
586 if (rqmt != const_true_rtx)
595 size = GET_MODE_BITSIZE (mode);
596 mode_min = GEN_INT (-((unsigned HOST_WIDEST_INT) 1 << (size - 1)));
597 mode_max = GEN_INT (((unsigned HOST_WIDEST_INT) 1 << (size - 1)) - 1);
606 mode_min = const0_rtx;
607 mode_max = simplify_gen_binary (MINUS, mode, const0_rtx, const1_rtx);
617 /* This iterates once, as init == lim. */
620 /* The behavior is undefined in signed cases. Never mind, we still
621 try to behave sanely. */
626 if (INTVAL (stride) <= 0)
628 n_to_wrap = simplify_gen_binary (MINUS, mode, mode_max, copy_rtx (init));
629 n_to_wrap = simplify_gen_binary (UDIV, mode, n_to_wrap, stride);
630 before_wrap = simplify_gen_binary (MULT, mode,
631 copy_rtx (n_to_wrap), stride);
632 before_wrap = simplify_gen_binary (PLUS, mode,
633 before_wrap, copy_rtx (init));
634 after_wrap = simplify_gen_binary (PLUS, mode,
635 before_wrap, stride);
636 if (GET_CODE (after_wrap) != CONST_INT)
638 after_wrap = simplify_gen_binary (PLUS, mode, mode_min, stride);
639 after_wrap = simplify_gen_binary (MINUS, mode, after_wrap, const1_rtx);
647 if (INTVAL (stride) >= 0)
649 stride = simplify_gen_unary (NEG, mode, stride, mode);
650 n_to_wrap = simplify_gen_binary (MINUS, mode, copy_rtx (init), mode_min);
651 n_to_wrap = simplify_gen_binary (UDIV, mode, n_to_wrap, stride);
652 before_wrap = simplify_gen_binary (MULT, mode,
653 copy_rtx (n_to_wrap), stride);
654 before_wrap = simplify_gen_binary (MINUS, mode,
655 copy_rtx (init), before_wrap);
656 after_wrap = simplify_gen_binary (MINUS, mode,
657 before_wrap, stride);
658 if (GET_CODE (after_wrap) != CONST_INT)
660 after_wrap = simplify_gen_binary (MINUS, mode, mode_max, stride);
661 after_wrap = simplify_gen_binary (PLUS, mode, after_wrap, const1_rtx);
668 /* If this is const_true_rtx and we did not take a conservative approximation
669 of after_wrap above, we might iterate the calculation (but of course we
670 would have to take care about infinite cases). Ignore this for now. */
671 rqmt = simplify_relational_operation (cond, mode, after_wrap, lim);
672 if (rqmt != const0_rtx)
675 return simplify_gen_binary (PLUS, mode, n_to_wrap, const1_rtx);
678 /* Checks whether value of EXPR fits into range of MODE. */
680 fits_in_mode_p (enum machine_mode mode, rtx expr)
682 unsigned HOST_WIDEST_INT val;
685 if (GET_CODE (expr) == CONST_INT)
687 for (val = INTVAL (expr); val; val >>= 1)
690 return n_bits <= GET_MODE_BITSIZE (mode);
693 if (GET_CODE (expr) == SIGN_EXTEND
694 || GET_CODE (expr) == ZERO_EXTEND)
695 return GET_MODE (XEXP (expr, 0)) == mode;
700 /* Return RTX expression representing number of iterations of loop as bounded
701 by test described by DESC (in the case loop really has multiple exit
702 edges, fewer iterations may happen in the practice).
704 Return NULL if it is unknown. Additionally the value may be invalid for
705 paradoxical loop (lets define paradoxical loops as loops whose test is
706 failing at -1th iteration, for instance "for (i=5;i<1;i++);").
708 These cases needs to be either cared by copying the loop test in the front
709 of loop or keeping the test in first iteration of loop.
711 When INIT/LIM are set, they are used instead of var/lim of DESC. */
713 count_loop_iterations (struct loop_desc *desc, rtx init, rtx lim)
715 enum rtx_code cond = desc->cond;
716 rtx stride = desc->stride;
717 rtx mod, exp, ainit, bound;
718 rtx overflow_check, mx, mxp;
719 enum machine_mode mode = GET_MODE (desc->var);
720 unsigned HOST_WIDEST_INT s, size, d;
722 /* Give up on floating point modes and friends. It can be possible to do
723 the job for constant loop bounds, but it is probably not worthwhile. */
724 if (!INTEGRAL_MODE_P (mode))
727 init = copy_rtx (init ? init : desc->var);
728 lim = copy_rtx (lim ? lim : desc->lim);
730 /* Ensure that we always handle the condition to stay inside loop. */
732 cond = reverse_condition (cond);
734 if (desc->inner_mode != mode)
736 /* We have a case when the variable in fact iterates in the narrower
737 mode. This has following consequences:
739 For induction variable itself, if !desc->postincr, it does not mean
740 anything too special, since we know the variable is already in range
741 of the inner mode when we compare it (so it is just needed to shorten
742 it into the mode before calculations are done, so that we don't risk
743 wrong results). More complicated case is when desc->postincr; then
744 the first two iterations are special (the first one because the value
745 may be out of range, the second one because after shortening it to the
746 range it may have absolutely any value), and we do not handle this in
747 unrolling. So if we aren't able to prove that the initial value is in
748 the range, we fail in this case.
750 Step is just moduled to fit into inner mode.
752 If lim is out of range, then either the loop is infinite (and then
753 we may unroll however we like to), or exits in the first iteration
754 (this is also ok, since we handle it specially for this case anyway).
755 So we may safely assume that it fits into the inner mode. */
757 for (ainit = desc->var_alts; ainit; ainit = XEXP (ainit, 1))
758 if (fits_in_mode_p (desc->inner_mode, XEXP (ainit, 0)))
766 init = simplify_gen_unary (desc->extend,
768 simplify_gen_subreg (desc->inner_mode,
775 stride = simplify_gen_subreg (desc->inner_mode, stride, mode, 0);
776 if (stride == const0_rtx)
780 /* Prepare condition to verify that we do not risk overflow. */
781 if (stride == const1_rtx
782 || stride == constm1_rtx
786 /* Overflow at NE conditions does not occur. EQ condition
787 is weird and is handled in count_strange_loop_iterations.
788 If stride is 1, overflow may occur only for <= and >= conditions,
789 and then they are infinite, so it does not bother us. */
790 overflow_check = const0_rtx;
794 if (cond == LT || cond == LTU)
795 mx = simplify_gen_binary (MINUS, mode, lim, const1_rtx);
796 else if (cond == GT || cond == GTU)
797 mx = simplify_gen_binary (PLUS, mode, lim, const1_rtx);
800 if (mode != desc->inner_mode)
801 mxp = simplify_gen_subreg (desc->inner_mode, mx, mode, 0);
804 mxp = simplify_gen_binary (PLUS, desc->inner_mode, mxp, stride);
805 if (mode != desc->inner_mode)
806 mxp = simplify_gen_unary (desc->extend, mode, mxp, desc->inner_mode);
807 overflow_check = simplify_gen_relational (cond, SImode, mode, mx, mxp);
810 /* Compute absolute value of the difference of initial and final value. */
811 if (INTVAL (stride) > 0)
813 /* Handle strange tests specially. */
814 if (cond == EQ || cond == GE || cond == GT || cond == GEU
816 return count_strange_loop_iterations (init, lim, cond, desc->postincr,
817 stride, mode, desc->inner_mode);
818 exp = simplify_gen_binary (MINUS, mode, lim, init);
822 if (cond == EQ || cond == LE || cond == LT || cond == LEU
824 return count_strange_loop_iterations (init, lim, cond, desc->postincr,
825 stride, mode, desc->inner_mode);
826 exp = simplify_gen_binary (MINUS, mode, init, lim);
827 stride = simplify_gen_unary (NEG, mode, stride, mode);
830 /* If there is a risk of overflow (i.e. when we increment value satisfying
831 a condition, we may again obtain a value satisfying the condition),
833 if (overflow_check != const0_rtx)
836 /* Normalize difference so the value is always first examined
837 and later incremented. */
839 exp = simplify_gen_binary (MINUS, mode, exp, stride);
841 /* Determine delta caused by exit condition. */
845 /* NE tests are easy to handle, because we just perform simple
846 arithmetics modulo power of 2. Let's use the fact to compute the
847 number of iterations exactly. We are now in situation when we want to
848 solve an equation stride * i = c (mod size of inner_mode).
849 Let nsd (stride, size of mode) = d. If d does not divide c, the
850 loop is infinite. Otherwise, the number of iterations is
851 (inverse(s/d) * (c/d)) mod (size of mode/d). */
852 size = GET_MODE_BITSIZE (desc->inner_mode);
861 bound = GEN_INT (((unsigned HOST_WIDEST_INT) 1 << (size - 1 ) << 1) - 1);
862 exp = simplify_gen_binary (UDIV, mode, exp, GEN_INT (d));
863 exp = simplify_gen_binary (MULT, mode,
864 exp, GEN_INT (inverse (s, size)));
865 exp = simplify_gen_binary (AND, mode, exp, bound);
877 exp = simplify_gen_binary (PLUS, mode, exp, const1_rtx);
883 if (cond != NE && stride != const1_rtx)
885 /* Number of iterations is now (EXP + STRIDE - 1 / STRIDE),
886 but we need to take care for overflows. */
888 mod = simplify_gen_binary (UMOD, mode, exp, stride);
890 /* This is dirty trick. When we can't compute number of iterations
891 to be constant, we simply ignore the possible overflow, as
892 runtime unroller always use power of 2 amounts and does not
893 care about possible lost bits. */
895 if (GET_CODE (mod) != CONST_INT)
897 rtx stridem1 = simplify_gen_binary (PLUS, mode, stride, constm1_rtx);
898 exp = simplify_gen_binary (PLUS, mode, exp, stridem1);
899 exp = simplify_gen_binary (UDIV, mode, exp, stride);
903 exp = simplify_gen_binary (UDIV, mode, exp, stride);
904 if (mod != const0_rtx)
905 exp = simplify_gen_binary (PLUS, mode, exp, const1_rtx);
911 fprintf (rtl_dump_file, "; Number of iterations: ");
912 print_simple_rtl (rtl_dump_file, exp);
913 fprintf (rtl_dump_file, "\n");
919 /* Return simplified RTX expression representing the value of test
920 described of DESC at given iteration of loop. */
923 test_for_iteration (struct loop_desc *desc, unsigned HOST_WIDE_INT iter)
925 enum rtx_code cond = desc->cond;
926 rtx exp = XEXP (desc->var_alts, 0);
929 /* Give up on floating point modes and friends. It can be possible to do
930 the job for constant loop bounds, but it is probably not worthwhile. */
931 if (!INTEGRAL_MODE_P (GET_MODE (desc->var)))
934 /* Ensure that we always handle the condition to stay inside loop. */
936 cond = reverse_condition (cond);
938 /* Compute the value of induction variable. */
939 addval = simplify_gen_binary (MULT, GET_MODE (desc->var),
941 gen_int_mode (desc->postincr
943 GET_MODE (desc->var)));
944 exp = simplify_gen_binary (PLUS, GET_MODE (desc->var), exp, addval);
945 /* Test at given condition. */
946 exp = simplify_gen_relational (cond, SImode,
947 GET_MODE (desc->var), exp, desc->lim);
951 fprintf (rtl_dump_file, "; Conditional to continue loop at "
952 HOST_WIDE_INT_PRINT_UNSIGNED "th iteration: ", iter);
953 print_simple_rtl (rtl_dump_file, exp);
954 fprintf (rtl_dump_file, "\n");
960 /* Tests whether exit at EXIT_EDGE from LOOP is simple. Returns simple loop
961 description joined to it in in DESC. INVARIANT_REGS and SINGLE_SET_REGS
962 are results of blocks_{invariant,single_set}_regs over BODY. */
964 simple_loop_exit_p (struct loop *loop, edge exit_edge,
965 regset invariant_regs, rtx *single_set_regs,
966 struct loop_desc *desc)
968 basic_block mod_bb, exit_bb;
973 exit_bb = exit_edge->src;
975 fallthru_out = (exit_edge->flags & EDGE_FALLTHRU);
980 /* It must be tested (at least) once during any iteration. */
981 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit_bb))
984 /* It must end in a simple conditional jump. */
985 if (!any_condjump_p (BB_END (exit_bb)))
992 desc->out_edge = exit_edge;
995 /* Condition must be a simple comparison in that one of operands
996 is register and the other one is invariant. */
997 if (!(condition = get_condition (BB_END (exit_bb), NULL, false)))
1000 if (!simple_condition_p (loop, condition, invariant_regs, desc))
1003 /* Var must be simply incremented or decremented in exactly one insn that
1004 is executed just once every iteration. */
1005 if (!(mod_bb = simple_increment (loop, single_set_regs, desc)))
1008 /* OK, it is simple loop. Now just fill in remaining info. */
1009 desc->postincr = !dominated_by_p (CDI_DOMINATORS, exit_bb, mod_bb);
1010 desc->neg = !fallthru_out;
1012 /* Find initial value of var and alternative values for lim. */
1013 e = loop_preheader_edge (loop);
1014 desc->var_alts = variable_initial_values (e, desc->var, desc->inner_mode);
1015 desc->lim_alts = variable_initial_values (e, desc->lim, desc->inner_mode);
1017 /* Number of iterations. */
1019 constant_iterations (desc, &desc->niter, &desc->may_be_zero);
1020 if (!desc->const_iter && !count_loop_iterations (desc, NULL, NULL))
1025 /* Tests whether LOOP is simple for loop. Returns simple loop description
1028 simple_loop_p (struct loop *loop, struct loop_desc *desc)
1033 struct loop_desc act;
1035 regset invariant_regs;
1036 regset_head invariant_regs_head;
1037 rtx *single_set_regs;
1040 body = get_loop_body (loop);
1042 invariant_regs = INITIALIZE_REG_SET (invariant_regs_head);
1043 single_set_regs = xmalloc (max_reg_num () * sizeof (rtx));
1045 blocks_invariant_registers (body, loop->num_nodes, invariant_regs);
1046 blocks_single_set_registers (body, loop->num_nodes, single_set_regs);
1049 for (i = 0; i < loop->num_nodes; i++)
1051 for (e = body[i]->succ; e; e = e->succ_next)
1052 if (!flow_bb_inside_loop_p (loop, e->dest)
1053 && simple_loop_exit_p (loop, e,
1054 invariant_regs, single_set_regs, &act))
1056 /* Prefer constant iterations; the less the better. */
1059 else if (!act.const_iter
1060 || (desc->const_iter && act.niter >= desc->niter))
1065 if (body[i]->succ && body[i]->succ->succ_next)
1068 desc->n_branches = n_branches;
1070 if (rtl_dump_file && any)
1072 fprintf (rtl_dump_file, "; Simple loop %i\n", loop->num);
1074 fprintf (rtl_dump_file,
1075 "; does postincrement after loop exit condition\n");
1077 fprintf (rtl_dump_file, "; Induction variable:");
1078 print_simple_rtl (rtl_dump_file, desc->var);
1079 fputc ('\n', rtl_dump_file);
1081 fprintf (rtl_dump_file, "; Initial values:");
1082 print_simple_rtl (rtl_dump_file, desc->var_alts);
1083 fputc ('\n', rtl_dump_file);
1085 fprintf (rtl_dump_file, "; Stride:");
1086 print_simple_rtl (rtl_dump_file, desc->stride);
1087 fputc ('\n', rtl_dump_file);
1089 fprintf (rtl_dump_file, "; Compared with:");
1090 print_simple_rtl (rtl_dump_file, desc->lim);
1091 fputc ('\n', rtl_dump_file);
1093 fprintf (rtl_dump_file, "; Alternative values:");
1094 print_simple_rtl (rtl_dump_file, desc->lim_alts);
1095 fputc ('\n', rtl_dump_file);
1097 fprintf (rtl_dump_file, "; Exit condition:");
1099 fprintf (rtl_dump_file, "(negated)");
1100 fprintf (rtl_dump_file, "%s\n", GET_RTX_NAME (desc->cond));
1102 fprintf (rtl_dump_file, "; Number of branches:");
1103 fprintf (rtl_dump_file, "%d\n", desc->n_branches);
1105 fputc ('\n', rtl_dump_file);
1109 FREE_REG_SET (invariant_regs);
1110 free (single_set_regs);
1114 /* Structure representing edge of a graph. */
1118 int src, dest; /* Source and destination. */
1119 struct edge *pred_next, *succ_next;
1120 /* Next edge in predecessor and successor lists. */
1121 void *data; /* Data attached to the edge. */
1124 /* Structure representing vertex of a graph. */
1128 struct edge *pred, *succ;
1129 /* Lists of predecessors and successors. */
1130 int component; /* Number of dfs restarts before reaching the
1132 int post; /* Postorder number. */
1135 /* Structure representing a graph. */
1139 int n_vertices; /* Number of vertices. */
1140 struct vertex *vertices;
1144 /* Dumps graph G into F. */
1146 extern void dump_graph (FILE *, struct graph *);
1147 void dump_graph (FILE *f, struct graph *g)
1152 for (i = 0; i < g->n_vertices; i++)
1154 if (!g->vertices[i].pred
1155 && !g->vertices[i].succ)
1158 fprintf (f, "%d (%d)\t<-", i, g->vertices[i].component);
1159 for (e = g->vertices[i].pred; e; e = e->pred_next)
1160 fprintf (f, " %d", e->src);
1163 fprintf (f, "\t->");
1164 for (e = g->vertices[i].succ; e; e = e->succ_next)
1165 fprintf (f, " %d", e->dest);
1170 /* Creates a new graph with N_VERTICES vertices. */
1172 static struct graph *
1173 new_graph (int n_vertices)
1175 struct graph *g = xmalloc (sizeof (struct graph));
1177 g->n_vertices = n_vertices;
1178 g->vertices = xcalloc (n_vertices, sizeof (struct vertex));
1183 /* Adds an edge from F to T to graph G, with DATA attached. */
1186 add_edge (struct graph *g, int f, int t, void *data)
1188 struct edge *e = xmalloc (sizeof (struct edge));
1194 e->pred_next = g->vertices[t].pred;
1195 g->vertices[t].pred = e;
1197 e->succ_next = g->vertices[f].succ;
1198 g->vertices[f].succ = e;
1201 /* Runs dfs search over vertices of G, from NQ vertices in queue QS.
1202 The vertices in postorder are stored into QT. If FORWARD is false,
1203 backward dfs is run. */
1206 dfs (struct graph *g, int *qs, int nq, int *qt, bool forward)
1208 int i, tick = 0, v, comp = 0, top;
1210 struct edge **stack = xmalloc (sizeof (struct edge *) * g->n_vertices);
1212 for (i = 0; i < g->n_vertices; i++)
1214 g->vertices[i].component = -1;
1215 g->vertices[i].post = -1;
1218 #define FST_EDGE(V) (forward ? g->vertices[(V)].succ : g->vertices[(V)].pred)
1219 #define NEXT_EDGE(E) (forward ? (E)->succ_next : (E)->pred_next)
1220 #define EDGE_SRC(E) (forward ? (E)->src : (E)->dest)
1221 #define EDGE_DEST(E) (forward ? (E)->dest : (E)->src)
1223 for (i = 0; i < nq; i++)
1226 if (g->vertices[v].post != -1)
1229 g->vertices[v].component = comp++;
1235 while (e && g->vertices[EDGE_DEST (e)].component != -1)
1242 g->vertices[v].post = tick++;
1256 g->vertices[v].component = comp - 1;
1263 /* Marks the edge E in graph G irreducible if it connects two vertices in the
1267 check_irred (struct graph *g, struct edge *e)
1269 edge real = e->data;
1271 /* All edges should lead from a component with higher number to the
1272 one with lower one. */
1273 if (g->vertices[e->src].component < g->vertices[e->dest].component)
1276 if (g->vertices[e->src].component != g->vertices[e->dest].component)
1279 real->flags |= EDGE_IRREDUCIBLE_LOOP;
1280 if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
1281 real->src->flags |= BB_IRREDUCIBLE_LOOP;
1284 /* Runs CALLBACK for all edges in G. */
1287 for_each_edge (struct graph *g,
1288 void (callback) (struct graph *, struct edge *))
1293 for (i = 0; i < g->n_vertices; i++)
1294 for (e = g->vertices[i].succ; e; e = e->succ_next)
1298 /* Releases the memory occupied by G. */
1301 free_graph (struct graph *g)
1306 for (i = 0; i < g->n_vertices; i++)
1307 for (e = g->vertices[i].succ; e; e = n)
1316 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
1317 throw away all latch edges and mark blocks inside any remaining cycle.
1318 Everything is a bit complicated due to fact we do not want to do this
1319 for parts of cycles that only "pass" through some loop -- i.e. for
1320 each cycle, we want to mark blocks that belong directly to innermost
1321 loop containing the whole cycle.
1323 LOOPS is the loop tree. */
1325 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block)
1326 #define BB_REPR(BB) ((BB)->index + 1)
1329 mark_irreducible_loops (struct loops *loops)
1335 int *queue1 = xmalloc ((last_basic_block + loops->num) * sizeof (int));
1336 int *queue2 = xmalloc ((last_basic_block + loops->num) * sizeof (int));
1340 /* Reset the flags. */
1341 FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
1343 act->flags &= ~BB_IRREDUCIBLE_LOOP;
1344 for (e = act->succ; e; e = e->succ_next)
1345 e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1348 /* Create the edge lists. */
1349 g = new_graph (last_basic_block + loops->num);
1351 FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
1352 for (e = act->succ; e; e = e->succ_next)
1354 /* Ignore edges to exit. */
1355 if (e->dest == EXIT_BLOCK_PTR)
1358 /* And latch edges. */
1359 if (e->dest->loop_father->header == e->dest
1360 && e->dest->loop_father->latch == act)
1363 /* Edges inside a single loop should be left where they are. Edges
1364 to subloop headers should lead to representative of the subloop,
1365 but from the same place.
1367 Edges exiting loops should lead from representative
1368 of the son of nearest common ancestor of the loops in that
1371 src = BB_REPR (act);
1372 dest = BB_REPR (e->dest);
1374 if (e->dest->loop_father->header == e->dest)
1375 dest = LOOP_REPR (e->dest->loop_father);
1377 if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
1379 depth = find_common_loop (act->loop_father,
1380 e->dest->loop_father)->depth + 1;
1381 if (depth == act->loop_father->depth)
1382 cloop = act->loop_father;
1384 cloop = act->loop_father->pred[depth];
1386 src = LOOP_REPR (cloop);
1389 add_edge (g, src, dest, e);
1392 /* Find the strongly connected components. Use the algorithm of Tarjan --
1393 first determine the postorder dfs numbering in reversed graph, then
1394 run the dfs on the original graph in the order given by decreasing
1395 numbers assigned by the previous pass. */
1397 FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
1399 queue1[nq++] = BB_REPR (act);
1401 for (i = 1; i < (int) loops->num; i++)
1402 if (loops->parray[i])
1403 queue1[nq++] = LOOP_REPR (loops->parray[i]);
1404 dfs (g, queue1, nq, queue2, false);
1405 for (i = 0; i < nq; i++)
1406 queue1[i] = queue2[nq - i - 1];
1407 dfs (g, queue1, nq, NULL, true);
1409 /* Mark the irreducible loops. */
1410 for_each_edge (g, check_irred);
1416 loops->state |= LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS;
1419 /* Counts number of insns inside LOOP. */
1421 num_loop_insns (struct loop *loop)
1423 basic_block *bbs, bb;
1424 unsigned i, ninsns = 0;
1427 bbs = get_loop_body (loop);
1428 for (i = 0; i < loop->num_nodes; i++)
1432 for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn))
1441 /* Counts number of insns executed on average per iteration LOOP. */
1443 average_num_loop_insns (struct loop *loop)
1445 basic_block *bbs, bb;
1446 unsigned i, binsns, ninsns, ratio;
1450 bbs = get_loop_body (loop);
1451 for (i = 0; i < loop->num_nodes; i++)
1456 for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn))
1460 ratio = loop->header->frequency == 0
1462 : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
1463 ninsns += binsns * ratio;
1467 ninsns /= BB_FREQ_MAX;
1469 ninsns = 1; /* To avoid division by zero. */
1474 /* Returns expected number of LOOP iterations.
1475 Compute upper bound on number of iterations in case they do not fit integer
1476 to help loop peeling heuristics. Use exact counts if at all possible. */
1478 expected_loop_iterations (const struct loop *loop)
1482 if (loop->header->count)
1484 gcov_type count_in, count_latch, expected;
1489 for (e = loop->header->pred; e; e = e->pred_next)
1490 if (e->src == loop->latch)
1491 count_latch = e->count;
1493 count_in += e->count;
1498 expected = (count_latch + count_in - 1) / count_in;
1500 /* Avoid overflows. */
1501 return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
1505 int freq_in, freq_latch;
1510 for (e = loop->header->pred; e; e = e->pred_next)
1511 if (e->src == loop->latch)
1512 freq_latch = EDGE_FREQUENCY (e);
1514 freq_in += EDGE_FREQUENCY (e);
1519 return (freq_latch + freq_in - 1) / freq_in;