From 2d49f8240d0215badf73b1984dade82a824c2ea0 Mon Sep 17 00:00:00 2001 From: rakdver Date: Thu, 18 Mar 2004 16:42:34 +0000 Subject: [PATCH] * doloop.c: Removed. * loop-doloop.c: New file. * Makefile.in (doloop.o): Remove. (loop-doloop.o): New. * cfgloop.h (get_loop_level, doloop_optimize_loops): Declare. * cfgloopanal.c (get_loop_level): New function. * loop-iv.c (iv_number_of_iterations): Handle case when loop is leaved immediatelly. * loop.c (strength_reduce): Do not call doloop optimization. * loop.h (LOOP_BCT): Removed. * passes.c (rest_of_handle_loop_optimize): Do not use LOOP_BCT. (rest_of_handle_loop2): Call doloop_optimize_loops. (rest_of_compilation): Test for optimizations moved to rest_of_handle_loop2. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@79625 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 17 ++ gcc/Makefile.in | 8 +- gcc/cfgloop.h | 2 + gcc/cfgloopanal.c | 17 ++ gcc/doloop.c | 883 ------------------------------------------------------ gcc/loop-doloop.c | 553 ++++++++++++++++++++++++++++++++++ gcc/loop-iv.c | 4 + gcc/loop.c | 17 -- gcc/loop.h | 5 +- gcc/passes.c | 18 +- 10 files changed, 612 insertions(+), 912 deletions(-) delete mode 100644 gcc/doloop.c create mode 100644 gcc/loop-doloop.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6058c6c..94a75a4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2004-03-18 Zdenek Dvorak + + * doloop.c: Removed. + * loop-doloop.c: New file. + * Makefile.in (doloop.o): Remove. + (loop-doloop.o): New. + * cfgloop.h (get_loop_level, doloop_optimize_loops): Declare. + * cfgloopanal.c (get_loop_level): New function. + * loop-iv.c (iv_number_of_iterations): Handle case when loop + is leaved immediatelly. + * loop.c (strength_reduce): Do not call doloop optimization. + * loop.h (LOOP_BCT): Removed. + * passes.c (rest_of_handle_loop_optimize): Do not use LOOP_BCT. + (rest_of_handle_loop2): Call doloop_optimize_loops. + (rest_of_compilation): Test for optimizations moved to + rest_of_handle_loop2. + 2004-03-17 Fariborz Jahanian * config/rs6000/rs6000.c (rs6000_stack_info): correct reg_size diff --git a/gcc/Makefile.in b/gcc/Makefile.in index e815cf3..84103f2 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -849,7 +849,7 @@ OBJS-common = \ cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o \ cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o \ cfgrtl.o combine.o conflict.o convert.o coverage.o cse.o cselib.o \ - dbxout.o debug.o df.o diagnostic.o dojump.o doloop.o dominance.o \ + dbxout.o debug.o df.o diagnostic.o dojump.o dominance.o loop-doloop.o \ dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o loop-iv.o \ expmed.o expr.o final.o flow.o fold-const.o function.o gcse.o \ genrtl.o ggc-common.o global.o graph.o gtype-desc.o \ @@ -1702,9 +1702,9 @@ loop.o : loop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) flags.h $(L insn-config.h $(REGS_H) hard-reg-set.h $(RECOG_H) $(EXPR_H) \ real.h $(PREDICT_H) $(BASIC_BLOCK_H) function.h cfgloop.h \ toplev.h varray.h except.h cselib.h $(OPTABS_H) $(TM_P_H) $(GGC_H) -doloop.o : doloop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) flags.h \ - $(LOOP_H) $(EXPR_H) hard-reg-set.h $(BASIC_BLOCK_H) $(TM_P_H) toplev.h \ - cfgloop.h +loop-doloop.o : loop-doloop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(RTL_H) flags.h $(EXPR_H) hard-reg-set.h $(BASIC_BLOCK_H) $(TM_P_H) \ + toplev.h cfgloop.h output.h $(PARAMS_H) unroll.o : unroll.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) insn-config.h \ function.h $(INTEGRATE_H) $(REGS_H) $(RECOG_H) flags.h $(EXPR_H) $(LOOP_H) toplev.h \ hard-reg-set.h varray.h $(BASIC_BLOCK_H) $(TM_P_H) $(PREDICT_H) $(PARAMS_H) \ diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 8a24c15..b233d26 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -247,6 +247,7 @@ extern bool flow_bb_inside_loop_p (const struct loop *, const basic_block); extern struct loop * find_common_loop (struct loop *, struct loop *); extern int num_loop_insns (struct loop *); extern int average_num_loop_insns (struct loop *); +extern unsigned get_loop_level (const struct loop *); /* Loops & cfg manipulation. */ extern basic_block *get_loop_body (const struct loop *); @@ -417,3 +418,4 @@ enum }; extern void unroll_and_peel_loops (struct loops *, int); +extern void doloop_optimize_loops (struct loops *); diff --git a/gcc/cfgloopanal.c b/gcc/cfgloopanal.c index 9b3ffa0..63d6f41 100644 --- a/gcc/cfgloopanal.c +++ b/gcc/cfgloopanal.c @@ -457,3 +457,20 @@ expected_loop_iterations (const struct loop *loop) return (freq_latch + freq_in - 1) / freq_in; } } + +/* Returns the maximum level of nesting of subloops of LOOP. */ + +unsigned +get_loop_level (const struct loop *loop) +{ + const struct loop *ploop; + unsigned mx = 0, l; + + for (ploop = loop->inner; ploop; ploop = ploop->next) + { + l = get_loop_level (ploop); + if (l >= mx) + mx = l + 1; + } + return mx; +} diff --git a/gcc/doloop.c b/gcc/doloop.c deleted file mode 100644 index 6d4840a..0000000 --- a/gcc/doloop.c +++ /dev/null @@ -1,883 +0,0 @@ -/* Perform doloop optimizations - Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004 - Free Software Foundation, Inc. - Contributed by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz) - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later -version. - -GCC is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING. If not, write to the Free -Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. */ - -#include "config.h" -#include "system.h" -#include "coretypes.h" -#include "tm.h" -#include "rtl.h" -#include "flags.h" -#include "expr.h" -#include "loop.h" -#include "hard-reg-set.h" -#include "basic-block.h" -#include "toplev.h" -#include "tm_p.h" -#include "cfgloop.h" - - -/* This module is used to modify loops with a determinable number of - iterations to use special low-overhead looping instructions. - - It first validates whether the loop is well behaved and has a - determinable number of iterations (either at compile or run-time). - It then modifies the loop to use a low-overhead looping pattern as - follows: - - 1. A pseudo register is allocated as the loop iteration counter. - - 2. The number of loop iterations is calculated and is stored - in the loop counter. - - 3. At the end of the loop, the jump insn is replaced by the - doloop_end pattern. The compare must remain because it might be - used elsewhere. If the loop-variable or condition register are - used elsewhere, they will be eliminated by flow. - - 4. An optional doloop_begin pattern is inserted at the top of the - loop. -*/ - - -#ifdef HAVE_doloop_end - -static rtx doloop_condition_get (rtx); -static unsigned HOST_WIDE_INT doloop_iterations_max (const struct loop_info *, - enum machine_mode, int); -static int doloop_valid_p (const struct loop *, rtx); -static int doloop_modify (const struct loop *, rtx, rtx, rtx, rtx, rtx); -static int doloop_modify_runtime (const struct loop *, rtx, rtx, rtx, - enum machine_mode, rtx); - - -/* Return the loop termination condition for PATTERN or zero - if it is not a decrement and branch jump insn. */ -static rtx -doloop_condition_get (rtx pattern) -{ - rtx cmp; - rtx inc; - rtx reg; - rtx condition; - - /* The canonical doloop pattern we expect is: - - (parallel [(set (pc) (if_then_else (condition) - (label_ref (label)) - (pc))) - (set (reg) (plus (reg) (const_int -1))) - (additional clobbers and uses)]) - - Some machines (IA-64) make the decrement conditional on - the condition as well, so we don't bother verifying the - actual decrement. In summary, the branch must be the - first entry of the parallel (also required by jump.c), - and the second entry of the parallel must be a set of - the loop counter register. */ - - if (GET_CODE (pattern) != PARALLEL) - return 0; - - cmp = XVECEXP (pattern, 0, 0); - inc = XVECEXP (pattern, 0, 1); - - /* Check for (set (reg) (something)). */ - if (GET_CODE (inc) != SET || ! REG_P (SET_DEST (inc))) - return 0; - - /* Extract loop counter register. */ - reg = SET_DEST (inc); - - /* Check for (set (pc) (if_then_else (condition) - (label_ref (label)) - (pc))). */ - if (GET_CODE (cmp) != SET - || SET_DEST (cmp) != pc_rtx - || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE - || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF - || XEXP (SET_SRC (cmp), 2) != pc_rtx) - return 0; - - /* Extract loop termination condition. */ - condition = XEXP (SET_SRC (cmp), 0); - - if ((GET_CODE (condition) != GE && GET_CODE (condition) != NE) - || GET_CODE (XEXP (condition, 1)) != CONST_INT) - return 0; - - if (XEXP (condition, 0) == reg) - return condition; - - if (GET_CODE (XEXP (condition, 0)) == PLUS - && XEXP (XEXP (condition, 0), 0) == reg) - return condition; - - /* ??? If a machine uses a funny comparison, we could return a - canonicalised form here. */ - - return 0; -} - - -/* Return an estimate of the maximum number of loop iterations for the - loop specified by LOOP or zero if the loop is not normal. - MODE is the mode of the iteration count and NONNEG is nonzero if - the iteration count has been proved to be non-negative. */ -static unsigned HOST_WIDE_INT -doloop_iterations_max (const struct loop_info *loop_info, - enum machine_mode mode, int nonneg) -{ - unsigned HOST_WIDE_INT n_iterations_max; - enum rtx_code code; - rtx min_value; - rtx max_value; - HOST_WIDE_INT abs_inc; - int neg_inc; - - neg_inc = 0; - abs_inc = INTVAL (loop_info->increment); - if (abs_inc < 0) - { - abs_inc = -abs_inc; - neg_inc = 1; - } - - if (neg_inc) - { - code = swap_condition (loop_info->comparison_code); - min_value = loop_info->final_equiv_value; - max_value = loop_info->initial_equiv_value; - } - else - { - code = loop_info->comparison_code; - min_value = loop_info->initial_equiv_value; - max_value = loop_info->final_equiv_value; - } - - /* Since the loop has a VTOP, we know that the initial test will be - true and thus the value of max_value should be greater than the - value of min_value. Thus the difference should always be positive - and the code must be LT, LE, LTU, LEU, or NE. Otherwise the loop is - not normal, e.g., `for (i = 0; i < 10; i--)'. */ - switch (code) - { - case LTU: - case LEU: - { - unsigned HOST_WIDE_INT umax; - unsigned HOST_WIDE_INT umin; - - if (GET_CODE (min_value) == CONST_INT) - umin = INTVAL (min_value); - else - umin = 0; - - if (GET_CODE (max_value) == CONST_INT) - umax = INTVAL (max_value); - else - umax = ((unsigned) 2 << (GET_MODE_BITSIZE (mode) - 1)) - 1; - - n_iterations_max = umax - umin; - break; - } - - case LT: - case LE: - { - HOST_WIDE_INT smax; - HOST_WIDE_INT smin; - - if (GET_CODE (min_value) == CONST_INT) - smin = INTVAL (min_value); - else - smin = -((unsigned) 1 << (GET_MODE_BITSIZE (mode) - 1)); - - if (GET_CODE (max_value) == CONST_INT) - smax = INTVAL (max_value); - else - smax = ((unsigned) 1 << (GET_MODE_BITSIZE (mode) - 1)) - 1; - - n_iterations_max = smax - smin; - break; - } - - case NE: - if (GET_CODE (min_value) == CONST_INT - && GET_CODE (max_value) == CONST_INT) - n_iterations_max = INTVAL (max_value) - INTVAL (min_value); - else - /* We need to conservatively assume that we might have the maximum - number of iterations without any additional knowledge. */ - n_iterations_max = ((unsigned) 2 << (GET_MODE_BITSIZE (mode) - 1)) - 1; - break; - - default: - return 0; - } - - n_iterations_max /= abs_inc; - - /* If we know that the iteration count is non-negative then adjust - n_iterations_max if it is so large that it appears negative. */ - if (nonneg - && n_iterations_max > ((unsigned) 1 << (GET_MODE_BITSIZE (mode) - 1))) - n_iterations_max = ((unsigned) 1 << (GET_MODE_BITSIZE (mode) - 1)) - 1; - - return n_iterations_max; -} - - -/* Return nonzero if the loop specified by LOOP is suitable for - the use of special low-overhead looping instructions. */ -static int -doloop_valid_p (const struct loop *loop, rtx jump_insn) -{ - const struct loop_info *loop_info = LOOP_INFO (loop); - - /* The loop must have a conditional jump at the end. */ - if (! any_condjump_p (jump_insn) - || ! onlyjump_p (jump_insn)) - { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "Doloop: Invalid jump at loop end.\n"); - return 0; - } - - /* Give up if a loop has been completely unrolled. */ - if (loop_info->n_iterations == loop_info->unroll_number) - { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "Doloop: Loop completely unrolled.\n"); - return 0; - } - - /* The loop must have a single exit target. A break or return - statement within a loop will generate multiple loop exits. - Another example of a loop that currently generates multiple exit - targets is for (i = 0; i < (foo ? 8 : 4); i++) { }. */ - if (loop_info->has_multiple_exit_targets || loop->exit_count) - { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "Doloop: Loop has multiple exit targets.\n"); - return 0; - } - - /* An indirect jump may jump out of the loop. */ - if (loop_info->has_indirect_jump) - { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "Doloop: Indirect jump in function.\n"); - return 0; - } - - /* A called function may clobber any special registers required for - low-overhead looping. */ - if (loop_info->has_call) - { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "Doloop: Function call in loop.\n"); - return 0; - } - - /* Some targets (eg, PPC) use the count register for branch on table - instructions. ??? This should be a target specific check. */ - if (loop_info->has_tablejump) - { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "Doloop: Computed branch in the loop.\n"); - return 0; - } - - if (! loop_info->increment) - { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "Doloop: Could not determine iteration info.\n"); - return 0; - } - - if (GET_CODE (loop_info->increment) != CONST_INT) - { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "Doloop: Increment not an integer constant.\n"); - return 0; - } - - /* There is no guarantee that a NE loop will terminate if the - absolute increment is not unity. ??? We could compute this - condition at run-time and have an additional jump around the loop - to ensure an infinite loop. */ - if (loop_info->comparison_code == NE - && !loop_info->preconditioned - && INTVAL (loop_info->increment) != -1 - && INTVAL (loop_info->increment) != 1) - { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "Doloop: NE loop with non-unity increment.\n"); - return 0; - } - - /* Check for loops that may not terminate under special conditions. */ - if (! loop_info->n_iterations - && ((loop_info->comparison_code == LEU - && INTVAL (loop_info->increment) > 0) - || (loop_info->comparison_code == GEU - && INTVAL (loop_info->increment) < 0) - || (loop_info->comparison_code == LTU - && INTVAL (loop_info->increment) > 1) - || (loop_info->comparison_code == GTU - && INTVAL (loop_info->increment) < -1))) - { - /* If the comparison is LEU and the comparison value is UINT_MAX - then the loop will not terminate. Similarly, if the - comparison code is GEU and the comparison value is 0, the - loop will not terminate. - - If the absolute increment is not 1, the loop can be infinite - even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2) - - Note that with LE and GE, the loop behavior is undefined - (C++ standard section 5 clause 5) if an overflow occurs, say - between INT_MAX and INT_MAX + 1. We thus don't have to worry - about these two cases. - - ??? We could compute these conditions at run-time and have a - additional jump around the loop to ensure an infinite loop. - However, it is very unlikely that this is the intended - behavior of the loop and checking for these rare boundary - conditions would pessimize all other code. - - If the loop is executed only a few times an extra check to - restart the loop could use up most of the benefits of using a - count register loop. Note however, that normally, this - restart branch would never execute, so it could be predicted - well by the CPU. We should generate the pessimistic code by - default, and have an option, e.g. -funsafe-loops that would - enable count-register loops in this case. */ - if (loop_dump_stream) - fprintf (loop_dump_stream, - "Doloop: Possible infinite iteration case ignored.\n"); - } - - return 1; -} - - -/* Modify the loop to use the low-overhead looping insn where LOOP - describes the loop, ITERATIONS is an RTX containing the desired - number of loop iterations, ITERATIONS_MAX is a CONST_INT specifying - the maximum number of loop iterations, and DOLOOP_INSN is the - low-overhead looping insn to emit at the end of the loop. This - returns nonzero if it was successful. */ -static int -doloop_modify (const struct loop *loop, rtx iterations, rtx iterations_max, - rtx doloop_seq, rtx start_label, rtx condition) -{ - rtx counter_reg; - rtx count; - rtx sequence; - rtx jump_insn; - int nonneg = 0; - int decrement_count; - - jump_insn = prev_nonnote_insn (loop->end); - - if (loop_dump_stream) - { - fprintf (loop_dump_stream, "Doloop: Inserting doloop pattern ("); - if (GET_CODE (iterations) == CONST_INT) - fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC, - INTVAL (iterations)); - else - fputs ("runtime", loop_dump_stream); - fputs (" iterations).", loop_dump_stream); - } - - /* Emit the label that will delimit the top of the loop. - This has to be done before the delete_insn call below, to prevent - delete_insn from deleting too much. */ - emit_label_after (start_label, loop->top ? loop->top : loop->start); - LABEL_NUSES (start_label)++; - - /* Discard original jump to continue loop. The original compare - result may still be live, so it cannot be discarded explicitly. */ - delete_related_insns (jump_insn); - - counter_reg = XEXP (condition, 0); - if (GET_CODE (counter_reg) == PLUS) - counter_reg = XEXP (counter_reg, 0); - - start_sequence (); - - count = iterations; - decrement_count = 0; - switch (GET_CODE (condition)) - { - case NE: - /* Currently only NE tests against zero and one are supported. */ - if (XEXP (condition, 1) == const0_rtx) - decrement_count = 1; - else if (XEXP (condition, 1) != const1_rtx) - abort (); - break; - - case GE: - /* Currently only GE tests against zero are supported. */ - if (XEXP (condition, 1) != const0_rtx) - abort (); - - /* The iteration count needs decrementing for a GE test. */ - decrement_count = 1; - - /* Determine if the iteration counter will be non-negative. - Note that the maximum value loaded is iterations_max - 1. */ - if ((unsigned HOST_WIDE_INT) INTVAL (iterations_max) - <= ((unsigned) 1 << (GET_MODE_BITSIZE (GET_MODE (counter_reg)) - 1))) - nonneg = 1; - break; - - /* Abort if an invalid doloop pattern has been generated. */ - default: - abort (); - } - - if (decrement_count) - { - if (GET_CODE (count) == CONST_INT) - count = GEN_INT (INTVAL (count) - 1); - else - count = expand_simple_binop (GET_MODE (counter_reg), MINUS, - count, const1_rtx, - 0, 0, OPTAB_LIB_WIDEN); - } - - /* Insert initialization of the count register into the loop header. */ - convert_move (counter_reg, count, 1); - sequence = get_insns (); - end_sequence (); - emit_insn_before (sequence, loop->start); - - /* Some targets (eg, C4x) need to initialize special looping - registers. */ -#ifdef HAVE_doloop_begin - { - rtx init; - - init = gen_doloop_begin (counter_reg, - GET_CODE (iterations) == CONST_INT - ? iterations : const0_rtx, iterations_max, - GEN_INT (loop->level)); - if (init) - { - start_sequence (); - emit_insn (init); - sequence = get_insns (); - end_sequence (); - emit_insn_after (sequence, loop->start); - } - } -#endif - - /* Insert the new low-overhead looping insn. */ - emit_jump_insn_before (doloop_seq, loop->end); - jump_insn = prev_nonnote_insn (loop->end); - JUMP_LABEL (jump_insn) = start_label; - - /* Add a REG_NONNEG note if the actual or estimated maximum number - of iterations is non-negative. */ - if (nonneg) - { - REG_NOTES (jump_insn) - = gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn)); - } - return 1; -} - - -/* Handle the more complex case, where the bounds are not known at - compile time. In this case we generate a run_time calculation of - the number of iterations. We rely on the existence of a run-time - guard to ensure that the loop executes at least once, i.e., - initial_value obeys the loop comparison condition. If a guard is - not present, we emit one. The loop to modify is described by LOOP. - ITERATIONS_MAX is a CONST_INT specifying the estimated maximum - number of loop iterations. DOLOOP_INSN is the low-overhead looping - insn to insert. Returns nonzero if loop successfully modified. */ -static int -doloop_modify_runtime (const struct loop *loop, rtx iterations_max, - rtx doloop_seq, rtx start_label, - enum machine_mode mode, rtx condition) -{ - const struct loop_info *loop_info = LOOP_INFO (loop); - HOST_WIDE_INT abs_inc; - HOST_WIDE_INT abs_loop_inc; - int neg_inc; - rtx diff; - rtx sequence; - rtx iterations; - rtx initial_value; - rtx final_value; - rtx increment; - int unsigned_p; - enum rtx_code comparison_code; - - increment = loop_info->increment; - initial_value = loop_info->initial_value; - final_value = loop_info->final_value; - - neg_inc = 0; - abs_inc = INTVAL (increment); - if (abs_inc < 0) - { - abs_inc = -abs_inc; - neg_inc = 1; - } - - comparison_code = loop_info->comparison_code; - unsigned_p = (comparison_code == LTU - || comparison_code == LEU - || comparison_code == GTU - || comparison_code == GEU - || comparison_code == NE); - - /* The number of iterations (prior to any loop unrolling) is given by: - - n = (abs (final - initial) + abs_inc - 1) / abs_inc. - - However, it is possible for the summation to overflow, and a - safer method is: - - n = abs (final - initial) / abs_inc; - n += (abs (final - initial) % abs_inc) != 0; - - But when abs_inc is a power of two, the summation won't overflow - except in cases where the loop never terminates. So we don't - need to use this more costly calculation. - - If the loop has been unrolled, the full calculation is - - t1 = abs_inc * unroll_number; increment per loop - n = (abs (final - initial) + abs_inc - 1) / t1; full loops - n += (abs (final - initial) + abs_inc - 1) % t1) >= abs_inc; - partial loop - which works out to be equivalent to - - n = (abs (final - initial) + t1 - 1) / t1; - - In the case where the loop was preconditioned, a few iterations - may have been executed earlier; but 'initial' was adjusted as they - were executed, so we don't need anything special for that case here. - As above, when t1 is a power of two we don't need to worry about - overflow. - - The division and modulo operations can be avoided by requiring - that the increment is a power of 2 (precondition_loop_p enforces - this requirement). Nevertheless, the RTX_COSTS should be checked - to see if a fast divmod is available. */ - - start_sequence (); - /* abs (final - initial) */ - diff = expand_simple_binop (mode, MINUS, - copy_rtx (neg_inc ? initial_value : final_value), - copy_rtx (neg_inc ? final_value : initial_value), - NULL_RTX, unsigned_p, OPTAB_LIB_WIDEN); - - /* Some code transformations can result in code akin to - - tmp = i + 1; - ... - goto scan_start; - top: - tmp = tmp + 1; - scan_start: - i = tmp; - if (i < n) goto top; - - We'll have already detected this form of loop in scan_loop, - and set loop->top and loop->scan_start appropriately. - - In this situation, we skip the increment the first time through - the loop, which results in an incorrect estimate of the number - of iterations. Adjust the difference to compensate. */ - /* ??? Logically, it would seem this belongs in loop_iterations. - However, this causes regressions e.g. on x86 execute/20011008-3.c, - so I do not believe we've properly characterized the exact nature - of the problem. In the meantime, this fixes execute/20011126-2.c - on ia64 and some Ada front end miscompilation on ppc. */ - - if (loop->scan_start) - { - rtx iteration_var = loop_info->iteration_var; - struct loop_ivs *ivs = LOOP_IVS (loop); - struct iv_class *bl; - - if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == BASIC_INDUCT) - bl = REG_IV_CLASS (ivs, REGNO (iteration_var)); - else if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == GENERAL_INDUCT) - { - struct induction *v = REG_IV_INFO (ivs, REGNO (iteration_var)); - bl = REG_IV_CLASS (ivs, REGNO (v->src_reg)); - } - else - /* Iteration var must be an induction variable to get here. */ - abort (); - - if (INSN_UID (bl->biv->insn) < max_uid_for_loop - && INSN_LUID (bl->biv->insn) < INSN_LUID (loop->scan_start)) - { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "Doloop: Basic induction var skips initial incr.\n"); - - diff = expand_simple_binop (mode, PLUS, diff, GEN_INT (abs_inc), - diff, unsigned_p, OPTAB_LIB_WIDEN); - } - } - - abs_loop_inc = abs_inc * loop_info->unroll_number; - if (abs_loop_inc != 1) - { - int shift_count; - - shift_count = exact_log2 (abs_loop_inc); - if (shift_count < 0) - abort (); - - /* (abs (final - initial) + abs_inc * unroll_number - 1) */ - diff = expand_simple_binop (GET_MODE (diff), PLUS, - diff, GEN_INT (abs_loop_inc - 1), - diff, 1, OPTAB_LIB_WIDEN); - - /* (abs (final - initial) + abs_inc * unroll_number - 1) - / (abs_inc * unroll_number) */ - diff = expand_simple_binop (GET_MODE (diff), LSHIFTRT, - diff, GEN_INT (shift_count), - diff, 1, OPTAB_LIB_WIDEN); - } - iterations = diff; - - /* If there is a NOTE_INSN_LOOP_VTOP, we have a `for' or `while' - style loop, with a loop exit test at the start. Thus, we can - assume that the loop condition was true when the loop was - entered. - - `do-while' loops require special treatment since the exit test is - not executed before the start of the loop. We need to determine - if the loop will terminate after the first pass and to limit the - iteration count to one if necessary. */ - if (! loop->vtop) - { - if (loop_dump_stream) - fprintf (loop_dump_stream, "Doloop: Do-while loop.\n"); - - /* A `do-while' loop must iterate at least once. For code like - i = initial; do { ... } while (++i < final); - we will calculate a bogus iteration count if initial > final. - So detect this and set the iteration count to 1. - Note that if the loop has been unrolled, then the loop body - is guaranteed to execute at least once. Also, when the - comparison is NE, our calculated count will be OK. */ - if (loop_info->unroll_number == 1 && comparison_code != NE) - { - rtx label; - - /* Emit insns to test if the loop will immediately - terminate and to set the iteration count to 1 if true. */ - label = gen_label_rtx(); - emit_cmp_and_jump_insns (copy_rtx (initial_value), - copy_rtx (loop_info->comparison_value), - comparison_code, NULL_RTX, mode, 0, - label); - JUMP_LABEL (get_last_insn ()) = label; - LABEL_NUSES (label)++; - emit_move_insn (iterations, const1_rtx); - emit_label (label); - } - } - - sequence = get_insns (); - end_sequence (); - emit_insn_before (sequence, loop->start); - - return doloop_modify (loop, iterations, iterations_max, doloop_seq, - start_label, condition); -} - - -/* This is the main entry point. Process loop described by LOOP - validating that the loop is suitable for conversion to use a low - overhead looping instruction, replacing the jump insn where - suitable. We distinguish between loops with compile-time bounds - and those with run-time bounds. Information from LOOP is used to - compute the number of iterations and to determine whether the loop - is a candidate for this optimization. Returns nonzero if loop - successfully modified. */ -int -doloop_optimize (const struct loop *loop) -{ - struct loop_info *loop_info = LOOP_INFO (loop); - rtx initial_value; - rtx final_value; - rtx increment; - rtx jump_insn; - enum machine_mode mode; - unsigned HOST_WIDE_INT n_iterations; - unsigned HOST_WIDE_INT n_iterations_max; - rtx doloop_seq, doloop_pat, doloop_reg; - rtx iterations; - rtx iterations_max; - rtx start_label; - rtx condition; - - if (loop_dump_stream) - fprintf (loop_dump_stream, - "Doloop: Processing loop %d, enclosed levels %d.\n", - loop->num, loop->level); - - jump_insn = prev_nonnote_insn (loop->end); - - /* Check that loop is a candidate for a low-overhead looping insn. */ - if (! doloop_valid_p (loop, jump_insn)) - return 0; - - /* Determine if the loop can be safely, and profitably, - preconditioned. While we don't precondition the loop in a loop - unrolling sense, this test ensures that the loop is well behaved - and that the increment is a constant integer. */ - if (! precondition_loop_p (loop, &initial_value, &final_value, - &increment, &mode)) - { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "Doloop: Cannot precondition loop.\n"); - return 0; - } - - /* Determine or estimate the maximum number of loop iterations. */ - n_iterations = loop_info->n_iterations; - if (n_iterations) - { - /* This is the simple case where the initial and final loop - values are constants. */ - n_iterations_max = n_iterations; - } - else - { - int nonneg = find_reg_note (jump_insn, REG_NONNEG, 0) != 0; - - /* This is the harder case where the initial and final loop - values may not be constants. */ - n_iterations_max = doloop_iterations_max (loop_info, mode, nonneg); - - if (! n_iterations_max) - { - /* We have something like `for (i = 0; i < 10; i--)'. */ - if (loop_dump_stream) - fprintf (loop_dump_stream, - "Doloop: Not normal loop.\n"); - return 0; - } - } - - /* Account for loop unrolling in the iteration count. This will - have no effect if loop_iterations could not determine the number - of iterations. */ - n_iterations /= loop_info->unroll_number; - n_iterations_max /= loop_info->unroll_number; - - if (n_iterations && n_iterations < 3) - { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "Doloop: Too few iterations (%ld) to be profitable.\n", - (long int) n_iterations); - return 0; - } - - iterations = GEN_INT (n_iterations); - iterations_max = GEN_INT (n_iterations_max); - - /* Generate looping insn. If the pattern FAILs then give up trying - to modify the loop since there is some aspect the back-end does - not like. */ - start_label = gen_label_rtx (); - doloop_reg = gen_reg_rtx (mode); - doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, - GEN_INT (loop->level), start_label); - if (! doloop_seq && mode != word_mode) - { - PUT_MODE (doloop_reg, word_mode); - doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, - GEN_INT (loop->level), start_label); - } - if (! doloop_seq) - { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "Doloop: Target unwilling to use doloop pattern!\n"); - return 0; - } - - /* If multiple instructions were created, the last must be the - jump instruction. Also, a raw define_insn may yield a plain - pattern. */ - doloop_pat = doloop_seq; - if (INSN_P (doloop_pat)) - { - while (NEXT_INSN (doloop_pat) != NULL_RTX) - doloop_pat = NEXT_INSN (doloop_pat); - if (GET_CODE (doloop_pat) == JUMP_INSN) - doloop_pat = PATTERN (doloop_pat); - else - doloop_pat = NULL_RTX; - } - - if (! doloop_pat - || ! (condition = doloop_condition_get (doloop_pat))) - { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "Doloop: Unrecognizable doloop pattern!\n"); - return 0; - } - - if (n_iterations != 0) - /* Handle the simpler case, where we know the iteration count at - compile time. */ - return doloop_modify (loop, iterations, iterations_max, doloop_seq, - start_label, condition); - else - /* Handle the harder case, where we must add additional runtime tests. */ - return doloop_modify_runtime (loop, iterations_max, doloop_seq, - start_label, mode, condition); -} - -#endif /* HAVE_doloop_end */ diff --git a/gcc/loop-doloop.c b/gcc/loop-doloop.c new file mode 100644 index 0000000..b75b9dc --- /dev/null +++ b/gcc/loop-doloop.c @@ -0,0 +1,553 @@ +/* Perform doloop optimizations + Copyright (C) 2004 Free Software Foundation, Inc. + Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "flags.h" +#include "expr.h" +#include "hard-reg-set.h" +#include "basic-block.h" +#include "toplev.h" +#include "tm_p.h" +#include "cfgloop.h" +#include "output.h" +#include "params.h" + +/* This module is used to modify loops with a determinable number of + iterations to use special low-overhead looping instructions. + + It first validates whether the loop is well behaved and has a + determinable number of iterations (either at compile or run-time). + It then modifies the loop to use a low-overhead looping pattern as + follows: + + 1. A pseudo register is allocated as the loop iteration counter. + + 2. The number of loop iterations is calculated and is stored + in the loop counter. + + 3. At the end of the loop, the jump insn is replaced by the + doloop_end pattern. The compare must remain because it might be + used elsewhere. If the loop-variable or condition register are + used elsewhere, they will be eliminated by flow. + + 4. An optional doloop_begin pattern is inserted at the top of the + loop. + + TODO The optimization should only performed when either the biv used for exit + condition is unused at all except for the exit test, or if we do not have to + change its value, since otherwise we have to add a new induction variable, + which usually will not pay up (unless the cost of the doloop pattern is + somehow extremely lower than the cost of compare & jump, or unless the bct + register cannot be used for anything else but doloop -- ??? detect these + cases). */ + +#ifdef HAVE_doloop_end + +/* Return the loop termination condition for PATTERN or zero + if it is not a decrement and branch jump insn. */ + +static rtx +doloop_condition_get (rtx pattern) +{ + rtx cmp; + rtx inc; + rtx reg; + rtx condition; + + /* The canonical doloop pattern we expect is: + + (parallel [(set (pc) (if_then_else (condition) + (label_ref (label)) + (pc))) + (set (reg) (plus (reg) (const_int -1))) + (additional clobbers and uses)]) + + Some machines (IA-64) make the decrement conditional on + the condition as well, so we don't bother verifying the + actual decrement. In summary, the branch must be the + first entry of the parallel (also required by jump.c), + and the second entry of the parallel must be a set of + the loop counter register. */ + + if (GET_CODE (pattern) != PARALLEL) + return 0; + + cmp = XVECEXP (pattern, 0, 0); + inc = XVECEXP (pattern, 0, 1); + + /* Check for (set (reg) (something)). */ + if (GET_CODE (inc) != SET || ! REG_P (SET_DEST (inc))) + return 0; + + /* Extract loop counter register. */ + reg = SET_DEST (inc); + + /* Check for (set (pc) (if_then_else (condition) + (label_ref (label)) + (pc))). */ + if (GET_CODE (cmp) != SET + || SET_DEST (cmp) != pc_rtx + || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE + || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF + || XEXP (SET_SRC (cmp), 2) != pc_rtx) + return 0; + + /* Extract loop termination condition. */ + condition = XEXP (SET_SRC (cmp), 0); + + if ((GET_CODE (condition) != GE && GET_CODE (condition) != NE) + || GET_CODE (XEXP (condition, 1)) != CONST_INT) + return 0; + + if (XEXP (condition, 0) == reg) + return condition; + + if (GET_CODE (XEXP (condition, 0)) == PLUS + && XEXP (XEXP (condition, 0), 0) == reg) + return condition; + + /* ??? If a machine uses a funny comparison, we could return a + canonicalised form here. */ + + return 0; +} + +/* Return nonzero if the loop specified by LOOP is suitable for + the use of special low-overhead looping instructions. DESC + describes the number of iterations of the loop. */ + +static bool +doloop_valid_p (struct loop *loop, struct niter_desc *desc) +{ + basic_block *body = get_loop_body (loop), bb; + rtx insn; + unsigned i; + + /* Check for loops that may not terminate under special conditions. */ + if (!desc->simple_p + || desc->assumptions + || desc->infinite) + { + /* There are some cases that would require a special attention. + For example if the comparison is LEU and the comparison value + is UINT_MAX then the loop will not terminate. Similarly, if the + comparison code is GEU and the comparison value is 0, the + loop will not terminate. + + If the absolute increment is not 1, the loop can be infinite + even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2) + + ??? We could compute these conditions at run-time and have a + additional jump around the loop to ensure an infinite loop. + However, it is very unlikely that this is the intended + behavior of the loop and checking for these rare boundary + conditions would pessimize all other code. + + If the loop is executed only a few times an extra check to + restart the loop could use up most of the benefits of using a + count register loop. Note however, that normally, this + restart branch would never execute, so it could be predicted + well by the CPU. We should generate the pessimistic code by + default, and have an option, e.g. -funsafe-loops that would + enable count-register loops in this case. */ + if (dump_file) + fprintf (dump_file, "Doloop: Possible infinite iteration case.\n"); + return false; + } + + for (i = 0; i < loop->num_nodes; i++) + { + bb = body[i]; + + for (insn = BB_HEAD (bb); + insn != NEXT_INSN (BB_END (bb)); + insn = NEXT_INSN (insn)) + { + /* A called function may clobber any special registers required for + low-overhead looping. */ + if (GET_CODE (insn) == CALL_INSN) + { + if (dump_file) + fprintf (dump_file, "Doloop: Function call in loop.\n"); + return false; + } + + /* Some targets (eg, PPC) use the count register for branch on table + instructions. ??? This should be a target specific check. */ + if (GET_CODE (insn) == JUMP_INSN + && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC + || GET_CODE (PATTERN (insn)) == ADDR_VEC)) + { + if (dump_file) + fprintf (dump_file, "Doloop: Computed branch in the loop.\n"); + return false; + } + } + } + free (body); + + return true; +} + +/* Adds test of COND jumping to DEST to the end of BB. */ + +static void +add_test (rtx cond, basic_block bb, basic_block dest) +{ + rtx seq, jump, label; + enum machine_mode mode; + rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1); + enum rtx_code code = GET_CODE (cond); + + mode = GET_MODE (XEXP (cond, 0)); + if (mode == VOIDmode) + mode = GET_MODE (XEXP (cond, 1)); + + start_sequence (); + op0 = force_operand (op0, NULL_RTX); + op1 = force_operand (op1, NULL_RTX); + label = block_label (dest); + do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL_RTX, label); + + jump = get_last_insn (); + JUMP_LABEL (jump) = label; + + /* The jump is supposed to handle an unlikely special case. */ + REG_NOTES (jump) + = gen_rtx_EXPR_LIST (REG_BR_PROB, + GEN_INT (0), REG_NOTES (jump)); + + LABEL_NUSES (label)++; + + seq = get_insns (); + end_sequence (); + emit_insn_after (seq, BB_END (bb)); +} + +/* Modify the loop to use the low-overhead looping insn where LOOP + describes the loop, DESC describes the number of iterations of the + loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the + end of the loop. CONDITION is the condition separated from the + DOLOOP_SEQ. */ + +static void +doloop_modify (struct loop *loop, struct niter_desc *desc, + rtx doloop_seq, rtx condition) +{ + rtx counter_reg; + rtx count, tmp, noloop = NULL_RTX; + rtx sequence; + rtx jump_insn; + rtx jump_label; + int nonneg = 0, irr; + bool increment_count; + basic_block loop_end = desc->out_edge->src; + + jump_insn = BB_END (loop_end); + + if (dump_file) + { + fprintf (dump_file, "Doloop: Inserting doloop pattern ("); + if (desc->const_iter) + fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter); + else + fputs ("runtime", dump_file); + fputs (" iterations).\n", dump_file); + } + + /* Discard original jump to continue loop. The original compare + result may still be live, so it cannot be discarded explicitly. */ + delete_insn (jump_insn); + + counter_reg = XEXP (condition, 0); + if (GET_CODE (counter_reg) == PLUS) + counter_reg = XEXP (counter_reg, 0); + + count = desc->niter_expr; + increment_count = false; + switch (GET_CODE (condition)) + { + case NE: + /* Currently only NE tests against zero and one are supported. */ + if (XEXP (condition, 1) == const1_rtx) + { + increment_count = true; + noloop = const1_rtx; + } + else if (XEXP (condition, 1) == const0_rtx) + noloop = const0_rtx; + else + abort (); + break; + + case GE: + /* Currently only GE tests against zero are supported. */ + if (XEXP (condition, 1) != const0_rtx) + abort (); + + noloop = constm1_rtx; + + /* The iteration count does not need incrementing for a GE test. */ + increment_count = false; + + /* Determine if the iteration counter will be non-negative. + Note that the maximum value loaded is iterations_max - 1. */ + if (desc->niter_max + <= ((unsigned HOST_WIDEST_INT) 1 + << (GET_MODE_BITSIZE (GET_MODE (counter_reg)) - 1))) + nonneg = 1; + break; + + /* Abort if an invalid doloop pattern has been generated. */ + default: + abort (); + } + + if (increment_count) + count = simplify_gen_binary (PLUS, desc->mode, count, const1_rtx); + + /* Insert initialization of the count register into the loop header. */ + start_sequence (); + tmp = force_operand (count, counter_reg); + convert_move (counter_reg, tmp, 1); + sequence = get_insns (); + end_sequence (); + emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); + + if (desc->noloop_assumptions) + { + rtx ass = desc->noloop_assumptions; + basic_block preheader = loop_preheader_edge (loop)->src; + basic_block set_zero + = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); + basic_block new_preheader + = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); + basic_block bb; + edge te; + gcov_type cnt; + + /* Expand the condition testing the assumptions and if it does not pass, + reset the count register to 0. */ + add_test (XEXP (ass, 0), preheader, set_zero); + preheader->succ->flags &= ~EDGE_FALLTHRU; + cnt = preheader->succ->count; + preheader->succ->probability = 0; + preheader->succ->count = 0; + irr = preheader->succ->flags & EDGE_IRREDUCIBLE_LOOP; + te = make_edge (preheader, new_preheader, EDGE_FALLTHRU | irr); + te->probability = REG_BR_PROB_BASE; + te->count = cnt; + set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader); + + set_zero->count = 0; + set_zero->frequency = 0; + + for (ass = XEXP (ass, 1); ass; ass = XEXP (ass, 1)) + { + bb = loop_split_edge_with (te, NULL_RTX); + te = bb->succ; + add_test (XEXP (ass, 0), bb, set_zero); + make_edge (bb, set_zero, irr); + } + + start_sequence (); + convert_move (counter_reg, noloop, 0); + sequence = get_insns (); + end_sequence (); + emit_insn_after (sequence, BB_END (set_zero)); + } + + /* Some targets (eg, C4x) need to initialize special looping + registers. */ +#ifdef HAVE_doloop_begin + { + rtx init; + unsigned level = get_loop_level (loop) + 1; + init = gen_doloop_begin (counter_reg, + desc->const_iter ? desc->niter_expr : const0_rtx, + desc->niter_max, + GEN_INT (level)); + if (init) + { + start_sequence (); + emit_insn (init); + sequence = get_insns (); + end_sequence (); + emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); + } + } +#endif + + /* Insert the new low-overhead looping insn. */ + emit_jump_insn_after (doloop_seq, BB_END (loop_end)); + jump_insn = BB_END (loop_end); + jump_label = block_label (desc->in_edge->dest); + JUMP_LABEL (jump_insn) = jump_label; + LABEL_NUSES (jump_label)++; + + /* Ensure the right fallthru edge is marked, for case we have reversed + the condition. */ + desc->in_edge->flags &= ~EDGE_FALLTHRU; + desc->out_edge->flags |= EDGE_FALLTHRU; + + /* Add a REG_NONNEG note if the actual or estimated maximum number + of iterations is non-negative. */ + if (nonneg) + { + REG_NOTES (jump_insn) + = gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn)); + } +} + +/* Process loop described by LOOP validating that the loop is suitable for + conversion to use a low overhead looping instruction, replacing the jump + insn where suitable. Returns true if the loop was successfully + modified. */ + +static bool +doloop_optimize (struct loop *loop) +{ + enum machine_mode mode; + rtx doloop_seq, doloop_pat, doloop_reg; + rtx iterations; + rtx iterations_max; + rtx start_label; + rtx condition; + unsigned level, est_niter; + struct niter_desc *desc; + + if (dump_file) + fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num); + + iv_analysis_loop_init (loop); + + /* Find the simple exit of a LOOP. */ + desc = get_simple_loop_desc (loop); + + /* Check that loop is a candidate for a low-overhead looping insn. */ + if (!doloop_valid_p (loop, desc)) + { + if (dump_file) + fprintf (dump_file, + "Doloop: The loop is not suitable.\n"); + return false; + } + mode = desc->mode; + + est_niter = 3; + if (desc->const_iter) + est_niter = desc->niter; + /* If the estimate on number of iterations is reliable (comes from profile + feedback), use it. Do not use it normally, since the expected number + of iterations of an unrolled loop is 2. */ + if (loop->header->count) + est_niter = expected_loop_iterations (loop); + + if (est_niter < 3) + { + if (dump_file) + fprintf (dump_file, + "Doloop: Too few iterations (%u) to be profitable.\n", + est_niter); + return false; + } + + iterations = desc->const_iter ? desc->niter_expr : const0_rtx; + iterations_max = GEN_INT (desc->niter_max); + level = get_loop_level (loop) + 1; + + /* Generate looping insn. If the pattern FAILs then give up trying + to modify the loop since there is some aspect the back-end does + not like. */ + start_label = block_label (desc->in_edge->dest); + doloop_reg = gen_reg_rtx (mode); + doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, + GEN_INT (level), start_label); + if (! doloop_seq && mode != word_mode) + { + PUT_MODE (doloop_reg, word_mode); + doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, + GEN_INT (level), start_label); + } + if (! doloop_seq) + { + if (dump_file) + fprintf (dump_file, + "Doloop: Target unwilling to use doloop pattern!\n"); + return false; + } + + /* If multiple instructions were created, the last must be the + jump instruction. Also, a raw define_insn may yield a plain + pattern. */ + doloop_pat = doloop_seq; + if (INSN_P (doloop_pat)) + { + while (NEXT_INSN (doloop_pat) != NULL_RTX) + doloop_pat = NEXT_INSN (doloop_pat); + if (GET_CODE (doloop_pat) == JUMP_INSN) + doloop_pat = PATTERN (doloop_pat); + else + doloop_pat = NULL_RTX; + } + + if (! doloop_pat + || ! (condition = doloop_condition_get (doloop_pat))) + { + if (dump_file) + fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n"); + return false; + } + + doloop_modify (loop, desc, doloop_seq, condition); + return true; +} + +/* This is the main entry point. Process all LOOPS using doloop_optimize. */ + +void +doloop_optimize_loops (struct loops *loops) +{ + unsigned i; + struct loop *loop; + + for (i = 1; i < loops->num; i++) + { + loop = loops->parray[i]; + if (!loop) + continue; + + doloop_optimize (loop); + } + + iv_analysis_done (); + +#ifdef ENABLE_CHECKING + verify_dominators (CDI_DOMINATORS); + verify_loop_structure (loops); +#endif +} +#endif /* HAVE_doloop_end */ + diff --git a/gcc/loop-iv.c b/gcc/loop-iv.c index 97cfdf7..face41d 100644 --- a/gcc/loop-iv.c +++ b/gcc/loop-iv.c @@ -2281,6 +2281,10 @@ iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition, simplify_using_initial_values (loop, IOR, &desc->infinite); simplify_using_initial_values (loop, NIL, &desc->niter_expr); + if (desc->noloop_assumptions + && XEXP (desc->noloop_assumptions, 0) == const_true_rtx) + goto zero_iter; + if (GET_CODE (desc->niter_expr) == CONST_INT) { unsigned HOST_WIDEST_INT val = INTVAL (desc->niter_expr); diff --git a/gcc/loop.c b/gcc/loop.c index 5d458cb..1b488fe 100644 --- a/gcc/loop.c +++ b/gcc/loop.c @@ -5352,23 +5352,6 @@ strength_reduce (struct loop *loop, int flags) && unrolled_insn_copies <= insn_count)) unroll_loop (loop, insn_count, 1); -#ifdef HAVE_doloop_end - if (HAVE_doloop_end && (flags & LOOP_BCT) && flag_branch_on_count_reg) - doloop_optimize (loop); -#endif /* HAVE_doloop_end */ - - /* In case number of iterations is known, drop branch prediction note - in the branch. Do that only in second loop pass, as loop unrolling - may change the number of iterations performed. */ - if (flags & LOOP_BCT) - { - unsigned HOST_WIDE_INT n - = loop_info->n_iterations / loop_info->unroll_number; - if (n > 1) - predict_insn (prev_nonnote_insn (loop->end), PRED_LOOP_ITERATIONS, - REG_BR_PROB_BASE - REG_BR_PROB_BASE / n); - } - if (loop_dump_stream) fprintf (loop_dump_stream, "\n"); diff --git a/gcc/loop.h b/gcc/loop.h index 2a7f3ec..bd88bb8 100644 --- a/gcc/loop.h +++ b/gcc/loop.h @@ -26,9 +26,8 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA /* Flags passed to loop_optimize. */ #define LOOP_UNROLL 1 -#define LOOP_BCT 2 -#define LOOP_PREFETCH 4 -#define LOOP_AUTO_UNROLL 8 +#define LOOP_PREFETCH 2 +#define LOOP_AUTO_UNROLL 4 /* Get the loop info pointer of a loop. */ #define LOOP_INFO(LOOP) ((struct loop_info *) (LOOP)->aux) diff --git a/gcc/passes.c b/gcc/passes.c index c730949..24a2d23 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -1456,7 +1456,7 @@ rest_of_handle_loop_optimize (tree decl, rtx insns) reg_scan (insns, max_reg_num (), 1); } cleanup_barriers (); - loop_optimize (insns, dump_file, do_unroll | LOOP_BCT | do_prefetch); + loop_optimize (insns, dump_file, do_unroll | do_prefetch); /* Loop can create trivially dead instructions. */ delete_trivially_dead_insns (insns, max_reg_num ()); @@ -1476,6 +1476,12 @@ rest_of_handle_loop2 (tree decl, rtx insns) struct loops *loops; basic_block bb; + if (!flag_unswitch_loops + && !flag_peel_loops + && !flag_unroll_loops + && !flag_branch_on_count_reg) + return; + timevar_push (TV_LOOP); open_dump_file (DFI_loop2, decl); if (dump_file) @@ -1498,6 +1504,11 @@ rest_of_handle_loop2 (tree decl, rtx insns) (flag_unroll_loops ? UAP_UNROLL : 0) | (flag_unroll_all_loops ? UAP_UNROLL_ALL : 0)); +#ifdef HAVE_doloop_end + if (flag_branch_on_count_reg && HAVE_doloop_end) + doloop_optimize_loops (loops); +#endif /* HAVE_doloop_end */ + loop_optimizer_finalize (loops, dump_file); } @@ -1776,10 +1787,7 @@ rest_of_compilation (tree decl) if (flag_tracer) rest_of_handle_tracer (decl, insns); - if (optimize > 0 - && (flag_unswitch_loops - || flag_peel_loops - || flag_unroll_loops)) + if (optimize > 0) rest_of_handle_loop2 (decl, insns); if (flag_web) -- 2.7.4