From f9cce2dcaaf2f076df995c819b410d07d8636c04 Mon Sep 17 00:00:00 2001 From: rakdver Date: Tue, 17 Feb 2004 16:41:44 +0000 Subject: [PATCH] * loop-iv.c: New file. * Makefile.in (loop-iv.o): New. * basic_block.h (FOR_BB_INSNS, FOR_BB_INSNS_REVERSE): New macros. * cfgloop.c (fill_sons_in_loop, get_loop_body_in_dom_order, num_loop_branches): New functions. * cfgloop.h (get_loop_body_in_dom_order, num_loop_branches, iv_analysis_loop_init, iv_get_reaching_def, iv_analyse, get_iv_value, find_simple_exit, iv_number_of_iterations, iv_analysis_done, get_simple_loop_desc, free_simple_loop_desc): Declare. (simple_loop_desc): New inline function. (struct rtx_iv, struct niter_desc): New. * cfgloopmanip.c (loopify): Specify semantics more precisely. * expr.c (force_operand): Handle subregs of expressions created by loop unroller. * loop-init.c (loop_optimizer_init, loop_optimizer_finalize): Move parts of the initialization to toplev.c * loop-unroll.c (loop_exit_at_end_p): New. (unroll_and_peel_loops): Call iv_analysis_done. (decide_peel_once_rolling, decide_peel_completely, decide_unroll_stupid, decide_unroll_constant_iterations, decide_unroll_runtime_iterations, decide_peel_simple, peel_loop_simple, unroll_loop_stupid, unroll_loop_constant_iterations, unroll_loop_runtime_iterations): Use new simple loop analysis. * loop-unswitch.c (compare_and_jump_seq): New. (may_unswitch_on_p): Renamed to ... (may_unswitch_on): Use new iv analysis. (reversed_condition): Export. (unswitch_single_loop, unswitch_loop): Use new iv analysis. * predict.c (estimate_probability): Use new simple loop analysis. * rtl.h (get_mode_bounds, reversed_condition,compare_and_jump_seq, canon_condition, simplify_using_condition): Declare. * stor-layout.c (get_mode_bounds): New. * toplev.c (rest_of_handle_loop2): Some parts of initialization/finalization moved here from loop-init.c. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@77951 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 37 + gcc/Makefile.in | 4 +- gcc/basic-block.h | 11 + gcc/cfgloop.c | 77 ++ gcc/cfgloop.h | 110 +++ gcc/cfgloopmanip.c | 14 +- gcc/expr.c | 14 + gcc/loop-init.c | 26 +- gcc/loop-iv.c | 2465 +++++++++++++++++++++++++++++++++++++++++++++++++++ gcc/loop-unroll.c | 356 +++++--- gcc/loop-unswitch.c | 230 +++-- gcc/predict.c | 9 +- gcc/rtl.h | 11 + gcc/stor-layout.c | 23 + gcc/toplev.c | 11 + 15 files changed, 3174 insertions(+), 224 deletions(-) create mode 100644 gcc/loop-iv.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 795460c..de24deb 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,40 @@ +2004-02-17 Zdenek Dvorak + + * loop-iv.c: New file. + * Makefile.in (loop-iv.o): New. + * basic_block.h (FOR_BB_INSNS, FOR_BB_INSNS_REVERSE): New macros. + * cfgloop.c (fill_sons_in_loop, get_loop_body_in_dom_order, + num_loop_branches): New functions. + * cfgloop.h (get_loop_body_in_dom_order, num_loop_branches, + iv_analysis_loop_init, iv_get_reaching_def, iv_analyse, get_iv_value, + find_simple_exit, iv_number_of_iterations, iv_analysis_done, + get_simple_loop_desc, free_simple_loop_desc): Declare. + (simple_loop_desc): New inline function. + (struct rtx_iv, struct niter_desc): New. + * cfgloopmanip.c (loopify): Specify semantics more precisely. + * expr.c (force_operand): Handle subregs of expressions created by + loop unroller. + * loop-init.c (loop_optimizer_init, loop_optimizer_finalize): Move + parts of the initialization to toplev.c + * loop-unroll.c (loop_exit_at_end_p): New. + (unroll_and_peel_loops): Call iv_analysis_done. + (decide_peel_once_rolling, decide_peel_completely, + decide_unroll_stupid, decide_unroll_constant_iterations, + decide_unroll_runtime_iterations, decide_peel_simple, + peel_loop_simple, unroll_loop_stupid, unroll_loop_constant_iterations, + unroll_loop_runtime_iterations): Use new simple loop analysis. + * loop-unswitch.c (compare_and_jump_seq): New. + (may_unswitch_on_p): Renamed to ... + (may_unswitch_on): Use new iv analysis. + (reversed_condition): Export. + (unswitch_single_loop, unswitch_loop): Use new iv analysis. + * predict.c (estimate_probability): Use new simple loop analysis. + * rtl.h (get_mode_bounds, reversed_condition,compare_and_jump_seq, + canon_condition, simplify_using_condition): Declare. + * stor-layout.c (get_mode_bounds): New. + * toplev.c (rest_of_handle_loop2): Some parts of + initialization/finalization moved here from loop-init.c. + 2004-02-17 Kazu Hirata * config/h8300/h8300.h (FIXED_REGISTERS): Add the soft frame diff --git a/gcc/Makefile.in b/gcc/Makefile.in index d30c4fa..06a70f2 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -848,7 +848,7 @@ OBJS-common = \ cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o \ cfgrtl.o combine.o conflict.o convert.o coverage.o cse.o cselib.o \ dbxout.o debug.o df.o diagnostic.o dojump.o doloop.o dominance.o \ - dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o \ + dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o loop-iv.o \ expmed.o expr.o final.o flow.o fold-const.o function.o gcse.o \ genrtl.o ggc-common.o global.o graph.o gtype-desc.o \ haifa-sched.o hooks.o ifcvt.o insn-attrtab.o insn-emit.o insn-modes.o \ @@ -1719,6 +1719,8 @@ cfgloop.o : cfgloop.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) coretypes.h $(TM_H) \ $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h flags.h cfgloopanal.o : cfgloopanal.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \ $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h $(EXPR_H) coretypes.h $(TM_H) +loop-iv.o : loop-iv.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(GGC_H) \ + $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h $(EXPR_H) coretypes.h $(TM_H) cfgloopmanip.o : cfgloopmanip.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \ $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h cfglayout.h output.h coretypes.h $(TM_H) loop-init.o : loop-init.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \ diff --git a/gcc/basic-block.h b/gcc/basic-block.h index 240edfd..3f1775d 100644 --- a/gcc/basic-block.h +++ b/gcc/basic-block.h @@ -288,6 +288,17 @@ extern varray_type basic_block_info; #define FOR_EACH_BB_REVERSE(BB) \ FOR_BB_BETWEEN (BB, EXIT_BLOCK_PTR->prev_bb, ENTRY_BLOCK_PTR, prev_bb) +/* For iterating over insns in basic block. */ +#define FOR_BB_INSNS(BB, INSN) \ + for ((INSN) = BB_HEAD (BB); \ + (INSN) != NEXT_INSN (BB_END (BB)); \ + (INSN) = NEXT_INSN (INSN)) + +#define FOR_BB_INSNS_REVERSE(BB, INSN) \ + for ((INSN) = BB_END (BB); \ + (INSN) != PREV_INSN (BB_HEAD (BB)); \ + (INSN) = PREV_INSN (INSN)) + /* Cycles through _all_ basic blocks, even the fake ones (entry and exit block). */ diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c index 6fa8b17..2be4b7c 100644 --- a/gcc/cfgloop.c +++ b/gcc/cfgloop.c @@ -959,6 +959,62 @@ get_loop_body (const struct loop *loop) return tovisit; } +/* Fills dominance descendants inside LOOP of the basic block BB into + array TOVISIT from index *TV. */ + +static void +fill_sons_in_loop (const struct loop *loop, basic_block bb, + basic_block *tovisit, int *tv) +{ + basic_block son, postpone = NULL; + + tovisit[(*tv)++] = bb; + for (son = first_dom_son (CDI_DOMINATORS, bb); + son; + son = next_dom_son (CDI_DOMINATORS, son)) + { + if (!flow_bb_inside_loop_p (loop, son)) + continue; + + if (dominated_by_p (CDI_DOMINATORS, loop->latch, son)) + { + postpone = son; + continue; + } + fill_sons_in_loop (loop, son, tovisit, tv); + } + + if (postpone) + fill_sons_in_loop (loop, postpone, tovisit, tv); +} + +/* Gets body of a LOOP (that must be different from the outermost loop) + sorted by dominance relation. Additionally, if a basic block s dominates + the latch, then only blocks dominated by s are be after it. */ + +basic_block * +get_loop_body_in_dom_order (const struct loop *loop) +{ + basic_block *tovisit; + int tv; + + if (!loop->num_nodes) + abort (); + + tovisit = xcalloc (loop->num_nodes, sizeof (basic_block)); + + if (loop->latch == EXIT_BLOCK_PTR) + abort (); + + tv = 0; + fill_sons_in_loop (loop, loop->header, tovisit, &tv); + + if (tv != (int) loop->num_nodes) + abort (); + + return tovisit; +} + /* Gets exit edges of a LOOP, returning their number in N_EDGES. */ edge * get_loop_exit_edges (const struct loop *loop, unsigned int *n_edges) @@ -988,6 +1044,27 @@ get_loop_exit_edges (const struct loop *loop, unsigned int *n_edges) return edges; } +/* Counts the number of conditional branches inside LOOP. */ + +unsigned +num_loop_branches (const struct loop *loop) +{ + unsigned i, n; + basic_block * body; + + if (loop->latch == EXIT_BLOCK_PTR) + abort (); + + body = get_loop_body (loop); + n = 0; + for (i = 0; i < loop->num_nodes; i++) + if (body[i]->succ && body[i]->succ->succ_next) + n++; + free (body); + + return n; +} + /* Adds basic block BB to LOOP. */ void add_bb_to_loop (basic_block bb, struct loop *loop) diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 4d8c67d..58184b5 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -278,7 +278,9 @@ extern int average_num_loop_insns (struct loop *); /* Loops & cfg manipulation. */ extern basic_block *get_loop_body (const struct loop *); +extern basic_block *get_loop_body_in_dom_order (const struct loop *); extern edge *get_loop_exit_edges (const struct loop *, unsigned *); +extern unsigned num_loop_branches (const struct loop *); extern edge loop_preheader_edge (const struct loop *); extern edge loop_latch_edge (const struct loop *); @@ -322,6 +324,114 @@ extern void unloop (struct loops *, struct loop *); extern bool remove_path (struct loops *, edge); extern edge split_loop_bb (basic_block, rtx); +/* Induction variable analysis. */ + +/* The description of induction variable. The things are a bit complicated + due to need to handle subregs and extends. The value of the object described + by it can be obtained as follows (all computations are done in extend_mode): + + Value in i-th iteration is + delta + mult * extend_{extend_mode} (subreg_{mode} (base + i * step)). + + If first_special is true, the value in the first iteration is + delta + mult * base + + If extend = NIL, first_special must be false, delta 0, mult 1 and value is + subreg_{mode} (base + i * step) + + The get_iv_value function can be used to obtain these expressions. + + ??? Add a third mode field that would specify the mode in that inner + computation is done, which would enable it to be different from the + outer one? */ + +struct rtx_iv +{ + /* Its base and step (mode of base and step is supposed to be extend_mode, + see the description above). */ + rtx base, step; + + /* The type of extend applied to it (SIGN_EXTEND, ZERO_EXTEND or NIL). */ + enum rtx_code extend; + + /* Operations applied in the extended mode. */ + rtx delta, mult; + + /* The mode it is extended to. */ + enum machine_mode extend_mode; + + /* The mode the variable iterates in. */ + enum machine_mode mode; + + /* Whether we have already filled the remaining fields. */ + unsigned analysed : 1; + + /* Whether the first iteration needs to be handled specially. */ + unsigned first_special : 1; +}; + +/* This should replace struct loop_desc. We keep this just so that we are + able to compare the results. */ + +struct niter_desc +{ + /* The edge out of the loop. */ + edge out_edge; + + /* The other edge leading from the condition. */ + edge in_edge; + + /* True if we are able to say anything about number of iterations of the + loop. */ + bool simple_p; + + /* True if the loop iterates the constant number of times. */ + bool const_iter; + + /* Number of iterations if constant. */ + unsigned HOST_WIDEST_INT niter; + + /* Upper bound on the number of iterations. */ + unsigned HOST_WIDEST_INT niter_max; + + /* Assumptions under that the rest of the information is valid. */ + rtx assumptions; + + /* Assumptions under that the loop ends before reaching the latch, + even if value of niter_expr says otherwise. */ + rtx noloop_assumptions; + + /* Condition under that the loop is infinite. */ + rtx infinite; + + /* Whether the comparison is signed. */ + bool signed_p; + + /* The mode in that niter_expr should be computed. */ + enum machine_mode mode; + + /* The number of iterations of the loop. */ + rtx niter_expr; +}; + +extern void iv_analysis_loop_init (struct loop *); +extern rtx iv_get_reaching_def (rtx, rtx); +extern bool iv_analyse (rtx, rtx, struct rtx_iv *); +extern rtx get_iv_value (struct rtx_iv *, rtx); +extern void find_simple_exit (struct loop *, struct niter_desc *); +extern void iv_number_of_iterations (struct loop *, rtx, rtx, + struct niter_desc *); +extern void iv_analysis_done (void); + +extern struct niter_desc *get_simple_loop_desc (struct loop *loop); +extern void free_simple_loop_desc (struct loop *loop); + +static inline struct niter_desc * +simple_loop_desc (struct loop *loop) +{ + return loop->aux; +} + /* Loop optimizer initialization. */ extern struct loops *loop_optimizer_init (FILE *); extern void loop_optimizer_finalize (struct loops *, FILE *); diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c index 3b8bcec..35444ee 100644 --- a/gcc/cfgloopmanip.c +++ b/gcc/cfgloopmanip.c @@ -480,11 +480,13 @@ scale_loop_frequencies (struct loop *loop, int num, int den) accordingly. Everything between them plus LATCH_EDGE destination must be dominated by HEADER_EDGE destination, and back-reachable from LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB, - SWITCH_BB->succ to original destination of LATCH_EDGE and - SWITCH_BB->succ->succ_next to original destination of HEADER_EDGE. + FALLTHRU_EDGE (SWITCH_BB) to original destination of HEADER_EDGE and + BRANCH_EDGE (SWITCH_BB) to original destination of LATCH_EDGE. Returns newly created loop. */ + struct loop * -loopify (struct loops *loops, edge latch_edge, edge header_edge, basic_block switch_bb) +loopify (struct loops *loops, edge latch_edge, edge header_edge, + basic_block switch_bb) { basic_block succ_bb = latch_edge->dest; basic_block pred_bb = header_edge->src; @@ -509,13 +511,15 @@ loopify (struct loops *loops, edge latch_edge, edge header_edge, basic_block swi /* Redirect edges. */ loop_redirect_edge (latch_edge, loop->header); + loop_redirect_edge (BRANCH_EDGE (switch_bb), succ_bb); + loop_redirect_edge (header_edge, switch_bb); - loop_redirect_edge (switch_bb->succ->succ_next, loop->header); - loop_redirect_edge (switch_bb->succ, succ_bb); + loop_redirect_edge (FALLTHRU_EDGE (switch_bb), loop->header); /* Update dominators. */ set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb); set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb); + set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb); /* Compute new loop. */ diff --git a/gcc/expr.c b/gcc/expr.c index a6d222e..223a36b 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -5588,6 +5588,20 @@ force_operand (rtx value, rtx target) rtx subtarget = get_subtarget (target); enum rtx_code code = GET_CODE (value); + /* Check for subreg applied to an expression produced by loop optimizer. */ + if (code == SUBREG + && GET_CODE (SUBREG_REG (value)) != REG + && GET_CODE (SUBREG_REG (value)) != MEM) + { + value = simplify_gen_subreg (GET_MODE (value), + force_reg (GET_MODE (SUBREG_REG (value)), + force_operand (SUBREG_REG (value), + NULL_RTX)), + GET_MODE (SUBREG_REG (value)), + SUBREG_BYTE (value)); + code = GET_CODE (value); + } + /* Check for a PIC address load. */ if ((code == PLUS || code == MINUS) && XEXP (value, 0) == pic_offset_table_rtx diff --git a/gcc/loop-init.c b/gcc/loop-init.c index 0b882d9..19d53e1 100644 --- a/gcc/loop-init.c +++ b/gcc/loop-init.c @@ -36,9 +36,6 @@ loop_optimizer_init (FILE *dumpfile) struct loops *loops = xcalloc (1, sizeof (struct loops)); edge e; - /* Initialize structures for layout changes. */ - cfg_layout_initialize (); - /* Avoid annoying special cases of edges going to exit block. */ for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next) @@ -49,18 +46,11 @@ loop_optimizer_init (FILE *dumpfile) if (flow_loops_find (loops, LOOP_TREE) <= 1) { - basic_block bb; - /* No loops. */ flow_loops_free (loops); free_dominance_info (CDI_DOMINATORS); free (loops); - /* Make chain. */ - FOR_EACH_BB (bb) - if (bb->next_bb != EXIT_BLOCK_PTR) - bb->rbi->next = bb->next_bb; - cfg_layout_finalize (); return NULL; } @@ -94,13 +84,14 @@ loop_optimizer_init (FILE *dumpfile) void loop_optimizer_finalize (struct loops *loops, FILE *dumpfile) { - basic_block bb; + unsigned i; - /* Finalize layout changes. */ - /* Make chain. */ - FOR_EACH_BB (bb) - if (bb->next_bb != EXIT_BLOCK_PTR) - bb->rbi->next = bb->next_bb; + if (!loops) + return; + + for (i = 1; i < loops->num; i++) + if (loops->parray[i]) + free_simple_loop_desc (loops->parray[i]); /* Another dump. */ flow_loops_dump (loops, dumpfile, NULL, 1); @@ -110,9 +101,6 @@ loop_optimizer_finalize (struct loops *loops, FILE *dumpfile) free_dominance_info (CDI_DOMINATORS); free (loops); - /* Finalize changes. */ - cfg_layout_finalize (); - /* Checking. */ #ifdef ENABLE_CHECKING verify_flow_info (); diff --git a/gcc/loop-iv.c b/gcc/loop-iv.c new file mode 100644 index 0000000..efdcc73 --- /dev/null +++ b/gcc/loop-iv.c @@ -0,0 +1,2465 @@ +/* Rtl-level induction variable analysis. + Copyright (C) 2004 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +GCC is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ + +/* This is just a very simplistic analysis of induction variables of the loop. + The major use is for determining the number of iterations of a loop for + loop unrolling, doloop optimization and branch prediction. For this we + are only interested in bivs and a fairly limited set of givs that are + needed in the exit condition. We also only compute the iv information on + demand. + + The interesting registers are determined. A register is interesting if + + -- it is set only in the blocks that dominate the latch of the current loop + -- all its sets are simple -- i.e. in the form we understand + + We also number the insns sequentially in each basic block. For a use of the + interesting reg, it is now easy to find a reaching definition (there may be + only one). + + Induction variable is then simply analysed by walking the use-def + chains. + + Usage: + + iv_analysis_loop_init (loop); + insn = iv_get_reaching_def (where, reg); + if (iv_analyse (insn, reg, &iv)) + { + ... + } + iv_analysis_done (); */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "hard-reg-set.h" +#include "basic-block.h" +#include "cfgloop.h" +#include "expr.h" +#include "output.h" + +/* The insn information. */ + +struct insn_info +{ + /* Id of the insn. */ + unsigned luid; + + /* The previous definition of the register defined by the single + set in the insn. */ + rtx prev_def; + + /* The description of the iv. */ + struct rtx_iv iv; +}; + +static struct insn_info *insn_info; + +/* The last definition of register. */ + +static rtx *last_def; + +/* The bivs. */ + +static struct rtx_iv *bivs; + +/* Maximal insn number for that there is place in insn_info array. */ + +static unsigned max_insn_no; + +/* Maximal register number for that there is place in bivs and last_def + arrays. */ + +static unsigned max_reg_no; + +/* Dumps information about IV to FILE. */ + +extern void dump_iv_info (FILE *, struct rtx_iv *); +void +dump_iv_info (FILE *file, struct rtx_iv *iv) +{ + if (!iv->base) + { + fprintf (file, "not simple"); + return; + } + + if (iv->step == const0_rtx) + { + fprintf (file, "invariant "); + print_rtl (file, iv->base); + return; + } + + print_rtl (file, iv->base); + fprintf (file, " + "); + print_rtl (file, iv->step); + fprintf (file, " * iteration"); + fprintf (file, " (in %s)", GET_MODE_NAME (iv->mode)); + + if (iv->mode != iv->extend_mode) + fprintf (file, " %s to %s", + rtx_name[iv->extend], + GET_MODE_NAME (iv->extend_mode)); + + if (iv->mult != const1_rtx) + { + fprintf (file, " * "); + print_rtl (file, iv->mult); + } + if (iv->delta != const0_rtx) + { + fprintf (file, " + "); + print_rtl (file, iv->delta); + } + if (iv->first_special) + fprintf (file, " (first special)"); +} + +/* Assigns luids to insns in basic block BB. */ + +static void +assign_luids (basic_block bb) +{ + unsigned i = 0, uid; + rtx insn; + + FOR_BB_INSNS (bb, insn) + { + uid = INSN_UID (insn); + insn_info[uid].luid = i++; + insn_info[uid].prev_def = NULL_RTX; + insn_info[uid].iv.analysed = false; + } +} + +/* Generates a subreg to get the least significant part of EXPR (in mode + INNER_MODE) to OUTER_MODE. */ + +static rtx +lowpart_subreg (enum machine_mode outer_mode, rtx expr, + enum machine_mode inner_mode) +{ + return simplify_gen_subreg (outer_mode, expr, inner_mode, + subreg_lowpart_offset (outer_mode, inner_mode)); +} + +/* Checks whether REG is a well-behaved register. */ + +static bool +simple_reg_p (rtx reg) +{ + unsigned r; + + if (GET_CODE (reg) == SUBREG) + { + if (!subreg_lowpart_p (reg)) + return false; + reg = SUBREG_REG (reg); + } + + if (!REG_P (reg)) + return false; + + r = REGNO (reg); + if (HARD_REGISTER_NUM_P (r)) + return false; + + if (GET_MODE_CLASS (GET_MODE (reg)) != MODE_INT) + return false; + + if (last_def[r] == const0_rtx) + return false; + + return true; +} + +/* Checks whether assignment LHS = RHS is simple enough for us to process. */ + +static bool +simple_set_p (rtx lhs, rtx rhs) +{ + rtx op0, op1; + + if (!REG_P (lhs) + || !simple_reg_p (lhs)) + return false; + + if (CONSTANT_P (rhs)) + return true; + + switch (GET_CODE (rhs)) + { + case SUBREG: + case REG: + return simple_reg_p (rhs); + + case SIGN_EXTEND: + case ZERO_EXTEND: + case NEG: + return simple_reg_p (XEXP (rhs, 0)); + + case PLUS: + case MINUS: + case MULT: + op0 = XEXP (rhs, 0); + op1 = XEXP (rhs, 1); + + if (!simple_reg_p (op0) + && !CONSTANT_P (op0)) + return false; + + if (!simple_reg_p (op1) + && !CONSTANT_P (op1)) + return false; + + if (GET_CODE (rhs) == MULT + && !CONSTANT_P (op0) + && !CONSTANT_P (op1)) + return false; + + return true; + + default: + return false; + } +} + +/* Mark single SET in INSN. */ + +static rtx +mark_single_set (rtx insn, rtx set) +{ + rtx def = SET_DEST (set), src; + unsigned regno, uid; + + src = find_reg_equal_equiv_note (insn); + if (!src) + src = SET_SRC (set); + + if (!simple_set_p (SET_DEST (set), src)) + return NULL_RTX; + + regno = REGNO (def); + uid = INSN_UID (insn); + + bivs[regno].analysed = false; + insn_info[uid].prev_def = last_def[regno]; + last_def[regno] = insn; + + return def; +} + +/* Invalidate register REG unless it is equal to EXCEPT. */ + +static void +kill_sets (rtx reg, rtx by ATTRIBUTE_UNUSED, void *except) +{ + if (GET_CODE (reg) == SUBREG) + reg = SUBREG_REG (reg); + if (!REG_P (reg)) + return; + if (reg == except) + return; + + last_def[REGNO (reg)] = const0_rtx; +} + +/* Marks sets in basic block BB. If DOM is true, BB dominates the loop + latch. */ + +static void +mark_sets (basic_block bb, bool dom) +{ + rtx insn, set, def; + + FOR_BB_INSNS (bb, insn) + { + if (!INSN_P (insn)) + continue; + + if (dom + && (set = single_set (insn))) + def = mark_single_set (insn, set); + else + def = NULL_RTX; + + note_stores (PATTERN (insn), kill_sets, def); + } +} + +/* Prepare the data for an induction variable analysis of a LOOP. */ + +void +iv_analysis_loop_init (struct loop *loop) +{ + basic_block *body = get_loop_body_in_dom_order (loop); + unsigned b; + + if ((unsigned) get_max_uid () >= max_insn_no) + { + /* Add some reserve for insns and registers produced in optimizations. */ + max_insn_no = get_max_uid () + 100; + if (insn_info) + free (insn_info); + insn_info = xmalloc (max_insn_no * sizeof (struct insn_info)); + } + + if ((unsigned) max_reg_num () >= max_reg_no) + { + max_reg_no = max_reg_num () + 100; + if (last_def) + free (last_def); + last_def = xmalloc (max_reg_no * sizeof (rtx)); + if (bivs) + free (bivs); + bivs = xmalloc (max_reg_no * sizeof (struct rtx_iv)); + } + + memset (last_def, 0, max_reg_num () * sizeof (rtx)); + + for (b = 0; b < loop->num_nodes; b++) + { + assign_luids (body[b]); + mark_sets (body[b], just_once_each_iteration_p (loop, body[b])); + } + + free (body); +} + +/* Gets definition of REG reaching the INSN. If REG is not simple, const0_rtx + is returned. If INSN is before the first def in the loop, NULL_RTX is + returned. */ + +rtx +iv_get_reaching_def (rtx insn, rtx reg) +{ + unsigned regno, luid, auid; + rtx ainsn; + basic_block bb, abb; + + if (GET_CODE (reg) == SUBREG) + { + if (!subreg_lowpart_p (reg)) + return const0_rtx; + reg = SUBREG_REG (reg); + } + if (!REG_P (reg)) + return NULL_RTX; + + regno = REGNO (reg); + if (!last_def[regno] + || last_def[regno] == const0_rtx) + return last_def[regno]; + + bb = BLOCK_FOR_INSN (insn); + luid = insn_info[INSN_UID (insn)].luid; + + ainsn = last_def[regno]; + while (1) + { + abb = BLOCK_FOR_INSN (ainsn); + + if (dominated_by_p (CDI_DOMINATORS, bb, abb)) + break; + + auid = INSN_UID (ainsn); + ainsn = insn_info[auid].prev_def; + + if (!ainsn) + return NULL_RTX; + } + + while (1) + { + abb = BLOCK_FOR_INSN (ainsn); + if (abb != bb) + return ainsn; + + auid = INSN_UID (ainsn); + if (luid > insn_info[auid].luid) + return ainsn; + + ainsn = insn_info[auid].prev_def; + if (!ainsn) + return NULL_RTX; + } +} + +/* Sets IV to invariant CST in MODE. Always returns true (just for + consistency with other iv manipulation functions that may fail). */ + +static bool +iv_constant (struct rtx_iv *iv, rtx cst, enum machine_mode mode) +{ + if (mode == VOIDmode) + mode = GET_MODE (cst); + + iv->analysed = true; + iv->mode = mode; + iv->base = cst; + iv->step = const0_rtx; + iv->first_special = false; + iv->extend = NIL; + iv->extend_mode = iv->mode; + iv->delta = const0_rtx; + iv->mult = const1_rtx; + + return true; +} + +/* Evaluates application of subreg to MODE on IV. */ + +static bool +iv_subreg (struct rtx_iv *iv, enum machine_mode mode) +{ + if (iv->extend_mode == mode) + return true; + + if (GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (iv->mode)) + return false; + + iv->extend = NIL; + iv->mode = mode; + + iv->base = simplify_gen_binary (PLUS, iv->extend_mode, iv->delta, + simplify_gen_binary (MULT, iv->extend_mode, + iv->base, iv->mult)); + iv->step = simplify_gen_binary (MULT, iv->extend_mode, iv->step, iv->mult); + iv->mult = const1_rtx; + iv->delta = const0_rtx; + iv->first_special = false; + + return true; +} + +/* Evaluates application of EXTEND to MODE on IV. */ + +static bool +iv_extend (struct rtx_iv *iv, enum rtx_code extend, enum machine_mode mode) +{ + if (mode != iv->extend_mode) + return false; + + if (iv->extend != NIL + && iv->extend != extend) + return false; + + iv->extend = extend; + + return true; +} + +/* Evaluates negation of IV. */ + +static bool +iv_neg (struct rtx_iv *iv) +{ + if (iv->extend == NIL) + { + iv->base = simplify_gen_unary (NEG, iv->extend_mode, + iv->base, iv->extend_mode); + iv->step = simplify_gen_unary (NEG, iv->extend_mode, + iv->step, iv->extend_mode); + } + else + { + iv->delta = simplify_gen_unary (NEG, iv->extend_mode, + iv->delta, iv->extend_mode); + iv->mult = simplify_gen_unary (NEG, iv->extend_mode, + iv->mult, iv->extend_mode); + } + + return true; +} + +/* Evaluates addition or subtraction (according to OP) of IV1 to IV0. */ + +static bool +iv_add (struct rtx_iv *iv0, struct rtx_iv *iv1, enum rtx_code op) +{ + enum machine_mode mode; + rtx arg; + + /* Extend the constant to extend_mode of the other operand if neccesary. */ + if (iv0->extend == NIL + && iv0->mode == iv0->extend_mode + && iv0->step == const0_rtx + && GET_MODE_SIZE (iv0->extend_mode) < GET_MODE_SIZE (iv1->extend_mode)) + { + iv0->extend_mode = iv1->extend_mode; + iv0->base = simplify_gen_unary (ZERO_EXTEND, iv0->extend_mode, + iv0->base, iv0->mode); + } + if (iv1->extend == NIL + && iv1->mode == iv1->extend_mode + && iv1->step == const0_rtx + && GET_MODE_SIZE (iv1->extend_mode) < GET_MODE_SIZE (iv0->extend_mode)) + { + iv1->extend_mode = iv0->extend_mode; + iv1->base = simplify_gen_unary (ZERO_EXTEND, iv1->extend_mode, + iv1->base, iv1->mode); + } + + mode = iv0->extend_mode; + if (mode != iv1->extend_mode) + return false; + + if (iv0->extend == NIL && iv1->extend == NIL) + { + if (iv0->mode != iv1->mode) + return false; + + iv0->base = simplify_gen_binary (op, mode, iv0->base, iv1->base); + iv0->step = simplify_gen_binary (op, mode, iv0->step, iv1->step); + + return true; + } + + /* Handle addition of constant. */ + if (iv1->extend == NIL + && iv1->mode == mode + && iv1->step == const0_rtx) + { + iv0->delta = simplify_gen_binary (op, mode, iv0->delta, iv1->base); + return true; + } + + if (iv0->extend == NIL + && iv0->mode == mode + && iv0->step == const0_rtx) + { + arg = iv0->base; + *iv0 = *iv1; + if (op == MINUS + && !iv_neg (iv0)) + return false; + + iv0->delta = simplify_gen_binary (PLUS, mode, iv0->delta, arg); + return true; + } + + return false; +} + +/* Evaluates multiplication of IV by constant CST. */ + +static bool +iv_mult (struct rtx_iv *iv, rtx mby) +{ + enum machine_mode mode = iv->extend_mode; + + if (GET_MODE (mby) != VOIDmode + && GET_MODE (mby) != mode) + return false; + + if (iv->extend == NIL) + { + iv->base = simplify_gen_binary (MULT, mode, iv->base, mby); + iv->step = simplify_gen_binary (MULT, mode, iv->step, mby); + } + else + { + iv->delta = simplify_gen_binary (MULT, mode, iv->delta, mby); + iv->mult = simplify_gen_binary (MULT, mode, iv->mult, mby); + } + + return true; +} + +/* The recursive part of get_biv_step. Gets the value of the single value + defined in INSN wrto initial value of REG inside loop, in shape described + at get_biv_step. */ + +static bool +get_biv_step_1 (rtx insn, rtx reg, + rtx *inner_step, enum machine_mode *inner_mode, + enum rtx_code *extend, enum machine_mode outer_mode, + rtx *outer_step) +{ + rtx set, lhs, rhs, op0 = NULL_RTX, op1 = NULL_RTX; + rtx next, nextr, def_insn, tmp; + enum rtx_code code; + + set = single_set (insn); + rhs = find_reg_equal_equiv_note (insn); + if (!rhs) + rhs = SET_SRC (set); + lhs = SET_DEST (set); + + code = GET_CODE (rhs); + switch (code) + { + case SUBREG: + case REG: + next = rhs; + break; + + case PLUS: + case MINUS: + op0 = XEXP (rhs, 0); + op1 = XEXP (rhs, 1); + + if (code == PLUS && CONSTANT_P (op0)) + { + tmp = op0; op0 = op1; op1 = tmp; + } + + if (!simple_reg_p (op0) + || !CONSTANT_P (op1)) + return false; + + if (GET_MODE (rhs) != outer_mode) + { + /* ppc64 uses expressions like + + (set x:SI (plus:SI (subreg:SI y:DI) 1)). + + this is equivalent to + + (set x':DI (plus:DI y:DI 1)) + (set x:SI (subreg:SI (x':DI)). */ + if (GET_CODE (op0) != SUBREG) + return false; + if (GET_MODE (SUBREG_REG (op0)) != outer_mode) + return false; + } + + next = op0; + break; + + case SIGN_EXTEND: + case ZERO_EXTEND: + if (GET_MODE (rhs) != outer_mode) + return false; + + op0 = XEXP (rhs, 0); + if (!simple_reg_p (op0)) + return false; + + next = op0; + break; + + default: + return false; + } + + if (GET_CODE (next) == SUBREG) + { + if (!subreg_lowpart_p (next)) + return false; + + nextr = SUBREG_REG (next); + if (GET_MODE (nextr) != outer_mode) + return false; + } + else + nextr = next; + + def_insn = iv_get_reaching_def (insn, nextr); + if (def_insn == const0_rtx) + return false; + + if (!def_insn) + { + if (!rtx_equal_p (nextr, reg)) + return false; + + *inner_step = const0_rtx; + *extend = NIL; + *inner_mode = outer_mode; + *outer_step = const0_rtx; + } + else if (!get_biv_step_1 (def_insn, reg, + inner_step, inner_mode, extend, outer_mode, + outer_step)) + return false; + + if (GET_CODE (next) == SUBREG) + { + enum machine_mode amode = GET_MODE (next); + + if (GET_MODE_SIZE (amode) > GET_MODE_SIZE (*inner_mode)) + return false; + + *inner_mode = amode; + *inner_step = simplify_gen_binary (PLUS, outer_mode, + *inner_step, *outer_step); + *outer_step = const0_rtx; + *extend = NIL; + } + + switch (code) + { + case REG: + case SUBREG: + break; + + case PLUS: + case MINUS: + if (*inner_mode == outer_mode + /* See comment in previous switch. */ + || GET_MODE (rhs) != outer_mode) + *inner_step = simplify_gen_binary (code, outer_mode, + *inner_step, op1); + else + *outer_step = simplify_gen_binary (code, outer_mode, + *outer_step, op1); + break; + + case SIGN_EXTEND: + case ZERO_EXTEND: + if (GET_MODE (op0) != *inner_mode + || *extend != NIL + || *outer_step != const0_rtx) + abort (); + + *extend = code; + break; + + default: + abort (); + } + + return true; +} + +/* Gets the operation on register REG inside loop, in shape + + OUTER_STEP + EXTEND_{OUTER_MODE} (SUBREG_{INNER_MODE} (REG + INNER_STEP)) + + If the operation cannot be described in this shape, return false. */ + +static bool +get_biv_step (rtx reg, rtx *inner_step, enum machine_mode *inner_mode, + enum rtx_code *extend, enum machine_mode *outer_mode, + rtx *outer_step) +{ + *outer_mode = GET_MODE (reg); + + if (!get_biv_step_1 (last_def[REGNO (reg)], reg, + inner_step, inner_mode, extend, *outer_mode, + outer_step)) + return false; + + if (*inner_mode != *outer_mode + && *extend == NIL) + abort (); + + if (*inner_mode == *outer_mode + && *extend != NIL) + abort (); + + if (*inner_mode == *outer_mode + && *outer_step != const0_rtx) + abort (); + + return true; +} + +/* Determines whether DEF is a biv and if so, stores its description + to *IV. */ + +static bool +iv_analyse_biv (rtx def, struct rtx_iv *iv) +{ + unsigned regno; + rtx inner_step, outer_step; + enum machine_mode inner_mode, outer_mode; + enum rtx_code extend; + + if (rtl_dump_file) + { + fprintf (rtl_dump_file, "Analysing "); + print_rtl (rtl_dump_file, def); + fprintf (rtl_dump_file, " for bivness.\n"); + } + + if (!REG_P (def)) + { + if (!CONSTANT_P (def)) + return false; + + return iv_constant (iv, def, VOIDmode); + } + + regno = REGNO (def); + if (last_def[regno] == const0_rtx) + { + if (rtl_dump_file) + fprintf (rtl_dump_file, " not simple.\n"); + return false; + } + + if (last_def[regno] && bivs[regno].analysed) + { + if (rtl_dump_file) + fprintf (rtl_dump_file, " already analysed.\n"); + + *iv = bivs[regno]; + return iv->base != NULL_RTX; + } + + if (!last_def[regno]) + { + iv_constant (iv, def, VOIDmode); + goto end; + } + + iv->analysed = true; + if (!get_biv_step (def, &inner_step, &inner_mode, &extend, + &outer_mode, &outer_step)) + { + iv->base = NULL_RTX; + goto end; + } + + /* Loop transforms base to es (base + inner_step) + outer_step, + where es means extend of subreg between inner_mode and outer_mode. + The corresponding induction variable is + + es ((base - outer_step) + i * (inner_step + outer_step)) + outer_step */ + + iv->base = simplify_gen_binary (MINUS, outer_mode, def, outer_step); + iv->step = simplify_gen_binary (PLUS, outer_mode, inner_step, outer_step); + iv->mode = inner_mode; + iv->extend_mode = outer_mode; + iv->extend = extend; + iv->mult = const1_rtx; + iv->delta = outer_step; + iv->first_special = inner_mode != outer_mode; + +end: + if (rtl_dump_file) + { + fprintf (rtl_dump_file, " "); + dump_iv_info (rtl_dump_file, iv); + fprintf (rtl_dump_file, "\n"); + } + + bivs[regno] = *iv; + + return iv->base != NULL_RTX; +} + +/* Analyses operand OP of INSN and stores the result to *IV. */ + +static bool +iv_analyse_op (rtx insn, rtx op, struct rtx_iv *iv) +{ + rtx def_insn; + unsigned regno; + bool inv = CONSTANT_P (op); + + if (rtl_dump_file) + { + fprintf (rtl_dump_file, "Analysing operand "); + print_rtl (rtl_dump_file, op); + fprintf (rtl_dump_file, " of insn "); + print_rtl_single (rtl_dump_file, insn); + } + + if (GET_CODE (op) == SUBREG) + { + if (!subreg_lowpart_p (op)) + return false; + + if (!iv_analyse_op (insn, SUBREG_REG (op), iv)) + return false; + + return iv_subreg (iv, GET_MODE (op)); + } + + if (!inv) + { + regno = REGNO (op); + if (!last_def[regno]) + inv = true; + else if (last_def[regno] == const0_rtx) + { + if (rtl_dump_file) + fprintf (rtl_dump_file, " not simple.\n"); + return false; + } + } + + if (inv) + { + iv_constant (iv, op, VOIDmode); + + if (rtl_dump_file) + { + fprintf (rtl_dump_file, " "); + dump_iv_info (rtl_dump_file, iv); + fprintf (rtl_dump_file, "\n"); + } + return true; + } + + def_insn = iv_get_reaching_def (insn, op); + if (def_insn == const0_rtx) + { + if (rtl_dump_file) + fprintf (rtl_dump_file, " not simple.\n"); + return false; + } + + return iv_analyse (def_insn, op, iv); +} + +/* Analyses iv DEF defined in INSN and stores the result to *IV. */ + +bool +iv_analyse (rtx insn, rtx def, struct rtx_iv *iv) +{ + unsigned uid; + rtx set, rhs, mby = NULL_RTX, tmp; + rtx op0 = NULL_RTX, op1 = NULL_RTX; + struct rtx_iv iv0, iv1; + enum machine_mode amode; + enum rtx_code code; + + if (insn == const0_rtx) + return false; + + if (GET_CODE (def) == SUBREG) + { + if (!subreg_lowpart_p (def)) + return false; + + if (!iv_analyse (insn, SUBREG_REG (def), iv)) + return false; + + return iv_subreg (iv, GET_MODE (def)); + } + + if (!insn) + return iv_analyse_biv (def, iv); + + if (rtl_dump_file) + { + fprintf (rtl_dump_file, "Analysing def of "); + print_rtl (rtl_dump_file, def); + fprintf (rtl_dump_file, " in insn "); + print_rtl_single (rtl_dump_file, insn); + } + + uid = INSN_UID (insn); + if (insn_info[uid].iv.analysed) + { + if (rtl_dump_file) + fprintf (rtl_dump_file, " already analysed.\n"); + *iv = insn_info[uid].iv; + return iv->base != NULL_RTX; + } + + iv->mode = VOIDmode; + iv->base = NULL_RTX; + iv->step = NULL_RTX; + + set = single_set (insn); + rhs = find_reg_equal_equiv_note (insn); + if (!rhs) + rhs = SET_SRC (set); + code = GET_CODE (rhs); + + if (CONSTANT_P (rhs)) + { + op0 = rhs; + amode = GET_MODE (def); + } + else + { + switch (code) + { + case SUBREG: + if (!subreg_lowpart_p (rhs)) + goto end; + op0 = rhs; + break; + + case REG: + op0 = rhs; + break; + + case SIGN_EXTEND: + case ZERO_EXTEND: + case NEG: + op0 = XEXP (rhs, 0); + break; + + case PLUS: + case MINUS: + op0 = XEXP (rhs, 0); + op1 = XEXP (rhs, 1); + break; + + case MULT: + op0 = XEXP (rhs, 0); + mby = XEXP (rhs, 1); + if (!CONSTANT_P (mby)) + { + if (!CONSTANT_P (op0)) + abort (); + tmp = op0; + op0 = mby; + mby = tmp; + } + break; + + default: + abort (); + } + + amode = GET_MODE (rhs); + } + + if (op0) + { + if (!iv_analyse_op (insn, op0, &iv0)) + goto end; + + if (iv0.mode == VOIDmode) + { + iv0.mode = amode; + iv0.extend_mode = amode; + } + } + + if (op1) + { + if (!iv_analyse_op (insn, op1, &iv1)) + goto end; + + if (iv1.mode == VOIDmode) + { + iv1.mode = amode; + iv1.extend_mode = amode; + } + } + + switch (code) + { + case SIGN_EXTEND: + case ZERO_EXTEND: + if (!iv_extend (&iv0, code, amode)) + goto end; + break; + + case NEG: + if (!iv_neg (&iv0)) + goto end; + break; + + case PLUS: + case MINUS: + if (!iv_add (&iv0, &iv1, code)) + goto end; + break; + + case MULT: + if (!iv_mult (&iv0, mby)) + goto end; + break; + + default: + break; + } + + *iv = iv0; + +end: + iv->analysed = true; + insn_info[uid].iv = *iv; + + if (rtl_dump_file) + { + print_rtl (rtl_dump_file, def); + fprintf (rtl_dump_file, " in insn "); + print_rtl_single (rtl_dump_file, insn); + fprintf (rtl_dump_file, " is "); + dump_iv_info (rtl_dump_file, iv); + fprintf (rtl_dump_file, "\n"); + } + + return iv->base != NULL_RTX; +} + +/* Calculates value of IV at ITERATION-th iteration. */ + +rtx +get_iv_value (struct rtx_iv *iv, rtx iteration) +{ + rtx val; + + /* We would need to generate some if_then_else patterns, and so far + it is not needed anywhere. */ + if (iv->first_special) + abort (); + + if (iv->step != const0_rtx && iteration != const0_rtx) + val = simplify_gen_binary (PLUS, iv->extend_mode, iv->base, + simplify_gen_binary (MULT, iv->extend_mode, + iv->step, iteration)); + else + val = iv->base; + + if (iv->extend_mode == iv->mode) + return val; + + val = lowpart_subreg (iv->mode, val, iv->extend_mode); + + if (iv->extend == NIL) + return val; + + val = simplify_gen_unary (iv->extend, iv->extend_mode, val, iv->mode); + val = simplify_gen_binary (PLUS, iv->extend_mode, iv->delta, + simplify_gen_binary (MULT, iv->extend_mode, + iv->mult, val)); + + return val; +} + +/* Free the data for an induction variable analysis. */ + +void +iv_analysis_done (void) +{ + max_insn_no = 0; + max_reg_no = 0; + if (insn_info) + { + free (insn_info); + insn_info = NULL; + } + if (last_def) + { + free (last_def); + last_def = NULL; + } + if (bivs) + { + free (bivs); + bivs = NULL; + } +} + +/* Computes inverse to X modulo (1 << MOD). */ + +static unsigned HOST_WIDEST_INT +inverse (unsigned HOST_WIDEST_INT x, int mod) +{ + unsigned HOST_WIDEST_INT mask = + ((unsigned HOST_WIDEST_INT) 1 << (mod - 1) << 1) - 1; + unsigned HOST_WIDEST_INT rslt = 1; + int i; + + for (i = 0; i < mod - 1; i++) + { + rslt = (rslt * x) & mask; + x = (x * x) & mask; + } + + return rslt; +} + +/* Tries to estimate the maximum number of iterations. */ + +static unsigned HOST_WIDEST_INT +determine_max_iter (struct niter_desc *desc) +{ + rtx niter = desc->niter_expr; + rtx mmin, mmax, left, right; + unsigned HOST_WIDEST_INT nmax, inc; + + if (GET_CODE (niter) == AND + && GET_CODE (XEXP (niter, 0)) == CONST_INT) + { + nmax = INTVAL (XEXP (niter, 0)); + if (!(nmax & (nmax + 1))) + { + desc->niter_max = nmax; + return nmax; + } + } + + get_mode_bounds (desc->mode, desc->signed_p, &mmin, &mmax); + nmax = INTVAL (mmax) - INTVAL (mmin); + + if (GET_CODE (niter) == UDIV) + { + if (GET_CODE (XEXP (niter, 1)) != CONST_INT) + { + desc->niter_max = nmax; + return nmax; + } + inc = INTVAL (XEXP (niter, 1)); + niter = XEXP (niter, 0); + } + else + inc = 1; + + if (GET_CODE (niter) == PLUS) + { + left = XEXP (niter, 0); + right = XEXP (niter, 0); + + if (GET_CODE (right) == CONST_INT) + right = GEN_INT (-INTVAL (right)); + } + else if (GET_CODE (niter) == MINUS) + { + left = XEXP (niter, 0); + right = XEXP (niter, 0); + } + else + { + left = niter; + right = mmin; + } + + if (GET_CODE (left) == CONST_INT) + mmax = left; + if (GET_CODE (right) == CONST_INT) + mmin = right; + nmax = INTVAL (mmax) - INTVAL (mmin); + + desc->niter_max = nmax / inc; + return nmax / inc; +} + +/* Checks whether register *REG is in set ALT. Callback for for_each_rtx. */ + +static int +altered_reg_used (rtx *reg, void *alt) +{ + if (!REG_P (*reg)) + return 0; + + return REGNO_REG_SET_P (alt, REGNO (*reg)); +} + +/* Marks registers altered by EXPR in set ALT. */ + +static void +mark_altered (rtx expr, rtx by ATTRIBUTE_UNUSED, void *alt) +{ + if (GET_CODE (expr) == SUBREG) + expr = SUBREG_REG (expr); + if (!REG_P (expr)) + return; + + SET_REGNO_REG_SET (alt, REGNO (expr)); +} + +/* Checks whether RHS is simple enough to process. */ + +static bool +simple_rhs_p (rtx rhs) +{ + rtx op0, op1; + + if (CONSTANT_P (rhs) + || REG_P (rhs)) + return true; + + switch (GET_CODE (rhs)) + { + case PLUS: + case MINUS: + op0 = XEXP (rhs, 0); + op1 = XEXP (rhs, 1); + /* Allow reg + const sets only. */ + if (REG_P (op0) && CONSTANT_P (op1)) + return true; + if (REG_P (op1) && CONSTANT_P (op0)) + return true; + + return false; + + default: + return false; + } +} + +/* Simplifies *EXPR using assignment in INSN. ALTERED is the set of registers + altered so far. */ + +static void +simplify_using_assignment (rtx insn, rtx *expr, regset altered) +{ + rtx set = single_set (insn); + rtx lhs, rhs; + bool ret = false; + + if (set) + { + lhs = SET_DEST (set); + if (GET_CODE (lhs) != REG + || altered_reg_used (&lhs, altered)) + ret = true; + } + else + ret = true; + + note_stores (PATTERN (insn), mark_altered, altered); + if (GET_CODE (insn) == CALL_INSN) + { + int i; + + /* Kill all call clobbered registers. */ + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i)) + SET_REGNO_REG_SET (altered, i); + } + + if (ret) + return; + + rhs = find_reg_equal_equiv_note (insn); + if (!rhs) + rhs = SET_SRC (set); + + if (!simple_rhs_p (rhs)) + return; + + if (for_each_rtx (&rhs, altered_reg_used, altered)) + return; + + *expr = simplify_replace_rtx (*expr, lhs, rhs); +} + +/* Checks whether A implies B. */ + +static bool +implies_p (rtx a, rtx b) +{ + rtx op0, op1, r; + + if (GET_CODE (a) == EQ) + { + op0 = XEXP (a, 0); + op1 = XEXP (a, 1); + + if (REG_P (op0)) + { + r = simplify_replace_rtx (b, op0, op1); + if (r == const_true_rtx) + return true; + } + + if (REG_P (op1)) + { + r = simplify_replace_rtx (b, op1, op0); + if (r == const_true_rtx) + return true; + } + } + + return false; +} + +/* Canonicalizes COND so that + + (1) Ensure that operands are ordered according to + swap_commutative_operands_p. + (2) (LE x const) will be replaced with (LT x ) and similarly + for GE, GEU, and LEU. */ + +rtx +canon_condition (rtx cond) +{ + rtx tem; + rtx op0, op1; + enum rtx_code code; + enum machine_mode mode; + + code = GET_CODE (cond); + op0 = XEXP (cond, 0); + op1 = XEXP (cond, 1); + + if (swap_commutative_operands_p (op0, op1)) + { + code = swap_condition (code); + tem = op0; + op0 = op1; + op1 = tem; + } + + mode = GET_MODE (op0); + if (mode == VOIDmode) + mode = GET_MODE (op1); + if (mode == VOIDmode) + abort (); + + if (GET_CODE (op1) == CONST_INT + && GET_MODE_CLASS (mode) != MODE_CC + && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT) + { + HOST_WIDE_INT const_val = INTVAL (op1); + unsigned HOST_WIDE_INT uconst_val = const_val; + unsigned HOST_WIDE_INT max_val + = (unsigned HOST_WIDE_INT) GET_MODE_MASK (mode); + + switch (code) + { + case LE: + if ((unsigned HOST_WIDE_INT) const_val != max_val >> 1) + code = LT, op1 = gen_int_mode (const_val + 1, GET_MODE (op0)); + break; + + /* When cross-compiling, const_val might be sign-extended from + BITS_PER_WORD to HOST_BITS_PER_WIDE_INT */ + case GE: + if ((HOST_WIDE_INT) (const_val & max_val) + != (((HOST_WIDE_INT) 1 + << (GET_MODE_BITSIZE (GET_MODE (op0)) - 1)))) + code = GT, op1 = gen_int_mode (const_val - 1, mode); + break; + + case LEU: + if (uconst_val < max_val) + code = LTU, op1 = gen_int_mode (uconst_val + 1, mode); + break; + + case GEU: + if (uconst_val != 0) + code = GTU, op1 = gen_int_mode (uconst_val - 1, mode); + break; + + default: + break; + } + } + + if (op0 != XEXP (cond, 0) + || op1 != XEXP (cond, 1) + || code != GET_CODE (cond) + || GET_MODE (cond) != SImode) + cond = gen_rtx_fmt_ee (code, SImode, op0, op1); + + return cond; +} + +/* Tries to use the fact that COND holds to simplify EXPR. ALTERED is the + set of altered regs. */ + +void +simplify_using_condition (rtx cond, rtx *expr, regset altered) +{ + rtx rev, reve, exp = *expr; + + if (GET_RTX_CLASS (GET_CODE (*expr)) != '<') + return; + + /* If some register gets altered later, we do not really speak about its + value at the time of comparison. */ + if (altered + && for_each_rtx (&cond, altered_reg_used, altered)) + return; + + rev = reversed_condition (cond); + reve = reversed_condition (exp); + + cond = canon_condition (cond); + exp = canon_condition (exp); + if (rev) + rev = canon_condition (rev); + if (reve) + reve = canon_condition (reve); + + if (rtx_equal_p (exp, cond)) + { + *expr = const_true_rtx; + return; + } + + + if (rev && rtx_equal_p (exp, rev)) + { + *expr = const0_rtx; + return; + } + + if (implies_p (cond, exp)) + { + *expr = const_true_rtx; + return; + } + + if (reve && implies_p (cond, reve)) + { + *expr = const0_rtx; + return; + } + + /* A proof by contradiction. If *EXPR implies (not cond), *EXPR must + be false. */ + if (rev && implies_p (exp, rev)) + { + *expr = const0_rtx; + return; + } + + /* Similarly, If (not *EXPR) implies (not cond), *EXPR must be true. */ + if (rev && reve && implies_p (reve, rev)) + { + *expr = const_true_rtx; + return; + } + + /* We would like to have some other tests here. TODO. */ + + return; +} + +/* Use relationship between A and *B to eventually eliminate *B. + OP is the operation we consider. */ + +static void +eliminate_implied_condition (enum rtx_code op, rtx a, rtx *b) +{ + if (op == AND) + { + /* If A implies *B, we may replace *B by true. */ + if (implies_p (a, *b)) + *b = const_true_rtx; + } + else if (op == IOR) + { + /* If *B implies A, we may replace *B by false. */ + if (implies_p (*b, a)) + *b = const0_rtx; + } + else + abort (); +} + +/* Eliminates the conditions in TAIL that are implied by HEAD. OP is the + operation we consider. */ + +static void +eliminate_implied_conditions (enum rtx_code op, rtx *head, rtx tail) +{ + rtx elt; + + for (elt = tail; elt; elt = XEXP (elt, 1)) + eliminate_implied_condition (op, *head, &XEXP (elt, 0)); + for (elt = tail; elt; elt = XEXP (elt, 1)) + eliminate_implied_condition (op, XEXP (elt, 0), head); +} + +/* Simplifies *EXPR using initial values at the start of the LOOP. If *EXPR + is a list, its elements are assumed to be combined using OP. */ + +static void +simplify_using_initial_values (struct loop *loop, enum rtx_code op, rtx *expr) +{ + rtx head, tail, insn; + rtx neutral, aggr; + regset altered; + regset_head altered_head; + edge e; + + if (!*expr) + return; + + if (CONSTANT_P (*expr)) + return; + + if (GET_CODE (*expr) == EXPR_LIST) + { + head = XEXP (*expr, 0); + tail = XEXP (*expr, 1); + + eliminate_implied_conditions (op, &head, tail); + + if (op == AND) + { + neutral = const_true_rtx; + aggr = const0_rtx; + } + else if (op == IOR) + { + neutral = const0_rtx; + aggr = const_true_rtx; + } + else + abort (); + + simplify_using_initial_values (loop, NIL, &head); + if (head == aggr) + { + XEXP (*expr, 0) = aggr; + XEXP (*expr, 1) = NULL_RTX; + return; + } + else if (head == neutral) + { + *expr = tail; + simplify_using_initial_values (loop, op, expr); + return; + } + simplify_using_initial_values (loop, op, &tail); + + if (tail && XEXP (tail, 0) == aggr) + { + *expr = tail; + return; + } + + XEXP (*expr, 0) = head; + XEXP (*expr, 1) = tail; + return; + } + + if (op != NIL) + abort (); + + e = loop_preheader_edge (loop); + if (e->src == ENTRY_BLOCK_PTR) + return; + + altered = INITIALIZE_REG_SET (altered_head); + + while (1) + { + insn = BB_END (e->src); + if (any_condjump_p (insn)) + { + /* FIXME -- slightly wrong -- what if compared register + gets altered between start of the condition and insn? */ + rtx cond = get_condition (BB_END (e->src), NULL, false); + + if (cond && (e->flags & EDGE_FALLTHRU)) + cond = reversed_condition (cond); + if (cond) + { + simplify_using_condition (cond, expr, altered); + if (CONSTANT_P (*expr)) + { + FREE_REG_SET (altered); + return; + } + } + } + + FOR_BB_INSNS_REVERSE (e->src, insn) + { + if (!INSN_P (insn)) + continue; + + simplify_using_assignment (insn, expr, altered); + if (CONSTANT_P (*expr)) + { + FREE_REG_SET (altered); + return; + } + } + + e = e->src->pred; + if (e->pred_next + || e->src == ENTRY_BLOCK_PTR) + break; + } + + FREE_REG_SET (altered); +} + +/* Transforms invariant IV into MODE. Adds assumptions based on the fact + that IV occurs as left operands of comparison COND and its signedness + is SIGNED_P to DESC. */ + +static void +shorten_into_mode (struct rtx_iv *iv, enum machine_mode mode, + enum rtx_code cond, bool signed_p, struct niter_desc *desc) +{ + rtx mmin, mmax, cond_over, cond_under; + + get_mode_bounds (mode, signed_p, &mmin, &mmax); + cond_under = simplify_gen_relational (LT, SImode, iv->extend_mode, + iv->base, mmin); + cond_over = simplify_gen_relational (GT, SImode, iv->extend_mode, + iv->base, mmax); + + switch (cond) + { + case LE: + case LT: + case LEU: + case LTU: + if (cond_under != const0_rtx) + desc->infinite = + alloc_EXPR_LIST (0, cond_under, desc->infinite); + if (cond_over != const0_rtx) + desc->noloop_assumptions = + alloc_EXPR_LIST (0, cond_over, desc->noloop_assumptions); + break; + + case GE: + case GT: + case GEU: + case GTU: + if (cond_over != const0_rtx) + desc->infinite = + alloc_EXPR_LIST (0, cond_over, desc->infinite); + if (cond_under != const0_rtx) + desc->noloop_assumptions = + alloc_EXPR_LIST (0, cond_under, desc->noloop_assumptions); + break; + + case NE: + if (cond_over != const0_rtx) + desc->infinite = + alloc_EXPR_LIST (0, cond_over, desc->infinite); + if (cond_under != const0_rtx) + desc->infinite = + alloc_EXPR_LIST (0, cond_under, desc->infinite); + break; + + default: + abort (); + } + + iv->mode = mode; + iv->extend = signed_p ? SIGN_EXTEND : ZERO_EXTEND; +} + +/* Transforms IV0 and IV1 compared by COND so that they are both compared as + subregs of the same mode if possible (sometimes it is neccesary to add + some assumptions to DESC). */ + +static bool +canonicalize_iv_subregs (struct rtx_iv *iv0, struct rtx_iv *iv1, + enum rtx_code cond, struct niter_desc *desc) +{ + enum machine_mode comp_mode; + bool signed_p; + + /* If the ivs behave specially in the first iteration, or are + added/multiplied after extending, we ignore them. */ + if (iv0->first_special || iv0->mult != const1_rtx || iv0->delta != const0_rtx) + return false; + if (iv1->first_special || iv1->mult != const1_rtx || iv1->delta != const0_rtx) + return false; + + /* If there is some extend, it must match signedness of the comparison. */ + switch (cond) + { + case LE: + case LT: + if (iv0->extend == ZERO_EXTEND + || iv1->extend == ZERO_EXTEND) + return false; + signed_p = true; + break; + + case LEU: + case LTU: + if (iv0->extend == SIGN_EXTEND + || iv1->extend == SIGN_EXTEND) + return false; + signed_p = false; + break; + + case NE: + if (iv0->extend != NIL + && iv1->extend != NIL + && iv0->extend != iv1->extend) + return false; + + signed_p = false; + if (iv0->extend != NIL) + signed_p = iv0->extend == SIGN_EXTEND; + if (iv1->extend != NIL) + signed_p = iv1->extend == SIGN_EXTEND; + break; + + default: + abort (); + } + + /* Values of both variables should be computed in the same mode. These + might indeed be different, if we have comparison like + + (compare (subreg:SI (iv0)) (subreg:SI (iv1))) + + and iv0 and iv1 are both ivs iterating in SI mode, but calculated + in different modes. This does not seem impossible to handle, but + it hardly ever occurs in practice. + + The only exception is the case when one of operands is invariant. + For example pentium 3 generates comparisons like + (lt (subreg:HI (reg:SI)) 100). Here we assign HImode to 100, but we + definitely do not want this prevent the optimization. */ + comp_mode = iv0->extend_mode; + if (GET_MODE_BITSIZE (comp_mode) < GET_MODE_BITSIZE (iv1->extend_mode)) + comp_mode = iv1->extend_mode; + + if (iv0->extend_mode != comp_mode) + { + if (iv0->mode != iv0->extend_mode + || iv0->step != const0_rtx) + return false; + + iv0->base = simplify_gen_unary (signed_p ? SIGN_EXTEND : ZERO_EXTEND, + comp_mode, iv0->base, iv0->mode); + iv0->extend_mode = comp_mode; + } + + if (iv1->extend_mode != comp_mode) + { + if (iv1->mode != iv1->extend_mode + || iv1->step != const0_rtx) + return false; + + iv1->base = simplify_gen_unary (signed_p ? SIGN_EXTEND : ZERO_EXTEND, + comp_mode, iv1->base, iv1->mode); + iv1->extend_mode = comp_mode; + } + + /* Check that both ivs belong to a range of a single mode. If one of the + operands is an invariant, we may need to shorten it into the common + mode. */ + if (iv0->mode == iv0->extend_mode + && iv0->step == const0_rtx + && iv0->mode != iv1->mode) + shorten_into_mode (iv0, iv1->mode, cond, signed_p, desc); + + if (iv1->mode == iv1->extend_mode + && iv1->step == const0_rtx + && iv0->mode != iv1->mode) + shorten_into_mode (iv1, iv0->mode, swap_condition (cond), signed_p, desc); + + if (iv0->mode != iv1->mode) + return false; + + desc->mode = iv0->mode; + desc->signed_p = signed_p; + + return true; +} + +/* Computes number of iterations of the CONDITION in INSN in LOOP and stores + the result into DESC. Very similar to determine_number_of_iterations + (basically its rtl version), complicated by things like subregs. */ + +void +iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition, + struct niter_desc *desc) +{ + rtx op0, op1, delta, step, bound, may_xform, def_insn, tmp, tmp0, tmp1; + struct rtx_iv iv0, iv1, tmp_iv; + rtx assumption; + enum rtx_code cond; + enum machine_mode mode, comp_mode; + rtx mmin, mmax; + unsigned HOST_WIDEST_INT s, size, d; + HOST_WIDEST_INT up, down, inc; + int was_sharp = false; + + /* The meaning of these assumptions is this: + if !assumptions + then the rest of information does not have to be valid + if noloop_assumptions then the loop does not roll + if infinite then this exit is never used */ + + desc->assumptions = NULL_RTX; + desc->noloop_assumptions = NULL_RTX; + desc->infinite = NULL_RTX; + desc->simple_p = true; + + desc->const_iter = false; + desc->niter_expr = NULL_RTX; + desc->niter_max = 0; + + cond = GET_CODE (condition); + if (GET_RTX_CLASS (cond) != '<') + abort (); + + mode = GET_MODE (XEXP (condition, 0)); + if (mode == VOIDmode) + mode = GET_MODE (XEXP (condition, 1)); + /* The constant comparisons should be folded. */ + if (mode == VOIDmode) + abort (); + + /* We only handle integers or pointers. */ + if (GET_MODE_CLASS (mode) != MODE_INT + && GET_MODE_CLASS (mode) != MODE_PARTIAL_INT) + goto fail; + + op0 = XEXP (condition, 0); + def_insn = iv_get_reaching_def (insn, op0); + if (!iv_analyse (def_insn, op0, &iv0)) + goto fail; + if (iv0.extend_mode == VOIDmode) + iv0.mode = iv0.extend_mode = mode; + + op1 = XEXP (condition, 1); + def_insn = iv_get_reaching_def (insn, op1); + if (!iv_analyse (def_insn, op1, &iv1)) + goto fail; + if (iv1.extend_mode == VOIDmode) + iv1.mode = iv1.extend_mode = mode; + + if (GET_MODE_BITSIZE (iv0.extend_mode) > HOST_BITS_PER_WIDE_INT + || GET_MODE_BITSIZE (iv1.extend_mode) > HOST_BITS_PER_WIDE_INT) + goto fail; + + /* Check condition and normalize it. */ + + switch (cond) + { + case GE: + case GT: + case GEU: + case GTU: + tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv; + cond = swap_condition (cond); + break; + case NE: + case LE: + case LEU: + case LT: + case LTU: + break; + default: + goto fail; + } + + /* Handle extends. This is relatively nontrivial, so we only try in some + easy cases, when we can canonicalize the ivs (possibly by adding some + assumptions) to shape subreg (base + i * step). This function also fills + in desc->mode and desc->signed_p. */ + + if (!canonicalize_iv_subregs (&iv0, &iv1, cond, desc)) + goto fail; + + comp_mode = iv0.extend_mode; + mode = iv0.mode; + size = GET_MODE_BITSIZE (mode); + get_mode_bounds (mode, (cond == LE || cond == LT), &mmin, &mmax); + + if (GET_CODE (iv0.step) != CONST_INT || GET_CODE (iv1.step) != CONST_INT) + goto fail; + + /* We can take care of the case of two induction variables chasing each other + if the test is NE. I have never seen a loop using it, but still it is + cool. */ + if (iv0.step != const0_rtx && iv1.step != const0_rtx) + { + if (cond != NE) + goto fail; + + iv0.step = simplify_gen_binary (MINUS, comp_mode, iv0.step, iv1.step); + iv1.step = const0_rtx; + } + + /* This is either infinite loop or the one that ends immediately, depending + on initial values. Unswitching should remove this kind of conditions. */ + if (iv0.step == const0_rtx && iv1.step == const0_rtx) + goto fail; + + /* Ignore loops of while (i-- < 10) type. */ + if (cond != NE + && (INTVAL (iv0.step) < 0 || INTVAL (iv1.step) > 0)) + goto fail; + + /* Some more condition normalization. We must record some assumptions + due to overflows. */ + switch (cond) + { + case LT: + case LTU: + /* We want to take care only of non-sharp relationals; this is easy, + as in cases the overflow would make the transformation unsafe + the loop does not roll. Seemingly it would make more sense to want + to take care of sharp relationals instead, as NE is more similar to + them, but the problem is that here the transformation would be more + difficult due to possibly infinite loops. */ + if (iv0.step == const0_rtx) + { + tmp = lowpart_subreg (mode, iv0.base, comp_mode); + assumption = simplify_gen_relational (EQ, SImode, mode, tmp, mmax); + if (assumption == const_true_rtx) + goto zero_iter; + iv0.base = simplify_gen_binary (PLUS, comp_mode, + iv0.base, const1_rtx); + } + else + { + tmp = lowpart_subreg (mode, iv1.base, comp_mode); + assumption = simplify_gen_relational (EQ, SImode, mode, tmp, mmin); + if (assumption == const_true_rtx) + goto zero_iter; + iv1.base = simplify_gen_binary (PLUS, comp_mode, + iv1.base, constm1_rtx); + } + + if (assumption != const0_rtx) + desc->noloop_assumptions = + alloc_EXPR_LIST (0, assumption, desc->noloop_assumptions); + cond = (cond == LT) ? LE : LEU; + + /* It will be useful to be able to tell the difference once more in + LE -> NE reduction. */ + was_sharp = true; + break; + default: ; + } + + /* Take care of trivially infinite loops. */ + if (cond != NE) + { + if (iv0.step == const0_rtx) + { + tmp = lowpart_subreg (mode, iv0.base, comp_mode); + if (rtx_equal_p (tmp, mmin)) + { + desc->infinite = + alloc_EXPR_LIST (0, const_true_rtx, NULL_RTX); + return; + } + } + else + { + tmp = lowpart_subreg (mode, iv1.base, comp_mode); + if (rtx_equal_p (tmp, mmax)) + { + desc->infinite = + alloc_EXPR_LIST (0, const_true_rtx, NULL_RTX); + return; + } + } + } + + /* If we can we want to take care of NE conditions instead of size + comparisons, as they are much more friendly (most importantly + this takes care of special handling of loops with step 1). We can + do it if we first check that upper bound is greater or equal to + lower bound, their difference is constant c modulo step and that + there is not an overflow. */ + if (cond != NE) + { + if (iv0.step == const0_rtx) + step = simplify_gen_unary (NEG, comp_mode, iv1.step, comp_mode); + else + step = iv0.step; + delta = simplify_gen_binary (MINUS, comp_mode, iv1.base, iv0.base); + delta = lowpart_subreg (mode, delta, comp_mode); + delta = simplify_gen_binary (UMOD, mode, delta, step); + may_xform = const0_rtx; + + if (GET_CODE (delta) == CONST_INT) + { + if (was_sharp && INTVAL (delta) == INTVAL (step) - 1) + { + /* A special case. We have transformed condition of type + for (i = 0; i < 4; i += 4) + into + for (i = 0; i <= 3; i += 4) + obviously if the test for overflow during that transformation + passed, we cannot overflow here. Most importantly any + loop with sharp end condition and step 1 falls into this + cathegory, so handling this case specially is definitely + worth the troubles. */ + may_xform = const_true_rtx; + } + else if (iv0.step == const0_rtx) + { + bound = simplify_gen_binary (PLUS, comp_mode, mmin, step); + bound = simplify_gen_binary (MINUS, comp_mode, bound, delta); + bound = lowpart_subreg (mode, bound, comp_mode); + tmp = lowpart_subreg (mode, iv0.base, comp_mode); + may_xform = simplify_gen_relational (cond, SImode, mode, + bound, tmp); + } + else + { + bound = simplify_gen_binary (MINUS, comp_mode, mmax, step); + bound = simplify_gen_binary (PLUS, comp_mode, bound, delta); + bound = lowpart_subreg (mode, bound, comp_mode); + tmp = lowpart_subreg (mode, iv1.base, comp_mode); + may_xform = simplify_gen_relational (cond, SImode, mode, + tmp, bound); + } + } + + if (may_xform != const0_rtx) + { + /* We perform the transformation always provided that it is not + completely senseless. This is OK, as we would need this assumption + to determine the number of iterations anyway. */ + if (may_xform != const_true_rtx) + desc->assumptions = alloc_EXPR_LIST (0, may_xform, + desc->assumptions); + + /* We are going to lose some information about upper bound on + number of iterations in this step, so record the information + here. */ + inc = INTVAL (iv0.step) - INTVAL (iv1.step); + if (GET_CODE (iv1.base) == CONST_INT) + up = INTVAL (iv1.base); + else + up = INTVAL (mmax) - inc; + down = INTVAL (GET_CODE (iv0.base) == CONST_INT ? iv0.base : mmin); + desc->niter_max = (up - down) / inc + 1; + + if (iv0.step == const0_rtx) + { + iv0.base = simplify_gen_binary (PLUS, comp_mode, iv0.base, delta); + iv0.base = simplify_gen_binary (MINUS, comp_mode, iv0.base, step); + } + else + { + iv1.base = simplify_gen_binary (MINUS, comp_mode, iv1.base, delta); + iv1.base = simplify_gen_binary (PLUS, comp_mode, iv1.base, step); + } + + tmp0 = lowpart_subreg (mode, iv0.base, comp_mode); + tmp1 = lowpart_subreg (mode, iv1.base, comp_mode); + assumption = simplify_gen_relational (reverse_condition (cond), + SImode, mode, tmp0, tmp1); + if (assumption == const_true_rtx) + goto zero_iter; + else if (assumption != const0_rtx) + desc->noloop_assumptions = + alloc_EXPR_LIST (0, assumption, desc->noloop_assumptions); + cond = NE; + } + } + + /* Count the number of iterations. */ + if (cond == NE) + { + /* Everything we do here is just arithmetics modulo size of mode. This + makes us able to do more involved computations of number of iterations + than in other cases. First transform the condition into shape + s * i <> c, with s positive. */ + iv1.base = simplify_gen_binary (MINUS, comp_mode, iv1.base, iv0.base); + iv0.base = const0_rtx; + iv0.step = simplify_gen_binary (MINUS, comp_mode, iv0.step, iv1.step); + iv1.step = const0_rtx; + if (INTVAL (iv0.step) < 0) + { + iv0.step = simplify_gen_unary (NEG, comp_mode, iv0.step, mode); + iv1.base = simplify_gen_unary (NEG, comp_mode, iv1.base, mode); + } + iv0.step = lowpart_subreg (mode, iv0.step, comp_mode); + + /* Let nsd (s, size of mode) = d. If d does not divide c, the loop + is infinite. Otherwise, the number of iterations is + (inverse(s/d) * (c/d)) mod (size of mode/d). */ + s = INTVAL (iv0.step); d = 1; + while (s % 2 != 1) + { + s /= 2; + d *= 2; + size--; + } + bound = GEN_INT (((unsigned HOST_WIDEST_INT) 1 << (size - 1 ) << 1) - 1); + + tmp1 = lowpart_subreg (mode, iv1.base, comp_mode); + tmp = simplify_gen_binary (UMOD, mode, tmp1, GEN_INT (d)); + assumption = simplify_gen_relational (NE, SImode, mode, tmp, const0_rtx); + desc->infinite = alloc_EXPR_LIST (0, assumption, desc->infinite); + + tmp = simplify_gen_binary (UDIV, mode, tmp1, GEN_INT (d)); + tmp = simplify_gen_binary (MULT, mode, + tmp, GEN_INT (inverse (s, size))); + desc->niter_expr = simplify_gen_binary (AND, mode, tmp, bound); + } + else + { + if (iv1.step == const0_rtx) + /* Condition in shape a + s * i <= b + We must know that b + s does not overflow and a <= b + s and then we + can compute number of iterations as (b + s - a) / s. (It might + seem that we in fact could be more clever about testing the b + s + overflow condition using some information about b - a mod s, + but it was already taken into account during LE -> NE transform). */ + { + step = iv0.step; + tmp0 = lowpart_subreg (mode, iv0.base, comp_mode); + tmp1 = lowpart_subreg (mode, iv1.base, comp_mode); + + bound = simplify_gen_binary (MINUS, mode, mmax, step); + assumption = simplify_gen_relational (cond, SImode, mode, + tmp1, bound); + desc->assumptions = + alloc_EXPR_LIST (0, assumption, desc->assumptions); + + tmp = simplify_gen_binary (PLUS, comp_mode, iv1.base, iv0.step); + tmp = lowpart_subreg (mode, tmp, comp_mode); + assumption = simplify_gen_relational (reverse_condition (cond), + SImode, mode, tmp0, tmp); + + delta = simplify_gen_binary (PLUS, mode, tmp1, step); + delta = simplify_gen_binary (MINUS, mode, delta, tmp0); + } + else + { + /* Condition in shape a <= b - s * i + We must know that a - s does not overflow and a - s <= b and then + we can again compute number of iterations as (b - (a - s)) / s. */ + step = simplify_gen_unary (NEG, mode, iv1.step, mode); + tmp0 = lowpart_subreg (mode, iv0.base, comp_mode); + tmp1 = lowpart_subreg (mode, iv1.base, comp_mode); + + bound = simplify_gen_binary (MINUS, mode, mmin, step); + assumption = simplify_gen_relational (cond, SImode, mode, + bound, tmp0); + desc->assumptions = + alloc_EXPR_LIST (0, assumption, desc->assumptions); + + tmp = simplify_gen_binary (PLUS, comp_mode, iv0.base, iv1.step); + tmp = lowpart_subreg (mode, tmp, comp_mode); + assumption = simplify_gen_relational (reverse_condition (cond), + SImode, mode, + tmp, tmp1); + delta = simplify_gen_binary (MINUS, mode, tmp0, step); + delta = simplify_gen_binary (MINUS, mode, tmp1, delta); + } + if (assumption == const_true_rtx) + goto zero_iter; + else if (assumption != const0_rtx) + desc->noloop_assumptions = + alloc_EXPR_LIST (0, assumption, desc->noloop_assumptions); + delta = simplify_gen_binary (UDIV, mode, delta, step); + desc->niter_expr = delta; + } + + simplify_using_initial_values (loop, AND, &desc->assumptions); + if (desc->assumptions + && XEXP (desc->assumptions, 0) == const0_rtx) + goto fail; + simplify_using_initial_values (loop, IOR, &desc->noloop_assumptions); + simplify_using_initial_values (loop, IOR, &desc->infinite); + simplify_using_initial_values (loop, NIL, &desc->niter_expr); + + /* Rerun the simplification. Consider code (created by copying loop headers) + + i = 0; + + if (0 < n) + { + do + { + i++; + } while (i < n); + } + + The first pass determines that i = 0, the second pass uses it to eliminate + noloop assumption. */ + + simplify_using_initial_values (loop, AND, &desc->assumptions); + if (desc->assumptions + && XEXP (desc->assumptions, 0) == const0_rtx) + goto fail; + simplify_using_initial_values (loop, IOR, &desc->noloop_assumptions); + simplify_using_initial_values (loop, IOR, &desc->infinite); + simplify_using_initial_values (loop, NIL, &desc->niter_expr); + + if (GET_CODE (desc->niter_expr) == CONST_INT) + { + unsigned HOST_WIDEST_INT val = INTVAL (desc->niter_expr); + + desc->const_iter = true; + desc->niter_max = desc->niter = val & GET_MODE_MASK (desc->mode); + } + else if (!desc->niter_max) + desc->niter_max = determine_max_iter (desc); + + return; + +fail: + desc->simple_p = false; + return; + +zero_iter: + desc->const_iter = true; + desc->niter = 0; + desc->niter_max = 0; + desc->niter_expr = const0_rtx; + return; +} + +/* Checks whether E is a simple exit from LOOP and stores its description + into DESC. TODO Should replace cfgloopanal.c:simple_loop_exit_p. */ + +static void +check_simple_exit (struct loop *loop, edge e, struct niter_desc *desc) +{ + basic_block exit_bb; + rtx condition, at; + edge ei; + + exit_bb = e->src; + desc->simple_p = false; + + /* It must belong directly to the loop. */ + if (exit_bb->loop_father != loop) + return; + + /* It must be tested (at least) once during any iteration. */ + if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit_bb)) + return; + + /* It must end in a simple conditional jump. */ + if (!any_condjump_p (BB_END (exit_bb))) + return; + + ei = exit_bb->succ; + if (ei == e) + ei = ei->succ_next; + + desc->out_edge = e; + desc->in_edge = ei; + + /* Test whether the condition is suitable. */ + if (!(condition = get_condition (BB_END (ei->src), &at, false))) + return; + + if (ei->flags & EDGE_FALLTHRU) + { + condition = reversed_condition (condition); + if (!condition) + return; + } + + /* Check that we are able to determine number of iterations and fill + in information about it. */ + iv_number_of_iterations (loop, at, condition, desc); +} + +/* Finds a simple exit of LOOP and stores its description into DESC. + TODO Should replace cfgloopanal.c:simple_loop_p. */ + +void +find_simple_exit (struct loop *loop, struct niter_desc *desc) +{ + unsigned i; + basic_block *body; + edge e; + struct niter_desc act; + bool any = false; + + desc->simple_p = false; + body = get_loop_body (loop); + + for (i = 0; i < loop->num_nodes; i++) + { + for (e = body[i]->succ; e; e = e->succ_next) + { + if (flow_bb_inside_loop_p (loop, e->dest)) + continue; + + check_simple_exit (loop, e, &act); + if (!act.simple_p) + continue; + + /* Prefer constant iterations; the less the better. */ + if (!any) + any = true; + else if (!act.const_iter + || (desc->const_iter && act.niter >= desc->niter)) + continue; + *desc = act; + } + } + + if (rtl_dump_file) + { + if (desc->simple_p) + { + fprintf (rtl_dump_file, "Loop %d is simple:\n", loop->num); + fprintf (rtl_dump_file, " simple exit %d -> %d\n", + desc->out_edge->src->index, + desc->out_edge->dest->index); + if (desc->assumptions) + { + fprintf (rtl_dump_file, " assumptions: "); + print_rtl (rtl_dump_file, desc->assumptions); + fprintf (rtl_dump_file, "\n"); + } + if (desc->noloop_assumptions) + { + fprintf (rtl_dump_file, " does not roll if: "); + print_rtl (rtl_dump_file, desc->noloop_assumptions); + fprintf (rtl_dump_file, "\n"); + } + if (desc->infinite) + { + fprintf (rtl_dump_file, " infinite if: "); + print_rtl (rtl_dump_file, desc->infinite); + fprintf (rtl_dump_file, "\n"); + } + + fprintf (rtl_dump_file, " number of iterations: "); + print_rtl (rtl_dump_file, desc->niter_expr); + fprintf (rtl_dump_file, "\n"); + + fprintf (rtl_dump_file, " upper bound: "); + fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter_max); + fprintf (rtl_dump_file, "\n"); + } + else + fprintf (rtl_dump_file, "Loop %d is not simple.\n", loop->num); + } + + free (body); +} + +/* Creates a simple loop description of LOOP if it was not computed + already. */ + +struct niter_desc * +get_simple_loop_desc (struct loop *loop) +{ + struct niter_desc *desc = simple_loop_desc (loop); + + if (desc) + return desc; + + desc = xmalloc (sizeof (struct niter_desc)); + iv_analysis_loop_init (loop); + find_simple_exit (loop, desc); + loop->aux = desc; + + return desc; +} + +/* Releases simple loop description for LOOP. */ + +void +free_simple_loop_desc (struct loop *loop) +{ + struct niter_desc *desc = simple_loop_desc (loop); + + if (!desc) + return; + + free (desc); + loop->aux = NULL; +} diff --git a/gcc/loop-unroll.c b/gcc/loop-unroll.c index 6c796af..b093a7d 100644 --- a/gcc/loop-unroll.c +++ b/gcc/loop-unroll.c @@ -85,7 +85,7 @@ void unroll_and_peel_loops (struct loops *loops, int flags) { struct loop *loop, *next; - int check; + bool check; /* First perform complete loop peeling (it is almost surely a win, and affects parameters for further decision a lot). */ @@ -110,7 +110,7 @@ unroll_and_peel_loops (struct loops *loops, int flags) else next = loop->outer; - check = 1; + check = true; /* And perform the appropriate transformations. */ switch (loop->lpt_decision.decision) { @@ -130,7 +130,7 @@ unroll_and_peel_loops (struct loops *loops, int flags) unroll_loop_stupid (loops, loop); break; case LPT_NONE: - check = 0; + check = false; break; default: abort (); @@ -144,6 +144,29 @@ unroll_and_peel_loops (struct loops *loops, int flags) } loop = next; } + + iv_analysis_done (); +} + +/* Check whether exit of the LOOP is at the end of loop body. */ + +static bool +loop_exit_at_end_p (struct loop *loop) +{ + struct niter_desc *desc = get_simple_loop_desc (loop); + rtx insn; + + if (desc->in_edge->dest != loop->latch) + return false; + + /* Check that the latch is empty. */ + FOR_BB_INSNS (loop->latch, insn) + { + if (INSN_P (insn)) + return false; + } + + return true; } /* Check whether to peel LOOPS (depending on FLAGS) completely and do so. */ @@ -168,10 +191,9 @@ peel_loops_completely (struct loops *loops, int flags) next = loop->outer; loop->lpt_decision.decision = LPT_NONE; - loop->has_desc = 0; if (rtl_dump_file) - fprintf (rtl_dump_file, ";; Considering loop %d for complete peeling\n", + fprintf (rtl_dump_file, "\n;; *** Considering loop %d for complete peeling ***\n", loop->num); loop->ninsns = num_loop_insns (loop); @@ -216,7 +238,7 @@ decide_unrolling_and_peeling (struct loops *loops, int flags) loop->lpt_decision.decision = LPT_NONE; if (rtl_dump_file) - fprintf (rtl_dump_file, ";; Considering loop %d\n", loop->num); + fprintf (rtl_dump_file, "\n;; *** Considering loop %d ***\n", loop->num); /* Do not peel cold areas. */ if (!maybe_hot_bb_p (loop->header)) @@ -269,8 +291,10 @@ decide_unrolling_and_peeling (struct loops *loops, int flags) static void decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED) { + struct niter_desc *desc; + if (rtl_dump_file) - fprintf (rtl_dump_file, ";; Considering peeling once rolling loop\n"); + fprintf (rtl_dump_file, "\n;; Considering peeling once rolling loop\n"); /* Is the loop small enough? */ if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns) @@ -281,11 +305,13 @@ decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED) } /* Check for simple loops. */ - loop->simple = simple_loop_p (loop, &loop->desc); - loop->has_desc = 1; + desc = get_simple_loop_desc (loop); /* Check number of iterations. */ - if (!loop->simple || !loop->desc.const_iter || loop->desc.niter != 0) + if (!desc->simple_p + || desc->assumptions + || !desc->const_iter + || desc->niter != 0) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; Unable to prove that the loop rolls exactly once\n"); @@ -303,9 +329,10 @@ static void decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED) { unsigned npeel; + struct niter_desc *desc; if (rtl_dump_file) - fprintf (rtl_dump_file, ";; Considering peeling completely\n"); + fprintf (rtl_dump_file, "\n;; Considering peeling completely\n"); /* Skip non-innermost loops. */ if (loop->inner) @@ -346,26 +373,24 @@ decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED) } /* Check for simple loops. */ - if (!loop->has_desc) - { - loop->simple = simple_loop_p (loop, &loop->desc); - loop->has_desc = 1; - } + desc = get_simple_loop_desc (loop); /* Check number of iterations. */ - if (!loop->simple || !loop->desc.const_iter) + if (!desc->simple_p + || desc->assumptions + || !desc->const_iter) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n"); return; } - if (loop->desc.niter > npeel - 1) + if (desc->niter > npeel - 1) { if (rtl_dump_file) { fprintf (rtl_dump_file, ";; Not peeling loop completely, rolls too much ("); - fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC,(HOST_WIDEST_INT) loop->desc.niter); + fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter); fprintf (rtl_dump_file, " iterations > %d [maximum peelings])\n", npeel); } return; @@ -397,8 +422,8 @@ peel_loop_completely (struct loops *loops, struct loop *loop) sbitmap wont_exit; unsigned HOST_WIDE_INT npeel; unsigned n_remove_edges, i; - edge *remove_edges; - struct loop_desc *desc = &loop->desc; + edge *remove_edges, ei; + struct niter_desc *desc = get_simple_loop_desc (loop); npeel = desc->niter; @@ -407,7 +432,7 @@ peel_loop_completely (struct loops *loops, struct loop *loop) wont_exit = sbitmap_alloc (npeel + 1); sbitmap_ones (wont_exit); RESET_BIT (wont_exit, 0); - if (desc->may_be_zero) + if (desc->noloop_assumptions) RESET_BIT (wont_exit, 1); remove_edges = xcalloc (npeel, sizeof (edge)); @@ -427,19 +452,24 @@ peel_loop_completely (struct loops *loops, struct loop *loop) free (remove_edges); } + ei = desc->in_edge; + free_simple_loop_desc (loop); + /* Now remove the unreachable part of the last iteration and cancel the loop. */ - remove_path (loops, desc->in_edge); + remove_path (loops, ei); if (rtl_dump_file) fprintf (rtl_dump_file, ";; Peeled loop completely, %d times\n", (int) npeel); } /* Decide whether to unroll LOOP iterating constant number of times and how much. */ + static void decide_unroll_constant_iterations (struct loop *loop, int flags) { - unsigned nunroll, nunroll_by_av, best_copies, best_unroll = -1, n_copies, i; + unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i; + struct niter_desc *desc; if (!(flags & UAP_UNROLL)) { @@ -448,7 +478,8 @@ decide_unroll_constant_iterations (struct loop *loop, int flags) } if (rtl_dump_file) - fprintf (rtl_dump_file, ";; Considering unrolling loop with constant number of iterations\n"); + fprintf (rtl_dump_file, + "\n;; Considering unrolling loop with constant number of iterations\n"); /* nunroll = total number of copies of the original loop body in unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ @@ -468,14 +499,10 @@ decide_unroll_constant_iterations (struct loop *loop, int flags) } /* Check for simple loops. */ - if (!loop->has_desc) - { - loop->simple = simple_loop_p (loop, &loop->desc); - loop->has_desc = 1; - } + desc = get_simple_loop_desc (loop); /* Check number of iterations. */ - if (!loop->simple || !loop->desc.const_iter) + if (!desc->simple_p || !desc->const_iter || desc->assumptions) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n"); @@ -483,7 +510,7 @@ decide_unroll_constant_iterations (struct loop *loop, int flags) } /* Check whether the loop rolls enough to consider. */ - if (loop->desc.niter < 2 * nunroll) + if (desc->niter < 2 * nunroll) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n"); @@ -497,16 +524,17 @@ decide_unroll_constant_iterations (struct loop *loop, int flags) best_copies = 2 * nunroll + 10; i = 2 * nunroll + 2; - if ((unsigned) i - 1 >= loop->desc.niter) - i = loop->desc.niter - 2; + if (i - 1 >= desc->niter) + i = desc->niter - 2; for (; i >= nunroll - 1; i--) { - unsigned exit_mod = loop->desc.niter % (i + 1); + unsigned exit_mod = desc->niter % (i + 1); - if (loop->desc.postincr) + if (!loop_exit_at_end_p (loop)) n_copies = exit_mod + i + 1; - else if (exit_mod != (unsigned) i || loop->desc.may_be_zero) + else if (exit_mod != (unsigned) i + || desc->noloop_assumptions != NULL_RTX) n_copies = exit_mod + i + 2; else n_copies = i + 1; @@ -524,6 +552,11 @@ decide_unroll_constant_iterations (struct loop *loop, int flags) loop->lpt_decision.decision = LPT_UNROLL_CONSTANT; loop->lpt_decision.times = best_unroll; + + if (rtl_dump_file) + fprintf (rtl_dump_file, + ";; Decided to unroll the constant times rolling loop, %d times.\n", + loop->lpt_decision.times); } /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1 @@ -554,11 +587,12 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop) unsigned n_remove_edges, i; edge *remove_edges; unsigned max_unroll = loop->lpt_decision.times; - struct loop_desc *desc = &loop->desc; + struct niter_desc *desc = get_simple_loop_desc (loop); + bool exit_at_end = loop_exit_at_end_p (loop); niter = desc->niter; - if (niter <= (unsigned) max_unroll + 1) + if (niter <= max_unroll + 1) abort (); /* Should not get here (such loop should be peeled instead). */ exit_mod = niter % (max_unroll + 1); @@ -569,9 +603,9 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop) remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge)); n_remove_edges = 0; - if (desc->postincr) + if (!exit_at_end) { - /* Counter is incremented after the exit test; leave exit test + /* The exit is not at the end of the loop; leave exit test in the first copy, so that the loops that start with test of exit condition have continuous body after unrolling. */ @@ -580,15 +614,22 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop) /* Peel exit_mod iterations. */ RESET_BIT (wont_exit, 0); - if (desc->may_be_zero) + if (desc->noloop_assumptions) RESET_BIT (wont_exit, 1); - if (exit_mod - && !duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), - loops, exit_mod, - wont_exit, desc->out_edge, remove_edges, &n_remove_edges, - DLTHE_FLAG_UPDATE_FREQ)) - abort (); + if (exit_mod) + { + if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), + loops, exit_mod, + wont_exit, desc->out_edge, + remove_edges, &n_remove_edges, + DLTHE_FLAG_UPDATE_FREQ)) + abort (); + + desc->noloop_assumptions = NULL_RTX; + desc->niter -= exit_mod; + desc->niter_max -= exit_mod; + } SET_BIT (wont_exit, 1); } @@ -602,12 +643,12 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop) /* We know that niter >= max_unroll + 2; so we do not need to care of case when we would exit before reaching the loop. So just peel - exit_mod + 1 iterations. - */ - if (exit_mod != (unsigned) max_unroll || desc->may_be_zero) + exit_mod + 1 iterations. */ + if (exit_mod != max_unroll + || desc->noloop_assumptions) { RESET_BIT (wont_exit, 0); - if (desc->may_be_zero) + if (desc->noloop_assumptions) RESET_BIT (wont_exit, 1); if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), @@ -616,6 +657,10 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop) DLTHE_FLAG_UPDATE_FREQ)) abort (); + desc->niter -= exit_mod + 1; + desc->niter_max -= exit_mod + 1; + desc->noloop_assumptions = NULL_RTX; + SET_BIT (wont_exit, 0); SET_BIT (wont_exit, 1); } @@ -632,6 +677,27 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop) free (wont_exit); + if (exit_at_end) + { + basic_block exit_block = desc->in_edge->src->rbi->copy; + /* Find a new in and out edge; they are in the last copy we have made. */ + + if (exit_block->succ->dest == desc->out_edge->dest) + { + desc->out_edge = exit_block->succ; + desc->in_edge = exit_block->succ->succ_next; + } + else + { + desc->out_edge = exit_block->succ->succ_next; + desc->in_edge = exit_block->succ; + } + } + + desc->niter /= max_unroll + 1; + desc->niter_max /= max_unroll + 1; + desc->niter_expr = GEN_INT (desc->niter); + /* Remove the edges. */ for (i = 0; i < n_remove_edges; i++) remove_path (loops, remove_edges[i]); @@ -647,6 +713,7 @@ static void decide_unroll_runtime_iterations (struct loop *loop, int flags) { unsigned nunroll, nunroll_by_av, i; + struct niter_desc *desc; if (!(flags & UAP_UNROLL)) { @@ -655,7 +722,8 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags) } if (rtl_dump_file) - fprintf (rtl_dump_file, ";; Considering unrolling loop with runtime computable number of iterations\n"); + fprintf (rtl_dump_file, + "\n;; Considering unrolling loop with runtime computable number of iterations\n"); /* nunroll = total number of copies of the original loop body in unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ @@ -675,21 +743,18 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags) } /* Check for simple loops. */ - if (!loop->has_desc) - { - loop->simple = simple_loop_p (loop, &loop->desc); - loop->has_desc = 1; - } + desc = get_simple_loop_desc (loop); /* Check simpleness. */ - if (!loop->simple) + if (!desc->simple_p || desc->assumptions) { if (rtl_dump_file) - fprintf (rtl_dump_file, ";; Unable to prove that the number of iterations can be counted in runtime\n"); + fprintf (rtl_dump_file, + ";; Unable to prove that the number of iterations can be counted in runtime\n"); return; } - if (loop->desc.const_iter) + if (desc->const_iter) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; Loop iterates constant times\n"); @@ -706,10 +771,16 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags) /* Success; now force nunroll to be power of 2, as we are unable to cope with overflows in computation of number of iterations. */ - for (i = 1; 2 * i <= nunroll; i *= 2); + for (i = 1; 2 * i <= nunroll; i *= 2) + continue; loop->lpt_decision.decision = LPT_UNROLL_RUNTIME; loop->lpt_decision.times = i - 1; + + if (rtl_dump_file) + fprintf (rtl_dump_file, + ";; Decided to unroll the runtime computable times rolling loop, %d times.\n", + loop->lpt_decision.times); } /* Unroll LOOP for that we are able to count number of iterations in runtime @@ -746,7 +817,7 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags) static void unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop) { - rtx niter, init_code, branch_code, jump, label; + rtx old_niter, niter, init_code, branch_code, tmp; unsigned i, j, p; basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch; unsigned n_dom_bbs; @@ -756,7 +827,8 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop) edge *remove_edges, e; bool extra_zero_check, last_may_exit; unsigned max_unroll = loop->lpt_decision.times; - struct loop_desc *desc = &loop->desc; + struct niter_desc *desc = get_simple_loop_desc (loop); + bool exit_at_end = loop_exit_at_end_p (loop); /* Remember blocks whose dominators will have to be updated. */ dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block)); @@ -777,7 +849,7 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop) } free (body); - if (desc->postincr) + if (!exit_at_end) { /* Leave exit in first copy (for explanation why see comment in unroll_loop_constant_iterations). */ @@ -798,15 +870,15 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop) /* Get expression for number of iterations. */ start_sequence (); - niter = count_loop_iterations (desc, NULL, NULL); - if (!niter) - abort (); - niter = force_operand (niter, NULL); + old_niter = niter = gen_reg_rtx (desc->mode); + tmp = force_operand (copy_rtx (desc->niter_expr), niter); + if (tmp != niter) + emit_move_insn (niter, tmp); /* Count modulo by ANDing it with max_unroll; we use the fact that the number of unrollings is a power of two, and thus this is correct even if there is overflow in the computation. */ - niter = expand_simple_binop (GET_MODE (desc->var), AND, + niter = expand_simple_binop (desc->mode, AND, niter, GEN_INT (max_unroll), NULL_RTX, 0, OPTAB_LIB_WIDEN); @@ -824,10 +896,11 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop) /* Peel the first copy of loop body (almost always we must leave exit test here; the only exception is when we have extra zero check and the number - of iterations is reliable (i.e. comes out of NE condition). Also record - the place of (possible) extra zero check. */ + of iterations is reliable. Also record the place of (possible) extra + zero check. */ sbitmap_zero (wont_exit); - if (extra_zero_check && desc->cond == NE) + if (extra_zero_check + && !desc->noloop_assumptions) SET_BIT (wont_exit, 1); ezc_swtch = loop_preheader_edge (loop)->src; if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), @@ -857,20 +930,8 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop) p = REG_BR_PROB_BASE / (i + 2); preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); - label = block_label (preheader); - start_sequence (); - do_compare_rtx_and_jump (copy_rtx (niter), GEN_INT (j), EQ, 0, - GET_MODE (desc->var), NULL_RTX, NULL_RTX, - label); - jump = get_last_insn (); - JUMP_LABEL (jump) = label; - REG_NOTES (jump) - = gen_rtx_EXPR_LIST (REG_BR_PROB, - GEN_INT (p), REG_NOTES (jump)); - - LABEL_NUSES (label)++; - branch_code = get_insns (); - end_sequence (); + branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ, + block_label (preheader), p, NULL_RTX); swtch = loop_split_edge_with (swtch->pred, branch_code); set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); @@ -886,20 +947,8 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop) p = REG_BR_PROB_BASE / (max_unroll + 1); swtch = ezc_swtch; preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); - label = block_label (preheader); - start_sequence (); - do_compare_rtx_and_jump (copy_rtx (niter), const0_rtx, EQ, 0, - GET_MODE (desc->var), NULL_RTX, NULL_RTX, - label); - jump = get_last_insn (); - JUMP_LABEL (jump) = label; - REG_NOTES (jump) - = gen_rtx_EXPR_LIST (REG_BR_PROB, - GEN_INT (p), REG_NOTES (jump)); - - LABEL_NUSES (label)++; - branch_code = get_insns (); - end_sequence (); + branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ, + block_label (preheader), p, NULL_RTX); swtch = loop_split_edge_with (swtch->succ, branch_code); set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); @@ -925,11 +974,45 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop) free (wont_exit); + if (exit_at_end) + { + basic_block exit_block = desc->in_edge->src->rbi->copy; + /* Find a new in and out edge; they are in the last copy we have made. */ + + if (exit_block->succ->dest == desc->out_edge->dest) + { + desc->out_edge = exit_block->succ; + desc->in_edge = exit_block->succ->succ_next; + } + else + { + desc->out_edge = exit_block->succ->succ_next; + desc->in_edge = exit_block->succ; + } + } + /* Remove the edges. */ for (i = 0; i < n_remove_edges; i++) remove_path (loops, remove_edges[i]); free (remove_edges); + /* We must be careful when updating the number of iterations due to + preconditioning and the fact that the value must be valid at entry + of the loop. After passing through the above code, we see that + the correct new number of iterations is this: */ + if (desc->const_iter) + abort (); + desc->niter_expr = + simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1)); + desc->niter_max /= max_unroll + 1; + if (exit_at_end) + { + desc->niter_expr = + simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx); + desc->noloop_assumptions = NULL_RTX; + desc->niter_max--; + } + if (rtl_dump_file) fprintf (rtl_dump_file, ";; Unrolled loop %d times, counting # of iterations in runtime, %i insns\n", @@ -941,6 +1024,7 @@ static void decide_peel_simple (struct loop *loop, int flags) { unsigned npeel; + struct niter_desc *desc; if (!(flags & UAP_PEEL)) { @@ -949,7 +1033,7 @@ decide_peel_simple (struct loop *loop, int flags) } if (rtl_dump_file) - fprintf (rtl_dump_file, ";; Considering simply peeling loop\n"); + fprintf (rtl_dump_file, "\n;; Considering simply peeling loop\n"); /* npeel = number of iterations to peel. */ npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns; @@ -965,14 +1049,10 @@ decide_peel_simple (struct loop *loop, int flags) } /* Check for simple loops. */ - if (!loop->has_desc) - { - loop->simple = simple_loop_p (loop, &loop->desc); - loop->has_desc = 1; - } + desc = get_simple_loop_desc (loop); /* Check number of iterations. */ - if (loop->simple && loop->desc.const_iter) + if (desc->simple_p && !desc->assumptions && desc->const_iter) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; Loop iterates constant times\n"); @@ -981,7 +1061,7 @@ decide_peel_simple (struct loop *loop, int flags) /* Do not simply peel loops with branches inside -- it increases number of mispredicts. */ - if (loop->desc.n_branches > 1) + if (num_loop_branches (loop) > 1) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; Not peeling, contains branches\n"); @@ -1016,6 +1096,10 @@ decide_peel_simple (struct loop *loop, int flags) /* Success. */ loop->lpt_decision.decision = LPT_PEEL_SIMPLE; loop->lpt_decision.times = npeel; + + if (rtl_dump_file) + fprintf (rtl_dump_file, ";; Decided to simply peel the loop, %d times.\n", + loop->lpt_decision.times); } /* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation: @@ -1037,6 +1121,7 @@ peel_loop_simple (struct loops *loops, struct loop *loop) { sbitmap wont_exit; unsigned npeel = loop->lpt_decision.times; + struct niter_desc *desc = get_simple_loop_desc (loop); wont_exit = sbitmap_alloc (npeel + 1); sbitmap_zero (wont_exit); @@ -1048,6 +1133,23 @@ peel_loop_simple (struct loops *loops, struct loop *loop) free (wont_exit); + if (desc->simple_p) + { + if (desc->const_iter) + { + desc->niter -= npeel; + desc->niter_expr = GEN_INT (desc->niter); + desc->noloop_assumptions = NULL_RTX; + } + else + { + /* We cannot just update niter_expr, as its value might be clobbered + inside loop. We could handle this by counting the number into + temporary just like we do in runtime unrolling, but it does not + seem worthwhile. */ + free_simple_loop_desc (loop); + } + } if (rtl_dump_file) fprintf (rtl_dump_file, ";; Peeling loop %d times\n", npeel); } @@ -1057,6 +1159,7 @@ static void decide_unroll_stupid (struct loop *loop, int flags) { unsigned nunroll, nunroll_by_av, i; + struct niter_desc *desc; if (!(flags & UAP_UNROLL_ALL)) { @@ -1065,7 +1168,7 @@ decide_unroll_stupid (struct loop *loop, int flags) } if (rtl_dump_file) - fprintf (rtl_dump_file, ";; Considering unrolling loop stupidly\n"); + fprintf (rtl_dump_file, "\n;; Considering unrolling loop stupidly\n"); /* nunroll = total number of copies of the original loop body in unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ @@ -1085,14 +1188,10 @@ decide_unroll_stupid (struct loop *loop, int flags) } /* Check for simple loops. */ - if (!loop->has_desc) - { - loop->simple = simple_loop_p (loop, &loop->desc); - loop->has_desc = 1; - } + desc = get_simple_loop_desc (loop); /* Check simpleness. */ - if (loop->simple) + if (desc->simple_p && !desc->assumptions) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; The loop is simple\n"); @@ -1101,7 +1200,7 @@ decide_unroll_stupid (struct loop *loop, int flags) /* Do not unroll loops with branches inside -- it increases number of mispredicts. */ - if (loop->desc.n_branches > 1) + if (num_loop_branches (loop) > 1) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; Not unrolling, contains branches\n"); @@ -1109,7 +1208,8 @@ decide_unroll_stupid (struct loop *loop, int flags) } /* If we have profile feedback, check whether the loop rolls. */ - if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll) + if (loop->header->count + && expected_loop_iterations (loop) < 2 * nunroll) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n"); @@ -1119,10 +1219,16 @@ decide_unroll_stupid (struct loop *loop, int flags) /* Success. Now force nunroll to be power of 2, as it seems that this improves results (partially because of better alignments, partially because of some dark magic). */ - for (i = 1; 2 * i <= nunroll; i *= 2); + for (i = 1; 2 * i <= nunroll; i *= 2) + continue; loop->lpt_decision.decision = LPT_UNROLL_STUPID; loop->lpt_decision.times = i - 1; + + if (rtl_dump_file) + fprintf (rtl_dump_file, + ";; Decided to unroll the loop stupidly, %d times.\n", + loop->lpt_decision.times); } /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation: @@ -1147,6 +1253,7 @@ unroll_loop_stupid (struct loops *loops, struct loop *loop) { sbitmap wont_exit; unsigned nunroll = loop->lpt_decision.times; + struct niter_desc *desc = get_simple_loop_desc (loop); wont_exit = sbitmap_alloc (nunroll + 1); sbitmap_zero (wont_exit); @@ -1158,6 +1265,17 @@ unroll_loop_stupid (struct loops *loops, struct loop *loop) free (wont_exit); + if (desc->simple_p) + { + /* We indeed may get here provided that there are nontrivial assumptions + for a loop to be really simple. We could update the counts, but the + problem is that we are unable to decide which exit will be taken + (not really true in case the number of iterations is constant, + but noone will do anything with this information, so we do not + worry about it). */ + desc->simple_p = false; + } + if (rtl_dump_file) fprintf (rtl_dump_file, ";; Unrolled loop %d times, %i insns\n", nunroll, num_loop_insns (loop)); diff --git a/gcc/loop-unswitch.c b/gcc/loop-unswitch.c index 40f0e83..ebbabe8 100644 --- a/gcc/loop-unswitch.c +++ b/gcc/loop-unswitch.c @@ -79,11 +79,63 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA with handling this case. */ static struct loop *unswitch_loop (struct loops *, struct loop *, - basic_block); + basic_block, rtx, rtx); static void unswitch_single_loop (struct loops *, struct loop *, rtx, int); -static bool may_unswitch_on_p (basic_block, struct loop *, - basic_block *); -static rtx reversed_condition (rtx); +static rtx may_unswitch_on (basic_block, struct loop *, rtx *); + +/* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if + true, with probability PROB. If CINSN is not NULL, it is the insn to copy + in order to create a jump. */ + +rtx +compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob, + rtx cinsn) +{ + rtx seq, jump, cond; + enum machine_mode mode; + + mode = GET_MODE (op0); + if (mode == VOIDmode) + mode = GET_MODE (op1); + + start_sequence (); + if (GET_MODE_CLASS (mode) == MODE_CC) + { + /* A hack -- there seems to be no easy generic way how to make a + conditional jump from a ccmode comparison. */ + if (!cinsn) + abort (); + cond = XEXP (SET_SRC (pc_set (cinsn)), 0); + if (GET_CODE (cond) != comp + || !rtx_equal_p (op0, XEXP (cond, 0)) + || !rtx_equal_p (op1, XEXP (cond, 1))) + abort (); + emit_jump_insn (copy_insn (PATTERN (cinsn))); + jump = get_last_insn (); + JUMP_LABEL (jump) = JUMP_LABEL (cinsn); + LABEL_NUSES (JUMP_LABEL (jump))++; + redirect_jump (jump, label, 0); + } + else + { + if (cinsn) + abort (); + + op0 = force_operand (op0, NULL_RTX); + op1 = force_operand (op1, NULL_RTX); + do_compare_rtx_and_jump (op0, op1, comp, 0, + mode, NULL_RTX, NULL_RTX, label); + jump = get_last_insn (); + JUMP_LABEL (jump) = label; + LABEL_NUSES (label)++; + } + REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (prob), + REG_NOTES (jump)); + seq = get_insns (); + end_sequence (); + + return seq; +} /* Main entry point. Perform loop unswitching on all suitable LOOPS. */ void @@ -111,48 +163,82 @@ unswitch_loops (struct loops *loops) verify_loop_structure (loops); #endif } + + iv_analysis_done (); } /* Checks whether we can unswitch LOOP on condition at end of BB -- one of its - basic blocks (for what it means see comments below). List of basic blocks - inside LOOP is provided in BODY to save time. */ -static bool -may_unswitch_on_p (basic_block bb, struct loop *loop, basic_block *body) + basic blocks (for what it means see comments below). In case condition + compares loop invariant cc mode register, return the jump in CINSN. */ + +static rtx +may_unswitch_on (basic_block bb, struct loop *loop, rtx *cinsn) { - rtx test; + rtx test, at, insn, op[2]; + struct rtx_iv iv; unsigned i; + enum machine_mode mode; /* BB must end in a simple conditional jump. */ if (!bb->succ || !bb->succ->succ_next || bb->succ->succ_next->succ_next) - return false; + return NULL_RTX; if (!any_condjump_p (BB_END (bb))) - return false; + return NULL_RTX; /* With branches inside loop. */ if (!flow_bb_inside_loop_p (loop, bb->succ->dest) || !flow_bb_inside_loop_p (loop, bb->succ->succ_next->dest)) - return false; + return NULL_RTX; /* It must be executed just once each iteration (because otherwise we are unable to update dominator/irreducible loop information correctly). */ if (!just_once_each_iteration_p (loop, bb)) - return false; + return NULL_RTX; - /* Condition must be invariant. We use just a stupid test of invariantness - of the condition: all used regs must not be modified inside loop body. */ - test = get_condition (BB_END (bb), NULL, true); + /* Condition must be invariant. */ + test = get_condition (BB_END (bb), &at, true); if (!test) - return false; + return NULL_RTX; + + for (i = 0; i < 2; i++) + { + op[i] = XEXP (test, i); + + if (CONSTANT_P (op[i])) + continue; + + insn = iv_get_reaching_def (at, op[i]); + if (!iv_analyse (insn, op[i], &iv)) + return NULL_RTX; + if (iv.step != const0_rtx + || iv.first_special) + return NULL_RTX; + + op[i] = get_iv_value (&iv, const0_rtx); + } + + mode = GET_MODE (op[0]); + if (mode == VOIDmode) + mode = GET_MODE (op[1]); + if (GET_MODE_CLASS (mode) == MODE_CC) + { + if (at != BB_END (bb)) + return NULL_RTX; - for (i = 0; i < loop->num_nodes; i++) - if (modified_between_p (test, BB_HEAD (body[i]), NEXT_INSN (BB_END (body[i])))) - return false; + *cinsn = BB_END (bb); + if (!rtx_equal_p (op[0], XEXP (test, 0)) + || !rtx_equal_p (op[1], XEXP (test, 1))) + return NULL_RTX; - return true; + return test; + } + + return canon_condition (gen_rtx_fmt_ee (GET_CODE (test), SImode, + op[0], op[1])); } /* Reverses CONDition; returns NULL if we cannot. */ -static rtx +rtx reversed_condition (rtx cond) { enum rtx_code reversed; @@ -173,13 +259,10 @@ static void unswitch_single_loop (struct loops *loops, struct loop *loop, rtx cond_checked, int num) { - basic_block *bbs, bb; + basic_block *bbs; struct loop *nloop; unsigned i; - int true_first; - rtx cond, rcond, conds, rconds, acond, split_before; - int always_true; - int always_false; + rtx cond, rcond, conds, rconds, acond, cinsn = NULL_RTX; int repeat; edge e; @@ -237,8 +320,9 @@ unswitch_single_loop (struct loops *loops, struct loop *loop, /* Find a bb to unswitch on. */ bbs = get_loop_body (loop); + iv_analysis_loop_init (loop); for (i = 0; i < loop->num_nodes; i++) - if (may_unswitch_on_p (bbs[i], loop, bbs)) + if ((cond = may_unswitch_on (bbs[i], loop, &cinsn))) break; if (i == loop->num_nodes) @@ -247,39 +331,26 @@ unswitch_single_loop (struct loops *loops, struct loop *loop, return; } - if (!(cond = get_condition (BB_END (bbs[i]), &split_before, true))) - abort (); rcond = reversed_condition (cond); + if (rcond) + rcond = canon_condition (rcond); /* Check whether the result can be predicted. */ - always_true = 0; - always_false = 0; for (acond = cond_checked; acond; acond = XEXP (acond, 1)) - { - if (rtx_equal_p (cond, XEXP (acond, 0))) - { - always_true = 1; - break; - } - if (rtx_equal_p (rcond, XEXP (acond, 0))) - { - always_false = 1; - break; - } - } + simplify_using_condition (XEXP (acond, 0), &cond, NULL); - if (always_true) + if (cond == const_true_rtx) { /* Remove false path. */ - for (e = bbs[i]->succ; !(e->flags & EDGE_FALLTHRU); e = e->succ_next); + e = FALLTHRU_EDGE (bbs[i]); remove_path (loops, e); free (bbs); repeat = 1; } - else if (always_false) + else if (cond == const0_rtx) { /* Remove true path. */ - for (e = bbs[i]->succ; e->flags & EDGE_FALLTHRU; e = e->succ_next); + e = BRANCH_EDGE (bbs[i]); remove_path (loops, e); free (bbs); repeat = 1; @@ -293,21 +364,17 @@ unswitch_single_loop (struct loops *loops, struct loop *loop, else rconds = cond_checked; - /* Separate condition in a single basic block. */ - bb = split_loop_bb (bbs[i], PREV_INSN (split_before))->dest; - free (bbs); - true_first = !(bb->succ->flags & EDGE_FALLTHRU); if (rtl_dump_file) fprintf (rtl_dump_file, ";; Unswitching loop\n"); /* Unswitch the loop on this condition. */ - nloop = unswitch_loop (loops, loop, bb); + nloop = unswitch_loop (loops, loop, bbs[i], cond, cinsn); if (!nloop) abort (); /* Invoke itself on modified loops. */ - unswitch_single_loop (loops, nloop, true_first ? conds : rconds, num + 1); - unswitch_single_loop (loops, loop, true_first ? rconds : conds, num + 1); + unswitch_single_loop (loops, nloop, rconds, num + 1); + unswitch_single_loop (loops, loop, conds, num + 1); free_EXPR_LIST_node (conds); if (rcond) @@ -316,17 +383,21 @@ unswitch_single_loop (struct loops *loops, struct loop *loop, /* Unswitch a LOOP w.r. to given basic block UNSWITCH_ON. We only support unswitching of innermost loops. UNSWITCH_ON must be executed in every - iteration, i.e. it must dominate LOOP latch, and should only contain code - for the condition we unswitch on. Returns NULL if impossible, new - loop otherwise. */ + iteration, i.e. it must dominate LOOP latch. COND is the condition + determining which loop is entered. Returns NULL if impossible, new loop + otherwise. The new loop is entered if COND is true. If CINSN is not + NULL, it is the insn in that COND is compared. */ + static struct loop * -unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on) +unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on, + rtx cond, rtx cinsn) { - edge entry, latch_edge; + edge entry, latch_edge, true_edge, false_edge, e; basic_block switch_bb, unswitch_on_alt, src; struct loop *nloop; sbitmap zero_bitmap; - int irred_flag; + int irred_flag, prob; + rtx seq; /* Some sanity checking. */ if (!flow_bb_inside_loop_p (loop, unswitch_on)) @@ -343,12 +414,6 @@ unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on) if (!flow_bb_inside_loop_p (loop, unswitch_on->succ->succ_next->dest)) abort (); - /* Will we be able to perform redirection? */ - if (!any_condjump_p (BB_END (unswitch_on))) - return NULL; - if (!cfg_layout_can_duplicate_bb_p (unswitch_on)) - return NULL; - entry = loop_preheader_edge (loop); /* Make a copy. */ @@ -365,10 +430,24 @@ unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on) /* Record the block with condition we unswitch on. */ unswitch_on_alt = unswitch_on->rbi->copy; + true_edge = BRANCH_EDGE (unswitch_on_alt); + false_edge = FALLTHRU_EDGE (unswitch_on); + latch_edge = loop->latch->rbi->copy->succ; + + /* Create a block with the condition. */ + prob = true_edge->probability; + switch_bb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb); + seq = compare_and_jump_seq (XEXP (cond, 0), XEXP (cond, 1), GET_CODE (cond), + block_label (true_edge->dest), + prob, cinsn); + emit_insn_after (seq, BB_END (switch_bb)); + e = make_edge (switch_bb, true_edge->dest, 0); + e->probability = prob; + e->count = latch_edge->count * prob / REG_BR_PROB_BASE; + e = make_edge (switch_bb, FALLTHRU_EDGE (unswitch_on)->dest, EDGE_FALLTHRU); + e->probability = false_edge->probability; + e->count = latch_edge->count * (false_edge->probability) / REG_BR_PROB_BASE; - /* Make a copy of the block containing the condition; we will use - it as switch to decide which loop we want to use. */ - switch_bb = cfg_layout_duplicate_bb (unswitch_on, NULL); if (irred_flag) { switch_bb->flags |= BB_IRREDUCIBLE_LOOP; @@ -381,19 +460,14 @@ unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on) switch_bb->succ->flags &= ~EDGE_IRREDUCIBLE_LOOP; switch_bb->succ->succ_next->flags &= ~EDGE_IRREDUCIBLE_LOOP; } - unswitch_on->rbi->copy = unswitch_on_alt; /* Loopify from the copy of LOOP body, constructing the new loop. */ - for (latch_edge = loop->latch->rbi->copy->succ; - latch_edge->dest != loop->header; - latch_edge = latch_edge->succ_next); nloop = loopify (loops, latch_edge, loop->header->rbi->copy->pred, switch_bb); - /* Remove branches that are now unreachable in new loops. We rely on the - fact that cfg_layout_duplicate_bb reverses list of edges. */ - remove_path (loops, unswitch_on->succ); - remove_path (loops, unswitch_on_alt->succ); + /* Remove branches that are now unreachable in new loops. */ + remove_path (loops, true_edge); + remove_path (loops, false_edge); /* One of created loops do not have to be subloop of the outer loop now, so fix its placement in loop data structure. */ diff --git a/gcc/predict.c b/gcc/predict.c index 50580bd..44ee10c 100644 --- a/gcc/predict.c +++ b/gcc/predict.c @@ -406,13 +406,16 @@ estimate_probability (struct loops *loops_info) unsigned j; int exits; struct loop *loop = loops_info->parray[i]; - struct loop_desc desc; + struct niter_desc desc; unsigned HOST_WIDE_INT niter; flow_loop_scan (loop, LOOP_EXIT_EDGES); exits = loop->num_exits; - if (simple_loop_p (loop, &desc) && desc.const_iter) + iv_analysis_loop_init (loop); + find_simple_exit (loop, &desc); + + if (desc.simple_p && desc.const_iter) { int prob; niter = desc.niter + 1; @@ -472,6 +475,8 @@ estimate_probability (struct loops *loops_info) free (bbs); } + iv_analysis_done (); + /* Attempt to predict conditional jumps using a number of heuristics. */ FOR_EACH_BB (bb) { diff --git a/gcc/rtl.h b/gcc/rtl.h index 0302466..c2c2a4b 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -2361,4 +2361,15 @@ extern void tracer (void); /* In var-tracking.c */ extern void variable_tracking_main (void); +/* In stor-layout.c. */ +extern void get_mode_bounds (enum machine_mode, int, rtx *, rtx *); + +/* In loop-unswitch.c */ +extern rtx reversed_condition (rtx); +extern rtx compare_and_jump_seq (rtx, rtx, enum rtx_code, rtx, int, rtx); + +/* In loop-iv.c */ +extern rtx canon_condition (rtx); +extern void simplify_using_condition (rtx, rtx *, struct bitmap_head_def *); + #endif /* ! GCC_RTL_H */ diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c index 1272a0c..98420fb 100644 --- a/gcc/stor-layout.c +++ b/gcc/stor-layout.c @@ -2118,4 +2118,27 @@ get_best_mode (int bitsize, int bitpos, unsigned int align, return mode; } +/* Gets minimal and maximal values for MODE (signed or unsigned depending on + SIGN). */ + +void +get_mode_bounds (enum machine_mode mode, int sign, rtx *mmin, rtx *mmax) +{ + int size = GET_MODE_BITSIZE (mode); + + if (size > HOST_BITS_PER_WIDE_INT) + abort (); + + if (sign) + { + *mmin = GEN_INT (-((unsigned HOST_WIDE_INT) 1 << (size - 1))); + *mmax = GEN_INT (((unsigned HOST_WIDE_INT) 1 << (size - 1)) - 1); + } + else + { + *mmin = const0_rtx; + *mmax = GEN_INT (((unsigned HOST_WIDE_INT) 1 << (size - 1) << 1) - 1); + } +} + #include "gt-stor-layout.h" diff --git a/gcc/toplev.c b/gcc/toplev.c index 763c5d0..c470cf0 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -3034,11 +3034,16 @@ static void rest_of_handle_loop2 (tree decl, rtx insns) { struct loops *loops; + basic_block bb; + timevar_push (TV_LOOP); open_dump_file (DFI_loop2, decl); if (rtl_dump_file) dump_flow_info (rtl_dump_file); + /* Initialize structures for layout changes. */ + cfg_layout_initialize (); + loops = loop_optimizer_init (rtl_dump_file); if (loops) @@ -3056,6 +3061,12 @@ rest_of_handle_loop2 (tree decl, rtx insns) loop_optimizer_finalize (loops, rtl_dump_file); } + /* Finalize layout changes. */ + FOR_EACH_BB (bb) + if (bb->next_bb != EXIT_BLOCK_PTR) + bb->rbi->next = bb->next_bb; + cfg_layout_finalize (); + cleanup_cfg (CLEANUP_EXPENSIVE); delete_trivially_dead_insns (insns, max_reg_num ()); reg_scan (insns, max_reg_num (), 0); -- 2.7.4