From a6629703d0608f506dfc32c20e017d6b0c6ce535 Mon Sep 17 00:00:00 2001 From: rakdver Date: Wed, 19 May 2004 17:53:45 +0000 Subject: [PATCH] PR c++/15463 * loop-iv.c (iv_number_of_iterations): Use trunc_int_for_mode on result of inverse. PR rtl-optimization/15274 * loop-iv.c (determine_max_iter, shorten_into_mode, iv_number_of_iterations): Handle constants correctly. * rtl.h (get_mode_bounds): Declaration changed. * stor-layout.c (get_mode_bounds): Return a constant suitable for the target mode. PR rtl-optimization/14692 * loop-unswitch.c (may_unswitch_on): Try folding the result. (unswitch_single_loop): Work correctly when may_unswitch_on returns a folded constant. * loop-iv.c (implies_p): Handle A < B ==> A + 1 <= B. * simplify-rtx.c (simplify_const_relational_operation): Optimize comparisons with mode bounds. * function.c (struct temp_slot): Add new field prev. (free_after_compilation, init_temp_slots): Free new fields. (cut_slot_from_list, insert_slot_to_list, temp_slots_at_level, max_slot_level, move_slot_to_level, make_slot_available): New functions. (assign_stack_temp_for_type, combine_temp_slots, find_temp_slot_from_address, preserve_temp_slots, preserve_rtl_expr_result, free_temp_slots, free_temps_for_rtl_expr, pop_temp_slots): Work with the new structure of lists. (mark_all_temps_used): Removed. * function.h (struct function): Field x_temp_slots replaced by x_used_temp_slots and x_avail_temp_slots. (temp_slots): Replaced by ... (used_temp_slots, avail_temp_slots): New. * tree.h (mark_all_temps_used): Declaration removed. * loop-iv.c (mark_single_set, get_biv_step_1, iv_analyze, simplify_using_assignment): Take the expression out of the expr_list wrapper. * loop-iv.c (iv_number_of_iterations): Improve clasification of infinite loops. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@82028 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 46 +++++++ gcc/function.c | 365 +++++++++++++++++++++++++++++++++------------------- gcc/function.h | 10 +- gcc/loop-iv.c | 120 +++++++++++++---- gcc/loop-unswitch.c | 26 ++-- gcc/rtl.h | 3 +- gcc/simplify-rtx.c | 84 ++++++++---- gcc/stor-layout.c | 20 ++- gcc/tree.h | 1 - 9 files changed, 470 insertions(+), 205 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ca2069d..673dae6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,49 @@ +2004-05-19 Zdenek Dvorak + + PR c++/15463 + * loop-iv.c (iv_number_of_iterations): Use trunc_int_for_mode on + result of inverse. + + PR rtl-optimization/15274 + * loop-iv.c (determine_max_iter, shorten_into_mode, + iv_number_of_iterations): Handle constants correctly. + * rtl.h (get_mode_bounds): Declaration changed. + * stor-layout.c (get_mode_bounds): Return a constant suitable for + the target mode. + + PR rtl-optimization/14692 + * loop-unswitch.c (may_unswitch_on): Try folding the result. + (unswitch_single_loop): Work correctly when may_unswitch_on + returns a folded constant. + + * loop-iv.c (implies_p): Handle A < B ==> A + 1 <= B. + * simplify-rtx.c (simplify_const_relational_operation): Optimize + comparisons with mode bounds. + + * function.c (struct temp_slot): Add new field prev. + (free_after_compilation, init_temp_slots): Free new fields. + (cut_slot_from_list, insert_slot_to_list, + temp_slots_at_level, max_slot_level, move_slot_to_level, + make_slot_available): New functions. + (assign_stack_temp_for_type, combine_temp_slots, + find_temp_slot_from_address, preserve_temp_slots, + preserve_rtl_expr_result, free_temp_slots, + free_temps_for_rtl_expr, pop_temp_slots): Work with + the new structure of lists. + (mark_all_temps_used): Removed. + * function.h (struct function): Field x_temp_slots + replaced by x_used_temp_slots and x_avail_temp_slots. + (temp_slots): Replaced by ... + (used_temp_slots, avail_temp_slots): New. + * tree.h (mark_all_temps_used): Declaration removed. + + * loop-iv.c (mark_single_set, get_biv_step_1, iv_analyze, + simplify_using_assignment): Take the expression out of + the expr_list wrapper. + + * loop-iv.c (iv_number_of_iterations): Improve clasification of + infinite loops. + 2004-05-19 Roger Sayle * doc/tm.texi (TARGET_RTX_COSTS): Document that instruction diff --git a/gcc/function.c b/gcc/function.c index 322227b..4c535c7 100644 --- a/gcc/function.c +++ b/gcc/function.c @@ -168,6 +168,9 @@ struct temp_slot GTY(()) { /* Points to next temporary slot. */ struct temp_slot *next; + /* Points to previous temporary slot. */ + struct temp_slot *prev; + /* The rtx to used to reference the slot. */ rtx slot; /* The rtx used to represent the address if not the address of the @@ -427,7 +430,8 @@ free_after_compilation (struct function *f) f->varasm = NULL; f->machine = NULL; - f->x_temp_slots = NULL; + f->x_avail_temp_slots = NULL; + f->x_used_temp_slots = NULL; f->arg_offset_rtx = NULL; f->return_rtx = NULL; f->internal_arg_pointer = NULL; @@ -605,6 +609,82 @@ assign_stack_local (enum machine_mode mode, HOST_WIDE_INT size, int align) { return assign_stack_local_1 (mode, size, align, cfun); } + + +/* Removes temporary slot TEMP from LIST. */ + +static void +cut_slot_from_list (struct temp_slot *temp, struct temp_slot **list) +{ + if (temp->next) + temp->next->prev = temp->prev; + if (temp->prev) + temp->prev->next = temp->next; + else + *list = temp->next; + + temp->prev = temp->next = NULL; +} + +/* Inserts temporary slot TEMP to LIST. */ + +static void +insert_slot_to_list (struct temp_slot *temp, struct temp_slot **list) +{ + temp->next = *list; + if (*list) + (*list)->prev = temp; + temp->prev = NULL; + *list = temp; +} + +/* Returns the list of used temp slots at LEVEL. */ + +static struct temp_slot ** +temp_slots_at_level (int level) +{ + level++; + + if (!used_temp_slots) + VARRAY_GENERIC_PTR_INIT (used_temp_slots, 3, "used_temp_slots"); + + while (level >= (int) VARRAY_ACTIVE_SIZE (used_temp_slots)) + VARRAY_PUSH_GENERIC_PTR (used_temp_slots, NULL); + + return (struct temp_slot **) &VARRAY_GENERIC_PTR (used_temp_slots, level); +} + +/* Returns the maximal temporary slot level. */ + +static int +max_slot_level (void) +{ + if (!used_temp_slots) + return -1; + + return VARRAY_ACTIVE_SIZE (used_temp_slots) - 1; +} + +/* Moves temporary slot TEMP to LEVEL. */ + +static void +move_slot_to_level (struct temp_slot *temp, int level) +{ + cut_slot_from_list (temp, temp_slots_at_level (temp->level)); + insert_slot_to_list (temp, temp_slots_at_level (level)); + temp->level = level; +} + +/* Make temporary slot TEMP available. */ + +static void +make_slot_available (struct temp_slot *temp) +{ + cut_slot_from_list (temp, temp_slots_at_level (temp->level)); + insert_slot_to_list (temp, &avail_temp_slots); + temp->in_use = 0; + temp->level = -1; +} /* Allocate a temporary stack slot and record it for possible later reuse. @@ -628,7 +708,7 @@ assign_stack_temp_for_type (enum machine_mode mode, HOST_WIDE_INT size, int keep tree type) { unsigned int align; - struct temp_slot *p, *best_p = 0; + struct temp_slot *p, *best_p = 0, *selected = NULL, **pp; rtx slot; /* If SIZE is -1 it means that somebody tried to allocate a temporary @@ -650,24 +730,30 @@ assign_stack_temp_for_type (enum machine_mode mode, HOST_WIDE_INT size, int keep /* Try to find an available, already-allocated temporary of the proper mode which meets the size and alignment requirements. Choose the smallest one with the closest alignment. */ - for (p = temp_slots; p; p = p->next) - if (p->align >= align && p->size >= size && GET_MODE (p->slot) == mode - && ! p->in_use - && objects_must_conflict_p (p->type, type) - && (best_p == 0 || best_p->size > p->size - || (best_p->size == p->size && best_p->align > p->align))) - { - if (p->align == align && p->size == size) - { - best_p = 0; - break; - } - best_p = p; - } + for (p = avail_temp_slots; p; p = p->next) + { + if (p->align >= align && p->size >= size && GET_MODE (p->slot) == mode + && objects_must_conflict_p (p->type, type) + && (best_p == 0 || best_p->size > p->size + || (best_p->size == p->size && best_p->align > p->align))) + { + if (p->align == align && p->size == size) + { + selected = p; + cut_slot_from_list (selected, &avail_temp_slots); + best_p = 0; + break; + } + best_p = p; + } + } /* Make our best, if any, the one to use. */ if (best_p) { + selected = best_p; + cut_slot_from_list (selected, &avail_temp_slots); + /* If there are enough aligned bytes left over, make them into a new temp_slot so that the extra bytes don't get wasted. Do this only for BLKmode slots, so that we can be sure of the alignment. */ @@ -690,8 +776,7 @@ assign_stack_temp_for_type (enum machine_mode mode, HOST_WIDE_INT size, int keep p->address = 0; p->rtl_expr = 0; p->type = best_p->type; - p->next = temp_slots; - temp_slots = p; + insert_slot_to_list (p, &avail_temp_slots); stack_slot_list = gen_rtx_EXPR_LIST (VOIDmode, p->slot, stack_slot_list); @@ -700,12 +785,10 @@ assign_stack_temp_for_type (enum machine_mode mode, HOST_WIDE_INT size, int keep best_p->full_size = rounded_size; } } - - p = best_p; } /* If we still didn't find one, make a new temporary. */ - if (p == 0) + if (selected == 0) { HOST_WIDE_INT frame_offset_old = frame_offset; @@ -750,10 +833,11 @@ assign_stack_temp_for_type (enum machine_mode mode, HOST_WIDE_INT size, int keep p->full_size = frame_offset - frame_offset_old; #endif p->address = 0; - p->next = temp_slots; - temp_slots = p; + + selected = p; } + p = selected; p->in_use = 1; p->addr_taken = 0; p->rtl_expr = seq_rtl_expr; @@ -775,6 +859,8 @@ assign_stack_temp_for_type (enum machine_mode mode, HOST_WIDE_INT size, int keep p->keep = keep; } + pp = temp_slots_at_level (p->level); + insert_slot_to_list (p, pp); /* Create a new MEM rtx to avoid clobbering MEM flags of old slots. */ slot = gen_rtx_MEM (mode, XEXP (p->slot, 0)); @@ -888,8 +974,7 @@ assign_temp (tree type_or_decl, int keep, int memory_required, void combine_temp_slots (void) { - struct temp_slot *p, *q; - struct temp_slot *prev_p, *prev_q; + struct temp_slot *p, *q, *next, *next_q; int num_slots; /* We can't combine slots, because the information about which slot @@ -900,52 +985,50 @@ combine_temp_slots (void) /* If there are a lot of temp slots, don't do anything unless high levels of optimization. */ if (! flag_expensive_optimizations) - for (p = temp_slots, num_slots = 0; p; p = p->next, num_slots++) + for (p = avail_temp_slots, num_slots = 0; p; p = p->next, num_slots++) if (num_slots > 100 || (num_slots > 10 && optimize == 0)) return; - for (p = temp_slots, prev_p = 0; p; p = prev_p ? prev_p->next : temp_slots) + for (p = avail_temp_slots; p; p = next) { int delete_p = 0; - if (! p->in_use && GET_MODE (p->slot) == BLKmode) - for (q = p->next, prev_q = p; q; q = prev_q->next) - { - int delete_q = 0; - if (! q->in_use && GET_MODE (q->slot) == BLKmode) - { - if (p->base_offset + p->full_size == q->base_offset) - { - /* Q comes after P; combine Q into P. */ - p->size += q->size; - p->full_size += q->full_size; - delete_q = 1; - } - else if (q->base_offset + q->full_size == p->base_offset) - { - /* P comes after Q; combine P into Q. */ - q->size += p->size; - q->full_size += p->full_size; - delete_p = 1; - break; - } - } - /* Either delete Q or advance past it. */ - if (delete_q) - prev_q->next = q->next; - else - prev_q = q; - } - /* Either delete P or advance past it. */ - if (delete_p) + next = p->next; + + if (GET_MODE (p->slot) != BLKmode) + continue; + + for (q = p->next; q; q = next_q) { - if (prev_p) - prev_p->next = p->next; - else - temp_slots = p->next; + int delete_q = 0; + + next_q = q->next; + + if (GET_MODE (q->slot) != BLKmode) + continue; + + if (p->base_offset + p->full_size == q->base_offset) + { + /* Q comes after P; combine Q into P. */ + p->size += q->size; + p->full_size += q->full_size; + delete_q = 1; + } + else if (q->base_offset + q->full_size == p->base_offset) + { + /* P comes after Q; combine P into Q. */ + q->size += p->size; + q->full_size += p->full_size; + delete_p = 1; + break; + } + if (delete_q) + cut_slot_from_list (q, &avail_temp_slots); } - else - prev_p = p; + + /* Either delete P or advance past it. */ + if (delete_p) + cut_slot_from_list (p, &avail_temp_slots); } } @@ -956,26 +1039,25 @@ find_temp_slot_from_address (rtx x) { struct temp_slot *p; rtx next; + int i; - for (p = temp_slots; p; p = p->next) - { - if (! p->in_use) - continue; - - else if (XEXP (p->slot, 0) == x - || p->address == x - || (GET_CODE (x) == PLUS - && XEXP (x, 0) == virtual_stack_vars_rtx - && GET_CODE (XEXP (x, 1)) == CONST_INT - && INTVAL (XEXP (x, 1)) >= p->base_offset - && INTVAL (XEXP (x, 1)) < p->base_offset + p->full_size)) - return p; - - else if (p->address != 0 && GET_CODE (p->address) == EXPR_LIST) - for (next = p->address; next; next = XEXP (next, 1)) - if (XEXP (next, 0) == x) - return p; - } + for (i = max_slot_level (); i >= 0; i--) + for (p = *temp_slots_at_level (i); p; p = p->next) + { + if (XEXP (p->slot, 0) == x + || p->address == x + || (GET_CODE (x) == PLUS + && XEXP (x, 0) == virtual_stack_vars_rtx + && GET_CODE (XEXP (x, 1)) == CONST_INT + && INTVAL (XEXP (x, 1)) >= p->base_offset + && INTVAL (XEXP (x, 1)) < p->base_offset + p->full_size)) + return p; + + else if (p->address != 0 && GET_CODE (p->address) == EXPR_LIST) + for (next = p->address; next; next = XEXP (next, 1)) + if (XEXP (next, 0) == x) + return p; + } /* If we have a sum involving a register, see if it points to a temp slot. */ @@ -1078,15 +1160,19 @@ mark_temp_addr_taken (rtx x) void preserve_temp_slots (rtx x) { - struct temp_slot *p = 0; + struct temp_slot *p = 0, *next; /* If there is no result, we still might have some objects whose address were taken, so we need to make sure they stay around. */ if (x == 0) { - for (p = temp_slots; p; p = p->next) - if (p->in_use && p->level == temp_slot_level && p->addr_taken) - p->level--; + for (p = *temp_slots_at_level (temp_slot_level); p; p = next) + { + next = p->next; + + if (p->addr_taken) + move_slot_to_level (p, temp_slot_level - 1); + } return; } @@ -1103,9 +1189,13 @@ preserve_temp_slots (rtx x) taken. */ if (p == 0 && (GET_CODE (x) != MEM || CONSTANT_P (XEXP (x, 0)))) { - for (p = temp_slots; p; p = p->next) - if (p->in_use && p->level == temp_slot_level && p->addr_taken) - p->level--; + for (p = *temp_slots_at_level (temp_slot_level); p; p = next) + { + next = p->next; + + if (p->addr_taken) + move_slot_to_level (p, temp_slot_level - 1); + } return; } @@ -1122,20 +1212,28 @@ preserve_temp_slots (rtx x) if (p->level == temp_slot_level) { - for (q = temp_slots; q; q = q->next) - if (q != p && q->addr_taken && q->level == p->level) - q->level--; + for (q = *temp_slots_at_level (temp_slot_level); q; q = next) + { + next = q->next; - p->level--; + if (p != q && q->addr_taken) + move_slot_to_level (q, temp_slot_level - 1); + } + + move_slot_to_level (p, temp_slot_level - 1); p->addr_taken = 0; } return; } /* Otherwise, preserve all non-kept slots at this level. */ - for (p = temp_slots; p; p = p->next) - if (p->in_use && p->level == temp_slot_level && ! p->keep) - p->level--; + for (p = *temp_slots_at_level (temp_slot_level); p; p = next) + { + next = p->next; + + if (!p->keep) + move_slot_to_level (p, temp_slot_level - 1); + } } /* X is the result of an RTL_EXPR. If it is a temporary slot associated @@ -1158,7 +1256,7 @@ preserve_rtl_expr_result (rtx x) p = find_temp_slot_from_address (XEXP (x, 0)); if (p != 0) { - p->level = MIN (p->level, temp_slot_level); + move_slot_to_level (p, MIN (p->level, temp_slot_level)); p->rtl_expr = 0; } @@ -1175,12 +1273,15 @@ preserve_rtl_expr_result (rtx x) void free_temp_slots (void) { - struct temp_slot *p; + struct temp_slot *p, *next; + + for (p = *temp_slots_at_level (temp_slot_level); p; p = next) + { + next = p->next; - for (p = temp_slots; p; p = p->next) - if (p->in_use && p->level == temp_slot_level && ! p->keep - && p->rtl_expr == 0) - p->in_use = 0; + if (!p->keep && p->rtl_expr == 0) + make_slot_available (p); + } combine_temp_slots (); } @@ -1190,37 +1291,26 @@ free_temp_slots (void) void free_temps_for_rtl_expr (tree t) { - struct temp_slot *p; - - for (p = temp_slots; p; p = p->next) - if (p->rtl_expr == t) - { - /* If this slot is below the current TEMP_SLOT_LEVEL, then it - needs to be preserved. This can happen if a temporary in - the RTL_EXPR was addressed; preserve_temp_slots will move - the temporary into a higher level. */ - if (temp_slot_level <= p->level) - p->in_use = 0; - else - p->rtl_expr = NULL_TREE; - } - - combine_temp_slots (); -} + struct temp_slot *p, *next; -/* Mark all temporaries ever allocated in this function as not suitable - for reuse until the current level is exited. */ - -void -mark_all_temps_used (void) -{ - struct temp_slot *p; - - for (p = temp_slots; p; p = p->next) + for (p = *temp_slots_at_level (temp_slot_level); p; p = next) { - p->in_use = p->keep = 1; - p->level = MIN (p->level, temp_slot_level); + next = p->next; + + if (p->rtl_expr == t) + { + /* If this slot is below the current TEMP_SLOT_LEVEL, then it + needs to be preserved. This can happen if a temporary in + the RTL_EXPR was addressed; preserve_temp_slots will move + the temporary into a higher level. */ + if (temp_slot_level <= p->level) + make_slot_available (p); + else + p->rtl_expr = NULL_TREE; + } } + + combine_temp_slots (); } /* Push deeper into the nesting level for stack temporaries. */ @@ -1237,11 +1327,15 @@ push_temp_slots (void) void pop_temp_slots (void) { - struct temp_slot *p; + struct temp_slot *p, *next; - for (p = temp_slots; p; p = p->next) - if (p->in_use && p->level == temp_slot_level && p->rtl_expr == 0) - p->in_use = 0; + for (p = *temp_slots_at_level (temp_slot_level); p; p = next) + { + next = p->next; + + if (p->rtl_expr == 0) + make_slot_available (p); + } combine_temp_slots (); @@ -1254,7 +1348,8 @@ void init_temp_slots (void) { /* We have not allocated any temporaries yet. */ - temp_slots = 0; + avail_temp_slots = 0; + used_temp_slots = 0; temp_slot_level = 0; var_temp_slot_level = 0; target_temp_slot_level = 0; diff --git a/gcc/function.h b/gcc/function.h index 4c7537e..1a9c4c9 100644 --- a/gcc/function.h +++ b/gcc/function.h @@ -322,8 +322,11 @@ struct function GTY(()) element in this vector is one less than MAX_PARM_REG, above. */ rtx * GTY ((length ("%h.x_max_parm_reg"))) x_parm_reg_stack_loc; - /* List of all temporaries allocated, both available and in use. */ - struct temp_slot *x_temp_slots; + /* List of all used temporaries allocated, by level. */ + struct varray_head_tag * GTY((param_is (struct temp_slot))) x_used_temp_slots; + + /* List of available temp slots. */ + struct temp_slot *x_avail_temp_slots; /* Current nesting level for temporaries. */ int x_temp_slot_level; @@ -563,7 +566,8 @@ extern int trampolines_created; #define rtl_expr_chain (cfun->x_rtl_expr_chain) #define last_parm_insn (cfun->x_last_parm_insn) #define function_call_count (cfun->x_function_call_count) -#define temp_slots (cfun->x_temp_slots) +#define used_temp_slots (cfun->x_used_temp_slots) +#define avail_temp_slots (cfun->x_avail_temp_slots) #define temp_slot_level (cfun->x_temp_slot_level) #define target_temp_slot_level (cfun->x_target_temp_slot_level) #define var_temp_slot_level (cfun->x_var_temp_slot_level) diff --git a/gcc/loop-iv.c b/gcc/loop-iv.c index face41d..0e416c4 100644 --- a/gcc/loop-iv.c +++ b/gcc/loop-iv.c @@ -254,7 +254,9 @@ mark_single_set (rtx insn, rtx set) unsigned regno, uid; src = find_reg_equal_equiv_note (insn); - if (!src) + if (src) + src = XEXP (src, 0); + else src = SET_SRC (set); if (!simple_set_p (SET_DEST (set), src)) @@ -603,7 +605,9 @@ get_biv_step_1 (rtx insn, rtx reg, set = single_set (insn); rhs = find_reg_equal_equiv_note (insn); - if (!rhs) + if (rhs) + rhs = XEXP (rhs, 0); + else rhs = SET_SRC (set); lhs = SET_DEST (set); @@ -979,7 +983,9 @@ iv_analyze (rtx insn, rtx def, struct rtx_iv *iv) set = single_set (insn); rhs = find_reg_equal_equiv_note (insn); - if (!rhs) + if (rhs) + rhs = XEXP (rhs, 0); + else rhs = SET_SRC (set); code = GET_CODE (rhs); @@ -1203,7 +1209,7 @@ determine_max_iter (struct niter_desc *desc) } } - get_mode_bounds (desc->mode, desc->signed_p, &mmin, &mmax); + get_mode_bounds (desc->mode, desc->signed_p, desc->mode, &mmin, &mmax); nmax = INTVAL (mmax) - INTVAL (mmin); if (GET_CODE (niter) == UDIV) @@ -1337,7 +1343,9 @@ simplify_using_assignment (rtx insn, rtx *expr, regset altered) return; rhs = find_reg_equal_equiv_note (insn); - if (!rhs) + if (rhs) + rhs = XEXP (rhs, 0); + else rhs = SET_SRC (set); if (!simple_rhs_p (rhs)) @@ -1354,7 +1362,8 @@ simplify_using_assignment (rtx insn, rtx *expr, regset altered) static bool implies_p (rtx a, rtx b) { - rtx op0, op1, r; + rtx op0, op1, opb0, opb1, r; + enum machine_mode mode; if (GET_CODE (a) == EQ) { @@ -1376,6 +1385,45 @@ implies_p (rtx a, rtx b) } } + /* A < B implies A + 1 <= B. */ + if ((GET_CODE (a) == GT || GET_CODE (a) == LT) + && (GET_CODE (b) == GE || GET_CODE (b) == LE)) + { + op0 = XEXP (a, 0); + op1 = XEXP (a, 1); + opb0 = XEXP (b, 0); + opb1 = XEXP (b, 1); + + if (GET_CODE (a) == GT) + { + r = op0; + op0 = op1; + op1 = r; + } + + if (GET_CODE (b) == GE) + { + r = opb0; + opb0 = opb1; + opb1 = r; + } + + mode = GET_MODE (op0); + if (mode != GET_MODE (opb0)) + mode = VOIDmode; + else if (mode == VOIDmode) + { + mode = GET_MODE (op1); + if (mode != GET_MODE (opb1)) + mode = VOIDmode; + } + + if (mode != VOIDmode + && rtx_equal_p (op1, opb1) + && simplify_gen_binary (MINUS, mode, opb0, op0) == const1_rtx) + return true; + } + return false; } @@ -1696,7 +1744,7 @@ shorten_into_mode (struct rtx_iv *iv, enum machine_mode mode, { rtx mmin, mmax, cond_over, cond_under; - get_mode_bounds (mode, signed_p, &mmin, &mmax); + get_mode_bounds (mode, signed_p, iv->extend_mode, &mmin, &mmax); cond_under = simplify_gen_relational (LT, SImode, iv->extend_mode, iv->base, mmin); cond_over = simplify_gen_relational (GT, SImode, iv->extend_mode, @@ -1870,11 +1918,11 @@ iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition, { rtx op0, op1, delta, step, bound, may_xform, def_insn, tmp, tmp0, tmp1; struct rtx_iv iv0, iv1, tmp_iv; - rtx assumption; + rtx assumption, may_not_xform; enum rtx_code cond; enum machine_mode mode, comp_mode; - rtx mmin, mmax; - unsigned HOST_WIDEST_INT s, size, d; + rtx mmin, mmax, mode_mmin, mode_mmax; + unsigned HOST_WIDEST_INT s, size, d, inv; HOST_WIDEST_INT up, down, inc; int was_sharp = false; @@ -1959,7 +2007,9 @@ iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition, comp_mode = iv0.extend_mode; mode = iv0.mode; size = GET_MODE_BITSIZE (mode); - get_mode_bounds (mode, (cond == LE || cond == LT), &mmin, &mmax); + get_mode_bounds (mode, (cond == LE || cond == LT), comp_mode, &mmin, &mmax); + mode_mmin = lowpart_subreg (mode, mmin, comp_mode); + mode_mmax = lowpart_subreg (mode, mmax, comp_mode); if (GET_CODE (iv0.step) != CONST_INT || GET_CODE (iv1.step) != CONST_INT) goto fail; @@ -2001,7 +2051,8 @@ iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition, if (iv0.step == const0_rtx) { tmp = lowpart_subreg (mode, iv0.base, comp_mode); - assumption = simplify_gen_relational (EQ, SImode, mode, tmp, mmax); + assumption = simplify_gen_relational (EQ, SImode, mode, tmp, + mode_mmax); if (assumption == const_true_rtx) goto zero_iter; iv0.base = simplify_gen_binary (PLUS, comp_mode, @@ -2010,7 +2061,8 @@ iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition, else { tmp = lowpart_subreg (mode, iv1.base, comp_mode); - assumption = simplify_gen_relational (EQ, SImode, mode, tmp, mmin); + assumption = simplify_gen_relational (EQ, SImode, mode, tmp, + mode_mmin); if (assumption == const_true_rtx) goto zero_iter; iv1.base = simplify_gen_binary (PLUS, comp_mode, @@ -2035,7 +2087,7 @@ iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition, if (iv0.step == const0_rtx) { tmp = lowpart_subreg (mode, iv0.base, comp_mode); - if (rtx_equal_p (tmp, mmin)) + if (rtx_equal_p (tmp, mode_mmin)) { desc->infinite = alloc_EXPR_LIST (0, const_true_rtx, NULL_RTX); @@ -2045,7 +2097,7 @@ iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition, else { tmp = lowpart_subreg (mode, iv1.base, comp_mode); - if (rtx_equal_p (tmp, mmax)) + if (rtx_equal_p (tmp, mode_mmax)) { desc->infinite = alloc_EXPR_LIST (0, const_true_rtx, NULL_RTX); @@ -2070,6 +2122,7 @@ iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition, delta = lowpart_subreg (mode, delta, comp_mode); delta = simplify_gen_binary (UMOD, mode, delta, step); may_xform = const0_rtx; + may_not_xform = const_true_rtx; if (GET_CODE (delta) == CONST_INT) { @@ -2094,6 +2147,9 @@ iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition, tmp = lowpart_subreg (mode, iv0.base, comp_mode); may_xform = simplify_gen_relational (cond, SImode, mode, bound, tmp); + may_not_xform = simplify_gen_relational (reverse_condition (cond), + SImode, mode, + bound, tmp); } else { @@ -2103,6 +2159,9 @@ iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition, tmp = lowpart_subreg (mode, iv1.base, comp_mode); may_xform = simplify_gen_relational (cond, SImode, mode, tmp, bound); + may_not_xform = simplify_gen_relational (reverse_condition (cond), + SImode, mode, + tmp, bound); } } @@ -2112,8 +2171,18 @@ iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition, completely senseless. This is OK, as we would need this assumption to determine the number of iterations anyway. */ if (may_xform != const_true_rtx) - desc->assumptions = alloc_EXPR_LIST (0, may_xform, - desc->assumptions); + { + /* If the step is a power of two and the final value we have + computed overflows, the cycle is infinite. Otherwise it + is nontrivial to compute the number of iterations. */ + s = INTVAL (step); + if ((s & (s - 1)) == 0) + desc->infinite = alloc_EXPR_LIST (0, may_not_xform, + desc->infinite); + else + desc->assumptions = alloc_EXPR_LIST (0, may_xform, + desc->assumptions); + } /* We are going to lose some information about upper bound on number of iterations in this step, so record the information @@ -2122,8 +2191,10 @@ iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition, if (GET_CODE (iv1.base) == CONST_INT) up = INTVAL (iv1.base); else - up = INTVAL (mmax) - inc; - down = INTVAL (GET_CODE (iv0.base) == CONST_INT ? iv0.base : mmin); + up = INTVAL (mode_mmax) - inc; + down = INTVAL (GET_CODE (iv0.base) == CONST_INT + ? iv0.base + : mode_mmin); desc->niter_max = (up - down) / inc + 1; if (iv0.step == const0_rtx) @@ -2186,8 +2257,9 @@ iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition, desc->infinite = alloc_EXPR_LIST (0, assumption, desc->infinite); tmp = simplify_gen_binary (UDIV, mode, tmp1, GEN_INT (d)); - tmp = simplify_gen_binary (MULT, mode, - tmp, GEN_INT (inverse (s, size))); + inv = inverse (s, size); + inv = trunc_int_for_mode (inv, mode); + tmp = simplify_gen_binary (MULT, mode, tmp, GEN_INT (inv)); desc->niter_expr = simplify_gen_binary (AND, mode, tmp, bound); } else @@ -2204,7 +2276,8 @@ iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition, tmp0 = lowpart_subreg (mode, iv0.base, comp_mode); tmp1 = lowpart_subreg (mode, iv1.base, comp_mode); - bound = simplify_gen_binary (MINUS, mode, mmax, step); + bound = simplify_gen_binary (MINUS, mode, mode_mmax, + lowpart_subreg (mode, step, comp_mode)); assumption = simplify_gen_relational (cond, SImode, mode, tmp1, bound); desc->assumptions = @@ -2227,7 +2300,8 @@ iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition, tmp0 = lowpart_subreg (mode, iv0.base, comp_mode); tmp1 = lowpart_subreg (mode, iv1.base, comp_mode); - bound = simplify_gen_binary (MINUS, mode, mmin, step); + bound = simplify_gen_binary (MINUS, mode, mode_mmin, + lowpart_subreg (mode, step, comp_mode)); assumption = simplify_gen_relational (cond, SImode, mode, bound, tmp0); desc->assumptions = diff --git a/gcc/loop-unswitch.c b/gcc/loop-unswitch.c index bedf5f8..8f1f54f 100644 --- a/gcc/loop-unswitch.c +++ b/gcc/loop-unswitch.c @@ -174,7 +174,7 @@ unswitch_loops (struct loops *loops) static rtx may_unswitch_on (basic_block bb, struct loop *loop, rtx *cinsn) { - rtx test, at, insn, op[2]; + rtx test, at, insn, op[2], stest; struct rtx_iv iv; unsigned i; enum machine_mode mode; @@ -233,6 +233,12 @@ may_unswitch_on (basic_block bb, struct loop *loop, rtx *cinsn) return test; } + stest = simplify_gen_relational (GET_CODE (test), SImode, + mode, op[0], op[1]); + if (stest == const0_rtx + || stest == const_true_rtx) + return stest; + return canon_condition (gen_rtx_fmt_ee (GET_CODE (test), SImode, op[0], op[1])); } @@ -262,7 +268,7 @@ unswitch_single_loop (struct loops *loops, struct loop *loop, basic_block *bbs; struct loop *nloop; unsigned i; - rtx cond, rcond, conds, rconds, acond, cinsn = NULL_RTX; + rtx cond, rcond = NULL_RTX, conds, rconds, acond, cinsn = NULL_RTX; int repeat; edge e; @@ -331,13 +337,17 @@ unswitch_single_loop (struct loops *loops, struct loop *loop, return; } - rcond = reversed_condition (cond); - if (rcond) - rcond = canon_condition (rcond); + if (cond != const0_rtx + && cond != const_true_rtx) + { + rcond = reversed_condition (cond); + if (rcond) + rcond = canon_condition (rcond); - /* Check whether the result can be predicted. */ - for (acond = cond_checked; acond; acond = XEXP (acond, 1)) - simplify_using_condition (XEXP (acond, 0), &cond, NULL); + /* Check whether the result can be predicted. */ + for (acond = cond_checked; acond; acond = XEXP (acond, 1)) + simplify_using_condition (XEXP (acond, 0), &cond, NULL); + } if (cond == const_true_rtx) { diff --git a/gcc/rtl.h b/gcc/rtl.h index 89f6683..cb6850a 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -2447,7 +2447,8 @@ extern void tracer (void); extern void variable_tracking_main (void); /* In stor-layout.c. */ -extern void get_mode_bounds (enum machine_mode, int, rtx *, rtx *); +extern void get_mode_bounds (enum machine_mode, int, enum machine_mode, + rtx *, rtx *); /* In loop-unswitch.c */ extern rtx reversed_condition (rtx); diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 680ecaf..0bef69e 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -2892,6 +2892,63 @@ simplify_const_relational_operation (enum rtx_code code, /* Otherwise, there are some code-specific tests we can make. */ else { + /* Optimize comparisons with upper and lower bounds. */ + if (INTEGRAL_MODE_P (mode) + && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT) + { + rtx mmin, mmax; + int sign; + + if (code == GEU + || code == LEU + || code == GTU + || code == LTU) + sign = 0; + else + sign = 1; + + get_mode_bounds (mode, sign, mode, &mmin, &mmax); + + tem = NULL_RTX; + switch (code) + { + case GEU: + case GE: + /* x >= min is always true. */ + if (rtx_equal_p (trueop1, mmin)) + tem = const_true_rtx; + else + break; + + case LEU: + case LE: + /* x <= max is always true. */ + if (rtx_equal_p (trueop1, mmax)) + tem = const_true_rtx; + break; + + case GTU: + case GT: + /* x > max is always false. */ + if (rtx_equal_p (trueop1, mmax)) + tem = const0_rtx; + break; + + case LTU: + case LT: + /* x < min is always false. */ + if (rtx_equal_p (trueop1, mmin)) + tem = const0_rtx; + break; + + default: + break; + } + if (tem == const0_rtx + || tem == const_true_rtx) + return tem; + } + switch (code) { case EQ: @@ -2904,33 +2961,6 @@ simplify_const_relational_operation (enum rtx_code code, return const_true_rtx; break; - case GEU: - /* Unsigned values are never negative. */ - if (trueop1 == const0_rtx) - return const_true_rtx; - break; - - case LTU: - if (trueop1 == const0_rtx) - return const0_rtx; - break; - - case LEU: - /* Unsigned values are never greater than the largest - unsigned value. */ - if (GET_CODE (trueop1) == CONST_INT - && (unsigned HOST_WIDE_INT) INTVAL (trueop1) == GET_MODE_MASK (mode) - && INTEGRAL_MODE_P (mode)) - return const_true_rtx; - break; - - case GTU: - if (GET_CODE (trueop1) == CONST_INT - && (unsigned HOST_WIDE_INT) INTVAL (trueop1) == GET_MODE_MASK (mode) - && INTEGRAL_MODE_P (mode)) - return const0_rtx; - break; - case LT: /* Optimize abs(x) < 0.0. */ if (trueop1 == CONST0_RTX (mode) && !HONOR_SNANS (mode)) diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c index 5c87a63..e1137f5 100644 --- a/gcc/stor-layout.c +++ b/gcc/stor-layout.c @@ -2160,26 +2160,32 @@ get_best_mode (int bitsize, int bitpos, unsigned int align, } /* Gets minimal and maximal values for MODE (signed or unsigned depending on - SIGN). */ + SIGN). The returned constants are made to be usable in TARGET_MODE. */ void -get_mode_bounds (enum machine_mode mode, int sign, rtx *mmin, rtx *mmax) +get_mode_bounds (enum machine_mode mode, int sign, + enum machine_mode target_mode, + rtx *mmin, rtx *mmax) { - int size = GET_MODE_BITSIZE (mode); + unsigned size = GET_MODE_BITSIZE (mode); + unsigned HOST_WIDE_INT min_val, max_val; if (size > HOST_BITS_PER_WIDE_INT) abort (); if (sign) { - *mmin = GEN_INT (-((unsigned HOST_WIDE_INT) 1 << (size - 1))); - *mmax = GEN_INT (((unsigned HOST_WIDE_INT) 1 << (size - 1)) - 1); + min_val = -((unsigned HOST_WIDE_INT) 1 << (size - 1)); + max_val = ((unsigned HOST_WIDE_INT) 1 << (size - 1)) - 1; } else { - *mmin = const0_rtx; - *mmax = GEN_INT (((unsigned HOST_WIDE_INT) 1 << (size - 1) << 1) - 1); + min_val = 0; + max_val = ((unsigned HOST_WIDE_INT) 1 << (size - 1) << 1) - 1; } + + *mmin = GEN_INT (trunc_int_for_mode (min_val, target_mode)); + *mmax = GEN_INT (trunc_int_for_mode (max_val, target_mode)); } #include "gt-stor-layout.h" diff --git a/gcc/tree.h b/gcc/tree.h index feebe73..991c9fe 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -3539,7 +3539,6 @@ extern void put_var_into_stack (tree, int); extern void flush_addressof (tree); extern void setjmp_vars_warning (tree); extern void setjmp_args_warning (void); -extern void mark_all_temps_used (void); extern void init_temp_slots (void); extern void combine_temp_slots (void); extern void free_temp_slots (void); -- 2.7.4