From c8a2b4ff9b521b6b455caf9b45137c7153738a64 Mon Sep 17 00:00:00 2001 From: jakub Date: Thu, 31 Oct 2013 13:57:05 +0000 Subject: [PATCH] * tree.c (tree_ctz): New function. * tree.h (tree_ctz): New prototype. * tree-ssanames.h (get_range_info, get_nonzero_bits): Change first argument from tree to const_tree. * tree-ssanames.c (get_range_info, get_nonzero_bits): Likewise. * tree-vectorizer.h (vect_generate_tmps_on_preheader): New prototype. * tree-vect-loop-manip.c (vect_generate_tmps_on_preheader): No longer static. * expr.c (highest_pow2_factor): Reimplemented using tree_ctz. * tree-vect-loop.c (vect_analyze_loop_operations, vect_transform_loop): Don't force scalar loop for bound just because number of iterations is unknown, only do it if it is not known to be a multiple of vectorization_factor. * builtins.c (get_object_alignment_2): Use tree_ctz on offset. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@204257 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 15 ++++++ gcc/builtins.c | 48 +++---------------- gcc/expr.c | 76 ++++-------------------------- gcc/tree-ssanames.c | 4 +- gcc/tree-ssanames.h | 5 +- gcc/tree-vect-loop-manip.c | 2 +- gcc/tree-vect-loop.c | 21 +++++---- gcc/tree-vectorizer.h | 2 + gcc/tree.c | 113 +++++++++++++++++++++++++++++++++++++++++++++ gcc/tree.h | 1 + 10 files changed, 165 insertions(+), 122 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0e072c4..551b303 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,20 @@ 2013-10-31 Jakub Jelinek + * tree.c (tree_ctz): New function. + * tree.h (tree_ctz): New prototype. + * tree-ssanames.h (get_range_info, get_nonzero_bits): Change + first argument from tree to const_tree. + * tree-ssanames.c (get_range_info, get_nonzero_bits): Likewise. + * tree-vectorizer.h (vect_generate_tmps_on_preheader): New prototype. + * tree-vect-loop-manip.c (vect_generate_tmps_on_preheader): No longer + static. + * expr.c (highest_pow2_factor): Reimplemented using tree_ctz. + * tree-vect-loop.c (vect_analyze_loop_operations, + vect_transform_loop): Don't force scalar loop for bound just because + number of iterations is unknown, only do it if it is not known to be + a multiple of vectorization_factor. + * builtins.c (get_object_alignment_2): Use tree_ctz on offset. + * gimple-pretty-print.c (dump_ssaname_info): Print newline also in case of VR_VARYING. Print get_nonzero_bits if not all ones. * tree-ssanames.h (struct range_info_def): Add nonzero_bits field. diff --git a/gcc/builtins.c b/gcc/builtins.c index 1b08545..f84789e 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -314,7 +314,7 @@ get_object_alignment_2 (tree exp, unsigned int *alignp, tree offset; enum machine_mode mode; int unsignedp, volatilep; - unsigned int inner, align = BITS_PER_UNIT; + unsigned int align = BITS_PER_UNIT; bool known_alignment = false; /* Get the innermost object and the constant (bitpos) and possibly @@ -423,50 +423,16 @@ get_object_alignment_2 (tree exp, unsigned int *alignp, /* If there is a non-constant offset part extract the maximum alignment that can prevail. */ - inner = ~0U; - while (offset) + if (offset) { - tree next_offset; - - if (TREE_CODE (offset) == PLUS_EXPR) - { - next_offset = TREE_OPERAND (offset, 0); - offset = TREE_OPERAND (offset, 1); - } - else - next_offset = NULL; - if (host_integerp (offset, 1)) - { - /* Any overflow in calculating offset_bits won't change - the alignment. */ - unsigned offset_bits - = ((unsigned) tree_low_cst (offset, 1) * BITS_PER_UNIT); - - if (offset_bits) - inner = MIN (inner, (offset_bits & -offset_bits)); - } - else if (TREE_CODE (offset) == MULT_EXPR - && host_integerp (TREE_OPERAND (offset, 1), 1)) - { - /* Any overflow in calculating offset_factor won't change - the alignment. */ - unsigned offset_factor - = ((unsigned) tree_low_cst (TREE_OPERAND (offset, 1), 1) - * BITS_PER_UNIT); - - if (offset_factor) - inner = MIN (inner, (offset_factor & -offset_factor)); - } - else + int trailing_zeros = tree_ctz (offset); + if (trailing_zeros < HOST_BITS_PER_INT) { - inner = MIN (inner, BITS_PER_UNIT); - break; + unsigned int inner = (1U << trailing_zeros) * BITS_PER_UNIT; + if (inner) + align = MIN (align, inner); } - offset = next_offset; } - /* Alignment is innermost object alignment adjusted by the constant - and non-constant offset parts. */ - align = MIN (align, inner); *alignp = align; *bitposp = bitpos & (*alignp - 1); diff --git a/gcc/expr.c b/gcc/expr.c index 89e3979..551a660 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -7283,74 +7283,14 @@ safe_from_p (const_rtx x, tree exp, int top_p) unsigned HOST_WIDE_INT highest_pow2_factor (const_tree exp) { - unsigned HOST_WIDE_INT c0, c1; - - switch (TREE_CODE (exp)) - { - case INTEGER_CST: - /* We can find the lowest bit that's a one. If the low - HOST_BITS_PER_WIDE_INT bits are zero, return BIGGEST_ALIGNMENT. - We need to handle this case since we can find it in a COND_EXPR, - a MIN_EXPR, or a MAX_EXPR. If the constant overflows, we have an - erroneous program, so return BIGGEST_ALIGNMENT to avoid any - later ICE. */ - if (TREE_OVERFLOW (exp)) - return BIGGEST_ALIGNMENT; - else - { - /* Note: tree_low_cst is intentionally not used here, - we don't care about the upper bits. */ - c0 = TREE_INT_CST_LOW (exp); - c0 &= -c0; - return c0 ? c0 : BIGGEST_ALIGNMENT; - } - break; - - case PLUS_EXPR: case MINUS_EXPR: case MIN_EXPR: case MAX_EXPR: - c0 = highest_pow2_factor (TREE_OPERAND (exp, 0)); - c1 = highest_pow2_factor (TREE_OPERAND (exp, 1)); - return MIN (c0, c1); - - case MULT_EXPR: - c0 = highest_pow2_factor (TREE_OPERAND (exp, 0)); - c1 = highest_pow2_factor (TREE_OPERAND (exp, 1)); - return c0 * c1; - - case ROUND_DIV_EXPR: case TRUNC_DIV_EXPR: case FLOOR_DIV_EXPR: - case CEIL_DIV_EXPR: - if (integer_pow2p (TREE_OPERAND (exp, 1)) - && host_integerp (TREE_OPERAND (exp, 1), 1)) - { - c0 = highest_pow2_factor (TREE_OPERAND (exp, 0)); - c1 = tree_low_cst (TREE_OPERAND (exp, 1), 1); - return MAX (1, c0 / c1); - } - break; - - case BIT_AND_EXPR: - /* The highest power of two of a bit-and expression is the maximum of - that of its operands. We typically get here for a complex LHS and - a constant negative power of two on the RHS to force an explicit - alignment, so don't bother looking at the LHS. */ - return highest_pow2_factor (TREE_OPERAND (exp, 1)); - - CASE_CONVERT: - case SAVE_EXPR: - return highest_pow2_factor (TREE_OPERAND (exp, 0)); - - case COMPOUND_EXPR: - return highest_pow2_factor (TREE_OPERAND (exp, 1)); - - case COND_EXPR: - c0 = highest_pow2_factor (TREE_OPERAND (exp, 1)); - c1 = highest_pow2_factor (TREE_OPERAND (exp, 2)); - return MIN (c0, c1); - - default: - break; - } - - return 1; + unsigned HOST_WIDE_INT ret; + int trailing_zeros = tree_ctz (exp); + if (trailing_zeros >= HOST_BITS_PER_WIDE_INT) + return BIGGEST_ALIGNMENT; + ret = (unsigned HOST_WIDE_INT) 1 << trailing_zeros; + if (ret > BIGGEST_ALIGNMENT) + return BIGGEST_ALIGNMENT; + return ret; } /* Similar, except that the alignment requirements of TARGET are diff --git a/gcc/tree-ssanames.c b/gcc/tree-ssanames.c index 00bd436..7635891 100644 --- a/gcc/tree-ssanames.c +++ b/gcc/tree-ssanames.c @@ -221,7 +221,7 @@ set_range_info (tree name, double_int min, double_int max) is used to determine if MIN and MAX are valid values. */ enum value_range_type -get_range_info (tree name, double_int *min, double_int *max) +get_range_info (const_tree name, double_int *min, double_int *max) { enum value_range_type range_type; gcc_assert (!POINTER_TYPE_P (TREE_TYPE (name))); @@ -271,7 +271,7 @@ set_nonzero_bits (tree name, double_int mask) NAME, or double_int_minus_one if unknown. */ double_int -get_nonzero_bits (tree name) +get_nonzero_bits (const_tree name) { if (POINTER_TYPE_P (TREE_TYPE (name))) { diff --git a/gcc/tree-ssanames.h b/gcc/tree-ssanames.h index 1d02d96..d0a6542 100644 --- a/gcc/tree-ssanames.h +++ b/gcc/tree-ssanames.h @@ -72,9 +72,10 @@ enum value_range_type { VR_UNDEFINED, VR_RANGE, VR_ANTI_RANGE, VR_VARYING }; /* Sets the value range to SSA. */ extern void set_range_info (tree, double_int, double_int); /* Gets the value range from SSA. */ -extern enum value_range_type get_range_info (tree, double_int *, double_int *); +extern enum value_range_type get_range_info (const_tree, double_int *, + double_int *); extern void set_nonzero_bits (tree, double_int); -extern double_int get_nonzero_bits (tree); +extern double_int get_nonzero_bits (const_tree); extern void init_ssanames (struct function *, int); extern void fini_ssanames (void); extern void ssanames_print_statistics (void); diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index 62eea7d..00a3661 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -1437,7 +1437,7 @@ vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq) and places them at the loop preheader edge or in COND_EXPR_STMT_LIST if that is non-NULL. */ -static void +void vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_ptr, tree *ratio_mult_vf_name_ptr, diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 296b7b2..1da87c7 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1586,9 +1586,9 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp) return false; } - if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0 - || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) + if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) + || ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)) + < exact_log2 (vectorization_factor))) { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required.\n"); @@ -5656,15 +5656,20 @@ vect_transform_loop (loop_vec_info loop_vinfo) will remain scalar and will compute the remaining (n%VF) iterations. (VF is the vectorization factor). */ - if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0) - || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) + if ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)) + < exact_log2 (vectorization_factor) + || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, th, check_profitability); - else + else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)), LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor); + else + { + tree ni_name, ratio_mult_vf; + vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, &ratio_mult_vf, + &ratio, NULL); + } /* 1) Make sure the loop header has exactly two entries 2) Make sure we have a preheader basic block. */ diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 8b7b345..a2f482d 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -901,6 +901,8 @@ extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree); extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge); struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, edge); extern void vect_loop_versioning (loop_vec_info, unsigned int, bool); +extern void vect_generate_tmps_on_preheader (loop_vec_info, tree *, tree *, + tree *, gimple_seq); extern void vect_do_peeling_for_loop_bound (loop_vec_info, tree *, unsigned int, bool); extern void vect_do_peeling_for_alignment (loop_vec_info, unsigned int, bool); diff --git a/gcc/tree.c b/gcc/tree.c index 094459a..332751a 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -2214,6 +2214,119 @@ tree_floor_log2 (const_tree expr) : floor_log2 (low)); } +/* Return number of known trailing zero bits in EXPR, or, if the value of + EXPR is known to be zero, the precision of it's type. */ + +unsigned int +tree_ctz (const_tree expr) +{ + if (!INTEGRAL_TYPE_P (TREE_TYPE (expr)) + && !POINTER_TYPE_P (TREE_TYPE (expr))) + return 0; + + unsigned int ret1, ret2, prec = TYPE_PRECISION (TREE_TYPE (expr)); + switch (TREE_CODE (expr)) + { + case INTEGER_CST: + ret1 = tree_to_double_int (expr).trailing_zeros (); + return MIN (ret1, prec); + case SSA_NAME: + ret1 = get_nonzero_bits (expr).trailing_zeros (); + return MIN (ret1, prec); + case PLUS_EXPR: + case MINUS_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + case MIN_EXPR: + case MAX_EXPR: + ret1 = tree_ctz (TREE_OPERAND (expr, 0)); + if (ret1 == 0) + return ret1; + ret2 = tree_ctz (TREE_OPERAND (expr, 1)); + return MIN (ret1, ret2); + case POINTER_PLUS_EXPR: + ret1 = tree_ctz (TREE_OPERAND (expr, 0)); + ret2 = tree_ctz (TREE_OPERAND (expr, 1)); + /* Second operand is sizetype, which could be in theory + wider than pointer's precision. Make sure we never + return more than prec. */ + ret2 = MIN (ret2, prec); + return MIN (ret1, ret2); + case BIT_AND_EXPR: + ret1 = tree_ctz (TREE_OPERAND (expr, 0)); + ret2 = tree_ctz (TREE_OPERAND (expr, 1)); + return MAX (ret1, ret2); + case MULT_EXPR: + ret1 = tree_ctz (TREE_OPERAND (expr, 0)); + ret2 = tree_ctz (TREE_OPERAND (expr, 1)); + return MIN (ret1 + ret2, prec); + case LSHIFT_EXPR: + ret1 = tree_ctz (TREE_OPERAND (expr, 0)); + if (host_integerp (TREE_OPERAND (expr, 1), 1) + && ((unsigned HOST_WIDE_INT) tree_low_cst (TREE_OPERAND (expr, 1), 1) + < (unsigned HOST_WIDE_INT) prec)) + { + ret2 = tree_low_cst (TREE_OPERAND (expr, 1), 1); + return MIN (ret1 + ret2, prec); + } + return ret1; + case RSHIFT_EXPR: + if (host_integerp (TREE_OPERAND (expr, 1), 1) + && ((unsigned HOST_WIDE_INT) tree_low_cst (TREE_OPERAND (expr, 1), 1) + < (unsigned HOST_WIDE_INT) prec)) + { + ret1 = tree_ctz (TREE_OPERAND (expr, 0)); + ret2 = tree_low_cst (TREE_OPERAND (expr, 1), 1); + if (ret1 > ret2) + return ret1 - ret2; + } + return 0; + case TRUNC_DIV_EXPR: + case CEIL_DIV_EXPR: + case FLOOR_DIV_EXPR: + case ROUND_DIV_EXPR: + case EXACT_DIV_EXPR: + if (TREE_CODE (TREE_OPERAND (expr, 1)) == INTEGER_CST + && tree_int_cst_sgn (TREE_OPERAND (expr, 1)) == 1) + { + int l = tree_log2 (TREE_OPERAND (expr, 1)); + if (l >= 0) + { + ret1 = tree_ctz (TREE_OPERAND (expr, 0)); + ret2 = l; + if (ret1 > ret2) + return ret1 - ret2; + } + } + return 0; + CASE_CONVERT: + ret1 = tree_ctz (TREE_OPERAND (expr, 0)); + if (ret1 && ret1 == TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (expr, 0)))) + ret1 = prec; + return MIN (ret1, prec); + case SAVE_EXPR: + return tree_ctz (TREE_OPERAND (expr, 0)); + case COND_EXPR: + ret1 = tree_ctz (TREE_OPERAND (expr, 1)); + if (ret1 == 0) + return 0; + ret2 = tree_ctz (TREE_OPERAND (expr, 2)); + return MIN (ret1, ret2); + case COMPOUND_EXPR: + return tree_ctz (TREE_OPERAND (expr, 1)); + case ADDR_EXPR: + ret1 = get_pointer_alignment (CONST_CAST_TREE (expr)); + if (ret1 > BITS_PER_UNIT) + { + ret1 = ctz_hwi (ret1 / BITS_PER_UNIT); + return MIN (ret1, prec); + } + return 0; + default: + return 0; + } +} + /* Return 1 if EXPR is the real constant zero. Trailing zeroes matter for decimal float constants, so don't return 1 for them. */ diff --git a/gcc/tree.h b/gcc/tree.h index 33aea7f..920ad07 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -4564,6 +4564,7 @@ extern void get_type_static_bounds (const_tree, mpz_t, mpz_t); extern bool variably_modified_type_p (tree, tree); extern int tree_log2 (const_tree); extern int tree_floor_log2 (const_tree); +extern unsigned int tree_ctz (const_tree); extern int simple_cst_equal (const_tree, const_tree); extern hashval_t iterative_hash_expr (const_tree, hashval_t); extern hashval_t iterative_hash_exprs_commutative (const_tree, -- 2.7.4