From be2e5c02b5b04c1e18f090ce4e71b25970d9779d Mon Sep 17 00:00:00 2001 From: matz Date: Tue, 19 Jan 2010 16:05:57 +0000 Subject: [PATCH] PR tree-optimization/41783 * tree-data-ref.c (toplevel): Include flags.h. (dump_data_dependence_relation): Also dump the inputs if the result will be unknown. (split_constant_offset_1): Look through some conversions. * tree-predcom.c (determine_roots_comp): Restart a new chain if the offset from last element is too large. (ref_at_iteration): Deal also with MISALIGNED_INDIRECT_REF. (reassociate_to_the_same_stmt): Handle vector registers. * tree-vect-data-refs.c (vect_equal_offsets): Handle unary operations (e.g. conversions). * tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop): Add wide_prolog_niters argument, emit widening instructions. (vect_do_peeling_for_alignment): Adjust caller, use widened variant of the iteration cound. * Makefile.in (tree-data-ref.o): Add $(FLAGS_H). testsuite/ * gfortran.dg/vect/fast-math-mgrid-resid.f: New. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@156043 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 25 ++++++++++-- gcc/Makefile.in | 2 +- gcc/testsuite/ChangeLog | 5 +++ .../gfortran.dg/vect/fast-math-mgrid-resid.f | 44 ++++++++++++++++++++++ gcc/tree-data-ref.c | 32 ++++++++++++++++ gcc/tree-predcom.c | 24 ++++++++++-- gcc/tree-vect-data-refs.c | 19 ++++++---- gcc/tree-vect-loop-manip.c | 22 +++++++++-- 8 files changed, 154 insertions(+), 19 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1b5a0d3..ca67d10 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,10 +1,29 @@ +2010-01-19 Michael Matz + + PR tree-optimization/41783 + * tree-data-ref.c (toplevel): Include flags.h. + (dump_data_dependence_relation): Also dump the inputs if the + result will be unknown. + (split_constant_offset_1): Look through some conversions. + * tree-predcom.c (determine_roots_comp): Restart a new chain if + the offset from last element is too large. + (ref_at_iteration): Deal also with MISALIGNED_INDIRECT_REF. + (reassociate_to_the_same_stmt): Handle vector registers. + * tree-vect-data-refs.c (vect_equal_offsets): Handle unary operations + (e.g. conversions). + * tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop): Add + wide_prolog_niters argument, emit widening instructions. + (vect_do_peeling_for_alignment): Adjust caller, use widened + variant of the iteration cound. + * Makefile.in (tree-data-ref.o): Add $(FLAGS_H). + 2010-01-19 Ramana Radhakrishnan - PR target/38697 - * config/arm/neon-testgen.m (emit_automatics): New parameter + PR target/38697 + * config/arm/neon-testgen.m (emit_automatics): New parameter features. Adjust for Fixed_return_reg feature. (test_intrinsic): Call emit_automatics with new feature. - * config/arm/neon.ml: Update copyright years. + * config/arm/neon.ml: Update copyright years. (features): New Fixed_return_reg feature. (ops): Update feature for Vget_low. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index d6a57c4..7c08ea2 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2548,7 +2548,7 @@ tree-scalar-evolution.o: tree-scalar-evolution.c $(CONFIG_H) $(SYSTEM_H) \ $(TIMEVAR_H) $(CFGLOOP_H) $(SCEV_H) $(TREE_PASS_H) $(FLAGS_H) \ gt-tree-scalar-evolution.h tree-data-ref.o: tree-data-ref.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ - $(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) \ + $(GGC_H) $(FLAGS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) \ $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \ $(TREE_DATA_REF_H) $(TREE_PASS_H) langhooks.h sese.o: sese.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 8c02782..58666a6 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2010-01-19 Michael Matz + + PR tree-optimization/41783 + * gfortran.dg/vect/fast-math-mgrid-resid.f: New. + 2010-01-19 Ramana Radhakrishnan PR target/38697. diff --git a/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f b/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f new file mode 100644 index 0000000..2d1844b --- /dev/null +++ b/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f @@ -0,0 +1,44 @@ +! { dg-do compile } +! { dg-require-effective-target vect_double } +! { dg-options "-O3 -ffast-math -fpredictive-commoning -ftree-vectorize -fdump-tree-optimized" } + +******* RESID COMPUTES THE RESIDUAL: R = V - AU +* +* THIS SIMPLE IMPLEMENTATION COSTS 27A + 4M PER RESULT, WHERE +* A AND M DENOTE THE COSTS OF ADDITION (OR SUBTRACTION) AND +* MULTIPLICATION, RESPECTIVELY. BY USING SEVERAL TWO-DIMENSIONAL +* BUFFERS ONE CAN REDUCE THIS COST TO 13A + 4M IN THE GENERAL +* CASE, OR 10A + 3M WHEN THE COEFFICIENT A(1) IS ZERO. +* + SUBROUTINE RESID(U,V,R,N,A) + INTEGER N + REAL*8 U(N,N,N),V(N,N,N),R(N,N,N),A(0:3) + INTEGER I3, I2, I1 +C + DO 600 I3=2,N-1 + DO 600 I2=2,N-1 + DO 600 I1=2,N-1 + 600 R(I1,I2,I3)=V(I1,I2,I3) + > -A(0)*( U(I1, I2, I3 ) ) + > -A(1)*( U(I1-1,I2, I3 ) + U(I1+1,I2, I3 ) + > + U(I1, I2-1,I3 ) + U(I1, I2+1,I3 ) + > + U(I1, I2, I3-1) + U(I1, I2, I3+1) ) + > -A(2)*( U(I1-1,I2-1,I3 ) + U(I1+1,I2-1,I3 ) + > + U(I1-1,I2+1,I3 ) + U(I1+1,I2+1,I3 ) + > + U(I1, I2-1,I3-1) + U(I1, I2+1,I3-1) + > + U(I1, I2-1,I3+1) + U(I1, I2+1,I3+1) + > + U(I1-1,I2, I3-1) + U(I1-1,I2, I3+1) + > + U(I1+1,I2, I3-1) + U(I1+1,I2, I3+1) ) + > -A(3)*( U(I1-1,I2-1,I3-1) + U(I1+1,I2-1,I3-1) + > + U(I1-1,I2+1,I3-1) + U(I1+1,I2+1,I3-1) + > + U(I1-1,I2-1,I3+1) + U(I1+1,I2-1,I3+1) + > + U(I1-1,I2+1,I3+1) + U(I1+1,I2+1,I3+1) ) +C + RETURN + END +! we want to check that predictive commoning did something on the +! vectorized loop, which means we have to have exactly 13 vector +! additions. +! { dg-final { scan-tree-dump-times "vect_var\[^\\n\]*\\+ " 13 "optimized" } } +! { dg-final { cleanup-tree-dump "vect" } } +! { dg-final { cleanup-tree-dump "optimized" } } diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index dbdd323..9f5a623 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -79,6 +79,7 @@ along with GCC; see the file COPYING3. If not see #include "coretypes.h" #include "tm.h" #include "ggc.h" +#include "flags.h" #include "tree.h" /* These RTL headers are needed for basic-block.h. */ @@ -380,6 +381,19 @@ dump_data_dependence_relation (FILE *outf, if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) { + if (ddr) + { + dra = DDR_A (ddr); + drb = DDR_B (ddr); + if (dra) + dump_data_reference (outf, dra); + else + fprintf (outf, " (nil)\n"); + if (drb) + dump_data_reference (outf, drb); + else + fprintf (outf, " (nil)\n"); + } fprintf (outf, " (don't know)\n)\n"); return; } @@ -631,6 +645,24 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1, return split_constant_offset_1 (type, var0, subcode, var1, var, off); } + CASE_CONVERT: + { + /* We must not introduce undefined overflow, and we must not change the value. + Hence we're okay if the inner type doesn't overflow to start with + (pointer or signed), the outer type also is an integer or pointer + and the outer precision is at least as large as the inner. */ + tree itype = TREE_TYPE (op0); + if ((POINTER_TYPE_P (itype) + || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype))) + && TYPE_PRECISION (type) >= TYPE_PRECISION (itype) + && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type))) + { + split_constant_offset (op0, &var0, off); + *var = fold_convert (type, var0); + return true; + } + return false; + } default: return false; diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c index 78d45b8..f31c392 100644 --- a/gcc/tree-predcom.c +++ b/gcc/tree-predcom.c @@ -1180,6 +1180,7 @@ determine_roots_comp (struct loop *loop, unsigned i; dref a; chain_p chain = NULL; + double_int last_ofs = double_int_zero; /* Invariants are handled specially. */ if (comp->comp_step == RS_INVARIANT) @@ -1194,13 +1195,20 @@ determine_roots_comp (struct loop *loop, for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++) { - if (!chain || !DR_IS_READ (a->ref)) + if (!chain || !DR_IS_READ (a->ref) + || double_int_ucmp (uhwi_to_double_int (MAX_DISTANCE), + double_int_add (a->offset, + double_int_neg (last_ofs))) <= 0) { if (nontrivial_chain_p (chain)) - VEC_safe_push (chain_p, heap, *chains, chain); + { + add_looparound_copies (loop, chain); + VEC_safe_push (chain_p, heap, *chains, chain); + } else release_chain (chain); chain = make_rooted_chain (a); + last_ofs = a->offset; continue; } @@ -1338,9 +1346,11 @@ ref_at_iteration (struct loop *loop, tree ref, int iter) else if (!INDIRECT_REF_P (ref)) return unshare_expr (ref); - if (TREE_CODE (ref) == INDIRECT_REF) + if (INDIRECT_REF_P (ref)) { - ret = build1 (INDIRECT_REF, TREE_TYPE (ref), NULL_TREE); + /* Take care for INDIRECT_REF and MISALIGNED_INDIRECT_REF at + the same time. */ + ret = copy_node (ref); idx = TREE_OPERAND (ref, 0); idx_p = &TREE_OPERAND (ret, 0); } @@ -2205,11 +2215,17 @@ reassociate_to_the_same_stmt (tree name1, tree name2) /* Insert the new statement combining NAME1 and NAME2 before S1, and combine it with the rhs of S1. */ var = create_tmp_var (type, "predreastmp"); + if (TREE_CODE (type) == COMPLEX_TYPE + || TREE_CODE (type) == VECTOR_TYPE) + DECL_GIMPLE_REG_P (var) = 1; add_referenced_var (var); new_name = make_ssa_name (var, NULL); new_stmt = gimple_build_assign_with_ops (code, new_name, name1, name2); var = create_tmp_var (type, "predreastmp"); + if (TREE_CODE (type) == COMPLEX_TYPE + || TREE_CODE (type) == VECTOR_TYPE) + DECL_GIMPLE_REG_P (var) = 1; add_referenced_var (var); tmp_name = make_ssa_name (var, NULL); diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 8991853..37ae9b5 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -294,7 +294,7 @@ vect_update_interleaving_chain (struct data_reference *drb, static bool vect_equal_offsets (tree offset1, tree offset2) { - bool res0, res1; + bool res; STRIP_NOPS (offset1); STRIP_NOPS (offset2); @@ -303,16 +303,19 @@ vect_equal_offsets (tree offset1, tree offset2) return true; if (TREE_CODE (offset1) != TREE_CODE (offset2) - || !BINARY_CLASS_P (offset1) - || !BINARY_CLASS_P (offset2)) + || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1))) return false; - res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0), - TREE_OPERAND (offset2, 0)); - res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1), - TREE_OPERAND (offset2, 1)); + res = vect_equal_offsets (TREE_OPERAND (offset1, 0), + TREE_OPERAND (offset2, 0)); - return (res0 && res1); + if (!res || !BINARY_CLASS_P (offset1)) + return res; + + res = vect_equal_offsets (TREE_OPERAND (offset1, 1), + TREE_OPERAND (offset2, 1)); + + return res; } diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index a0d3ce5..f4056b0 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -1961,7 +1961,8 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, use TYPE_VECTOR_SUBPARTS. */ static tree -vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) +vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, + tree *wide_prolog_niters) { struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); @@ -2045,6 +2046,19 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) add_referenced_var (var); stmts = NULL; iters_name = force_gimple_operand (iters, &stmts, false, var); + if (types_compatible_p (sizetype, niters_type)) + *wide_prolog_niters = iters_name; + else + { + gimple_seq seq = NULL; + tree wide_iters = fold_convert (sizetype, iters); + var = create_tmp_var (sizetype, "prolog_loop_niters"); + add_referenced_var (var); + *wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false, + var); + if (seq) + gimple_seq_add_seq (&stmts, seq); + } /* Insert stmt on loop preheader edge. */ if (stmts) @@ -2115,6 +2129,7 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo) struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree niters_of_prolog_loop, ni_name; tree n_iters; + tree wide_prolog_niters; struct loop *new_loop; unsigned int th = 0; int min_profitable_iters; @@ -2125,7 +2140,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo) initialize_original_copy_tables (); ni_name = vect_build_loop_niters (loop_vinfo, NULL); - niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name); + niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name, + &wide_prolog_niters); /* Get profitability threshold for vectorized loop. */ @@ -2150,7 +2166,7 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo) TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop); /* Update the init conditions of the access functions of all data refs. */ - vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop); + vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters); /* After peeling we have to reset scalar evolution analyzer. */ scev_reset (); -- 2.7.4