From 5e37ea0ef1bb8d9a27e6502a82b7b14f30d6d6b6 Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Thu, 2 Dec 2010 16:53:16 +0000 Subject: [PATCH] Fix PR45199: do not aggregate memory accesses to the same array for -ftree-loop-distribute-patterns 2010-11-30 Sebastian Pop PR tree-optimization/45199 * tree-data-ref.c (mem_write_stride_of_same_size_as_unit_type_p): New. (stores_zero_from_loop): Call mem_write_stride_of_same_size_as_unit_type_p. * tree-data-ref.h (stride_of_unit_type_p): New. * tree-loop-distribution.c (generate_memset_zero): Simplified. Call stride_of_unit_type_p. (build_rdg_partition_for_component): Do not call rdg_flag_similar_memory_accesses when flag_tree_loop_distribute_patterns is set. * gcc.dg/tree-ssa/ldist-15.c: New. * gcc.dg/tree-ssa/ldist-16.c: New. * gfortran.dg/ldist-pr45199.f: New. From-SVN: r167380 --- gcc/ChangeLog | 13 +++++++++ gcc/testsuite/ChangeLog | 11 ++++++-- gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c | 23 ++++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c | 21 +++++++++++++++ gcc/testsuite/gfortran.dg/ldist-pr45199.f | 27 +++++++++++++++++++ gcc/tree-data-ref.c | 24 ++++++++++++++++- gcc/tree-data-ref.h | 11 ++++++++ gcc/tree-loop-distribution.c | 44 +++++++++++-------------------- 8 files changed, 142 insertions(+), 32 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c create mode 100644 gcc/testsuite/gfortran.dg/ldist-pr45199.f diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3241bbd..c72c693 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2010-12-02 Sebastian Pop + + PR tree-optimization/45199 + * tree-data-ref.c (mem_write_stride_of_same_size_as_unit_type_p): New. + (stores_zero_from_loop): Call + mem_write_stride_of_same_size_as_unit_type_p. + * tree-data-ref.h (stride_of_unit_type_p): New. + * tree-loop-distribution.c (generate_memset_zero): Simplified. + Call stride_of_unit_type_p. + (build_rdg_partition_for_component): Do not call + rdg_flag_similar_memory_accesses when + flag_tree_loop_distribute_patterns is set. + 2010-12-02 Richard Guenther * tree-vect-loop.c (vect_analyze_scalar_cycles_1): Disregard diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 43f1768..3eeee83 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2010-12-02 Sebastian Pop + + PR tree-optimization/45199 + * gcc.dg/tree-ssa/ldist-15.c: New. + * gcc.dg/tree-ssa/ldist-16.c: New. + * gfortran.dg/ldist-pr45199.f: New. + 2010-12-02 Richard Guenther PR tree-optimization/46723 @@ -23,8 +30,8 @@ 2010-12-02 Nicola Pero * objc.dg/exceptions-6.m: New. - * obj-c++.dg/exceptions-6.mm: New. - + * obj-c++.dg/exceptions-6.mm: New. + 2010-12-01 Jan Hubicka * gcc.c-torture/execute/bcp-1.c: Make ready for -fuse-linker-plugin diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c new file mode 100644 index 0000000..7ce3b95 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-ldist-details" } */ + +int x[1000]; + +void foo (int n) +{ + int i; + + for (i = 0; i < n; ++i) + { + x[2*i] = 0; + x[2*i + 1] = 1; + } +} + +/* We should not apply loop distribution as it is not beneficial from + a data locality point of view. Also it is not possible to generate + a memset (0) as the write has a stride of 2. */ + +/* { dg-final { scan-tree-dump-not "distributed: split to" "ldist" } } */ +/* { dg-final { scan-tree-dump-not "__builtin_memset" "ldist" } } */ +/* { dg-final { cleanup-tree-dump "ldist" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c new file mode 100644 index 0000000..61e8e56 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-ldist-details" } */ + +int x[1000]; + +void foo (int n) +{ + int i; + + for (i = 0; i < n; ++i) + { + x[i] = 0; + x[2*i + 1] = 1; + } +} + +/* We should apply loop distribution and generate a memset (0). */ + +/* { dg-final { scan-tree-dump "distributed: split to 2" "ldist" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_memset" 2 "ldist" } } */ +/* { dg-final { cleanup-tree-dump "ldist" } } */ diff --git a/gcc/testsuite/gfortran.dg/ldist-pr45199.f b/gcc/testsuite/gfortran.dg/ldist-pr45199.f new file mode 100644 index 0000000..6f65501 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/ldist-pr45199.f @@ -0,0 +1,27 @@ +! { dg-do compile } +! { dg-options "-O3 -fdump-tree-ldist-details" } + + parameter(numlev=3,numoblev=1000) + integer i_otyp(numoblev,numlev), i_styp(numoblev,numlev) + logical l_numob(numoblev,numlev) + do ixe=1,numoblev + do iye=1,numlev + i_otyp(ixe,iye)=0 + i_styp(ixe,iye)=0 + l_numob(ixe,iye)=.false. + enddo + enddo + do i=1,m + do j=1,n + if (l_numob(i,j)) then + write(20,'(7I4,F12.2,4F16.10)') i_otyp(i,j),i_styp(i,j) + endif + enddo + enddo + end + +! GCC should apply memset zero loop distribution and it should not ICE. + +! { dg-final { scan-tree-dump "distributed: split to 9 loops" "ldist" } } +! { dg-final { scan-tree-dump-times "__builtin_memset" 18 "ldist" } } +! { dg-final { cleanup-tree-dump "ldist" } } diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 3cee320..094d168 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -4974,6 +4974,27 @@ stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts) free (bbs); } +/* Returns true when STMT is an assignment that contains a data + reference on its LHS with a stride of the same size as its unit + type. */ + +static bool +mem_write_stride_of_same_size_as_unit_type_p (gimple stmt) +{ + struct data_reference *dr = XCNEW (struct data_reference); + tree op0 = gimple_assign_lhs (stmt); + bool res; + + DR_STMT (dr) = stmt; + DR_REF (dr) = op0; + + res = dr_analyze_innermost (dr) + && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)); + + free_data_ref (dr); + return res; +} + /* Initialize STMTS with all the statements of LOOP that contain a store to memory of the form "A[i] = 0". */ @@ -4994,7 +5015,8 @@ stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts) && is_gimple_assign (stmt) && gimple_assign_rhs_code (stmt) == INTEGER_CST && (op = gimple_assign_rhs1 (stmt)) - && (integer_zerop (op) || real_zerop (op))) + && (integer_zerop (op) || real_zerop (op)) + && mem_write_stride_of_same_size_as_unit_type_p (stmt)) VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si)); free (bbs); diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h index 844a2ae..d929f31 100644 --- a/gcc/tree-data-ref.h +++ b/gcc/tree-data-ref.h @@ -603,6 +603,17 @@ void remove_similar_memory_refs (VEC (gimple, heap) **); bool rdg_defs_used_in_other_loops_p (struct graph *, int); bool have_similar_memory_accesses (gimple, gimple); +/* Returns true when STRIDE is equal in absolute value to the size of + the unit type of TYPE. */ + +static inline bool +stride_of_unit_type_p (tree stride, tree type) +{ + return tree_int_cst_equal (fold_unary (ABS_EXPR, TREE_TYPE (stride), + stride), + TYPE_SIZE_UNIT (type)); +} + /* Determines whether RDG vertices V1 and V2 access to similar memory locations, in which case they have to be in the same partition. */ diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index 007c4f3..357f51f 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -258,42 +258,27 @@ generate_memset_zero (gimple stmt, tree op0, tree nb_iter, if (!dr_analyze_innermost (dr)) goto end; - /* Test for a positive stride, iterating over every element. */ - if (integer_zerop (size_binop (MINUS_EXPR, - fold_convert (sizetype, DR_STEP (dr)), - TYPE_SIZE_UNIT (TREE_TYPE (op0))))) - { - addr_base = fold_convert_loc (loc, sizetype, - size_binop_loc (loc, PLUS_EXPR, - DR_OFFSET (dr), - DR_INIT (dr))); - addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR, - TREE_TYPE (DR_BASE_ADDRESS (dr)), - DR_BASE_ADDRESS (dr), addr_base); - - nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); - } + if (!stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0))) + goto end; + + nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); + addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); + addr_base = fold_convert_loc (loc, sizetype, addr_base); /* Test for a negative stride, iterating over every element. */ - else if (integer_zerop (size_binop (PLUS_EXPR, - TYPE_SIZE_UNIT (TREE_TYPE (op0)), - fold_convert (sizetype, DR_STEP (dr))))) + if (integer_zerop (size_binop (PLUS_EXPR, + TYPE_SIZE_UNIT (TREE_TYPE (op0)), + fold_convert (sizetype, DR_STEP (dr))))) { - nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); - - addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); - addr_base = fold_convert_loc (loc, sizetype, addr_base); addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, fold_convert_loc (loc, sizetype, nb_bytes)); addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, TYPE_SIZE_UNIT (TREE_TYPE (op0))); - addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR, - TREE_TYPE (DR_BASE_ADDRESS (dr)), - DR_BASE_ADDRESS (dr), addr_base); } - else - goto end; + addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR, + TREE_TYPE (DR_BASE_ADDRESS (dr)), + DR_BASE_ADDRESS (dr), addr_base); mem = force_gimple_operand (addr_base, &stmts, true, NULL); gimple_seq_add_seq (&stmt_list, stmts); @@ -781,8 +766,9 @@ build_rdg_partition_for_component (struct graph *rdg, rdgc c, and determine those vertices that have some memory affinity with the current nodes in the component: these are stores to the same arrays, i.e. we're taking care of cache locality. */ - rdg_flag_similar_memory_accesses (rdg, partition, loops, processed, - other_stores); + if (!flag_tree_loop_distribute_patterns) + rdg_flag_similar_memory_accesses (rdg, partition, loops, processed, + other_stores); rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes); -- 2.7.4