From f689d33df9a7a6dcddeaf4036c3eae14b77eb3f3 Mon Sep 17 00:00:00 2001 From: rguenth Date: Wed, 6 Jun 2012 09:45:27 +0000 Subject: [PATCH] 2012-06-06 Richard Guenther PR tree-optimization/53081 * tree-data-ref.h (adjacent_store_dr_p): Rename to ... (adjacent_dr_p): ... this and make it work for reads, too. * tree-loop-distribution.c (enum partition_kind): Add PKIND_MEMCPY. (struct partition_s): Change main_stmt to main_dr, add secondary_dr member. (build_size_arg_loc): Change to date data-reference and not gimplify here. (build_addr_arg_loc): New function split out from ... (generate_memset_builtin): ... here. Use it and simplify. (generate_memcpy_builtin): New function. (generate_code_for_partition): Adjust. (classify_partition): Streamline pattern detection. Detect memcpy. (ldist_gen): Adjust. (tree_loop_distribution): Adjust seed statements for memcpy recognition. * gcc.dg/tree-ssa/ldist-20.c: New testcase. * gcc.dg/tree-ssa/loop-19.c: Add -fno-tree-loop-distribute-patterns. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@188261 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 20 +++ gcc/testsuite/ChangeLog | 6 + gcc/testsuite/gcc.dg/tree-ssa/ldist-20.c | 37 +++++ gcc/testsuite/gcc.dg/tree-ssa/loop-19.c | 2 +- gcc/tree-data-ref.h | 5 +- gcc/tree-loop-distribution.c | 267 ++++++++++++++++++++----------- 6 files changed, 235 insertions(+), 102 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ldist-20.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 30d502e..29daf15 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2012-06-06 Richard Guenther + + PR tree-optimization/53081 + * tree-data-ref.h (adjacent_store_dr_p): Rename to ... + (adjacent_dr_p): ... this and make it work for reads, too. + * tree-loop-distribution.c (enum partition_kind): Add PKIND_MEMCPY. + (struct partition_s): Change main_stmt to main_dr, add + secondary_dr member. + (build_size_arg_loc): Change to date data-reference and not + gimplify here. + (build_addr_arg_loc): New function split out from ... + (generate_memset_builtin): ... here. Use it and simplify. + (generate_memcpy_builtin): New function. + (generate_code_for_partition): Adjust. + (classify_partition): Streamline pattern detection. Detect + memcpy. + (ldist_gen): Adjust. + (tree_loop_distribution): Adjust seed statements for memcpy + recognition. + 2012-06-06 Matt Turner * config/arm/mmintrin.h (_mm_empty): New. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c25aadb..a219f84 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2012-06-06 Richard Guenther + + PR tree-optimization/53081 + * gcc.dg/tree-ssa/ldist-20.c: New testcase. + * gcc.dg/tree-ssa/loop-19.c: Add -fno-tree-loop-distribute-patterns. + 2012-06-05 Michael Meissner * gcc.target/powerpc/pr53487.c: New test. diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-20.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-20.c new file mode 100644 index 0000000..95ae2c0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-20.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */ + +void foo(char *); +void my_memcpy (void *q, unsigned int n) +{ + unsigned i; + char p[1024]; + for (i = 0; i < n; ++i) + ((char *)p)[i] = ((char *)q)[i]; + foo(p); +} + +struct S { int i; int j; }; + +void my_memcpy2 (void *q, unsigned int n) +{ + unsigned i; + char p[1024]; + for (i = 0; i < n; ++i) + ((struct S *)p)[i] = ((struct S *)q)[i]; + foo(p); +} + +char p[1024]; +void my_memmove (unsigned int n) +{ + unsigned i; + for (i = 0; i < n; ++i) + p[i] = p[i+1]; + foo(p); +} + + +/* { dg-final { scan-tree-dump-times "generated memcpy" 2 "ldist" } } */ +/* { dg-final { scan-tree-dump-times "generated memmove" 1 "ldist" } } */ +/* { dg-final { cleanup-tree-dump "ldist" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-19.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-19.c index 80f2e60..36a2688 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/loop-19.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-19.c @@ -6,7 +6,7 @@ /* { dg-do compile { target { i?86-*-* || { x86_64-*-* || powerpc_hard_double } } } } */ /* { dg-require-effective-target nonpic } */ -/* { dg-options "-O3 -fno-prefetch-loop-arrays -fdump-tree-optimized" } */ +/* { dg-options "-O3 -fno-tree-loop-distribute-patterns -fno-prefetch-loop-arrays -fdump-tree-optimized" } */ # define N 2000000 double a[N],c[N]; diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h index 853dd81..db33e32 100644 --- a/gcc/tree-data-ref.h +++ b/gcc/tree-data-ref.h @@ -615,11 +615,8 @@ bool rdg_defs_used_in_other_loops_p (struct graph *, int); with a stride equal to its unit type size. */ static inline bool -adjacent_store_dr_p (struct data_reference *dr) +adjacent_dr_p (struct data_reference *dr) { - if (!DR_IS_WRITE (dr)) - return false; - /* If this is a bitfield store bail out. */ if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1))) diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index 9f66608..58ed12b 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -52,15 +52,16 @@ along with GCC; see the file COPYING3. If not see #include "tree-scalar-evolution.h" #include "tree-pass.h" -enum partition_kind { PKIND_NORMAL, PKIND_MEMSET }; +enum partition_kind { PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY }; typedef struct partition_s { bitmap stmts; bool has_writes; enum partition_kind kind; - /* Main statement a kind != PKIND_NORMAL partition is about. */ - gimple main_stmt; + /* data-references a kind != PKIND_NORMAL partition is about. */ + data_reference_p main_dr; + data_reference_p secondary_dr; } *partition_t; DEF_VEC_P (partition_t); @@ -313,40 +314,53 @@ generate_loops_for_partition (struct loop *loop, partition_t partition, free (bbs); } -/* Build the size argument for a memset call. */ +/* Build the size argument for a memory operation call. */ -static inline tree -build_size_arg_loc (location_t loc, tree nb_iter, tree op, - gimple_seq *stmt_list) +static tree +build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter) { - gimple_seq stmts; - tree x = fold_build2_loc (loc, MULT_EXPR, size_type_node, - fold_convert_loc (loc, size_type_node, nb_iter), - fold_convert_loc (loc, size_type_node, - TYPE_SIZE_UNIT (TREE_TYPE (op)))); - x = force_gimple_operand (x, &stmts, true, NULL); - gimple_seq_add_seq (stmt_list, stmts); - - return x; + tree size; + size = fold_build2_loc (loc, MULT_EXPR, sizetype, + fold_convert_loc (loc, sizetype, nb_iter), + TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); + return fold_convert_loc (loc, size_type_node, size); +} + +/* Build an address argument for a memory operation call. */ + +static tree +build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes) +{ + tree addr_base; + + addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); + addr_base = fold_convert_loc (loc, sizetype, addr_base); + + /* Test for a negative stride, iterating over every element. */ + if (tree_int_cst_sgn (DR_STEP (dr)) == -1) + { + addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, + fold_convert_loc (loc, sizetype, nb_bytes)); + addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, + TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); + } + + return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base); } /* Generate a call to memset for PARTITION in LOOP. */ static void -generate_memset_builtin (struct loop *loop, struct graph *rdg, - partition_t partition) +generate_memset_builtin (struct loop *loop, partition_t partition) { gimple_stmt_iterator gsi; gimple stmt, fn_call; - tree op0, nb_iter, mem, fn, addr_base, nb_bytes; - gimple_seq stmt_list = NULL, stmts; - struct data_reference *dr = XCNEW (struct data_reference); + tree nb_iter, mem, fn, nb_bytes; location_t loc; tree val; - stmt = partition->main_stmt; + stmt = DR_STMT (partition->main_dr); loc = gimple_location (stmt); - op0 = gimple_assign_lhs (stmt); if (gimple_bb (stmt) == loop->latch) nb_iter = number_of_latch_executions (loop); else @@ -355,25 +369,12 @@ generate_memset_builtin (struct loop *loop, struct graph *rdg, /* The new statements will be placed before LOOP. */ gsi = gsi_last_bb (loop_preheader_edge (loop)->src); - dr = VEC_index (data_reference_p, - RDG_DATAREFS (rdg, rdg_vertex_for_stmt (rdg, stmt)), 0); - nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); - addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); - addr_base = fold_convert_loc (loc, sizetype, addr_base); - - /* Test for a negative stride, iterating over every element. */ - if (tree_int_cst_sgn (DR_STEP (dr)) == -1) - { - addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, - fold_convert_loc (loc, sizetype, nb_bytes)); - addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, - TYPE_SIZE_UNIT (TREE_TYPE (op0))); - } - - addr_base = fold_build_pointer_plus_loc (loc, - DR_BASE_ADDRESS (dr), addr_base); - mem = force_gimple_operand (addr_base, &stmts, true, NULL); - gimple_seq_add_seq (&stmt_list, stmts); + nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter); + nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); + mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); /* This exactly matches the pattern recognition in classify_partition. */ val = gimple_assign_rhs1 (stmt); @@ -393,15 +394,14 @@ generate_memset_builtin (struct loop *loop, struct graph *rdg, tree tem = create_tmp_reg (integer_type_node, NULL); tem = make_ssa_name (tem, NULL); cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE); - gimple_seq_add_stmt (&stmt_list, cstmt); + gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING); val = tem; } } fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); - gimple_seq_add_stmt (&stmt_list, fn_call); - gsi_insert_seq_after (&gsi, stmt_list, GSI_CONTINUE_LINKING); + gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -415,6 +415,54 @@ generate_memset_builtin (struct loop *loop, struct graph *rdg, } } +/* Generate a call to memcpy for PARTITION in LOOP. */ + +static void +generate_memcpy_builtin (struct loop *loop, partition_t partition) +{ + gimple_stmt_iterator gsi; + gimple stmt, fn_call; + tree nb_iter, dest, src, fn, nb_bytes; + location_t loc; + enum built_in_function kind; + + stmt = DR_STMT (partition->main_dr); + loc = gimple_location (stmt); + if (gimple_bb (stmt) == loop->latch) + nb_iter = number_of_latch_executions (loop); + else + nb_iter = number_of_exit_cond_executions (loop); + + /* The new statements will be placed before LOOP. */ + gsi = gsi_last_bb (loop_preheader_edge (loop)->src); + + nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter); + nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); + src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes); + if (ptr_derefs_may_alias_p (dest, src)) + kind = BUILT_IN_MEMMOVE; + else + kind = BUILT_IN_MEMCPY; + + dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + fn = build_fold_addr_expr (builtin_decl_implicit (kind)); + fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes); + gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + if (kind == BUILT_IN_MEMCPY) + fprintf (dump_file, "generated memcpy\n"); + else + fprintf (dump_file, "generated memmove\n"); + } +} + /* Remove and destroy the loop LOOP. */ static void @@ -466,13 +514,21 @@ destroy_loop (struct loop *loop) /* Generates code for PARTITION. */ static void -generate_code_for_partition (struct loop *loop, struct graph *rdg, +generate_code_for_partition (struct loop *loop, partition_t partition, bool copy_p) { switch (partition->kind) { case PKIND_MEMSET: - generate_memset_builtin (loop, rdg, partition); + generate_memset_builtin (loop, partition); + /* If this is the last partition for which we generate code, we have + to destroy the loop. */ + if (!copy_p) + destroy_loop (loop); + break; + + case PKIND_MEMCPY: + generate_memcpy_builtin (loop, partition); /* If this is the last partition for which we generate code, we have to destroy the loop. */ if (!copy_p) @@ -849,9 +905,11 @@ classify_partition (loop_p loop, struct graph *rdg, partition_t partition) bitmap_iterator bi; unsigned i; tree nb_iter; + data_reference_p single_load, single_store; partition->kind = PKIND_NORMAL; - partition->main_stmt = NULL; + partition->main_dr = NULL; + partition->secondary_dr = NULL; if (!flag_tree_loop_distribute_patterns) return; @@ -880,10 +938,14 @@ classify_partition (loop_p loop, struct graph *rdg, partition_t partition) } } - /* Detect memset. */ + /* Detect memset and memcpy. */ + single_load = NULL; + single_store = NULL; EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) { gimple stmt = RDG_STMT (rdg, i); + data_reference_p dr; + unsigned j; if (gimple_code (stmt) == GIMPLE_PHI) continue; @@ -892,41 +954,68 @@ classify_partition (loop_p loop, struct graph *rdg, partition_t partition) if (!gimple_vuse (stmt)) continue; - /* Exactly one store. */ - if (gimple_assign_single_p (stmt) - && !is_gimple_reg (gimple_assign_lhs (stmt))) + /* Otherwise just regular loads/stores. */ + if (!gimple_assign_single_p (stmt)) + return; + + /* But exactly one store and/or load. */ + for (j = 0; + VEC_iterate (data_reference_p, RDG_DATAREFS (rdg, i), j, dr); ++j) { - tree rhs; - if (partition->main_stmt != NULL) - return; - partition->main_stmt = stmt; - rhs = gimple_assign_rhs1 (stmt); - if (!(integer_zerop (rhs) - || integer_all_onesp (rhs) - || real_zerop (rhs) - || (TREE_CODE (rhs) == CONSTRUCTOR - && !TREE_CLOBBER_P (rhs)) - || (INTEGRAL_TYPE_P (TREE_TYPE (rhs)) - && (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) - == TYPE_MODE (unsigned_char_type_node))))) - return; - if (TREE_CODE (rhs) == SSA_NAME - && !SSA_NAME_IS_DEFAULT_DEF (rhs) - && flow_bb_inside_loop_p - (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs)))) - return; - if (VEC_length (data_reference_p, RDG_DATAREFS (rdg, i)) != 1) - return; - if (!adjacent_store_dr_p (VEC_index (data_reference_p, - RDG_DATAREFS (rdg, i), 0))) - return; + if (DR_IS_READ (dr)) + { + if (single_load != NULL) + return; + single_load = dr; + } + else + { + if (single_store != NULL) + return; + single_store = dr; + } } - else - return; } - if (partition->main_stmt != NULL) - partition->kind = PKIND_MEMSET; + if (single_store && !single_load) + { + gimple stmt = DR_STMT (single_store); + tree rhs = gimple_assign_rhs1 (stmt); + if (!(integer_zerop (rhs) + || integer_all_onesp (rhs) + || real_zerop (rhs) + || (TREE_CODE (rhs) == CONSTRUCTOR + && !TREE_CLOBBER_P (rhs)) + || (INTEGRAL_TYPE_P (TREE_TYPE (rhs)) + && (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) + == TYPE_MODE (unsigned_char_type_node))))) + return; + if (TREE_CODE (rhs) == SSA_NAME + && !SSA_NAME_IS_DEFAULT_DEF (rhs) + && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs)))) + return; + if (!adjacent_dr_p (single_store)) + return; + partition->kind = PKIND_MEMSET; + partition->main_dr = single_store; + } + else if (single_store && single_load) + { + gimple store = DR_STMT (single_store); + gimple load = DR_STMT (single_load); + /* Direct aggregate copy or via an SSA name temporary. */ + if (load != store + && gimple_assign_lhs (load) != gimple_assign_rhs1 (store)) + return; + if (!adjacent_dr_p (single_store) + || !adjacent_dr_p (single_load) + || !operand_equal_p (DR_STEP (single_store), + DR_STEP (single_load), 0)) + return; + partition->kind = PKIND_MEMCPY; + partition->main_dr = single_store; + partition->secondary_dr = single_load; + } } /* For a data reference REF, return the declaration of its base @@ -1259,7 +1348,7 @@ ldist_gen (struct loop *loop, struct graph *rdg, dump_rdg_partitions (dump_file, partitions); FOR_EACH_VEC_ELT (partition_t, partitions, i, partition) - generate_code_for_partition (loop, rdg, partition, i < nbp - 1); + generate_code_for_partition (loop, partition, i < nbp - 1); ldist_done: @@ -1392,22 +1481,6 @@ tree_loop_distribution (void) || is_gimple_reg (gimple_assign_lhs (stmt))) continue; - /* If we are only performing pattern detection restrict - what we try to distribute to stores from constants. */ - if (!flag_tree_loop_distribution) - { - tree rhs = gimple_assign_rhs1 (stmt); - if (!is_gimple_min_invariant (rhs) - && TREE_CODE (rhs) != CONSTRUCTOR - && TREE_CODE (rhs) != SSA_NAME) - continue; - if (TREE_CODE (rhs) == SSA_NAME - && !SSA_NAME_IS_DEFAULT_DEF (rhs) - && flow_bb_inside_loop_p - (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs)))) - continue; - } - VEC_safe_push (gimple, heap, work_list, stmt); } } -- 2.7.4