From 162719b305a713b80afc9df7016f5dfc8d095515 Mon Sep 17 00:00:00 2001 From: hubicka Date: Sun, 28 Jan 2007 19:38:39 +0000 Subject: [PATCH] * expr.c (emit_block_move_via_movmem, emit_block_move_via_libcall): Add variant handling histograms; add wrapper. (clear_storage_via_libcall): Export. (emit_block_move_hints): Break out from ...; add histograms. (emit_block_move): ... this one. (clear_storage_hints): Break out from ...; add histograms. (clear_storage): ... this one. (set_storage_via_memset): Handle histogram. * expr.h (emit_block_move_via_libcall, emit_block_move_hints): Declare. (clear_storage_hints, clear_storage_via_libcall): Declare. (set_storage_via_setmem): Update prototype. * doc/md.texi (movmem, setmem): Document new arguments. * value-prof.c (dump_histogram_value, tree_find_values_to_profile): Add new histograms. (stringop_block_profile): New global function. (tree_stringops_values_to_profile): Profile block size and alignment. * value-prof.h (enum hist_type): add HIST_TYPE_AVERAGE and HIST_TYPE_IOR. (struct profile_hooks): Add gen_average_profiler and gen_ior_profiler. (stringop_block_profile): Declare. * builtins.c: Include value-prof.h. (expand_builtin_memcpy, expand_builtin_memset): Pass block profile. * gcov-ui.h (GCOV_COUNTER_NAMES): Add new counter. (GCOV_COUNTER_AVERAGE, GCOV_COUNTER_IOR): New constants. (GCOV_COUNTERS, GCOV_LAST_VALUE_COUNTER): Update. * profile.c (instrument_values): Add new counters. * cfgexpand.c (expand_gimple_basic_block): Propagate histograms to calls. * tree-profile.c (tree_average_profiler_fn, tree_ior_profiler_fn): New. (tree_init_edge_profiler): Build new profilers. (tree_gen_average_profiler, tree_gen_ior_profiler): New. (pass_tree_profile): Add dump. (tree_profile_hooks): Update. * Makefile.in (LIBGCOV): Add new constants. * libgcov.c (__gcov_merge_ior, __gcov_average_profiler, __gcov_ior_profiler): New. * i386.md (movmem/setmem expanders): Add new optional arguments. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@121270 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 41 ++++++++++++++ gcc/Makefile.in | 3 +- gcc/builtins.c | 33 ++++++++---- gcc/cfgexpand.c | 9 ++-- gcc/config/i386/i386.md | 20 ++++--- gcc/doc/md.texi | 10 ++++ gcc/expr.c | 53 ++++++++++++++---- gcc/expr.h | 7 ++- gcc/gcov-io.h | 15 ++++-- gcc/libgcov.c | 36 +++++++++++++ gcc/profile.c | 16 ++++++ gcc/testsuite/ChangeLog | 4 ++ gcc/testsuite/gcc.dg/tree-prof/val-prof-6.c | 20 +++++++ gcc/tree-profile.c | 73 ++++++++++++++++++++++--- gcc/value-prof.c | 84 ++++++++++++++++++++++++++++- gcc/value-prof.h | 11 +++- 16 files changed, 390 insertions(+), 45 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-prof/val-prof-6.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 94702ad..1e8f296 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,44 @@ +2007-01-28 Jan Hubicka + + * expr.c (emit_block_move_via_movmem, emit_block_move_via_libcall): Add + variant handling histograms; add wrapper. + (clear_storage_via_libcall): Export. + (emit_block_move_hints): Break out from ...; add histograms. + (emit_block_move): ... this one. + (clear_storage_hints): Break out from ...; add histograms. + (clear_storage): ... this one. + (set_storage_via_memset): Handle histogram. + * expr.h (emit_block_move_via_libcall, emit_block_move_hints): Declare. + (clear_storage_hints, clear_storage_via_libcall): Declare. + (set_storage_via_setmem): Update prototype. + * doc/md.texi (movmem, setmem): Document new arguments. + + * value-prof.c (dump_histogram_value, tree_find_values_to_profile): Add + new histograms. + (stringop_block_profile): New global function. + (tree_stringops_values_to_profile): Profile block size and alignment. + * value-prof.h (enum hist_type): add HIST_TYPE_AVERAGE and + HIST_TYPE_IOR. + (struct profile_hooks): Add gen_average_profiler and gen_ior_profiler. + (stringop_block_profile): Declare. + * builtins.c: Include value-prof.h. + (expand_builtin_memcpy, expand_builtin_memset): Pass block profile. + * gcov-ui.h (GCOV_COUNTER_NAMES): Add new counter. + (GCOV_COUNTER_AVERAGE, GCOV_COUNTER_IOR): New constants. + (GCOV_COUNTERS, GCOV_LAST_VALUE_COUNTER): Update. + * profile.c (instrument_values): Add new counters. + * cfgexpand.c (expand_gimple_basic_block): Propagate histograms to + calls. + * tree-profile.c (tree_average_profiler_fn, tree_ior_profiler_fn): New. + (tree_init_edge_profiler): Build new profilers. + (tree_gen_average_profiler, tree_gen_ior_profiler): New. + (pass_tree_profile): Add dump. + (tree_profile_hooks): Update. + * Makefile.in (LIBGCOV): Add new constants. + * libgcov.c (__gcov_merge_ior, __gcov_average_profiler, + __gcov_ior_profiler): New. + * i386.md (movmem/setmem expanders): Add new optional arguments. + 2007-01-28 David Edelsohn * doc/md.texi (Standard Pattern Names): Document blockage pattern. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 01d190d..dff2173 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1200,7 +1200,8 @@ LIBGCOV = _gcov _gcov_merge_add _gcov_merge_single _gcov_merge_delta \ _gcov_fork _gcov_execl _gcov_execlp _gcov_execle \ _gcov_execv _gcov_execvp _gcov_execve \ _gcov_interval_profiler _gcov_pow2_profiler _gcov_one_value_profiler \ - _gcov_indirect_call_profiler + _gcov_indirect_call_profiler _gcov_average_profiler _gcov_ior_profiler \ + _gcov_merge_ior FPBIT_FUNCS = _pack_sf _unpack_sf _addsub_sf _mul_sf _div_sf \ _fpcmp_parts_sf _compare_sf _eq_sf _ne_sf _gt_sf _ge_sf \ diff --git a/gcc/builtins.c b/gcc/builtins.c index d122379..7947907 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -49,6 +49,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "basic-block.h" #include "tree-mudflap.h" #include "tree-flow.h" +#include "value-prof.h" #ifndef PAD_VARARGS_DOWN #define PAD_VARARGS_DOWN BYTES_BIG_ENDIAN @@ -3099,6 +3100,8 @@ expand_builtin_memcpy (tree exp, rtx target, enum machine_mode mode) rtx dest_mem, src_mem, dest_addr, len_rtx; tree result = fold_builtin_memory_op (arglist, TREE_TYPE (TREE_TYPE (fndecl)), false, /*endp=*/0); + HOST_WIDE_INT expected_size = -1; + unsigned int expected_align = 0; if (result) { @@ -3119,7 +3122,10 @@ expand_builtin_memcpy (tree exp, rtx target, enum machine_mode mode) operation in-line. */ if (src_align == 0) return 0; - + + stringop_block_profile (exp, &expected_align, &expected_size); + if (expected_align < dest_align) + expected_align = dest_align; dest_mem = get_memory_rtx (dest, len); set_mem_align (dest_mem, dest_align); len_rtx = expand_normal (len); @@ -3146,9 +3152,10 @@ expand_builtin_memcpy (tree exp, rtx target, enum machine_mode mode) set_mem_align (src_mem, src_align); /* Copy word part most expediently. */ - dest_addr = emit_block_move (dest_mem, src_mem, len_rtx, - CALL_EXPR_TAILCALL (exp) - ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL); + dest_addr = emit_block_move_hints (dest_mem, src_mem, len_rtx, + CALL_EXPR_TAILCALL (exp) + ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL, + expected_align, expected_size); if (dest_addr == 0) { @@ -3640,6 +3647,8 @@ expand_builtin_memset (tree arglist, rtx target, enum machine_mode mode, char c; unsigned int dest_align; rtx dest_mem, dest_addr, len_rtx; + HOST_WIDE_INT expected_size = -1; + unsigned int expected_align = 0; dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT); @@ -3648,6 +3657,10 @@ expand_builtin_memset (tree arglist, rtx target, enum machine_mode mode, if (dest_align == 0) return 0; + stringop_block_profile (orig_exp, &expected_align, &expected_size); + if (expected_align < dest_align) + expected_align = dest_align; + /* If the LEN parameter is zero, return DEST. */ if (integer_zerop (len)) { @@ -3687,7 +3700,7 @@ expand_builtin_memset (tree arglist, rtx target, enum machine_mode mode, builtin_memset_gen_str, val_rtx, dest_align, 0); } else if (!set_storage_via_setmem (dest_mem, len_rtx, val_rtx, - dest_align)) + dest_align, -1, 0)) goto do_libcall; dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX); @@ -3707,7 +3720,8 @@ expand_builtin_memset (tree arglist, rtx target, enum machine_mode mode, store_by_pieces (dest_mem, tree_low_cst (len, 1), builtin_memset_read_str, &c, dest_align, 0); else if (!set_storage_via_setmem (dest_mem, len_rtx, GEN_INT (c), - dest_align)) + dest_align, expected_align, + expected_size)) goto do_libcall; dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX); @@ -3716,9 +3730,10 @@ expand_builtin_memset (tree arglist, rtx target, enum machine_mode mode, } set_mem_align (dest_mem, dest_align); - dest_addr = clear_storage (dest_mem, len_rtx, - CALL_EXPR_TAILCALL (orig_exp) - ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL); + dest_addr = clear_storage_hints (dest_mem, len_rtx, + CALL_EXPR_TAILCALL (orig_exp) + ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL, + expected_align, expected_size); if (dest_addr == 0) { diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c index b91af8e..52ac211 100644 --- a/gcc/cfgexpand.c +++ b/gcc/cfgexpand.c @@ -1516,9 +1516,12 @@ expand_gimple_basic_block (basic_block bb) /* For the benefit of calls.c, converting all this to rtl, we need to record the call expression, not just the outer modify statement. */ - if (call && call != stmt - && (region = lookup_stmt_eh_region (stmt)) > 0) - add_stmt_to_eh_region (call, region); + if (call && call != stmt) + { + if ((region = lookup_stmt_eh_region (stmt)) > 0) + add_stmt_to_eh_region (call, region); + gimple_duplicate_stmt_histograms (cfun, call, cfun, stmt); + } if (call && CALL_EXPR_TAILCALL (call)) { bool can_fallthru; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 56cf81d..21d1c2b 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -17591,11 +17591,13 @@ [(use (match_operand:BLK 0 "memory_operand" "")) (use (match_operand:BLK 1 "memory_operand" "")) (use (match_operand:SI 2 "nonmemory_operand" "")) - (use (match_operand:SI 3 "const_int_operand" ""))] + (use (match_operand:SI 3 "const_int_operand" "")) + (use (match_operand:SI 4 "const_int_operand" "")) + (use (match_operand:SI 5 "const_int_operand" ""))] "" { if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3], - operands[3], constm1_rtx)) + operands[4], operands[5])) DONE; else FAIL; @@ -17605,11 +17607,13 @@ [(use (match_operand:BLK 0 "memory_operand" "")) (use (match_operand:BLK 1 "memory_operand" "")) (use (match_operand:DI 2 "nonmemory_operand" "")) - (use (match_operand:DI 3 "const_int_operand" ""))] + (use (match_operand:DI 3 "const_int_operand" "")) + (use (match_operand:SI 4 "const_int_operand" "")) + (use (match_operand:SI 5 "const_int_operand" ""))] "TARGET_64BIT" { if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3], - operands[3], constm1_rtx)) + operands[4], operands[5])) DONE; else FAIL; @@ -17867,12 +17871,14 @@ [(use (match_operand:BLK 0 "memory_operand" "")) (use (match_operand:SI 1 "nonmemory_operand" "")) (use (match_operand 2 "const_int_operand" "")) - (use (match_operand 3 "const_int_operand" ""))] + (use (match_operand 3 "const_int_operand" "")) + (use (match_operand:SI 4 "const_int_operand" "")) + (use (match_operand:SI 5 "const_int_operand" ""))] "" { if (ix86_expand_setmem (operands[0], operands[1], operands[2], operands[3], - operands[3], constm1_rtx)) + operands[4], operands[5])) DONE; else FAIL; @@ -17889,7 +17895,7 @@ { if (ix86_expand_setmem (operands[0], operands[1], operands[2], operands[3], - operands[3], constm1_rtx)) + operands[4], operands[5])) DONE; else FAIL; diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 5745ec4..54ce9f5 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -3971,6 +3971,11 @@ destination, in the form of a @code{const_int} rtx. Thus, if the compiler knows that both source and destination are word-aligned, it may provide the value 4 for this operand. +Optional operands 5 and 6 specify expected alignment and size of block +respectively. The expected alignment differs from alignment in operand 4 +in a way that the blocks are not required to be aligned according to it in +all cases. Expected size, when unknown, is set to @code{(const_int -1)}. + Descriptions of multiple @code{movmem@var{m}} patterns can only be beneficial if the patterns for smaller modes have fewer restrictions on their first, second and fourth operands. Note that the mode @var{m} @@ -4003,6 +4008,11 @@ of a @code{const_int} rtx. Thus, if the compiler knows that the destination is word-aligned, it may provide the value 4 for this operand. +Optional operands 5 and 6 specify expected alignment and size of block +respectively. The expected alignment differs from alignment in operand 4 +in a way that the blocks are not required to be aligned according to it in +all cases. Expected size, when unknown, is set to @code{(const_int -1)}. + The use for multiple @code{setmem@var{m}} is as for @code{movmem@var{m}}. @cindex @code{cmpstrn@var{m}} instruction pattern diff --git a/gcc/expr.c b/gcc/expr.c index ec0306f..6e60909 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -126,7 +126,7 @@ static unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT, static void move_by_pieces_1 (rtx (*) (rtx, ...), enum machine_mode, struct move_by_pieces *); static bool block_move_libcall_safe_for_call_parm (void); -static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned); +static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned, unsigned, HOST_WIDE_INT); static tree emit_block_move_libcall_fn (int); static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned); static rtx clear_by_pieces_1 (void *, HOST_WIDE_INT, enum machine_mode); @@ -1147,7 +1147,8 @@ move_by_pieces_1 (rtx (*genfun) (rtx, ...), enum machine_mode mode, 0 otherwise. */ rtx -emit_block_move (rtx x, rtx y, rtx size, enum block_op_methods method) +emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method, + unsigned int expected_align, HOST_WIDE_INT expected_size) { bool may_use_call; rtx retval = 0; @@ -1202,7 +1203,8 @@ emit_block_move (rtx x, rtx y, rtx size, enum block_op_methods method) if (GET_CODE (size) == CONST_INT && MOVE_BY_PIECES_P (INTVAL (size), align)) move_by_pieces (x, y, INTVAL (size), align, 0); - else if (emit_block_move_via_movmem (x, y, size, align)) + else if (emit_block_move_via_movmem (x, y, size, align, + expected_align, expected_size)) ; else if (may_use_call) retval = emit_block_move_via_libcall (x, y, size, @@ -1216,6 +1218,12 @@ emit_block_move (rtx x, rtx y, rtx size, enum block_op_methods method) return retval; } +rtx +emit_block_move (rtx x, rtx y, rtx size, enum block_op_methods method) +{ + return emit_block_move_hints (x, y, size, method, 0, -1); +} + /* A subroutine of emit_block_move. Returns true if calling the block move libcall will not clobber any parameters which may have already been placed on the stack. */ @@ -1266,12 +1274,16 @@ block_move_libcall_safe_for_call_parm (void) return true if successful. */ static bool -emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align) +emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align, + unsigned int expected_align, HOST_WIDE_INT expected_size) { rtx opalign = GEN_INT (align / BITS_PER_UNIT); int save_volatile_ok = volatile_ok; enum machine_mode mode; + if (expected_align < align) + expected_align = align; + /* Since this is a move insn, we don't care about volatility. */ volatile_ok = 1; @@ -1315,7 +1327,12 @@ emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align) that it doesn't fail the expansion because it thinks emitting the libcall would be more efficient. */ - pat = GEN_FCN ((int) code) (x, y, op2, opalign); + if (insn_data[(int) code].n_operands == 4) + pat = GEN_FCN ((int) code) (x, y, op2, opalign); + else + pat = GEN_FCN ((int) code) (x, y, op2, opalign, + GEN_INT (expected_align), + GEN_INT (expected_size)); if (pat) { emit_insn (pat); @@ -2495,7 +2512,8 @@ store_by_pieces_2 (rtx (*genfun) (rtx, ...), enum machine_mode mode, its length in bytes. */ rtx -clear_storage (rtx object, rtx size, enum block_op_methods method) +clear_storage_hints (rtx object, rtx size, enum block_op_methods method, + unsigned int expected_align, HOST_WIDE_INT expected_size) { enum machine_mode mode = GET_MODE (object); unsigned int align; @@ -2535,7 +2553,8 @@ clear_storage (rtx object, rtx size, enum block_op_methods method) if (GET_CODE (size) == CONST_INT && CLEAR_BY_PIECES_P (INTVAL (size), align)) clear_by_pieces (object, INTVAL (size), align); - else if (set_storage_via_setmem (object, size, const0_rtx, align)) + else if (set_storage_via_setmem (object, size, const0_rtx, align, + expected_align, expected_size)) ; else return set_storage_via_libcall (object, size, const0_rtx, @@ -2544,6 +2563,13 @@ clear_storage (rtx object, rtx size, enum block_op_methods method) return NULL; } +rtx +clear_storage (rtx object, rtx size, enum block_op_methods method) +{ + return clear_storage_hints (object, size, method, 0, -1); +} + + /* A subroutine of clear_storage. Expand a call to memset. Return the return value of memset, 0 otherwise. */ @@ -2645,7 +2671,8 @@ clear_storage_libcall_fn (int for_call) /* Expand a setmem pattern; return true if successful. */ bool -set_storage_via_setmem (rtx object, rtx size, rtx val, unsigned int align) +set_storage_via_setmem (rtx object, rtx size, rtx val, unsigned int align, + unsigned int expected_align, HOST_WIDE_INT expected_size) { /* Try the most limited insn first, because there's no point including more than one in the machine description unless @@ -2654,6 +2681,9 @@ set_storage_via_setmem (rtx object, rtx size, rtx val, unsigned int align) rtx opalign = GEN_INT (align / BITS_PER_UNIT); enum machine_mode mode; + if (expected_align < align) + expected_align = align; + for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) { @@ -2694,7 +2724,12 @@ set_storage_via_setmem (rtx object, rtx size, rtx val, unsigned int align) opchar = copy_to_mode_reg (char_mode, opchar); } - pat = GEN_FCN ((int) code) (object, opsize, opchar, opalign); + if (insn_data[(int) code].n_operands == 4) + pat = GEN_FCN ((int) code) (object, opsize, opchar, opalign); + else + pat = GEN_FCN ((int) code) (object, opsize, opchar, opalign, + GEN_INT (expected_align), + GEN_INT (expected_size)); if (pat) { emit_insn (pat); diff --git a/gcc/expr.h b/gcc/expr.h index beb8ea3..32a0a51 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -378,6 +378,8 @@ extern void init_block_clear_fn (const char *); extern rtx emit_block_move (rtx, rtx, rtx, enum block_op_methods); extern rtx emit_block_move_via_libcall (rtx, rtx, rtx, bool); +extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods, + unsigned int, HOST_WIDE_INT); /* Copy all or part of a value X into registers starting at REGNO. The number of registers to be filled is NREGS. */ @@ -424,11 +426,14 @@ extern void use_group_regs (rtx *, rtx); /* Write zeros through the storage of OBJECT. If OBJECT has BLKmode, SIZE is its length in bytes. */ extern rtx clear_storage (rtx, rtx, enum block_op_methods); +extern rtx clear_storage_hints (rtx, rtx, enum block_op_methods, + unsigned int, HOST_WIDE_INT); /* The same, but always output an library call. */ rtx set_storage_via_libcall (rtx, rtx, rtx, bool); /* Expand a setmem pattern; return true if successful. */ -extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int); +extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int, + unsigned int, HOST_WIDE_INT); /* Determine whether the LEN bytes can be moved by using several move instructions. Return nonzero if a call to move_by_pieces should diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h index e819eb3..3ea5d41 100644 --- a/gcc/gcov-io.h +++ b/gcc/gcov-io.h @@ -329,16 +329,21 @@ typedef HOST_WIDEST_INT gcov_type; consecutive values of expression. */ #define GCOV_COUNTER_V_INDIR 5 /* The most common indirect address */ -#define GCOV_LAST_VALUE_COUNTER 5 /* The last of counters used for value +#define GCOV_COUNTER_AVERAGE 6 /* The most common difference between + consecutive values of expression. */ +#define GCOV_COUNTER_IOR 7 /* The most common difference between + consecutive values of expression. */ +#define GCOV_LAST_VALUE_COUNTER 7 /* The last of counters used for value profiling. */ -#define GCOV_COUNTERS 6 +#define GCOV_COUNTERS 8 /* Number of counters used for value profiling. */ #define GCOV_N_VALUE_COUNTERS \ (GCOV_LAST_VALUE_COUNTER - GCOV_FIRST_VALUE_COUNTER + 1) /* A list of human readable names of the counters */ -#define GCOV_COUNTER_NAMES {"arcs", "interval", "pow2", "single", "delta", "indirect_call"} +#define GCOV_COUNTER_NAMES {"arcs", "interval", "pow2", "single", \ + "delta","indirect_call", "average", "ior"} /* Names of merge functions for counters. */ #define GCOV_MERGE_FUNCTIONS {"__gcov_merge_add", \ @@ -346,7 +351,9 @@ typedef HOST_WIDEST_INT gcov_type; "__gcov_merge_add", \ "__gcov_merge_single", \ "__gcov_merge_delta", \ - "__gcov_merge_single" } + "__gcov_merge_single", \ + "__gcov_merge_add", \ + "__gcov_merge_ior"} /* Convert a counter index to a tag. */ #define GCOV_TAG_FOR_COUNTER(COUNT) \ diff --git a/gcc/libgcov.c b/gcc/libgcov.c index 880686e..04fc3b2 100644 --- a/gcc/libgcov.c +++ b/gcc/libgcov.c @@ -614,6 +614,18 @@ __gcov_merge_add (gcov_type *counters, unsigned n_counters) } #endif /* L_gcov_merge_add */ +#ifdef L_gcov_merge_ior +/* The profile merging function that just adds the counters. It is given + an array COUNTERS of N_COUNTERS old counters and it reads the same number + of counters from the gcov file. */ +void +__gcov_merge_ior (gcov_type *counters, unsigned n_counters) +{ + for (; n_counters; counters++, n_counters--) + *counters |= gcov_read_counter (); +} +#endif + #ifdef L_gcov_merge_single /* The profile merging function for choosing the most common value. It is given an array COUNTERS of N_COUNTERS old counters and it @@ -770,6 +782,30 @@ __gcov_indirect_call_profiler (gcov_type* counter, gcov_type value, } #endif + +#ifdef L_gcov_average_profiler +/* Increase corresponding COUNTER by VALUE. FIXME: Perhaps we want + to saturate up. */ + +void +__gcov_average_profiler (gcov_type *counters, gcov_type value) +{ + counters[0] += value; + counters[1] ++; +} +#endif + +#ifdef L_gcov_ior_profiler +/* Increase corresponding COUNTER by VALUE. FIXME: Perhaps we want + to saturate up. */ + +void +__gcov_ior_profiler (gcov_type *counters, gcov_type value) +{ + *counters |= value; +} +#endif + #ifdef L_gcov_fork /* A wrapper for the fork function. Flushes the accumulated profiling data, so that they are not counted twice. */ diff --git a/gcc/profile.c b/gcc/profile.c index ef6b326..f833a1a 100644 --- a/gcc/profile.c +++ b/gcc/profile.c @@ -196,6 +196,14 @@ instrument_values (histogram_values values) t = GCOV_COUNTER_V_INDIR; break; + case HIST_TYPE_AVERAGE: + t = GCOV_COUNTER_AVERAGE; + break; + + case HIST_TYPE_IOR: + t = GCOV_COUNTER_IOR; + break; + default: gcc_unreachable (); } @@ -224,6 +232,14 @@ instrument_values (histogram_values values) (profile_hooks->gen_ic_profiler) (hist, t, 0); break; + case HIST_TYPE_AVERAGE: + (profile_hooks->gen_average_profiler) (hist, t, 0); + break; + + case HIST_TYPE_IOR: + (profile_hooks->gen_ior_profiler) (hist, t, 0); + break; + default: gcc_unreachable (); } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 0d59da3..4482ad8 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2007-01-28 Jan Hubicka + + * gcc.dg/tree-prof/val-prof-6.c: New test. + 2007-01-28 Roger Sayle * gcc.dg/large-size-array-3.c: Correct test case (portability). diff --git a/gcc/testsuite/gcc.dg/tree-prof/val-prof-6.c b/gcc/testsuite/gcc.dg/tree-prof/val-prof-6.c new file mode 100644 index 0000000..c439fcf --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-prof/val-prof-6.c @@ -0,0 +1,20 @@ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +char a[1000]; +char b[1000]; +int size=1000; +__attribute__ ((noinline)) +t(int size) +{ + __builtin_memcpy(a,b,size); +} +int +main() +{ + int i; + for (i=0; i < size; i++) + t(i); + return 0; +} +/* { dg-final-use { scan-tree-dump "Average value sum:499500" "optimized"} } */ +/* { dg-final-use { scan-tree-dump "IOR value" "optimized"} } */ +/* { dg-final-use { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/tree-profile.c b/gcc/tree-profile.c index 2a4ec2a..b5088f0 100644 --- a/gcc/tree-profile.c +++ b/gcc/tree-profile.c @@ -52,6 +52,8 @@ static GTY(()) tree tree_interval_profiler_fn; static GTY(()) tree tree_pow2_profiler_fn; static GTY(()) tree tree_one_value_profiler_fn; static GTY(()) tree tree_indirect_call_profiler_fn; +static GTY(()) tree tree_average_profiler_fn; +static GTY(()) tree tree_ior_profiler_fn; static GTY(()) tree ic_void_ptr_var; @@ -101,6 +103,7 @@ tree_init_edge_profiler (void) tree one_value_profiler_fn_type; tree gcov_type_ptr; tree ic_profiler_fn_type; + tree average_profiler_fn_type; if (!gcov_type_node) { @@ -145,6 +148,16 @@ tree_init_edge_profiler (void) tree_indirect_call_profiler_fn = build_fn_decl ("__gcov_indirect_call_profiler", ic_profiler_fn_type); + /* void (*) (gcov_type *, gcov_type) */ + average_profiler_fn_type + = build_function_type_list (void_type_node, + gcov_type_ptr, gcov_type_node, NULL_TREE); + tree_average_profiler_fn + = build_fn_decl ("__gcov_average_profiler", + average_profiler_fn_type); + tree_ior_profiler_fn + = build_fn_decl ("__gcov_ior_profiler", + average_profiler_fn_type); } } @@ -354,6 +367,48 @@ tree_gen_const_delta_profiler (histogram_value value ATTRIBUTE_UNUSED, gcc_unreachable (); } +/* Output instructions as GIMPLE trees to increment the average histogram + counter. VALUE is the expression whose value is profiled. TAG is the + tag of the section for counters, BASE is offset of the counter position. */ + +static void +tree_gen_average_profiler (histogram_value value, unsigned tag, unsigned base) +{ + tree stmt = value->hvalue.stmt; + block_stmt_iterator bsi = bsi_for_stmt (stmt); + tree ref = tree_coverage_counter_ref (tag, base), ref_ptr; + tree args, call, val; + + ref_ptr = force_gimple_operand_bsi (&bsi, + build_addr (ref, current_function_decl), + true, NULL_TREE); + val = prepare_instrumented_value (&bsi, value); + args = tree_cons (NULL_TREE, ref_ptr, tree_cons (NULL_TREE, val, NULL_TREE)); + call = build_function_call_expr (tree_average_profiler_fn, args); + bsi_insert_before (&bsi, call, BSI_SAME_STMT); +} + +/* Output instructions as GIMPLE trees to increment the ior histogram + counter. VALUE is the expression whose value is profiled. TAG is the + tag of the section for counters, BASE is offset of the counter position. */ + +static void +tree_gen_ior_profiler (histogram_value value, unsigned tag, unsigned base) +{ + tree stmt = value->hvalue.stmt; + block_stmt_iterator bsi = bsi_for_stmt (stmt); + tree ref = tree_coverage_counter_ref (tag, base), ref_ptr; + tree args, call, val; + + ref_ptr = force_gimple_operand_bsi (&bsi, + build_addr (ref, current_function_decl), + true, NULL_TREE); + val = prepare_instrumented_value (&bsi, value); + args = tree_cons (NULL_TREE, ref_ptr, tree_cons (NULL_TREE, val, NULL_TREE)); + call = build_function_call_expr (tree_ior_profiler_fn, args); + bsi_insert_before (&bsi, call, BSI_SAME_STMT); +} + /* Return 1 if tree-based profiling is in effect, else 0. If it is, set up hooks for tree-based profiling. Gate for pass_tree_profile. */ @@ -408,19 +463,21 @@ struct tree_opt_pass pass_tree_profile = PROP_gimple_leh | PROP_cfg, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ - TODO_verify_stmts, /* todo_flags_finish */ + TODO_verify_stmts | TODO_dump_func, /* todo_flags_finish */ 0 /* letter */ }; struct profile_hooks tree_profile_hooks = { - tree_init_edge_profiler, /* init_edge_profiler */ - tree_gen_edge_profiler, /* gen_edge_profiler */ - tree_gen_interval_profiler, /* gen_interval_profiler */ - tree_gen_pow2_profiler, /* gen_pow2_profiler */ - tree_gen_one_value_profiler, /* gen_one_value_profiler */ - tree_gen_const_delta_profiler,/* gen_const_delta_profiler */ - tree_gen_ic_profiler, /* gen_ic_profiler */ + tree_init_edge_profiler, /* init_edge_profiler */ + tree_gen_edge_profiler, /* gen_edge_profiler */ + tree_gen_interval_profiler, /* gen_interval_profiler */ + tree_gen_pow2_profiler, /* gen_pow2_profiler */ + tree_gen_one_value_profiler, /* gen_one_value_profiler */ + tree_gen_const_delta_profiler, /* gen_const_delta_profiler */ + tree_gen_ic_profiler, /* gen_ic_profiler */ + tree_gen_average_profiler, /* gen_average_profiler */ + tree_gen_ior_profiler /* gen_ior_profiler */ }; #include "gt-tree-profile.h" diff --git a/gcc/value-prof.c b/gcc/value-prof.c index f23fd68..4734355 100644 --- a/gcc/value-prof.c +++ b/gcc/value-prof.c @@ -248,6 +248,28 @@ dump_histogram_value (FILE *dump_file, histogram_value hist) fprintf (dump_file, ".\n"); break; + case HIST_TYPE_AVERAGE: + fprintf (dump_file, "Average value "); + if (hist->hvalue.counters) + { + fprintf (dump_file, "sum:"HOST_WIDEST_INT_PRINT_DEC + " times:"HOST_WIDEST_INT_PRINT_DEC, + (HOST_WIDEST_INT) hist->hvalue.counters[0], + (HOST_WIDEST_INT) hist->hvalue.counters[1]); + } + fprintf (dump_file, ".\n"); + break; + + case HIST_TYPE_IOR: + fprintf (dump_file, "IOR value "); + if (hist->hvalue.counters) + { + fprintf (dump_file, "ior:"HOST_WIDEST_INT_PRINT_DEC, + (HOST_WIDEST_INT) hist->hvalue.counters[0]); + } + fprintf (dump_file, ".\n"); + break; + case HIST_TYPE_CONST_DELTA: fprintf (dump_file, "Constant delta "); if (hist->hvalue.counters) @@ -1404,6 +1426,45 @@ tree_stringops_transform (block_stmt_iterator *bsi) return true; } +void +stringop_block_profile (tree stmt, unsigned int *expected_align, + HOST_WIDE_INT *expected_size) +{ + histogram_value histogram; + histogram = gimple_histogram_value_of_type (cfun, stmt, HIST_TYPE_AVERAGE); + if (!histogram) + *expected_size = -1; + else + { + gcov_type size; + size = ((histogram->hvalue.counters[0] + + histogram->hvalue.counters[0] / 2) + / histogram->hvalue.counters[0]); + /* Even if we can hold bigger value in SIZE, INT_MAX + is safe "infinity" for code generation strategies. */ + if (size > INT_MAX) + size = INT_MAX; + *expected_size = size; + gimple_remove_histogram_value (cfun, stmt, histogram); + } + histogram = gimple_histogram_value_of_type (cfun, stmt, HIST_TYPE_IOR); + if (!histogram) + *expected_size = -1; + else + { + gcov_type count; + int alignment; + + count = histogram->hvalue.counters[0]; + alignment = 1; + while (!(count & alignment) + && (alignment * 2 * BITS_PER_UNIT)) + alignment <<= 1; + *expected_align = alignment * BITS_PER_UNIT; + gimple_remove_histogram_value (cfun, stmt, histogram); + } +} + struct value_prof_hooks { /* Find list of values for which we want to measure histograms. */ void (*find_values_to_profile) (histogram_values *); @@ -1513,6 +1574,7 @@ tree_stringops_values_to_profile (tree stmt, histogram_values *values) tree fndecl; tree arglist; tree blck_size; + tree dest; enum built_in_function fcode; if (!call) @@ -1526,15 +1588,25 @@ tree_stringops_values_to_profile (tree stmt, histogram_values *values) if (!interesting_stringop_to_profile_p (fndecl, arglist)) return; + dest = TREE_VALUE (arglist); if (fcode == BUILT_IN_BZERO) blck_size = TREE_VALUE (TREE_CHAIN (arglist)); else blck_size = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); if (TREE_CODE (blck_size) != INTEGER_CST) + { + VEC_safe_push (histogram_value, heap, *values, + gimple_alloc_histogram_value (cfun, HIST_TYPE_SINGLE_VALUE, + stmt, blck_size)); + VEC_safe_push (histogram_value, heap, *values, + gimple_alloc_histogram_value (cfun, HIST_TYPE_AVERAGE, + stmt, blck_size)); + } + if (TREE_CODE (blck_size) != INTEGER_CST) VEC_safe_push (histogram_value, heap, *values, - gimple_alloc_histogram_value (cfun, HIST_TYPE_SINGLE_VALUE, - stmt, blck_size)); + gimple_alloc_histogram_value (cfun, HIST_TYPE_IOR, + stmt, dest)); } /* Find values inside STMT for that we want to measure histograms and adds @@ -1588,6 +1660,14 @@ tree_find_values_to_profile (histogram_values *values) hist->n_counters = 3; break; + case HIST_TYPE_AVERAGE: + hist->n_counters = 3; + break; + + case HIST_TYPE_IOR: + hist->n_counters = 3; + break; + default: gcc_unreachable (); } diff --git a/gcc/value-prof.h b/gcc/value-prof.h index 78c9e88..4447b14 100644 --- a/gcc/value-prof.h +++ b/gcc/value-prof.h @@ -31,8 +31,10 @@ enum hist_type always constant. */ HIST_TYPE_CONST_DELTA, /* Tries to identify the (almost) always constant difference between two evaluations of a value. */ - HIST_TYPE_INDIR_CALL /* Tries to identify the function that is (almost) + HIST_TYPE_INDIR_CALL, /* Tries to identify the function that is (almost) called in indirect call */ + HIST_TYPE_AVERAGE, /* Compute average value (sum of all values). */ + HIST_TYPE_IOR /* Used to compute expected alignment. */ }; #define COUNTER_FOR_HIST_TYPE(TYPE) ((int) (TYPE) + GCOV_FIRST_VALUE_COUNTER) @@ -99,6 +101,12 @@ struct profile_hooks { /* Insert code to find the most common indirect call */ void (*gen_ic_profiler) (histogram_value, unsigned, unsigned); + + /* Insert code to find the average value of an expression. */ + void (*gen_average_profiler) (histogram_value, unsigned, unsigned); + + /* Insert code to ior value of an expression. */ + void (*gen_ior_profiler) (histogram_value, unsigned, unsigned); }; histogram_value gimple_histogram_value (struct function *, tree); @@ -111,6 +119,7 @@ void gimple_remove_stmt_histograms (struct function *, tree); void gimple_duplicate_stmt_histograms (struct function *, tree, struct function *, tree); void verify_histograms (void); void free_histograms (void); +void stringop_block_profile (tree, unsigned int *, HOST_WIDE_INT *); /* In profile.c. */ extern void init_branch_prob (void); -- 2.7.4