From 79a497cd1dd6b3f6b0d9195bdbff2eb13c8919dd Mon Sep 17 00:00:00 2001 From: Janis Johnson Date: Fri, 10 May 2002 20:48:32 +0000 Subject: [PATCH] loop.c: (PREFETCH_EXTREME_DIFFERENCE, PREFETCH_BEFORE_LOOP): New. * loop.c: (PREFETCH_EXTREME_DIFFERENCE, PREFETCH_BEFORE_LOOP): New. (PREFETCH_CONDITIONAL): Renamed from PREFETCH_NOT_ALWAYS. (struct prefetch_info): Fix spelling of member bytes_accessed. (emit_prefetch_instructions): Make dump messages more regular; restructure code to add more dump messages; use new macros for heuristics. (There are no code generation changes in any of this). From-SVN: r53371 --- gcc/ChangeLog | 9 +++ gcc/loop.c | 200 ++++++++++++++++++++++++++++++++++++++-------------------- 2 files changed, 140 insertions(+), 69 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a4ef701..bc20fd3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2002-05-10 Janis Johnson + + * loop.c: (PREFETCH_EXTREME_DIFFERENCE, PREFETCH_BEFORE_LOOP): New. + (PREFETCH_CONDITIONAL): Renamed from PREFETCH_NOT_ALWAYS. + (struct prefetch_info): Fix spelling of member bytes_accessed. + (emit_prefetch_instructions): Make dump messages more regular; + restructure code to add more dump messages; use new macros for + heuristics. (There are no code generation changes in any of this). + 2002-05-10 David S. Miller * rtl.h (INSN_ANNULLED_BRANCH_P): Accept INSN too, update comment. diff --git a/gcc/loop.c b/gcc/loop.c index 5f18773..3735d79 100644 --- a/gcc/loop.c +++ b/gcc/loop.c @@ -124,14 +124,26 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #define PREFETCH_EXTREME_STRIDE 4096 #endif +/* Define a limit to how far apart indices can be and still be merged + into a single prefetch. */ +#ifndef PREFETCH_EXTREME_DIFFERENCE +#define PREFETCH_EXTREME_DIFFERENCE 4096 +#endif + +/* Issue prefetch instructions before the loop to fetch data to be used + in the first few loop iterations. */ +#ifndef PREFETCH_BEFORE_LOOP +#define PREFETCH_BEFORE_LOOP 1 +#endif + /* Do not handle reversed order prefetches (negative stride). */ #ifndef PREFETCH_NO_REVERSE_ORDER #define PREFETCH_NO_REVERSE_ORDER 1 #endif -/* Prefetch even if the GIV is not always executed. */ -#ifndef PREFETCH_NOT_ALWAYS -#define PREFETCH_NOT_ALWAYS 0 +/* Prefetch even if the GIV is in conditional code. */ +#ifndef PREFETCH_CONDITIONAL +#define PREFETCH_CONDITIONAL 0 #endif /* If the loop requires more prefetches than the target can process in @@ -3569,7 +3581,7 @@ struct prefetch_info HOST_WIDE_INT index; HOST_WIDE_INT stride; /* Prefetch stride in bytes in each iteration. */ - unsigned int bytes_accesed; /* Sum of sizes of all acceses to this + unsigned int bytes_accessed; /* Sum of sizes of all acceses to this prefetch area in one iteration. */ unsigned int total_bytes; /* Total bytes loop will access in this block. This is set only for loops with known @@ -3781,18 +3793,19 @@ emit_prefetch_instructions (loop) if (PREFETCH_NO_CALL && LOOP_INFO (loop)->has_call) { if (loop_dump_stream) - fprintf (loop_dump_stream, "Prefetch: ignoring loop - has call.\n"); + fprintf (loop_dump_stream, "Prefetch: ignoring loop: has call.\n"); return; } + /* Don't prefetch in loops known to have few iterations. */ if (PREFETCH_NO_LOW_LOOPCNT && LOOP_INFO (loop)->n_iterations && LOOP_INFO (loop)->n_iterations <= PREFETCH_LOW_LOOPCNT) { if (loop_dump_stream) fprintf (loop_dump_stream, - "Prefetch: ignoring loop - not enought iterations.\n"); + "Prefetch: ignoring loop: not enough iterations.\n"); return; } @@ -3813,14 +3826,13 @@ emit_prefetch_instructions (loop) BIVs that are executed multiple times; such BIVs ought to be handled in the nested loop. We accept not_every_iteration BIVs, since these only result in larger strides and make our - heuristics more conservative. - ??? What does the last sentence mean? */ + heuristics more conservative. */ if (GET_CODE (biv->add_val) != CONST_INT) { if (loop_dump_stream) { fprintf (loop_dump_stream, - "Prefetch: biv %i ignored: non-constant addition at insn %i:", + "Prefetch: ignoring biv %d: non-constant addition at insn %d:", REGNO (biv->src_reg), INSN_UID (biv->insn)); print_rtl (loop_dump_stream, biv->add_val); fprintf (loop_dump_stream, "\n"); @@ -3833,7 +3845,7 @@ emit_prefetch_instructions (loop) if (loop_dump_stream) { fprintf (loop_dump_stream, - "Prefetch: biv %i ignored: maybe_multiple at insn %i:", + "Prefetch: ignoring biv %d: maybe_multiple at insn %i:", REGNO (biv->src_reg), INSN_UID (biv->insn)); print_rtl (loop_dump_stream, biv->add_val); fprintf (loop_dump_stream, "\n"); @@ -3854,41 +3866,64 @@ emit_prefetch_instructions (loop) rtx temp; HOST_WIDE_INT index = 0; int add = 1; - HOST_WIDE_INT stride; + HOST_WIDE_INT stride = 0; + int stride_sign = 1; struct check_store_data d; + const char *ignore_reason = NULL; int size = GET_MODE_SIZE (GET_MODE (iv)); - /* There are several reasons why an induction variable is not - interesting to us. */ - if (iv->giv_type != DEST_ADDR - /* We are interested only in constant stride memory references - in order to be able to compute density easily. */ - || GET_CODE (iv->mult_val) != CONST_INT - /* Don't handle reversed order prefetches, since they are usually - ineffective. Later we may be able to reverse such BIVs. */ - || (PREFETCH_NO_REVERSE_ORDER - && (stride = INTVAL (iv->mult_val) * basestride) < 0) - /* Prefetching of accesses with such an extreme stride is probably - not worthwhile, either. */ - || (PREFETCH_NO_EXTREME_STRIDE - && stride > PREFETCH_EXTREME_STRIDE) + /* See whether an induction variable is interesting to us and if + not, report the reason. */ + if (iv->giv_type != DEST_ADDR) + ignore_reason = "giv is not a destination address"; + + /* We are interested only in constant stride memory references + in order to be able to compute density easily. */ + else if (GET_CODE (iv->mult_val) != CONST_INT) + ignore_reason = "stride is not constant"; + + else + { + stride = INTVAL (iv->mult_val) * basestride; + if (stride < 0) + { + stride = -stride; + stride_sign = -1; + } + + /* On some targets, reversed order prefetches are not + worthwhile. */ + if (PREFETCH_NO_REVERSE_ORDER && stride_sign < 0) + ignore_reason = "reversed order stride"; + + /* Prefetch of accesses with an extreme stride might not be + worthwhile, either. */ + else if (PREFETCH_NO_EXTREME_STRIDE + && stride > PREFETCH_EXTREME_STRIDE) + ignore_reason = "extreme stride"; + /* Ignore GIVs with varying add values; we can't predict the - value for the next iteration. */ - || !loop_invariant_p (loop, iv->add_val) + value for the next iteration. */ + else if (!loop_invariant_p (loop, iv->add_val)) + ignore_reason = "giv has varying add value"; + /* Ignore GIVs in the nested loops; they ought to have been - handled already. */ - || iv->maybe_multiple) + handled already. */ + else if (iv->maybe_multiple) + ignore_reason = "giv is in nested loop"; + } + + if (ignore_reason != NULL) { if (loop_dump_stream) - fprintf (loop_dump_stream, "Prefetch: Ignoring giv at %i\n", - INSN_UID (iv->insn)); + fprintf (loop_dump_stream, + "Prefetch: ignoring giv at %d: %s.\n", + INSN_UID (iv->insn), ignore_reason); continue; } /* Determine the pointer to the basic array we are examining. It is the sum of the BIV's initial value and the GIV's add_val. */ - index = 0; - address = copy_rtx (iv->add_val); temp = copy_rtx (bl->initial_value); @@ -3901,7 +3936,7 @@ emit_prefetch_instructions (loop) /* When the GIV is not always executed, we might be better off by not dirtying the cache pages. */ - if (PREFETCH_NOT_ALWAYS || iv->always_executed) + if (PREFETCH_CONDITIONAL || iv->always_executed) note_stores (PATTERN (iv->insn), check_store, &d); /* Attempt to find another prefetch to the same array and see if we @@ -3914,13 +3949,14 @@ emit_prefetch_instructions (loop) just with small difference in constant indexes), merge the prefetches. Just do the later and the earlier will get prefetched from previous iteration. - 4096 is artificial threshold. It should not be too small, + The artificial threshold should not be too small, but also not bigger than small portion of memory usually traversed by single loop. */ - if (index >= info[i].index && index - info[i].index < 4096) + if (index >= info[i].index + && index - info[i].index < PREFETCH_EXTREME_DIFFERENCE) { info[i].write |= d.mem_write; - info[i].bytes_accesed += size; + info[i].bytes_accessed += size; info[i].index = index; info[i].giv = iv; info[i].class = bl; @@ -3929,10 +3965,11 @@ emit_prefetch_instructions (loop) break; } - if (index < info[i].index && info[i].index - index < 4096) + if (index < info[i].index + && info[i].index - index < PREFETCH_EXTREME_DIFFERENCE) { info[i].write |= d.mem_write; - info[i].bytes_accesed += size; + info[i].bytes_accessed += size; add = 0; break; } @@ -3947,7 +3984,7 @@ emit_prefetch_instructions (loop) info[num_prefetches].stride = stride; info[num_prefetches].base_address = address; info[num_prefetches].write = d.mem_write; - info[num_prefetches].bytes_accesed = size; + info[num_prefetches].bytes_accessed = size; num_prefetches++; if (num_prefetches >= MAX_PREFETCHES) { @@ -3962,8 +3999,10 @@ emit_prefetch_instructions (loop) for (i = 0; i < num_prefetches; i++) { - /* Attempt to calculate the number of bytes fetched by the loop. - Avoid overflow. */ + int density; + + /* Attempt to calculate the total number of bytes fetched by all + iterations of the loop. Avoid overflow. */ if (LOOP_INFO (loop)->n_iterations && ((unsigned HOST_WIDE_INT) (0xffffffff / info[i].stride) >= LOOP_INFO (loop)->n_iterations)) @@ -3971,19 +4010,29 @@ emit_prefetch_instructions (loop) else info[i].total_bytes = 0xffffffff; - /* Prefetch is worthwhile only when the loads/stores are dense. */ - if (PREFETCH_ONLY_DENSE_MEM - && info[i].bytes_accesed * 256 / info[i].stride > PREFETCH_DENSE_MEM - && (info[i].total_bytes / PREFETCH_BLOCK - >= PREFETCH_BLOCKS_BEFORE_LOOP_MIN)) - { - info[i].prefetch_before_loop = 1; - info[i].prefetch_in_loop - = (info[i].total_bytes / PREFETCH_BLOCK - > PREFETCH_BLOCKS_BEFORE_LOOP_MAX); - } + density = info[i].bytes_accessed * 100 / info[i].stride; + + /* Prefetch might be worthwhile only when the loads/stores are dense. */ + if (PREFETCH_ONLY_DENSE_MEM) + if (density * 256 > PREFETCH_DENSE_MEM * 100 + && (info[i].total_bytes / PREFETCH_BLOCK + >= PREFETCH_BLOCKS_BEFORE_LOOP_MIN)) + { + info[i].prefetch_before_loop = 1; + info[i].prefetch_in_loop + = (info[i].total_bytes / PREFETCH_BLOCK + > PREFETCH_BLOCKS_BEFORE_LOOP_MAX); + } + else + { + info[i].prefetch_in_loop = 0, info[i].prefetch_before_loop = 0; + if (loop_dump_stream) + fprintf (loop_dump_stream, + "Prefetch: ignoring giv at %d: %d%% density is too low.\n", + INSN_UID (info[i].giv->insn), density); + } else - info[i].prefetch_in_loop = 0, info[i].prefetch_before_loop = 0; + info[i].prefetch_in_loop = 1, info[i].prefetch_before_loop = 1; if (info[i].prefetch_in_loop) { @@ -3999,23 +4048,30 @@ emit_prefetch_instructions (loop) { for (i = 0; i < num_prefetches; i++) { - fprintf (loop_dump_stream, "Prefetch insn %i address: ", + if (info[i].prefetch_in_loop == 0 + && info[i].prefetch_before_loop == 0) + continue; + fprintf (loop_dump_stream, "Prefetch insn: %d", INSN_UID (info[i].giv->insn)); - print_rtl (loop_dump_stream, info[i].base_address); - fprintf (loop_dump_stream, " Index: "); - fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC, info[i].index); - fprintf (loop_dump_stream, " stride: "); - fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC, info[i].stride); fprintf (loop_dump_stream, - " density: %i%% total_bytes: %u%sin loop: %s before: %s\n", - (int) (info[i].bytes_accesed * 100 / info[i].stride), - info[i].total_bytes, - info[i].write ? " read/write " : " read only ", + "; in loop: %s; before: %s; %s\n", info[i].prefetch_in_loop ? "yes" : "no", - info[i].prefetch_before_loop ? "yes" : "no"); + info[i].prefetch_before_loop ? "yes" : "no", + info[i].write ? "read/write" : "read only"); + fprintf (loop_dump_stream, + " density: %d%%; bytes_accessed: %u; total_bytes: %u\n", + (int) (info[i].bytes_accessed * 100 / info[i].stride), + info[i].bytes_accessed, info[i].total_bytes); + fprintf (loop_dump_stream, " index: "); + fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC, info[i].index); + fprintf (loop_dump_stream, "; stride: "); + fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC, info[i].stride); + fprintf (loop_dump_stream, "; address: "); + print_rtl (loop_dump_stream, info[i].base_address); + fprintf (loop_dump_stream, "\n"); } - fprintf (loop_dump_stream, "Real prefetches needed: %i (write: %i)\n", + fprintf (loop_dump_stream, "Real prefetches needed: %d (write: %d)\n", num_real_prefetches, num_real_write_prefetches); } @@ -4024,8 +4080,14 @@ emit_prefetch_instructions (loop) ahead = SIMULTANEOUS_PREFETCHES / num_real_prefetches; - if (!ahead) - return; + if (ahead == 0) + { + if (loop_dump_stream) + fprintf (loop_dump_stream, + "Prefetch: ignoring loop: ahead is zero; %d < %d\n", + SIMULTANEOUS_PREFETCHES, num_real_prefetches); + return; + } for (i = 0; i < num_prefetches; i++) { @@ -4077,7 +4139,7 @@ emit_prefetch_instructions (loop) } } - if (info[i].prefetch_before_loop) + if (PREFETCH_BEFORE_LOOP && info[i].prefetch_before_loop) { int y; -- 2.7.4