From dc236397e4d96647c5d72fd9b6d503ca0b187bee Mon Sep 17 00:00:00 2001 From: Michael Matz Date: Fri, 8 Dec 2017 17:41:58 +0000 Subject: [PATCH] re PR tree-optimization/83323 (186.crafty miscompares) Fix PR83323 * gimple-loop-jam (unroll_jam_possible_p): Correct test for head-controlled loops and loop BBs. * common.opt (funroll-and-jam): Remove, instead ... (floop-unroll-and-jam): ... reuse this option. * opts.c (default_options_table): Use OPT_floop_unroll_and_jam. * doc/invoke.texi (-funroll-and-jam): Move docu to ... (-floop-unroll-and-jam): ... this option. testsuite/ * gcc.dg/pr83323.c: New test. * gcc.dg/unroll-and-jam.c: Use -floop-unroll-and-jam. From-SVN: r255514 --- gcc/ChangeLog | 11 ++++++++++ gcc/common.opt | 8 ++----- gcc/doc/invoke.texi | 8 +++---- gcc/gimple-loop-jam.c | 41 +++++++++++++++++------------------ gcc/opts.c | 2 +- gcc/testsuite/ChangeLog | 6 +++++ gcc/testsuite/gcc.dg/pr83323.c | 23 ++++++++++++++++++++ gcc/testsuite/gcc.dg/unroll-and-jam.c | 2 +- 8 files changed, 67 insertions(+), 34 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr83323.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c0d1662..35c27be 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2017-12-08 Michael Matz + + Fix PR tree-optimization/83323 + * gimple-loop-jam (unroll_jam_possible_p): Correct test for + head-controlled loops and loop BBs. + * common.opt (funroll-and-jam): Remove, instead ... + (floop-unroll-and-jam): ... reuse this option. + * opts.c (default_options_table): Use OPT_floop_unroll_and_jam. + * doc/invoke.texi (-funroll-and-jam): Move docu to ... + (-floop-unroll-and-jam): ... this option. + 2017-12-08 Jakub Jelinek * ipa-polymorphic-call.c (noncall_stmt_may_be_vtbl_ptr_store): Fix diff --git a/gcc/common.opt b/gcc/common.opt index 6fab2ab..57b3cd7 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1512,8 +1512,8 @@ Common Alias(floop-nest-optimize) Enable loop nest transforms. Same as -floop-nest-optimize. floop-unroll-and-jam -Common Alias(floop-nest-optimize) -Enable loop nest transforms. Same as -floop-nest-optimize. +Common Report Var(flag_unroll_jam) Optimization +Perform unroll-and-jam on loops. fgnu-tm Common Report Var(flag_tm) @@ -2695,10 +2695,6 @@ fsplit-loops Common Report Var(flag_split_loops) Optimization Perform loop splitting. -funroll-and-jam -Common Report Var(flag_unroll_jam) Optimization -Perform unroll-and-jam on loops. - funwind-tables Common Report Var(flag_unwind_tables) Optimization Just generate unwind tables for exception handling. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 50740c5..1413095 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -437,7 +437,7 @@ Objective-C and Objective-C++ Dialects}. -ftree-reassoc -ftree-sink -ftree-slsr -ftree-sra @gol -ftree-switch-conversion -ftree-tail-merge @gol -ftree-ter -ftree-vectorize -ftree-vrp -funconstrained-commons @gol --funit-at-a-time -funroll-all-loops -funroll-loops -funroll-and-jam @gol +-funit-at-a-time -funroll-all-loops -funroll-loops @gol -funsafe-math-optimizations -funswitch-loops @gol -fipa-ra -fvariable-expansion-in-unroller -fvect-cost-model -fvpt @gol -fweb -fwhole-program -fwpa -fuse-linker-plugin @gol @@ -8511,11 +8511,9 @@ at @option{-O} and higher. @item -ftree-loop-linear @itemx -floop-strip-mine @itemx -floop-block -@itemx -floop-unroll-and-jam @opindex ftree-loop-linear @opindex floop-strip-mine @opindex floop-block -@opindex floop-unroll-and-jam Perform loop nest optimizations. Same as @option{-floop-nest-optimize}. To use this code transformation, GCC has to be configured with @option{--with-isl} to enable the Graphite loop @@ -9789,8 +9787,8 @@ for one side of the iteration space and false for the other. Move branches with loop invariant conditions out of the loop, with duplicates of the loop on both branches (modified according to result of the condition). -@item -funroll-and-jam -@opindex funroll-and-jam +@item -floop-unroll-and-jam +@opindex floop-unroll-and-jam Apply unroll and jam transoformations on feasible loops. In a loop nest this unrolls the outer loop by some factor and fuses the resulting multiple inner loops. diff --git a/gcc/gimple-loop-jam.c b/gcc/gimple-loop-jam.c index 32f813b..8ed1bef 100644 --- a/gcc/gimple-loop-jam.c +++ b/gcc/gimple-loop-jam.c @@ -152,7 +152,7 @@ merge_loop_tree (struct loop *loop, struct loop *old) free (bbs); } -/* BB exits the outer loop of an unroll-and-jam situation. +/* BB is part of the outer loop of an unroll-and-jam situation. Check if any statements therein would prevent the transformation. */ static bool @@ -160,9 +160,10 @@ bb_prevents_fusion_p (basic_block bb) { gimple_stmt_iterator gsi; /* BB is duplicated by outer unrolling and then all N-1 first copies - move into the body of the fused inner loop. The last copy remains - the exit block of the outer loop and is still outside the inner loop - also after fusion. We can't allow this for some effects of BB: + move into the body of the fused inner loop. If BB exits the outer loop + the last copy still doess so, and the first N-1 copies are cancelled + by loop unrolling, so also after fusion it's the exit block. + But there might be other reasons that prevent fusion: * stores or unknown side-effects prevent fusion * loads don't * computations into SSA names: these aren't problematic. Their @@ -204,6 +205,19 @@ unroll_jam_possible_p (struct loop *outer, struct loop *loop) if (outer->inner != loop || loop->next) return false; + /* Prevent head-controlled inner loops, that we usually have. + The guard block would need to be accepted + (invariant condition either entering or skipping the loop), + without also accepting arbitrary control flow. When unswitching + ran before us (as with -O3) this won't be a problem because its + outer loop unswitching will have moved out the invariant condition. + + If we do that we need to extend fuse_loops() to cope with this + by threading through the (still invariant) copied condition + between the two loop copies. */ + if (!dominated_by_p (CDI_DOMINATORS, outer->latch, loop->header)) + return false; + /* The number of iterations of the inner loop must be loop invariant with respect to the outer loop. */ if (!number_of_iterations_exit (loop, single_exit (loop), &niter, @@ -218,23 +232,8 @@ unroll_jam_possible_p (struct loop *outer, struct loop *loop) n = get_loop_body_with_size (outer, bbs, n_basic_blocks_for_fn (cfun)); for (i = 0; i < n; i++) - { - if (bbs[i]->loop_father == outer - && bbs[i] != outer->latch && bbs[i] != outer->header - && (!loop_exits_from_bb_p (outer, bbs[i]) - || bb_prevents_fusion_p (bbs[i]))) - break; - /* XXX Note that the above disallows head-controlled inner loops, - that we usually have. The guard block would need to be accepted - (invariant condition either entering or skipping the loop), - without also accepting arbitrary control flow. When unswitching - ran before us (as with -O3) this won't be a problem because its - outer loop unswitching will have moved out the invariant condition. - - If we do that we need to extend fuse_loops() to cope with this - by threading through the (still invariant) copied condition - between the two loop copies. */ - } + if (bbs[i]->loop_father == outer && bb_prevents_fusion_p (bbs[i])) + break; free (bbs); if (i != n) return false; diff --git a/gcc/opts.c b/gcc/opts.c index 98fbf53..a157b5f 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -536,7 +536,7 @@ static const struct default_options default_options_table[] = { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_finline_functions_called_once, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_fsplit_loops, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_funswitch_loops, NULL, 1 }, - { OPT_LEVELS_3_PLUS, OPT_funroll_and_jam, NULL, 1 }, + { OPT_LEVELS_3_PLUS, OPT_floop_unroll_and_jam, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_slp_vectorize, NULL, 1 }, diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6a68ea6..396e26a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2017-12-08 Michael Matz + + Fix PR tree-optimization/83323 + * gcc.dg/pr83323.c: New test. + * gcc.dg/unroll-and-jam.c: Use -floop-unroll-and-jam. + 2017-12-08 Richard Biener PR middle-end/81782 diff --git a/gcc/testsuite/gcc.dg/pr83323.c b/gcc/testsuite/gcc.dg/pr83323.c new file mode 100644 index 0000000..6111745 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr83323.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -floop-unroll-and-jam --param unroll-jam-min-percent=0" } */ +int x[1024], y[1024]; + +void __attribute__((noipa)) foo () +{ + for (int i = 0; i < 1024; ++i) + { + x[i] = 0; + for (int j = 0; j < 1024; ++j) + if (!y[j]) + x[i] = 1; + } +} + +int main() +{ + y[1023] = 1; + foo (); + if (x[1] != 1) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/unroll-and-jam.c b/gcc/testsuite/gcc.dg/unroll-and-jam.c index 59d60ab..70910d3 100644 --- a/gcc/testsuite/gcc.dg/unroll-and-jam.c +++ b/gcc/testsuite/gcc.dg/unroll-and-jam.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O3 -funroll-and-jam --param unroll-jam-min-percent=0 -fdump-tree-unrolljam-details" } */ +/* { dg-options "-O3 -floop-unroll-and-jam --param unroll-jam-min-percent=0 -fdump-tree-unrolljam-details" } */ /* { dg-require-effective-target int32plus } */ #include -- 2.7.4