PR tree-optimization/38785
authormkuvyrkov <mkuvyrkov@138bc75d-0d04-0410-961f-82ee72b054a4>
Sat, 28 Apr 2012 01:56:54 +0000 (01:56 +0000)
committermkuvyrkov <mkuvyrkov@138bc75d-0d04-0410-961f-82ee72b054a4>
Sat, 28 Apr 2012 01:56:54 +0000 (01:56 +0000)
* common.opt (ftree-partial-pre): New option.
* doc/invoke.texi: Document it.
* opts.c (default_options_table): Initialize flag_tree_partial_pre.
* tree-ssa-pre.c (do_partial_partial_insertion): Insert only if it will
benefit speed path.
(execute_pre): Use flag_tree_partial_pre.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@186928 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/common.opt
gcc/doc/invoke.texi
gcc/opts.c
gcc/tree-ssa-pre.c

index 914801a..713c23f 100644 (file)
@@ -1,3 +1,15 @@
+2012-04-28  Joern Rennecke  <joern.rennecke@embecosm.com>
+           Steven Bosscher  <steven@gcc.gnu.org>
+           Maxim Kuvyrkov  <maxim@codesourcery.com>
+
+       PR tree-optimization/38785
+       * common.opt (ftree-partial-pre): New option.
+       * doc/invoke.texi: Document it.
+       * opts.c (default_options_table): Initialize flag_tree_partial_pre.
+       * tree-ssa-pre.c (do_partial_partial_insertion): Insert only if it will
+       benefit speed path.
+       (execute_pre): Use flag_tree_partial_pre.
+
 2012-04-27  John David Anglin  <dave.anglin@nrc-cnrc.gc.ca>
 
        PR target/52999
index 37e806a..14c88bf 100644 (file)
@@ -2033,6 +2033,10 @@ ftree-pre
 Common Report Var(flag_tree_pre) Optimization
 Enable SSA-PRE optimization on trees
 
+ftree-partial-pre
+Common Report Var(flag_tree_partial_pre) Optimization
+In SSA-PRE optimization on trees, enable partial-partial redundancy elimination
+
 ftree-pta
 Common Report Var(flag_tree_pta) Init(1) Optimization
 Perform function-local points-to analysis on trees.
index 280fac3..bd7ea3b 100644 (file)
@@ -410,7 +410,8 @@ Objective-C and Objective-C++ Dialects}.
 -ftree-loop-if-convert-stores -ftree-loop-im @gol
 -ftree-phiprop -ftree-loop-distribution -ftree-loop-distribute-patterns @gol
 -ftree-loop-ivcanon -ftree-loop-linear -ftree-loop-optimize @gol
--ftree-parallelize-loops=@var{n} -ftree-pre -ftree-pta -ftree-reassoc @gol
+-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-partial-pre -ftree-pta @gol
+-ftree-reassoc @gol
 -ftree-sink -ftree-sra -ftree-switch-conversion -ftree-tail-merge @gol
 -ftree-ter -ftree-vect-loop-version -ftree-vectorize -ftree-vrp @gol
 -funit-at-a-time -funroll-all-loops -funroll-loops @gol
@@ -6294,8 +6295,8 @@ invoking @option{-O2} on programs that use computed gotos.
 Optimize yet more.  @option{-O3} turns on all optimizations specified
 by @option{-O2} and also turns on the @option{-finline-functions},
 @option{-funswitch-loops}, @option{-fpredictive-commoning},
-@option{-fgcse-after-reload}, @option{-ftree-vectorize} and
-@option{-fipa-cp-clone} options.
+@option{-fgcse-after-reload}, @option{-ftree-vectorize},
+@option{-ftree-partial-pre} and @option{-fipa-cp-clone} options.
 
 @item -O0
 @opindex O0
@@ -7090,6 +7091,11 @@ at @option{-O} and higher.
 Perform partial redundancy elimination (PRE) on trees.  This flag is
 enabled by default at @option{-O2} and @option{-O3}.
 
+@item -ftree-partial-pre
+@opindex ftree-partial-pre
+Make partial redundancy elimination (PRE) more aggressive.  This flag is
+enabled by default at @option{-O3}.
+
 @item -ftree-forwprop
 @opindex ftree-forwprop
 Perform forward propagation on trees.  This flag is enabled by default
index 4e8b3c0..ab2de8f 100644 (file)
@@ -499,6 +499,7 @@ static const struct default_options default_options_table[] =
     { OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 },
     { OPT_LEVELS_3_PLUS, OPT_ftree_vectorize, NULL, 1 },
     { OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 },
+    { OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 },
 
     /* -Ofast adds optimizations to -O3.  */
     { OPT_LEVELS_FAST, OPT_ffast_math, NULL, 1 },
index e3e55ef..a89856a 100644 (file)
@@ -3774,20 +3774,51 @@ do_partial_partial_insertion (basic_block block, basic_block dom)
                }
              else
                avail[bprime->index] = edoubleprime;
-
            }
 
          /* If we can insert it, it's not the same value
             already existing along every predecessor, and
             it's defined by some predecessor, it is
             partially redundant.  */
-         if (!cant_insert && by_all && dbg_cnt (treepre_insert))
+         if (!cant_insert && by_all)
            {
-             pre_stats.pa_insert++;
-             if (insert_into_preds_of_block (block, get_expression_id (expr),
-                                             avail))
-               new_stuff = true;
-           }
+             edge succ;
+             bool do_insertion = false;
+
+             /* Insert only if we can remove a later expression on a path
+                that we want to optimize for speed.
+                The phi node that we will be inserting in BLOCK is not free,
+                and inserting it for the sake of !optimize_for_speed successor
+                may cause regressions on the speed path.  */
+             FOR_EACH_EDGE (succ, ei, block->succs)
+               {
+                 if (bitmap_set_contains_value (PA_IN (succ->dest), val))
+                   {
+                     if (optimize_edge_for_speed_p (succ))
+                       do_insertion = true;
+                   }
+               }
+
+             if (!do_insertion)
+               {
+                 if (dump_file && (dump_flags & TDF_DETAILS))
+                   {
+                     fprintf (dump_file, "Skipping partial partial redundancy "
+                              "for expression ");
+                     print_pre_expr (dump_file, expr);
+                     fprintf (dump_file, " (%04d), not partially anticipated "
+                              "on any to be optimized for speed edges\n", val);
+                   }
+               }
+             else if (dbg_cnt (treepre_insert))
+               {
+                 pre_stats.pa_insert++;
+                 if (insert_into_preds_of_block (block,
+                                                 get_expression_id (expr),
+                                                 avail))
+                   new_stuff = true;
+               }          
+           } 
          free (avail);
        }
     }
@@ -4948,7 +4979,8 @@ execute_pre (bool do_fre)
 {
   unsigned int todo = 0;
 
-  do_partial_partial = optimize > 2 && optimize_function_for_speed_p (cfun);
+  do_partial_partial =
+    flag_tree_partial_pre && optimize_function_for_speed_p (cfun);
 
   /* This has to happen before SCCVN runs because
      loop_optimizer_init may create new phis, etc.  */