gcc:
authorbonzini <bonzini@138bc75d-0d04-0410-961f-82ee72b054a4>
Tue, 17 May 2005 09:55:44 +0000 (09:55 +0000)
committerbonzini <bonzini@138bc75d-0d04-0410-961f-82ee72b054a4>
Tue, 17 May 2005 09:55:44 +0000 (09:55 +0000)
2005-05-17  Paolo Bonzini  <bonzini@gnu.org>

* Makefile.in: Add tree-ssa-math-opts.c.
* expr.c (expand_expr_real_1) <case RDIV_EXPR>: Never emit as a*(1/b).
* fold-const.c (distribute_real_division): New.
(fold_binary) <case PLUS_EXPR, case MINUS_EXPR>: Use it.
* tree-pass.h (pass_cse_reciprocals): New.
* tree-optimize.c (init_tree_optimization_passes): Run it.
* tree-ssa-math-opts.c: New file.
* doc/passes.texi: Document the new pass.

gcc/testsuite:
2005-05-17  Paolo Bonzini  <bonzini@gnu.org>

* gcc.dg/fold-div-1.c, gcc.dg/recip-1.c, gcc.dg/recip-2.c: New.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@99826 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/Makefile.in
gcc/doc/passes.texi
gcc/expr.c
gcc/fold-const.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/fold-div-1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/recip-1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/recip-2.c [new file with mode: 0644]
gcc/tree-optimize.c
gcc/tree-pass.h

index 449a1b4..a456ec1 100644 (file)
@@ -1,3 +1,14 @@
+2005-05-17  Paolo Bonzini  <bonzini@gnu.org>
+
+       * Makefile.in: Add tree-ssa-math-opts.c.
+       * expr.c (expand_expr_real_1) <case RDIV_EXPR>: Never emit as a*(1/b).
+       * fold-const.c (distribute_real_division): New.
+       (fold_binary) <case PLUS_EXPR, case MINUS_EXPR>: Use it.
+       * tree-pass.h (pass_cse_reciprocals): New.
+       * tree-optimize.c (init_tree_optimization_passes): Run it.
+       * tree-ssa-math-opts.c: New file.
+       * doc/passes.texi: Document the new pass.
+
 2005-05-17  Richard Guenther  <rguenth@gcc.gnu.org>
 
        PR middle-end/21595
@@ -21,7 +32,7 @@
 2005-05-17  Hans-Peter Nilsson  <hp@axis.com>
 
        * config/cris/cris.md: Unquote preparation and output statements.
-       (BWD, WD, BW): New, mode-macros.
+       (BWD, WD, BW): New, mode-macros.
        (S, s, m, mm, nbitsm1): New, mode-attrs.
        (szext, shift, shiftrt, ncond, ocond, rcond): New, code-macros.
        (u, su, shlr, slr, ncond, ocond, rcond, rCC, oCC, roCC): New,
index 8d0d15c..f9ed2e8 100644 (file)
@@ -934,7 +934,7 @@ OBJS-common = \
  tree-phinodes.o tree-ssanames.o tree-sra.o tree-complex.o tree-ssa-loop.o \
  tree-ssa-loop-niter.o tree-ssa-loop-manip.o tree-ssa-threadupdate.o      \
  tree-vectorizer.o tree-vect-analyze.o tree-vect-transform.o              \
- tree-ssa-loop-ivcanon.o tree-ssa-propagate.o                             \
+ tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-math-opts.o        \
  tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o                   \
  alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o                   \
  cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o            \
@@ -1803,6 +1803,8 @@ tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \
    $(PARAMS_H) output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h \
    $(TREE_DUMP_H) tree-pass.h $(FLAGS_H) real.h $(BASIC_BLOCK_H) \
    hard-reg-set.h
+tree-ssa-math-opts.o : tree-ssa-math-opts.c $(TREE_FLOW_H) $(CONFIG_H) \
+   $(SYSTEM_H) $(TREE_H) $(TIMEVAR_H) tree-pass.h $(TM_H) $(FLAGS_H)
 tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
    $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) tree-inline.h $(FLAGS_H) \
    function.h $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \
index 8e12366..ad6110e 100644 (file)
@@ -354,7 +354,7 @@ This pass transforms tail recursion into a loop.  It is located in
 
 This pass sinks stores and assignments down the flowgraph closer to it's
 use point.  The pass is located in @file{tree-ssa-sink.c} and is
-described by @code{pass_sink_code}
+described by @code{pass_sink_code}.
 
 @item Partial redundancy elimination
 
@@ -362,6 +362,12 @@ This pass eliminates partially redundant computations, as well as
 performing load motion.  The pass is located in @file{tree-ssa-pre.c}
 and is described by @code{pass_pre}.
 
+Just before partial redundancy elimination, if
+@option{-funsafe-math-optimizations} is on, GCC tries to convert
+divisions to multiplications by the reciprocal.  The pass is located
+in @file{tree-ssa-math-opts.c} and is described by
+@code{pass_cse_reciprocal}.
+
 @item Loop optimization
 
 The main driver of the pass is placed in @file{tree-ssa-loop.c}
index 12fa129..459c248 100644 (file)
@@ -7806,18 +7806,6 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
       return expand_divmod (0, code, mode, op0, op1, target, unsignedp);
 
     case RDIV_EXPR:
-      /* Emit a/b as a*(1/b).  Later we may manage CSE the reciprocal saving
-         expensive divide.  If not, combine will rebuild the original
-         computation.  */
-      if (flag_unsafe_math_optimizations && optimize && !optimize_size
-         && TREE_CODE (type) == REAL_TYPE
-         && !real_onep (TREE_OPERAND (exp, 0)))
-        return expand_expr (build2 (MULT_EXPR, type, TREE_OPERAND (exp, 0),
-                                   build2 (RDIV_EXPR, type,
-                                           build_real (type, dconst1),
-                                           TREE_OPERAND (exp, 1))),
-                           target, tmode, modifier);
-
       goto binop;
 
     case TRUNC_MOD_EXPR:
index deb8780..050d45c 100644 (file)
@@ -3103,6 +3103,46 @@ distribute_bit_expr (enum tree_code code, tree type, tree arg0, tree arg1)
   return fold_build2 (TREE_CODE (arg0), type, common,
                      fold_build2 (code, type, left, right));
 }
+
+/* Knowing that ARG0 and ARG1 are both RDIV_EXPRs, simplify a binary operation
+   with code CODE.  This optimization is unsafe.  */
+static tree
+distribute_real_division (enum tree_code code, tree type, tree arg0, tree arg1)
+{
+  bool mul0 = TREE_CODE (arg0) == MULT_EXPR;
+  bool mul1 = TREE_CODE (arg1) == MULT_EXPR;
+
+  /* (A / C) +- (B / C) -> (A +- B) / C.  */
+  if (mul0 == mul1
+      && operand_equal_p (TREE_OPERAND (arg0, 1),
+                      TREE_OPERAND (arg1, 1), 0))
+    return fold_build2 (mul0 ? MULT_EXPR : RDIV_EXPR, type,
+                       fold_build2 (code, type,
+                                    TREE_OPERAND (arg0, 0),
+                                    TREE_OPERAND (arg1, 0)),
+                       TREE_OPERAND (arg0, 1));
+
+  /* (A / C1) +- (A / C2) -> A * (1 / C1 +- 1 / C2).  */
+  if (operand_equal_p (TREE_OPERAND (arg0, 0),
+                      TREE_OPERAND (arg1, 0), 0)
+      && TREE_CODE (TREE_OPERAND (arg0, 1)) == REAL_CST
+      && TREE_CODE (TREE_OPERAND (arg1, 1)) == REAL_CST)
+    {
+      REAL_VALUE_TYPE r0, r1;
+      r0 = TREE_REAL_CST (TREE_OPERAND (arg0, 1));
+      r1 = TREE_REAL_CST (TREE_OPERAND (arg1, 1));
+      if (!mul0)
+       real_arithmetic (&r0, RDIV_EXPR, &dconst1, &r0);
+      if (!mul1)
+        real_arithmetic (&r1, RDIV_EXPR, &dconst1, &r1);
+      real_arithmetic (&r0, code, &r0, &r1);
+      return fold_build2 (MULT_EXPR, type,
+                         TREE_OPERAND (arg0, 0),
+                         build_real (type, r0));
+    }
+
+  return NULL_TREE;
+}
 \f
 /* Return a BIT_FIELD_REF of type TYPE to refer to BITSIZE bits of INNER
    starting at BITPOS.  The field is unsigned if UNSIGNEDP is nonzero.  */
@@ -7528,6 +7568,12 @@ fold_binary (enum tree_code code, tree type, tree op0, tree op1)
                                    fold_convert (type, tem));
            }
 
+          if (flag_unsafe_math_optimizations
+             && (TREE_CODE (arg0) == RDIV_EXPR || TREE_CODE (arg0) == MULT_EXPR)
+             && (TREE_CODE (arg1) == RDIV_EXPR || TREE_CODE (arg1) == MULT_EXPR)
+             && (tem = distribute_real_division (code, type, arg0, arg1)))
+           return tem;
+
          /* Convert x+x into x*2.0.  */
          if (operand_equal_p (arg0, arg1, 0)
              && SCALAR_FLOAT_TYPE_P (type))
@@ -7925,6 +7971,12 @@ fold_binary (enum tree_code code, tree type, tree op0, tree op1)
            return fold_convert (type, fold (tem));
        }
 
+      if (flag_unsafe_math_optimizations
+         && (TREE_CODE (arg0) == RDIV_EXPR || TREE_CODE (arg0) == MULT_EXPR)
+         && (TREE_CODE (arg1) == RDIV_EXPR || TREE_CODE (arg1) == MULT_EXPR)
+         && (tem = distribute_real_division (code, type, arg0, arg1)))
+       return tem;
+
       if (TREE_CODE (arg0) == MULT_EXPR
          && TREE_CODE (arg1) == MULT_EXPR
          && (!FLOAT_TYPE_P (type) || flag_unsafe_math_optimizations))
index b6ce026..6fcfe37 100644 (file)
@@ -1,3 +1,7 @@
+2005-05-17  Paolo Bonzini  <bonzini@gnu.org>
+
+       * gcc.dg/fold-div-1.c, gcc.dg/recip-1.c, gcc.dg/recip-2.c: New.
+
 2005-05-17  Richard Guenther  <rguenth@gcc.gnu.org>
 
        PR middle-end/21595
 
 2005-05-02  Paolo Bonzini  <bonzini@gnu.org>
 
-        * gcc.dg/altivec-3.c (vec_store): Do not use the old
-        __builtin_altivec_st_internal_4si built-in.
+       * gcc.dg/altivec-3.c (vec_store): Do not use the old
+       __builtin_altivec_st_internal_4si built-in.
 
 2005-05-02  Mark Mitchell  <mark@codesourcery.com>
 
diff --git a/gcc/testsuite/gcc.dg/fold-div-1.c b/gcc/testsuite/gcc.dg/fold-div-1.c
new file mode 100644 (file)
index 0000000..533908c
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-funsafe-math-optimizations -fdump-tree-gimple" } */
+
+float f(float x)
+{
+  return x/2 + x/3;
+}
+
+float g(float x)
+{
+  return 2/x + 3/x;
+}
+
+float h(float x)
+{
+  return x/2 - x/3;
+}
+
+float i(float x)
+{
+  return 2/x - 3/x;
+}
+
+/* f and h should be turned into multiplications,
+   the divisions in g and i should be grouped together.  */
+
+/* { dg-final { scan-tree-dump-times " \\* " 2 "gimple" } } */
+/* { dg-final { scan-tree-dump-times " / " 2 "gimple" } } */
+/* { dg-final { cleanup-tree-dump "gimple" } } */
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/recip-1.c b/gcc/testsuite/gcc.dg/tree-ssa/recip-1.c
new file mode 100644 (file)
index 0000000..36f2c23
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -funsafe-math-optimizations -fdump-tree-recip" } */
+
+float e(float *x, float *y, float *z)
+{
+  float m = __builtin_sqrt (*x * *x + *y * *y + *z * *z);
+  *x /= m;
+  *y /= m;
+  *z /= m;
+}
+
+/* Look for only one division.  */
+/* { dg-final { scan-tree-dump-times "= .* /" 1 "recip" } } */
+/* { dg-final { cleanup-tree-dump "recip" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/recip-2.c b/gcc/testsuite/gcc.dg/tree-ssa/recip-2.c
new file mode 100644 (file)
index 0000000..7d0e97a
--- /dev/null
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -funsafe-math-optimizations -fdump-tree-recip" } */
+
+float e(float a, float b, float c, float d, float e, float f)
+{
+  if (a < b)
+    {
+      a = a + b;
+      c = c + d;
+    }
+
+  /* The PHI nodes for these divisions should be combined.  */
+  e = e / a;
+  f = f / a;
+  
+  a = a / c;
+  b = b / c;
+
+  return a + b + e + f;
+}
+
+/* { dg-final { scan-tree-dump-times " / " 2 "recip" } } */
+/* { dg-final { cleanup-tree-dump "recip" } } */
index 66c25de..ec873fb 100644 (file)
@@ -383,6 +383,7 @@ init_tree_optimization_passes (void)
      we add may_alias right after fold builtins
      which can create arbitrary GIMPLE.  */
   NEXT_PASS (pass_may_alias);
+  NEXT_PASS (pass_cse_reciprocals);
   NEXT_PASS (pass_split_crit_edges);
   NEXT_PASS (pass_pre);
   NEXT_PASS (pass_sink_code);
index 83fa184..0806822 100644 (file)
@@ -196,6 +196,7 @@ extern struct tree_opt_pass pass_fold_builtins;
 extern struct tree_opt_pass pass_stdarg;
 extern struct tree_opt_pass pass_early_warn_uninitialized;
 extern struct tree_opt_pass pass_late_warn_uninitialized;
+extern struct tree_opt_pass pass_cse_reciprocals;
 extern struct tree_opt_pass pass_warn_function_return;
 extern struct tree_opt_pass pass_warn_function_noreturn;
 extern struct tree_opt_pass pass_phiopt;