Disable threading through latches until after loop optimizations.

author Aldy Hernandez <aldyh@redhat.com>

Thu, 9 Sep 2021 18:30:28 +0000 (20:30 +0200)

committer Aldy Hernandez <aldyh@redhat.com>

Fri, 10 Sep 2021 16:41:51 +0000 (18:41 +0200)
author Aldy Hernandez <aldyh@redhat.com>
Thu, 9 Sep 2021 18:30:28 +0000 (20:30 +0200)
committer Aldy Hernandez <aldyh@redhat.com>
Fri, 10 Sep 2021 16:41:51 +0000 (18:41 +0200)
diff --git a/gcc/gimple-range-path.cc b/gcc/gimple-range-path.cc

index a4fa3b2..c616b65 100644 (file)
--- a/gcc/gimple-range-path.cc
+++ b/gcc/gimple-range-path.cc
@@ -127,6 +127,9 @@ path_range_query::internal_range_of_expr (irange &r, tree name, gimple *stmt)
    basic_block bb = stmt ? gimple_bb (stmt) : exit_bb ();
    if (stmt && range_defined_in_block (r, name, bb))
      {
+      if (TREE_CODE (name) == SSA_NAME)
+       r.intersect (gimple_range_global (name));
+
        set_cache (r, name);
        return true;
      }
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2b.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2b.c

index e1c33e8..823ada9 100644 (file)
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2b.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2b.c
@@ -1,5 +1,5 @@
  /* { dg-do compile } */ 
-/* { dg-options "-O2 -fdump-tree-thread1-stats -fdump-tree-dom2-stats -fdisable-tree-ethread" } */
+/* { dg-options "-O2 -fdump-tree-thread3-stats -fdump-tree-dom2-stats -fdisable-tree-ethread" } */
  
  void foo();
  void bla();
@@ -26,4 +26,4 @@ void thread_latch_through_header (void)
     case.  And we want to thread through the header as well.  These
     are both caught by threading in DOM.  */
  /* { dg-final { scan-tree-dump-not "Jumps threaded" "dom2"} } */
-/* { dg-final { scan-tree-dump-times "Jumps threaded: 1" 1 "thread1"} } */
+/* { dg-final { scan-tree-dump-times "Jumps threaded: 1" 1 "thread3"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c

index c7bf867..ee46759 100644 (file)
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c
@@ -1,41 +1,8 @@
  /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-thread1-details -fdump-tree-thread2-details" } */
+/* { dg-options "-O2 -fdump-tree-thread1-details -fdump-tree-thread3-details" } */
  
-/* All the threads in the thread1 dump start on a X->BB12 edge, as can
-   be seen in the dump:
-
-     Registering FSM jump thread: (x, 12) incoming edge; ...
-     etc
-     etc
-
-   Before the new evrp, we were threading paths that started at the
-   following edges:
-
-      Registering FSM jump thread: (10, 12) incoming edge
-      Registering FSM jump thread:  (6, 12) incoming edge
-      Registering FSM jump thread:  (9, 12) incoming edge
-
-   This was because the PHI at BB12 had constant values coming in from
-   BB10, BB6, and BB9:
-
-   # state_10 = PHI <state_11(7), 0(10), state_11(5), 1(6), state_11(8), 2(9), state_11(11)>
-
-   Now with the new evrp, we get:
-
-   # state_10 = PHI <0(7), 0(10), state_11(5), 1(6), 0(8), 2(9), 1(11)>
-
-   Thus, we have 3 more paths that are known to be constant and can be
-   threaded.  Which means that by the second threading pass, we can
-   only find one profitable path.
-
-   For the record, all these extra constants are better paths coming
-   out of switches.  For example:
-
-     SWITCH_BB -> BBx -> BBy -> BBz -> PHI
-
-   We now know the value of the switch index at PHI.  */
  /* { dg-final { scan-tree-dump-times "Registering FSM jump" 6 "thread1" } } */
-/* { dg-final { scan-tree-dump-times "Registering FSM jump" 1 "thread2" } } */
+/* { dg-final { scan-tree-dump-times "Registering FSM jump" 1 "thread3" } } */
  
  int sum0, sum1, sum2, sum3;
  int foo (char *s, char **ret)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c

index 5fc2145..ba07942 100644 (file)
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
@@ -1,23 +1,8 @@
  /* { dg-do compile } */
  /* { dg-options "-O2 -fdump-tree-thread1-stats -fdump-tree-thread2-stats -fdump-tree-dom2-stats -fdump-tree-thread3-stats -fdump-tree-dom3-stats -fdump-tree-vrp2-stats -fno-guess-branch-probability" } */
  
-/* Here we have the same issue as was commented in ssa-dom-thread-6.c.
-   The PHI coming into the threader has a lot more constants, so the
-   threader can thread more paths.
-
-$ diff clean/a.c.105t.mergephi2 a.c.105t.mergephi2 
-252c252
-<   # s_50 = PHI <s_49(10), 5(14), s_51(18), s_51(22), 1(26), 1(29), 1(31), s_51(5), 4(12), 1(15), 5(17), 1(19), 3(21), 1(23), 6(25), 7(28), s_51(30)>
----
->   # s_50 = PHI <s_49(10), 5(14), 4(18), 5(22), 1(26), 1(29), 1(31), s_51(5), 4(12), 1(15), 5(17), 1(19), 3(21), 1(23), 6(25), 7(28), 7(30)>
-272a273
-
-  I spot checked a few and they all have the same pattern.  We are
-  basically tracking the switch index better through multiple
-  paths.  */
-
  /* { dg-final { scan-tree-dump "Jumps threaded: 18"  "thread1" } } */
-/* { dg-final { scan-tree-dump "Jumps threaded: 8" "thread2" } } */
+/* { dg-final { scan-tree-dump "Jumps threaded: 8" "thread3" } } */
  /* { dg-final { scan-tree-dump-not "Jumps threaded"  "dom2" } } */
  
  /* aarch64 has the highest CASE_VALUES_THRESHOLD in GCC.  It's high enough
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h

index 83941bc..eb75eb1 100644 (file)
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -225,6 +225,8 @@ protected:
                                                    been optimized.  */
  #define PROP_gimple_lomp_dev   (1 << 16)       /* done omp_device_lower */
  #define PROP_rtl_split_insns   (1 << 17)       /* RTL has insns split.  */
+#define PROP_loop_opts_done    (1 << 18)       /* SSA loop optimizations
+                                                  have completed.  */
  
  #define PROP_gimple \
    (PROP_gimple_any | PROP_gimple_lcf | PROP_gimple_leh | PROP_gimple_lomp)
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c

index 0cc4b3b..1bbf2f1 100644 (file)
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -540,7 +540,7 @@ const pass_data pass_data_tree_loop_done =
    OPTGROUP_LOOP, /* optinfo_flags */
    TV_NONE, /* tv_id */
    PROP_cfg, /* properties_required */
-  0, /* properties_provided */
+  PROP_loop_opts_done, /* properties_provided */
    0, /* properties_destroyed */
    0, /* todo_flags_start */
    TODO_cleanup_cfg, /* todo_flags_finish */
diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c

index 449232c..e729923 100644 (file)
--- a/gcc/tree-ssa-threadbackward.c
+++ b/gcc/tree-ssa-threadbackward.c
@@ -43,6 +43,7 @@ along with GCC; see the file COPYING3.  If not see
  #include "ssa.h"
  #include "tree-cfgcleanup.h"
  #include "tree-pretty-print.h"
+#include "cfghooks.h"
  
  // Path registry for the backwards threader.  After all paths have been
  // registered with register_path(), thread_through_all_blocks() is called
@@ -564,7 +565,10 @@ back_threader_registry::thread_through_all_blocks (bool may_peel_loop_headers)
     TAKEN_EDGE, otherwise it is NULL.
  
     CREATES_IRREDUCIBLE_LOOP, if non-null is set to TRUE if threading this path
-   would create an irreducible loop.  */
+   would create an irreducible loop.
+
+   ?? It seems we should be able to loosen some of the restrictions in
+   this function after loop optimizations have run.  */
  
  bool
  back_threader_profitability::profitable_path_p (const vec<basic_block> &m_path,
@@ -725,7 +729,11 @@ back_threader_profitability::profitable_path_p (const vec<basic_block> &m_path,
          the last entry in the array when determining if we thread
          through the loop latch.  */
        if (loop->latch == bb)
-       threaded_through_latch = true;
+       {
+         threaded_through_latch = true;
+         if (dump_file && (dump_flags & TDF_DETAILS))
+           fprintf (dump_file, " (latch)");
+       }
      }
  
    gimple *stmt = get_gimple_control_stmt (m_path[0]);
@@ -845,6 +853,22 @@ back_threader_profitability::profitable_path_p (const vec<basic_block> &m_path,
                  "a multiway branch.\n");
        return false;
      }
+
+  /* Threading through an empty latch would cause code to be added to
+     the latch.  This could alter the loop form sufficiently to cause
+     loop optimizations to fail.  Disable these threads until after
+     loop optimizations have run.  */
+  if ((threaded_through_latch
+       || (taken_edge && taken_edge->dest == loop->latch))
+      && !(cfun->curr_properties & PROP_loop_opts_done)
+      && empty_block_p (loop->latch))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+       fprintf (dump_file,
+                "  FAIL: FSM Thread through latch before loop opts would create non-empty latch\n");
+      return false;
+
+    }
    return true;
  }
author	Aldy Hernandez <aldyh@redhat.com>
	Thu, 9 Sep 2021 18:30:28 +0000 (20:30 +0200)
committer	Aldy Hernandez <aldyh@redhat.com>
	Fri, 10 Sep 2021 16:41:51 +0000 (18:41 +0200)
gcc/gimple-range-path.cc		patch \| blob \| history
gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2b.c		patch \| blob \| history
gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c		patch \| blob \| history
gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c		patch \| blob \| history
gcc/tree-pass.h		patch \| blob \| history
gcc/tree-ssa-loop.c		patch \| blob \| history
gcc/tree-ssa-threadbackward.c		patch \| blob \| history