/* Value histograms attached to particular statements. */
htab_t GTY((skip)) value_histograms;
+ /* Different from normal TODO_flags which are handled right at the
+ beginning or the end of one pass execution, the pending_TODOs
+ are passed down in the pipeline until one of its consumers can
+ perform the requested action. Consumers should then clear the
+ flags for the actions that they have taken. */
+ unsigned int pending_TODOs;
+
/* For function.c. */
/* Points to the FUNCTION_DECL of this function. */
return new pass_late_compilation (ctxt);
}
+/* Pre-SLP scalar cleanup, it has several cleanup passes like FRE, DSE. */
+namespace {
+
+const pass_data pass_data_pre_slp_scalar_cleanup =
+{
+ GIMPLE_PASS, /* type */
+ "*pre_slp_scalar_cleanup", /* name */
+ OPTGROUP_LOOP, /* optinfo_flags */
+ TV_SCALAR_CLEANUP, /* tv_id */
+ ( PROP_cfg | PROP_ssa ), /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_pre_slp_scalar_cleanup : public gimple_opt_pass
+{
+public:
+ pass_pre_slp_scalar_cleanup (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_pre_slp_scalar_cleanup, ctxt)
+ {
+ }
+
+ virtual bool
+ gate (function *fun)
+ {
+ return flag_tree_slp_vectorize
+ && (fun->pending_TODOs & PENDING_TODO_force_next_scalar_cleanup);
+ }
+
+ virtual unsigned int
+ execute (function *fun)
+ {
+ fun->pending_TODOs &= ~PENDING_TODO_force_next_scalar_cleanup;
+ return 0;
+ }
+
+}; // class pass_pre_slp_scalar_cleanup
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_pre_slp_scalar_cleanup (gcc::context *ctxt)
+{
+ return new pass_pre_slp_scalar_cleanup (ctxt);
+}
/* Set the static pass number of pass PASS to ID and record that
in the mapping from static pass number to pass. */
/* pass_vectorize must immediately follow pass_if_conversion.
Please do not add any other passes in between. */
NEXT_PASS (pass_vectorize);
- PUSH_INSERT_PASSES_WITHIN (pass_vectorize)
+ PUSH_INSERT_PASSES_WITHIN (pass_vectorize)
NEXT_PASS (pass_dce);
- POP_INSERT_PASSES ()
- NEXT_PASS (pass_predcom);
+ POP_INSERT_PASSES ()
+ NEXT_PASS (pass_predcom);
NEXT_PASS (pass_complete_unroll);
+ NEXT_PASS (pass_pre_slp_scalar_cleanup);
+ PUSH_INSERT_PASSES_WITHIN (pass_pre_slp_scalar_cleanup)
+ NEXT_PASS (pass_fre, false /* may_iterate */);
+ NEXT_PASS (pass_dse);
+ POP_INSERT_PASSES ()
NEXT_PASS (pass_slp_vectorize);
NEXT_PASS (pass_loop_prefetch);
/* Run IVOPTs after the last pass that uses data-reference analysis
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -funroll-loops -ftree-vectorize -fdump-tree-dse-details" } */
+
+/* Test if scalar cleanup pass takes effects, mainly check
+ its secondary pass DSE can remove dead stores on array
+ tmp. */
+
+#include "stdint.h"
+
+static inline void
+foo (int16_t *diff, int i_size, uint8_t *val1, int i_val1, uint8_t *val2,
+ int i_val2)
+{
+ for (int y = 0; y < i_size; y++)
+ {
+ for (int x = 0; x < i_size; x++)
+ diff[x + y * i_size] = val1[x] - val2[x];
+ val1 += i_val1;
+ val2 += i_val2;
+ }
+}
+
+void
+bar (int16_t res[16], uint8_t *val1, uint8_t *val2)
+{
+ int16_t d[16];
+ int16_t tmp[16];
+
+ foo (d, 4, val1, 16, val2, 32);
+
+ for (int i = 0; i < 4; i++)
+ {
+ int s03 = d[i * 4 + 0] + d[i * 4 + 3];
+ int s12 = d[i * 4 + 1] + d[i * 4 + 2];
+ int d03 = d[i * 4 + 0] - d[i * 4 + 3];
+ int d12 = d[i * 4 + 1] - d[i * 4 + 2];
+
+ tmp[0 * 4 + i] = s03 + s12;
+ tmp[1 * 4 + i] = 2 * d03 + d12;
+ tmp[2 * 4 + i] = s03 - s12;
+ tmp[3 * 4 + i] = d03 - 2 * d12;
+ }
+
+ for (int i = 0; i < 4; i++)
+ {
+ int s03 = tmp[i * 4 + 0] + tmp[i * 4 + 3];
+ int s12 = tmp[i * 4 + 1] + tmp[i * 4 + 2];
+ int d03 = tmp[i * 4 + 0] - tmp[i * 4 + 3];
+ int d12 = tmp[i * 4 + 1] - tmp[i * 4 + 2];
+
+ res[i * 4 + 0] = s03 + s12;
+ res[i * 4 + 1] = 2 * d03 + d12;
+ res[i * 4 + 2] = s03 - s12;
+ res[i * 4 + 3] = d03 - 2 * d12;
+ }
+}
+
+/* { dg-final { scan-tree-dump {Deleted dead store:.*tmp} "dse3" } } */
/* { dg-final { scan-tree-dump-not "Deleted dead store" "dse1"} } */
/* { dg-final { scan-tree-dump-not "Deleted dead store" "dse2"} } */
-/* { dg-final { scan-tree-dump-not "Deleted dead store" "dse3"} } */
+/* { dg-final { scan-tree-dump-not "Deleted dead store" "dse4"} } */
/* { dg-final { scan-tree-dump-times "Deleted dead store" 3 "dse1"} } */
/* { dg-final { scan-tree-dump-not "Deleted dead store" "dse2"} } */
-/* { dg-final { scan-tree-dump-not "Deleted dead store" "dse3"} } */
+/* { dg-final { scan-tree-dump-not "Deleted dead store" "dse4"} } */
a[i] = b[0] + b[1] + b[i+1] + b[i+2];
}
-void bar (int *a, int *b)
+/* Disable pre-slp FRE to avoid unexpected SLP on the epilogue
+ of the 1st loop. */
+void __attribute__((optimize("-fno-tree-fre")))
+bar (int *a, int *b)
{
int i;
for (i = 0; i < (ARR_SIZE - 2); ++i)
DEFTIMEVAR (TV_LOOP_SPLIT , "loop splitting")
DEFTIMEVAR (TV_LOOP_JAM , "unroll and jam")
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
+DEFTIMEVAR (TV_SCALAR_CLEANUP , "scalar cleanup")
DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops")
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
DEFTIMEVAR (TV_TREE_SLP_VECTORIZATION, "tree slp vectorization")
#define TODO_verify_all TODO_verify_il
+/* To-do flags for pending_TODOs. */
+
+/* Tell the next scalar cleanup pass that there is
+ work for it to do. */
+#define PENDING_TODO_force_next_scalar_cleanup (1 << 1)
/* Register pass info. */
extern gimple_opt_pass *make_pass_slp_vectorize (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_complete_unroll (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_complete_unrolli (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_pre_slp_scalar_cleanup (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_parallelize_loops (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_loop_prefetch (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_iv_optimize (gcc::context *ctxt);
bitmap_clear (father_bbs);
bitmap_set_bit (father_bbs, loop_father->header->index);
}
+ else if (unroll_outer)
+ /* Trigger scalar cleanup once any outermost loop gets unrolled. */
+ cfun->pending_TODOs |= PENDING_TODO_force_next_scalar_cleanup;
return true;
}