scheduling: optionally create schedules with outermost parallelism
authorSven Verdoolaege <skimo@kotnet.org>
Wed, 25 May 2011 14:57:25 +0000 (16:57 +0200)
committerSven Verdoolaege <skimo@kotnet.org>
Sun, 5 Jun 2011 08:28:44 +0000 (10:28 +0200)
Some applications require not only tilable bands, but also parallel
loops inside those bands.  The default algorithm favors larger bands
over having parallel loops inside the band.  It is always possible
to sacrifice one of the loops in the band to create parallelism by
applying a wavefront transformation.  This typically leads to more
complicated schedules, however.
Instead, when enabled, we now force there to be at least one parallel
loop inside each tilable band.

Signed-off-by: Sven Verdoolaege <skimo@kotnet.org>
include/isl/options.h
isl_options.c
isl_schedule.c

index a60d2c8..bb0344a 100644 (file)
@@ -60,6 +60,7 @@ struct isl_options {
        int                     convex;
 
        int                     schedule_parametric;
+       int                     schedule_outer_parallelism;
 };
 
 ISL_ARG_DECL(isl_options, struct isl_options, isl_options_arg)
index d382af7..72fec13 100644 (file)
@@ -117,6 +117,9 @@ ISL_ARG_CHOICE(struct isl_options, convex, 0, "convex-hull", \
        convex, ISL_CONVEX_HULL_WRAP, "convex hull algorithm to use")
 ISL_ARG_BOOL(struct isl_options, schedule_parametric, 0,
        "schedule-parametric", 1, "construct possibly parametric schedules")
+ISL_ARG_BOOL(struct isl_options, schedule_outer_parallelism, 0,
+       "schedule-outer-parallelism", 0,
+       "try to construct schedules with outer parallelism")
 ISL_ARG_VERSION(print_version)
 ISL_ARG_END
 };
index bb747de..3e8d3a3 100644 (file)
@@ -1100,8 +1100,12 @@ static int count_constraints(struct isl_sched_graph *graph,
  *
  * The constraints are those from the edges plus two or three equalities
  * to express the sums.
+ *
+ * If force_parallel is set, then we add equalities to ensure that
+ * the sum of the m_n coefficients and m_0 are both zero.
  */
-static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
+static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
+       int force_parallel)
 {
        int i, j;
        int k;
@@ -1129,17 +1133,26 @@ static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
 
        dim = isl_dim_set_alloc(ctx, 0, total);
        isl_basic_set_free(graph->lp);
-       n_eq += 2 + parametric;
+       n_eq += 2 + parametric + force_parallel;
        graph->lp = isl_basic_set_alloc_dim(dim, 0, n_eq, n_ineq);
 
        k = isl_basic_set_alloc_equality(graph->lp);
        if (k < 0)
                return -1;
        isl_seq_clr(graph->lp->eq[k], 1 +  total);
-       isl_int_set_si(graph->lp->eq[k][1], -1);
+       if (!force_parallel)
+               isl_int_set_si(graph->lp->eq[k][1], -1);
        for (i = 0; i < 2 * nparam; ++i)
                isl_int_set_si(graph->lp->eq[k][1 + param_pos + i], 1);
 
+       if (force_parallel) {
+               k = isl_basic_set_alloc_equality(graph->lp);
+               if (k < 0)
+                       return -1;
+               isl_seq_clr(graph->lp->eq[k], 1 +  total);
+               isl_int_set_si(graph->lp->eq[k][2], -1);
+       }
+
        if (parametric) {
                k = isl_basic_set_alloc_equality(graph->lp);
                if (k < 0)
@@ -2163,9 +2176,16 @@ static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph)
  *
  * If we manage to complete the schedule, we finish off by topologically
  * sorting the statements based on the remaining dependences.
+ *
+ * If ctx->opt->schedule_outer_parallelism is set, then we force the
+ * outermost dimension in the current band to be parallel.  If this
+ * turns out to be impossible, we fall back on the general scheme above
+ * and try to carry as many dependences as possible.
  */
 static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
 {
+       int force_parallel = 0;
+
        if (detect_sccs(graph) < 0)
                return -1;
        sort_sccs(graph);
@@ -2173,13 +2193,16 @@ static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
        if (compute_maxvar(graph) < 0)
                return -1;
 
+       if (ctx->opt->schedule_outer_parallelism)
+               force_parallel = 1;
+
        while (graph->n_row < graph->maxvar) {
                isl_vec *sol;
 
                graph->src_scc = -1;
                graph->dst_scc = -1;
 
-               if (setup_lp(ctx, graph) < 0)
+               if (setup_lp(ctx, graph, force_parallel) < 0)
                        return -1;
                sol = solve_lp(graph);
                if (!sol)
@@ -2194,6 +2217,7 @@ static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
                }
                if (update_schedule(graph, sol, 1, 1) < 0)
                        return -1;
+               force_parallel = 0;
        }
 
        if (graph->n_total_row > graph->band_start)