isl_union_set_compute_schedule: only carry validity dependences
[platform/upstream/isl.git] / isl_schedule.c
index cc8da96..0807fce 100644 (file)
@@ -1042,7 +1042,8 @@ static int node_update_cmap(struct isl_sched_node *node)
 
 /* Count the number of equality and inequality constraints
  * that will be added for the given map.
- * If once is set, then we count
+ * If carry is set, then we are counting the number of (validity)
+ * constraints that will be added in setup_carry_lp and we count
  * each edge exactly once.  Otherwise, we count as follows
  * validity            -> 1 (>= 0)
  * validity+proximity  -> 2 (>= 0 and upper bound)
@@ -1050,10 +1051,15 @@ static int node_update_cmap(struct isl_sched_node *node)
  */
 static int count_map_constraints(struct isl_sched_graph *graph,
        struct isl_sched_edge *edge, __isl_take isl_map *map,
-       int *n_eq, int *n_ineq, int once)
+       int *n_eq, int *n_ineq, int carry)
 {
        isl_basic_set *coef;
-       int f = once ? 1 : edge->proximity ? 2 : 1;
+       int f = carry ? 1 : edge->proximity ? 2 : 1;
+
+       if (carry && !edge->validity) {
+               isl_map_free(map);
+               return 0;
+       }
 
        if (edge->src == edge->dst)
                coef = intra_coefficients(graph, map);
@@ -1070,14 +1076,13 @@ static int count_map_constraints(struct isl_sched_graph *graph,
 
 /* Count the number of equality and inequality constraints
  * that will be added to the main lp problem.
- * If once is set, then we count
- * each edge exactly once.  Otherwise, we count as follows
+ * We count as follows
  * validity            -> 1 (>= 0)
  * validity+proximity  -> 2 (>= 0 and upper bound)
  * proximity           -> 2 (lower and upper bound)
  */
 static int count_constraints(struct isl_sched_graph *graph,
-       int *n_eq, int *n_ineq, int once)
+       int *n_eq, int *n_ineq)
 {
        int i;
 
@@ -1087,13 +1092,50 @@ static int count_constraints(struct isl_sched_graph *graph,
                isl_map *map = isl_map_copy(edge->map);
 
                if (count_map_constraints(graph, edge, map,
-                                         n_eq, n_ineq, once) < 0)
+                                         n_eq, n_ineq, 0) < 0)
                        return -1;
        }
 
        return 0;
 }
 
+/* Add constraints that bound the values of the variable and parameter
+ * coefficients of the schedule.
+ *
+ * The maximal value of the coefficients is defined by the option
+ * 'schedule_max_coefficient'.
+ */
+static int add_bound_coefficient_constraints(isl_ctx *ctx,
+       struct isl_sched_graph *graph)
+{
+       int i, j, k;
+       int max_coefficient;
+       int total;
+
+       max_coefficient = ctx->opt->schedule_max_coefficient;
+
+       if (max_coefficient == -1)
+               return 0;
+
+       total = isl_basic_set_total_dim(graph->lp);
+
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+               for (j = 0; j < 2 * node->nparam + 2 * node->nvar; ++j) {
+                       int dim;
+                       k = isl_basic_set_alloc_inequality(graph->lp);
+                       if (k < 0)
+                               return -1;
+                       dim = 1 + node->start + 1 + j;
+                       isl_seq_clr(graph->lp->ineq[k], 1 +  total);
+                       isl_int_set_si(graph->lp->ineq[k][dim], -1);
+                       isl_int_set_si(graph->lp->ineq[k][0], max_coefficient);
+               }
+       }
+
+       return 0;
+}
+
 /* Construct an ILP problem for finding schedule coefficients
  * that result in non-negative, but small dependence distances
  * over all dependences.
@@ -1139,6 +1181,11 @@ static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
        int parametric;
        int param_pos;
        int n_eq, n_ineq;
+       int max_constant_term;
+       int max_coefficient;
+
+       max_constant_term = ctx->opt->schedule_max_constant_term;
+       max_coefficient = ctx->opt->schedule_max_coefficient;
 
        parametric = ctx->opt->schedule_parametric;
        nparam = isl_space_dim(graph->node[0].dim, isl_dim_param);
@@ -1152,12 +1199,19 @@ static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
                total += 1 + 2 * (node->nparam + node->nvar);
        }
 
-       if (count_constraints(graph, &n_eq, &n_ineq, 0) < 0)
+       if (count_constraints(graph, &n_eq, &n_ineq) < 0)
                return -1;
 
        dim = isl_space_set_alloc(ctx, 0, total);
        isl_basic_set_free(graph->lp);
        n_eq += 2 + parametric + force_zero;
+       if (max_constant_term != -1)
+               n_ineq += graph->n;
+       if (max_coefficient != -1)
+               for (i = 0; i < graph->n; ++i)
+                       n_ineq += 2 * graph->node[i].nparam +
+                                 2 * graph->node[i].nvar;
+
        graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
 
        k = isl_basic_set_alloc_equality(graph->lp);
@@ -1204,6 +1258,19 @@ static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
                        isl_int_set_si(graph->lp->eq[k][pos + j], 1);
        }
 
+       if (max_constant_term != -1)
+               for (i = 0; i < graph->n; ++i) {
+                       struct isl_sched_node *node = &graph->node[i];
+                       k = isl_basic_set_alloc_inequality(graph->lp);
+                       if (k < 0)
+                               return -1;
+                       isl_seq_clr(graph->lp->ineq[k], 1 +  total);
+                       isl_int_set_si(graph->lp->ineq[k][1 + node->start], -1);
+                       isl_int_set_si(graph->lp->ineq[k][0], max_constant_term);
+               }
+
+       if (add_bound_coefficient_constraints(ctx, graph) < 0)
+               return -1;
        if (add_all_validity_constraints(graph) < 0)
                return -1;
        if (add_all_proximity_constraints(graph) < 0)
@@ -1489,7 +1556,7 @@ static void next_band(struct isl_sched_graph *graph)
        graph->n_band++;
 }
 
-/* Topologically sort statements mapped to same schedule iteration
+/* Topologically sort statements mapped to the same schedule iteration
  * and add a row to the schedule corresponding to this order.
  */
 static int sort_statements(isl_ctx *ctx, struct isl_sched_graph *graph)
@@ -1625,6 +1692,9 @@ static int copy_nodes(struct isl_sched_graph *dst, struct isl_sched_graph *src,
 
 /* Copy non-empty edges that satisfy edge_pred from the src dependence graph
  * to the dst dependence graph.
+ * If the source or destination node of the edge is not in the destination
+ * graph, then it must be a backward proximity edge and it should simply
+ * be ignored.
  */
 static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
        struct isl_sched_graph *src,
@@ -1636,6 +1706,7 @@ static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
        for (i = 0; i < src->n_edge; ++i) {
                struct isl_sched_edge *edge = &src->edge[i];
                isl_map *map;
+               struct isl_sched_node *dst_src, *dst_dst;
 
                if (!edge_pred(edge, data))
                        continue;
@@ -1643,12 +1714,19 @@ static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
                if (isl_map_plain_is_empty(edge->map))
                        continue;
 
+               dst_src = graph_find_node(ctx, dst, edge->src->dim);
+               dst_dst = graph_find_node(ctx, dst, edge->dst->dim);
+               if (!dst_src || !dst_dst) {
+                       if (edge->validity)
+                               isl_die(ctx, isl_error_internal,
+                                       "backward validity edge", return -1);
+                       continue;
+               }
+
                map = isl_map_copy(edge->map);
 
-               dst->edge[dst->n_edge].src =
-                       graph_find_node(ctx, dst, edge->src->dim);
-               dst->edge[dst->n_edge].dst =
-                       graph_find_node(ctx, dst, edge->dst->dim);
+               dst->edge[dst->n_edge].src = dst_src;
+               dst->edge[dst->n_edge].dst = dst_dst;
                dst->edge[dst->n_edge].map = map;
                dst->edge[dst->n_edge].validity = edge->validity;
                dst->edge[dst->n_edge].proximity = edge->proximity;
@@ -1777,9 +1855,9 @@ static int node_scc_at_least(struct isl_sched_node *node, int scc)
        return node->scc >= scc;
 }
 
-static int edge_src_scc_exactly(struct isl_sched_edge *edge, int scc)
+static int edge_scc_exactly(struct isl_sched_edge *edge, int scc)
 {
-       return edge->src->scc == scc;
+       return edge->src->scc == scc && edge->dst->scc == scc;
 }
 
 static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
@@ -1829,6 +1907,9 @@ static int pad_schedule(struct isl_sched_graph *graph)
  * It would be possible to reuse them as the first rows in the next
  * band, but recomputing them may result in better rows as we are looking
  * at a smaller part of the dependence graph.
+ * compute_split_schedule is only called when no zero-distance schedule row
+ * could be found on the entire graph, so we wark the splitting row as
+ * non zero-distance.
  *
  * The band_id of the second group is set to n, where n is the number
  * of nodes in the first group.  This ensures that the band_ids over
@@ -1869,6 +1950,7 @@ static int compute_split_schedule(isl_ctx *ctx, struct isl_sched_graph *graph)
                        node->sched = isl_mat_set_element_si(node->sched,
                                                             row, j, 0);
                node->band[graph->n_total_row] = graph->n_band;
+               node->zero[graph->n_total_row] = 0;
        }
 
        e1 = e2 = 0;
@@ -2033,8 +2115,8 @@ static int add_inter_constraints(struct isl_sched_graph *graph,
        return 0;
 }
 
-/* Add constraints to graph->lp that force all dependence
- * to be respected and attempt to carry it.
+/* Add constraints to graph->lp that force all validity dependences
+ * to be respected and attempt to carry them.
  */
 static int add_all_constraints(struct isl_sched_graph *graph)
 {
@@ -2044,6 +2126,10 @@ static int add_all_constraints(struct isl_sched_graph *graph)
        pos = 0;
        for (i = 0; i < graph->n_edge; ++i) {
                struct isl_sched_edge *edge= &graph->edge[i];
+
+               if (!edge->validity)
+                       continue;
+
                for (j = 0; j < edge->map->n; ++j) {
                        isl_basic_map *bmap;
                        isl_map *map;
@@ -2066,14 +2152,10 @@ static int add_all_constraints(struct isl_sched_graph *graph)
 
 /* Count the number of equality and inequality constraints
  * that will be added to the carry_lp problem.
- * If once is set, then we count
- * each edge exactly once.  Otherwise, we count as follows
- * validity            -> 1 (>= 0)
- * validity+proximity  -> 2 (>= 0 and upper bound)
- * proximity           -> 2 (lower and upper bound)
+ * We count each edge exactly once.
  */
 static int count_all_constraints(struct isl_sched_graph *graph,
-       int *n_eq, int *n_ineq, int once)
+       int *n_eq, int *n_ineq)
 {
        int i, j;
 
@@ -2088,7 +2170,7 @@ static int count_all_constraints(struct isl_sched_graph *graph,
                        map = isl_map_from_basic_map(bmap);
 
                        if (count_map_constraints(graph, edge, map,
-                                                 n_eq, n_ineq, once) < 0)
+                                                 n_eq, n_ineq, 1) < 0)
                                    return -1;
                }
        }
@@ -2125,7 +2207,7 @@ static int count_all_constraints(struct isl_sched_graph *graph,
  *             - positive and negative parts of c_i_n (if parametric)
  *             - positive and negative parts of c_i_x
  *
- * The constraints are those from the edges plus three equalities
+ * The constraints are those from the (validity) edges plus three equalities
  * to express the sums and n_edge inequalities to express e_i <= 1.
  */
 static int setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
@@ -2148,7 +2230,7 @@ static int setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
                total += 1 + 2 * (node->nparam + node->nvar);
        }
 
-       if (count_all_constraints(graph, &n_eq, &n_ineq, 1) < 0)
+       if (count_all_constraints(graph, &n_eq, &n_ineq) < 0)
                return -1;
 
        dim = isl_space_set_alloc(ctx, 0, total);
@@ -2207,40 +2289,45 @@ static int setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
        return 0;
 }
 
-/* If the schedule_split_parallel option is set and if the linear
- * parts of the scheduling rows for all nodes in the graphs are the same,
- * then split off the constant term from the linear part.
+/* If the schedule_split_scaled option is set and if the linear
+ * parts of the scheduling rows for all nodes in the graphs have
+ * non-trivial common divisor, then split off the constant term
+ * from the linear part.
  * The constant term is then placed in a separate band and
- * the linear part is simplified.
+ * the linear part is reduced.
  */
-static int split_parallel(isl_ctx *ctx, struct isl_sched_graph *graph)
+static int split_scaled(isl_ctx *ctx, struct isl_sched_graph *graph)
 {
        int i;
-       int equal = 1;
-       int row, cols;
-       struct isl_sched_node *node0;
+       int row;
+       isl_int gcd, gcd_i;
 
-       if (!ctx->opt->schedule_split_parallel)
+       if (!ctx->opt->schedule_split_scaled)
                return 0;
        if (graph->n <= 1)
                return 0;
 
-       node0 = &graph->node[0];
-       row = isl_mat_rows(node0->sched) - 1;
-       cols = isl_mat_cols(node0->sched);
-       for (i = 1; i < graph->n; ++i) {
+       isl_int_init(gcd);
+       isl_int_init(gcd_i);
+
+       isl_int_set_si(gcd, 0);
+
+       row = isl_mat_rows(graph->node[0].sched) - 1;
+
+       for (i = 0; i < graph->n; ++i) {
                struct isl_sched_node *node = &graph->node[i];
+               int cols = isl_mat_cols(node->sched);
 
-               if (!isl_seq_eq(node0->sched->row[row] + 1,
-                               node->sched->row[row] + 1, cols - 1))
-                       return 0;
-               if (equal &&
-                   isl_int_ne(node0->sched->row[row][0],
-                              node->sched->row[row][0]))
-                       equal = 0;
+               isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
+               isl_int_gcd(gcd, gcd, gcd_i);
        }
-       if (equal)
+
+       isl_int_clear(gcd_i);
+
+       if (isl_int_cmp_si(gcd, 1) <= 0) {
+               isl_int_clear(gcd);
                return 0;
+       }
 
        next_band(graph);
 
@@ -2251,19 +2338,26 @@ static int split_parallel(isl_ctx *ctx, struct isl_sched_graph *graph)
                node->sched_map = NULL;
                node->sched = isl_mat_add_zero_rows(node->sched, 1);
                if (!node->sched)
-                       return -1;
-               isl_int_set(node->sched->row[row + 1][0],
-                           node->sched->row[row][0]);
-               isl_int_set_si(node->sched->row[row][0], 0);
-               node->sched = isl_mat_normalize_row(node->sched, row);
+                       goto error;
+               isl_int_fdiv_r(node->sched->row[row + 1][0],
+                              node->sched->row[row][0], gcd);
+               isl_int_fdiv_q(node->sched->row[row][0],
+                              node->sched->row[row][0], gcd);
+               isl_int_mul(node->sched->row[row][0],
+                           node->sched->row[row][0], gcd);
+               node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
                if (!node->sched)
-                       return -1;
+                       goto error;
                node->band[graph->n_total_row] = graph->n_band;
        }
 
        graph->n_total_row++;
 
+       isl_int_clear(gcd);
        return 0;
+error:
+       isl_int_clear(gcd);
+       return -1;
 }
 
 /* Construct a schedule row for each node such that as many dependences
@@ -2303,12 +2397,53 @@ static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph)
        if (update_schedule(graph, sol, 0, 0) < 0)
                return -1;
 
-       if (split_parallel(ctx, graph) < 0)
+       if (split_scaled(ctx, graph) < 0)
                return -1;
 
        return compute_next_band(ctx, graph);
 }
 
+/* Are there any validity edges in the graph?
+ */
+static int has_validity_edges(struct isl_sched_graph *graph)
+{
+       int i;
+
+       for (i = 0; i < graph->n_edge; ++i)
+               if (graph->edge[i].validity)
+                       return 1;
+
+       return 0;
+}
+
+/* Should we apply a Feautrier step?
+ * That is, did the user request the Feautrier algorithm and are
+ * there any validity dependences (left)?
+ */
+static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph)
+{
+       if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER)
+               return 0;
+
+       return has_validity_edges(graph);
+}
+
+/* Compute a schedule for a connected dependence graph using Feautrier's
+ * multi-dimensional scheduling algorithm.
+ * The original algorithm is described in [1].
+ * The main idea is to minimize the number of scheduling dimensions, by
+ * trying to satisfy as many dependences as possible per scheduling dimension.
+ *
+ * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
+ *     Problem, Part II: Multi-Dimensional Time.
+ *     In Intl. Journal of Parallel Programming, 1992.
+ */
+static int compute_schedule_wcc_feautrier(isl_ctx *ctx,
+       struct isl_sched_graph *graph)
+{
+       return carry_dependences(ctx, graph);
+}
+
 /* Compute a schedule for a connected dependence graph.
  * We try to find a sequence of as many schedule rows as possible that result
  * in non-negative dependence distances (independent of the previous rows
@@ -2321,6 +2456,10 @@ static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph)
  * - try to carry as many dependences as possible and continue with the next
  *     band
  *
+ * If Feautrier's algorithm is selected, we first recursively try to satisfy
+ * as many validity dependences as possible. When all validity dependences
+ * are satisfied we extend the schedule to a full-dimensional schedule.
+ *
  * If we manage to complete the schedule, we finish off by topologically
  * sorting the statements based on the remaining dependences.
  *
@@ -2340,6 +2479,9 @@ static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
        if (compute_maxvar(graph) < 0)
                return -1;
 
+       if (need_feautrier_step(ctx, graph))
+               return compute_schedule_wcc_feautrier(ctx, graph);
+
        if (ctx->opt->schedule_outer_zero_distance)
                force_zero = 1;
 
@@ -2375,8 +2517,37 @@ static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
        return sort_statements(ctx, graph);
 }
 
+/* Add a row to the schedules that separates the SCCs and move
+ * to the next band.
+ */
+static int split_on_scc(struct isl_sched_graph *graph)
+{
+       int i;
+
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+               int row = isl_mat_rows(node->sched);
+
+               isl_map_free(node->sched_map);
+               node->sched_map = NULL;
+               node->sched = isl_mat_add_zero_rows(node->sched, 1);
+               node->sched = isl_mat_set_element_si(node->sched, row, 0,
+                                                    node->scc);
+               if (!node->sched)
+                       return -1;
+               node->band[graph->n_total_row] = graph->n_band;
+       }
+
+       graph->n_total_row++;
+       next_band(graph);
+
+       return 0;
+}
+
 /* Compute a schedule for each component (identified by node->scc)
  * of the dependence graph separately and then combine the results.
+ * Depending on the setting of schedule_fuse, a component may be
+ * either weakly or strongly connected.
  *
  * The band_id is adjusted such that each component has a separate id.
  * Note that the band_id may have already been set to a value different
@@ -2390,6 +2561,9 @@ static int compute_component_schedule(isl_ctx *ctx,
        int n_total_row, orig_total_row;
        int n_band, orig_band;
 
+       if (ctx->opt->schedule_fuse == ISL_SCHEDULE_FUSE_MIN)
+               split_on_scc(graph);
+
        n_total_row = 0;
        orig_total_row = graph->n_total_row;
        n_band = 0;
@@ -2403,12 +2577,13 @@ static int compute_component_schedule(isl_ctx *ctx,
                                n++;
                n_edge = 0;
                for (i = 0; i < graph->n_edge; ++i)
-                       if (graph->edge[i].src->scc == wcc)
+                       if (graph->edge[i].src->scc == wcc &&
+                           graph->edge[i].dst->scc == wcc)
                                n_edge++;
 
                if (compute_sub_schedule(ctx, graph, n, n_edge,
                                    &node_scc_exactly,
-                                   &edge_src_scc_exactly, wcc, 1) < 0)
+                                   &edge_scc_exactly, wcc, 1) < 0)
                        return -1;
                if (graph->n_total_row > n_total_row)
                        n_total_row = graph->n_total_row;
@@ -2426,12 +2601,20 @@ static int compute_component_schedule(isl_ctx *ctx,
 
 /* Compute a schedule for the given dependence graph.
  * We first check if the graph is connected (through validity dependences)
- * and if so compute a schedule for each component separately.
+ * and, if not, compute a schedule for each component separately.
+ * If schedule_fuse is set to minimal fusion, then we check for strongly
+ * connected components instead and compute a separate schedule for
+ * each such strongly connected component.
  */
 static int compute_schedule(isl_ctx *ctx, struct isl_sched_graph *graph)
 {
-       if (detect_wccs(graph) < 0)
-               return -1;
+       if (ctx->opt->schedule_fuse == ISL_SCHEDULE_FUSE_MIN) {
+               if (detect_sccs(graph) < 0)
+                       return -1;
+       } else {
+               if (detect_wccs(graph) < 0)
+                       return -1;
+       }
 
        if (graph->scc > 1)
                return compute_component_schedule(ctx, graph);
@@ -2440,8 +2623,12 @@ static int compute_schedule(isl_ctx *ctx, struct isl_sched_graph *graph)
 }
 
 /* Compute a schedule for the given union of domains that respects
- * all the validity dependences and tries to minimize the dependence
- * distances over the proximity dependences.
+ * all the validity dependences.
+ * If the default isl scheduling algorithm is used, it tries to minimize
+ * the dependence distances over the proximity dependences.
+ * If Feautrier's scheduling algorithm is used, the proximity dependence
+ * distances are only minimized during the extension to a full-dimensional
+ * schedule.
  */
 __isl_give isl_schedule *isl_union_set_compute_schedule(
        __isl_take isl_union_set *domain,