isl_union_set_compute_schedule: fix computation of maximal number of rows

[platform/upstream/isl.git] / isl_schedule.c
diff --git a/isl_schedule.c b/isl_schedule.c

index b279281..3c08e5e 100644 (file)
--- a/isl_schedule.c
+++ b/isl_schedule.c
@@ -24,6 +24,7 @@
  #include <isl_schedule_private.h>
  #include <isl_band_private.h>
  #include <isl_list_private.h>
+#include <isl_options_private.h>
  
  /*
   * The scheduling algorithm implemented in this file was inspired by
@@ -134,6 +135,7 @@ struct isl_sched_edge {
   * n is the number of nodes
   * node is the list of nodes
   * maxvar is the maximal number of variables over all nodes
+ * max_row is the allocated number of rows in the schedule
   * n_row is the current (maximal) number of linearly independent
   *     rows in the node schedules
   * n_total_row is the current number of rows in the node schedules
@@ -174,6 +176,7 @@ struct isl_sched_graph {
         struct isl_sched_node *node;
         int n;
         int maxvar;
+       int max_row;
         int n_row;
  
         int *sorted;
@@ -371,6 +374,43 @@ static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
         isl_basic_set_free(graph->lp);
  }
  
+/* For each "set" on which this function is called, increment
+ * graph->n by one and update graph->maxvar.
+ */
+static int init_n_maxvar(__isl_take isl_set *set, void *user)
+{
+       struct isl_sched_graph *graph = user;
+       int nvar = isl_set_dim(set, isl_dim_set);
+
+       graph->n++;
+       if (nvar > graph->maxvar)
+               graph->maxvar = nvar;
+
+       isl_set_free(set);
+
+       return 0;
+}
+
+/* Compute the number of rows that should be allocated for the schedule.
+ * The graph can be split at most "n - 1" times, there can be at most
+ * two rows for each dimension in the iteration domains (in particular,
+ * we usually have one row, but it may be split by split_parallel),
+ * and there can be one extra row for ordering the statements.
+ * Note that if we have actually split "n - 1" times, then no ordering
+ * is needed, so in principle we could use "graph->n + 2 * graph->maxvar - 1".
+ */
+static int compute_max_row(struct isl_sched_graph *graph,
+       __isl_keep isl_union_set *domain)
+{
+       graph->n = 0;
+       graph->maxvar = 0;
+       if (isl_union_set_foreach_set(domain, &init_n_maxvar, graph) < 0)
+               return -1;
+       graph->max_row = graph->n + 2 * graph->maxvar;
+
+       return 0;
+}
+
  /* Add a new node to the graph representing the given set.
   */
  static int extract_node(__isl_take isl_set *set, void *user)
@@ -395,11 +435,11 @@ static int extract_node(__isl_take isl_set *set, void *user)
         graph->node[graph->n].nparam = nparam;
         graph->node[graph->n].sched = sched;
         graph->node[graph->n].sched_map = NULL;
-       band = isl_alloc_array(ctx, int, graph->n_edge + nvar);
+       band = isl_alloc_array(ctx, int, graph->max_row);
         graph->node[graph->n].band = band;
-       band_id = isl_calloc_array(ctx, int, graph->n_edge + nvar);
+       band_id = isl_calloc_array(ctx, int, graph->max_row);
         graph->node[graph->n].band_id = band_id;
-       zero = isl_calloc_array(ctx, int, graph->n_edge + nvar);
+       zero = isl_calloc_array(ctx, int, graph->max_row);
         graph->node[graph->n].zero = zero;
         graph->n++;
  
@@ -1138,6 +1178,9 @@ static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
         int parametric;
         int param_pos;
         int n_eq, n_ineq;
+       int max_constant_term;
+
+       max_constant_term = ctx->opt->schedule_max_constant_term;
  
         parametric = ctx->opt->schedule_parametric;
         nparam = isl_space_dim(graph->node[0].dim, isl_dim_param);
@@ -1157,6 +1200,9 @@ static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
         dim = isl_space_set_alloc(ctx, 0, total);
         isl_basic_set_free(graph->lp);
         n_eq += 2 + parametric + force_zero;
+       if (max_constant_term != -1)
+               n_ineq += graph->n;
+
         graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
  
         k = isl_basic_set_alloc_equality(graph->lp);
@@ -1203,6 +1249,17 @@ static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
                         isl_int_set_si(graph->lp->eq[k][pos + j], 1);
         }
  
+       if (max_constant_term != -1)
+               for (i = 0; i < graph->n; ++i) {
+                       struct isl_sched_node *node = &graph->node[i];
+                       k = isl_basic_set_alloc_inequality(graph->lp);
+                       if (k < 0)
+                               return -1;
+                       isl_seq_clr(graph->lp->ineq[k], 1 +  total);
+                       isl_int_set_si(graph->lp->ineq[k][1 + node->start], -1);
+                       isl_int_set_si(graph->lp->ineq[k][0], max_constant_term);
+               }
+
         if (add_all_validity_constraints(graph) < 0)
                 return -1;
         if (add_all_proximity_constraints(graph) < 0)
@@ -1624,6 +1681,9 @@ static int copy_nodes(struct isl_sched_graph *dst, struct isl_sched_graph *src,
  
  /* Copy non-empty edges that satisfy edge_pred from the src dependence graph
   * to the dst dependence graph.
+ * If the source or destination node of the edge is not in the destination
+ * graph, then it must be a backward proximity edge and it should simply
+ * be ignored.
   */
  static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
         struct isl_sched_graph *src,
@@ -1635,6 +1695,7 @@ static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
         for (i = 0; i < src->n_edge; ++i) {
                 struct isl_sched_edge *edge = &src->edge[i];
                 isl_map *map;
+               struct isl_sched_node *dst_src, *dst_dst;
  
                 if (!edge_pred(edge, data))
                         continue;
@@ -1642,12 +1703,19 @@ static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
                 if (isl_map_plain_is_empty(edge->map))
                         continue;
  
+               dst_src = graph_find_node(ctx, dst, edge->src->dim);
+               dst_dst = graph_find_node(ctx, dst, edge->dst->dim);
+               if (!dst_src || !dst_dst) {
+                       if (edge->validity)
+                               isl_die(ctx, isl_error_internal,
+                                       "backward validity edge", return -1);
+                       continue;
+               }
+
                 map = isl_map_copy(edge->map);
  
-               dst->edge[dst->n_edge].src =
-                       graph_find_node(ctx, dst, edge->src->dim);
-               dst->edge[dst->n_edge].dst =
-                       graph_find_node(ctx, dst, edge->dst->dim);
+               dst->edge[dst->n_edge].src = dst_src;
+               dst->edge[dst->n_edge].dst = dst_dst;
                 dst->edge[dst->n_edge].map = map;
                 dst->edge[dst->n_edge].validity = edge->validity;
                 dst->edge[dst->n_edge].proximity = edge->proximity;
@@ -1828,6 +1896,9 @@ static int pad_schedule(struct isl_sched_graph *graph)
   * It would be possible to reuse them as the first rows in the next
   * band, but recomputing them may result in better rows as we are looking
   * at a smaller part of the dependence graph.
+ * compute_split_schedule is only called when no zero-distance schedule row
+ * could be found on the entire graph, so we wark the splitting row as
+ * non zero-distance.
   *
   * The band_id of the second group is set to n, where n is the number
   * of nodes in the first group.  This ensures that the band_ids over
@@ -1868,6 +1939,7 @@ static int compute_split_schedule(isl_ctx *ctx, struct isl_sched_graph *graph)
                         node->sched = isl_mat_set_element_si(node->sched,
                                                              row, j, 0);
                 node->band[graph->n_total_row] = graph->n_band;
+               node->zero[graph->n_total_row] = 0;
         }
  
         e1 = e2 = 0;
@@ -2230,6 +2302,8 @@ static int split_parallel(isl_ctx *ctx, struct isl_sched_graph *graph)
         for (i = 1; i < graph->n; ++i) {
                 struct isl_sched_node *node = &graph->node[i];
  
+               if (isl_mat_cols(node->sched) != cols)
+                       return 0;
                 if (!isl_seq_eq(node0->sched->row[row] + 1,
                                 node->sched->row[row] + 1, cols - 1))
                         return 0;
@@ -2308,6 +2382,47 @@ static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph)
         return compute_next_band(ctx, graph);
  }
  
+/* Are there any validity edges in the graph?
+ */
+static int has_validity_edges(struct isl_sched_graph *graph)
+{
+       int i;
+
+       for (i = 0; i < graph->n_edge; ++i)
+               if (graph->edge[i].validity)
+                       return 1;
+
+       return 0;
+}
+
+/* Should we apply a Feautrier step?
+ * That is, did the user request the Feautrier algorithm and are
+ * there any validity dependences (left)?
+ */
+static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph)
+{
+       if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER)
+               return 0;
+
+       return has_validity_edges(graph);
+}
+
+/* Compute a schedule for a connected dependence graph using Feautrier's
+ * multi-dimensional scheduling algorithm.
+ * The original algorithm is described in [1].
+ * The main idea is to minimize the number of scheduling dimensions, by
+ * trying to satisfy as many dependences as possible per scheduling dimension.
+ *
+ * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
+ *     Problem, Part II: Multi-Dimensional Time.
+ *     In Intl. Journal of Parallel Programming, 1992.
+ */
+static int compute_schedule_wcc_feautrier(isl_ctx *ctx,
+       struct isl_sched_graph *graph)
+{
+       return carry_dependences(ctx, graph);
+}
+
  /* Compute a schedule for a connected dependence graph.
   * We try to find a sequence of as many schedule rows as possible that result
   * in non-negative dependence distances (independent of the previous rows
@@ -2320,6 +2435,10 @@ static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph)
   * - try to carry as many dependences as possible and continue with the next
   *     band
   *
+ * If Feautrier's algorithm is selected, we first recursively try to satisfy
+ * as many validity dependences as possible. When all validity dependences
+ * are satisfied we extend the schedule to a full-dimensional schedule.
+ *
   * If we manage to complete the schedule, we finish off by topologically
   * sorting the statements based on the remaining dependences.
   *
@@ -2339,6 +2458,9 @@ static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
         if (compute_maxvar(graph) < 0)
                 return -1;
  
+       if (need_feautrier_step(ctx, graph))
+               return compute_schedule_wcc_feautrier(ctx, graph);
+
         if (ctx->opt->schedule_outer_zero_distance)
                 force_zero = 1;
  
@@ -2425,7 +2547,7 @@ static int compute_component_schedule(isl_ctx *ctx,
  
  /* Compute a schedule for the given dependence graph.
   * We first check if the graph is connected (through validity dependences)
- * and if so compute a schedule for each component separately.
+ * and, if not, compute a schedule for each component separately.
   */
  static int compute_schedule(isl_ctx *ctx, struct isl_sched_graph *graph)
  {
@@ -2439,8 +2561,12 @@ static int compute_schedule(isl_ctx *ctx, struct isl_sched_graph *graph)
  }
  
  /* Compute a schedule for the given union of domains that respects
- * all the validity dependences and tries to minimize the dependence
- * distances over the proximity dependences.
+ * all the validity dependences.
+ * If the default isl scheduling algorithm is used, it tries to minimize
+ * the dependence distances over the proximity dependences.
+ * If Feautrier's scheduling algorithm is used, the proximity dependence
+ * distances are only minimized during the extension to a full-dimensional
+ * schedule.
   */
  __isl_give isl_schedule *isl_union_set_compute_schedule(
         __isl_take isl_union_set *domain,
@@ -2469,6 +2595,8 @@ __isl_give isl_schedule *isl_union_set_compute_schedule(
         if (graph_alloc(ctx, &graph, graph.n,
             isl_union_map_n_map(validity) + isl_union_map_n_map(proximity)) < 0)
                 goto error;
+       if (compute_max_row(&graph, domain) < 0)
+               goto error;
         graph.root = 1;
         graph.n = 0;
         if (isl_union_set_foreach_set(domain, &extract_node, &graph) < 0)