bernstein_coefficients_cell: handle NULL poly

[platform/upstream/isl.git] / isl_schedule.c
diff --git a/isl_schedule.c b/isl_schedule.c

index 0b1ff56..320db6c 100644 (file)
--- a/isl_schedule.c
+++ b/isl_schedule.c
@@ -23,6 +23,7 @@
  #include <isl_qsort.h>
  #include <isl_schedule_private.h>
  #include <isl_band_private.h>
+#include <isl_list_private.h>
  
  /*
   * The scheduling algorithm implemented in this file was inspired by
@@ -55,9 +56,10 @@
   * band_id is used to differentiate between separate bands at the same
   * level within the same parent band, i.e., bands that are separated
   * by the parent band or bands that are independent of each other.
- * parallel contains a boolean for each of the rows of the schedule,
- * indicating whether the corresponding scheduling dimension is parallel
- * within its band and with respect to the proximity edges.
+ * zero contains a boolean for each of the rows of the schedule,
+ * indicating whether the corresponding scheduling dimension results
+ * in zero dependence distances within its band and with respect
+ * to the proximity edges.
   *
   * index, min_index and on_stack are used during the SCC detection
   * index represents the order in which nodes are visited.
@@ -78,7 +80,7 @@ struct isl_sched_node {
  
         int     *band;
         int     *band_id;
-       int     *parallel;
+       int     *zero;
  
         /* scc detection */
         int      index;
@@ -354,7 +356,7 @@ static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
                 if (graph->root) {
                         free(graph->node[i].band);
                         free(graph->node[i].band_id);
-                       free(graph->node[i].parallel);
+                       free(graph->node[i].zero);
                 }
         }
         free(graph->node);
@@ -378,7 +380,7 @@ static int extract_node(__isl_take isl_set *set, void *user)
         isl_dim *dim;
         isl_mat *sched;
         struct isl_sched_graph *graph = user;
-       int *band, *band_id, *parallel;
+       int *band, *band_id, *zero;
  
         ctx = isl_set_get_ctx(set);
         dim = isl_set_get_dim(set);
@@ -397,11 +399,11 @@ static int extract_node(__isl_take isl_set *set, void *user)
         graph->node[graph->n].band = band;
         band_id = isl_calloc_array(ctx, int, graph->n_edge + nvar);
         graph->node[graph->n].band_id = band_id;
-       parallel = isl_calloc_array(ctx, int, graph->n_edge + nvar);
-       graph->node[graph->n].parallel = parallel;
+       zero = isl_calloc_array(ctx, int, graph->n_edge + nvar);
+       graph->node[graph->n].zero = zero;
         graph->n++;
  
-       if (!sched || !band || !band_id || !parallel)
+       if (!sched || !band || !band_id || !zero)
                 return -1;
  
         return 0;
@@ -1100,8 +1102,12 @@ static int count_constraints(struct isl_sched_graph *graph,
   *
   * The constraints are those from the edges plus two or three equalities
   * to express the sums.
+ *
+ * If force_zero is set, then we add equalities to ensure that
+ * the sum of the m_n coefficients and m_0 are both zero.
   */
-static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
+static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
+       int force_zero)
  {
         int i, j;
         int k;
@@ -1129,17 +1135,26 @@ static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
  
         dim = isl_dim_set_alloc(ctx, 0, total);
         isl_basic_set_free(graph->lp);
-       n_eq += 2 + parametric;
+       n_eq += 2 + parametric + force_zero;
         graph->lp = isl_basic_set_alloc_dim(dim, 0, n_eq, n_ineq);
  
         k = isl_basic_set_alloc_equality(graph->lp);
         if (k < 0)
                 return -1;
         isl_seq_clr(graph->lp->eq[k], 1 +  total);
-       isl_int_set_si(graph->lp->eq[k][1], -1);
+       if (!force_zero)
+               isl_int_set_si(graph->lp->eq[k][1], -1);
         for (i = 0; i < 2 * nparam; ++i)
                 isl_int_set_si(graph->lp->eq[k][1 + param_pos + i], 1);
  
+       if (force_zero) {
+               k = isl_basic_set_alloc_equality(graph->lp);
+               if (k < 0)
+                       return -1;
+               isl_seq_clr(graph->lp->eq[k], 1 +  total);
+               isl_int_set_si(graph->lp->eq[k][2], -1);
+       }
+
         if (parametric) {
                 k = isl_basic_set_alloc_equality(graph->lp);
                 if (k < 0)
@@ -1267,16 +1282,16 @@ static __isl_give isl_vec *solve_lp(struct isl_sched_graph *graph)
   * In this case, we then also need to perform this multiplication
   * to obtain the values of c_i_x.
   *
- * If check_parallel is set, then the first two coordinates of sol are
+ * If check_zero is set, then the first two coordinates of sol are
   * assumed to correspond to the dependence distance.  If these two
   * coordinates are zero, then the corresponding scheduling dimension
- * is marked as being parallel.
+ * is marked as being zero distance.
   */
  static int update_schedule(struct isl_sched_graph *graph,
-       __isl_take isl_vec *sol, int use_cmap, int check_parallel)
+       __isl_take isl_vec *sol, int use_cmap, int check_zero)
  {
         int i, j;
-       int parallel = 0;
+       int zero = 0;
         isl_vec *csol = NULL;
  
         if (!sol)
@@ -1285,8 +1300,8 @@ static int update_schedule(struct isl_sched_graph *graph,
                 isl_die(sol->ctx, isl_error_internal,
                         "no solution found", goto error);
  
-       if (check_parallel)
-               parallel = isl_int_is_zero(sol->el[1]) &&
+       if (check_zero)
+               zero = isl_int_is_zero(sol->el[1]) &&
                            isl_int_is_zero(sol->el[2]);
  
         for (i = 0; i < graph->n; ++i) {
@@ -1325,7 +1340,7 @@ static int update_schedule(struct isl_sched_graph *graph,
                         node->sched = isl_mat_set_element(node->sched,
                                         row, 1 + node->nparam + j, csol->el[j]);
                 node->band[graph->n_total_row] = graph->n_band;
-               node->parallel[graph->n_total_row] = parallel;
+               node->zero[graph->n_total_row] = zero;
         }
         isl_vec_free(sol);
         isl_vec_free(csol);
@@ -1520,20 +1535,20 @@ static __isl_give isl_schedule *extract_schedule(struct isl_sched_graph *graph,
  
         for (i = 0; i < sched->n; ++i) {
                 int r, b;
-               int *band_end, *band_id, *parallel;
+               int *band_end, *band_id, *zero;
  
                 band_end = isl_alloc_array(ctx, int, graph->n_band);
                 band_id = isl_alloc_array(ctx, int, graph->n_band);
-               parallel = isl_alloc_array(ctx, int, graph->n_total_row);
+               zero = isl_alloc_array(ctx, int, graph->n_total_row);
                 sched->node[i].sched = node_extract_schedule(&graph->node[i]);
                 sched->node[i].band_end = band_end;
                 sched->node[i].band_id = band_id;
-               sched->node[i].parallel = parallel;
-               if (!band_end || !band_id || !parallel)
+               sched->node[i].zero = zero;
+               if (!band_end || !band_id || !zero)
                         goto error;
  
                 for (r = 0; r < graph->n_total_row; ++r)
-                       parallel[r] = graph->node[i].parallel[r];
+                       zero[r] = graph->node[i].zero[r];
                 for (r = b = 0; r < graph->n_total_row; ++r) {
                         if (graph->node[i].band[r] == b)
                                 continue;
@@ -1577,7 +1592,7 @@ static int copy_nodes(struct isl_sched_graph *dst, struct isl_sched_graph *src,
                         isl_map_copy(src->node[i].sched_map);
                 dst->node[dst->n].band = src->node[i].band;
                 dst->node[dst->n].band_id = src->node[i].band_id;
-               dst->node[dst->n].parallel = src->node[i].parallel;
+               dst->node[dst->n].zero = src->node[i].zero;
                 dst->n++;
         }
  
@@ -2115,6 +2130,65 @@ static int setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
         return 0;
  }
  
+/* If the schedule_split_parallel option is set and if the linear
+ * parts of the scheduling rows for all nodes in the graphs are the same,
+ * then split off the constant term from the linear part.
+ * The constant term is then placed in a separate band and
+ * the linear part is simplified.
+ */
+static int split_parallel(isl_ctx *ctx, struct isl_sched_graph *graph)
+{
+       int i;
+       int equal = 1;
+       int row, cols;
+       struct isl_sched_node *node0;
+
+       if (!ctx->opt->schedule_split_parallel)
+               return 0;
+       if (graph->n <= 1)
+               return 0;
+
+       node0 = &graph->node[0];
+       row = isl_mat_rows(node0->sched) - 1;
+       cols = isl_mat_cols(node0->sched);
+       for (i = 1; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+
+               if (!isl_seq_eq(node0->sched->row[row] + 1,
+                               node->sched->row[row] + 1, cols - 1))
+                       return 0;
+               if (equal &&
+                   isl_int_ne(node0->sched->row[row][0],
+                              node->sched->row[row][0]))
+                       equal = 0;
+       }
+       if (equal)
+               return 0;
+
+       next_band(graph);
+
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+
+               isl_map_free(node->sched_map);
+               node->sched_map = NULL;
+               node->sched = isl_mat_add_zero_rows(node->sched, 1);
+               if (!node->sched)
+                       return -1;
+               isl_int_set(node->sched->row[row + 1][0],
+                           node->sched->row[row][0]);
+               isl_int_set_si(node->sched->row[row][0], 0);
+               node->sched = isl_mat_normalize_row(node->sched, row);
+               if (!node->sched)
+                       return -1;
+               node->band[graph->n_total_row] = graph->n_band;
+       }
+
+       graph->n_total_row++;
+
+       return 0;
+}
+
  /* Construct a schedule row for each node such that as many dependences
   * as possible are carried and then continue with the next band.
   */
@@ -2146,6 +2220,9 @@ static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph)
         if (update_schedule(graph, sol, 0, 0) < 0)
                 return -1;
  
+       if (split_parallel(ctx, graph) < 0)
+               return -1;
+
         return compute_next_band(ctx, graph);
  }
  
@@ -2163,9 +2240,16 @@ static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph)
   *
   * If we manage to complete the schedule, we finish off by topologically
   * sorting the statements based on the remaining dependences.
+ *
+ * If ctx->opt->schedule_outer_zero_distance is set, then we force the
+ * outermost dimension in the current band to be zero distance.  If this
+ * turns out to be impossible, we fall back on the general scheme above
+ * and try to carry as many dependences as possible.
   */
  static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
  {
+       int force_zero = 0;
+
         if (detect_sccs(graph) < 0)
                 return -1;
         sort_sccs(graph);
@@ -2173,19 +2257,25 @@ static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
         if (compute_maxvar(graph) < 0)
                 return -1;
  
+       if (ctx->opt->schedule_outer_zero_distance)
+               force_zero = 1;
+
         while (graph->n_row < graph->maxvar) {
                 isl_vec *sol;
  
                 graph->src_scc = -1;
                 graph->dst_scc = -1;
  
-               if (setup_lp(ctx, graph) < 0)
+               if (setup_lp(ctx, graph, force_zero) < 0)
                         return -1;
                 sol = solve_lp(graph);
                 if (!sol)
                         return -1;
                 if (sol->size == 0) {
                         isl_vec_free(sol);
+                       if (!ctx->opt->schedule_maximize_band_depth &&
+                           graph->n_total_row > graph->band_start)
+                               return compute_next_band(ctx, graph);
                         if (graph->src_scc >= 0)
                                 return compute_split_schedule(ctx, graph);
                         if (graph->n_total_row > graph->band_start)
@@ -2194,6 +2284,7 @@ static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
                 }
                 if (update_schedule(graph, sol, 1, 1) < 0)
                         return -1;
+               force_zero = 0;
         }
  
         if (graph->n_total_row > graph->band_start)
@@ -2343,7 +2434,7 @@ void *isl_schedule_free(__isl_take isl_schedule *sched)
                 isl_map_free(sched->node[i].sched);
                 free(sched->node[i].band_end);
                 free(sched->node[i].band_id);
-               free(sched->node[i].parallel);
+               free(sched->node[i].zero);
         }
         isl_dim_free(sched->dim);
         isl_band_list_free(sched->band_forest);
@@ -2372,47 +2463,6 @@ __isl_give isl_union_map *isl_schedule_get_map(__isl_keep isl_schedule *sched)
         return umap;
  }
  
-int isl_schedule_n_band(__isl_keep isl_schedule *sched)
-{
-       return sched ? sched->n_band : 0;
-}
-
-/* Construct a mapping that maps each domain to the band in its schedule
- * with the specified band index.  Note that bands with the same index
- * but for different domains do not need to be related.
- */
-__isl_give isl_union_map *isl_schedule_get_band(__isl_keep isl_schedule *sched,
-       unsigned band)
-{
-       int i;
-       isl_union_map *umap;
-
-       if (!sched)
-               return NULL;
-
-       umap = isl_union_map_empty(isl_dim_copy(sched->dim));
-       for (i = 0; i < sched->n; ++i) {
-               int start, end;
-               isl_map *map;
-
-               if (band >= sched->node[i].n_band)
-                       continue;
-
-               start = band > 0 ? sched->node[i].band_end[band - 1] : 0;
-               end = sched->node[i].band_end[band];
-
-               map = isl_map_copy(sched->node[i].sched);
-
-               map = isl_map_project_out(map, isl_dim_out, end,
-                                         sched->n_total_row - end);
-               map = isl_map_project_out(map, isl_dim_out, 0, start);
-
-               umap = isl_union_map_add_map(umap, map);
-       }
-
-       return umap;
-}
-
  static __isl_give isl_band_list *construct_band_list(
         __isl_keep isl_schedule *schedule, __isl_keep isl_band *parent,
         int band_nr, int *parent_active, int n_active);
@@ -2467,12 +2517,12 @@ static __isl_give isl_band *construct_band(__isl_keep isl_schedule *schedule,
                 schedule->node[i].band_end[band_nr] : start;
         band->n = end - start;
  
-       band->parallel = isl_alloc_array(ctx, int, band->n);
-       if (!band->parallel)
+       band->zero = isl_alloc_array(ctx, int, band->n);
+       if (!band->zero)
                 goto error;
  
         for (j = 0; j < band->n; ++j)
-               band->parallel[j] = schedule->node[i].parallel[start + j];
+               band->zero[j] = schedule->node[i].zero[start + j];
  
         band->map = isl_union_map_empty(isl_dim_copy(schedule->dim));
         for (i = 0; i < schedule->n; ++i) {
@@ -2619,5 +2669,74 @@ __isl_give isl_band_list *isl_schedule_get_band_forest(
                 return NULL;
         if (!schedule->band_forest)
                 schedule->band_forest = construct_forest(schedule);
-       return isl_band_list_copy(schedule->band_forest);
+       return isl_band_list_dup(schedule->band_forest);
+}
+
+static __isl_give isl_printer *print_band_list(__isl_take isl_printer *p,
+       __isl_keep isl_band_list *list);
+
+static __isl_give isl_printer *print_band(__isl_take isl_printer *p,
+       __isl_keep isl_band *band)
+{
+       isl_band_list *children;
+
+       p = isl_printer_start_line(p);
+       p = isl_printer_print_union_map(p, band->map);
+       p = isl_printer_end_line(p);
+
+       if (!isl_band_has_children(band))
+               return p;
+
+       children = isl_band_get_children(band);
+
+       p = isl_printer_indent(p, 4);
+       p = print_band_list(p, children);
+       p = isl_printer_indent(p, -4);
+
+       isl_band_list_free(children);
+
+       return p;
+}
+
+static __isl_give isl_printer *print_band_list(__isl_take isl_printer *p,
+       __isl_keep isl_band_list *list)
+{
+       int i, n;
+
+       n = isl_band_list_n_band(list);
+       for (i = 0; i < n; ++i) {
+               isl_band *band;
+               band = isl_band_list_get_band(list, i);
+               p = print_band(p, band);
+               isl_band_free(band);
+       }
+
+       return p;
+}
+
+__isl_give isl_printer *isl_printer_print_schedule(__isl_take isl_printer *p,
+       __isl_keep isl_schedule *schedule)
+{
+       isl_band_list *forest;
+
+       forest = isl_schedule_get_band_forest(schedule);
+
+       p = print_band_list(p, forest);
+
+       isl_band_list_free(forest);
+
+       return p;
+}
+
+void isl_schedule_dump(__isl_keep isl_schedule *schedule)
+{
+       isl_printer *printer;
+
+       if (!schedule)
+               return;
+
+       printer = isl_printer_to_file(isl_schedule_get_ctx(schedule), stderr);
+       printer = isl_printer_print_schedule(printer, schedule);
+
+       isl_printer_free(printer);
  }