bernstein_coefficients_cell: handle NULL poly
[platform/upstream/isl.git] / isl_schedule.c
index 3e8d3a3..320db6c 100644 (file)
@@ -23,6 +23,7 @@
 #include <isl_qsort.h>
 #include <isl_schedule_private.h>
 #include <isl_band_private.h>
+#include <isl_list_private.h>
 
 /*
  * The scheduling algorithm implemented in this file was inspired by
  * band_id is used to differentiate between separate bands at the same
  * level within the same parent band, i.e., bands that are separated
  * by the parent band or bands that are independent of each other.
- * parallel contains a boolean for each of the rows of the schedule,
- * indicating whether the corresponding scheduling dimension is parallel
- * within its band and with respect to the proximity edges.
+ * zero contains a boolean for each of the rows of the schedule,
+ * indicating whether the corresponding scheduling dimension results
+ * in zero dependence distances within its band and with respect
+ * to the proximity edges.
  *
  * index, min_index and on_stack are used during the SCC detection
  * index represents the order in which nodes are visited.
@@ -78,7 +80,7 @@ struct isl_sched_node {
 
        int     *band;
        int     *band_id;
-       int     *parallel;
+       int     *zero;
 
        /* scc detection */
        int      index;
@@ -354,7 +356,7 @@ static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
                if (graph->root) {
                        free(graph->node[i].band);
                        free(graph->node[i].band_id);
-                       free(graph->node[i].parallel);
+                       free(graph->node[i].zero);
                }
        }
        free(graph->node);
@@ -378,7 +380,7 @@ static int extract_node(__isl_take isl_set *set, void *user)
        isl_dim *dim;
        isl_mat *sched;
        struct isl_sched_graph *graph = user;
-       int *band, *band_id, *parallel;
+       int *band, *band_id, *zero;
 
        ctx = isl_set_get_ctx(set);
        dim = isl_set_get_dim(set);
@@ -397,11 +399,11 @@ static int extract_node(__isl_take isl_set *set, void *user)
        graph->node[graph->n].band = band;
        band_id = isl_calloc_array(ctx, int, graph->n_edge + nvar);
        graph->node[graph->n].band_id = band_id;
-       parallel = isl_calloc_array(ctx, int, graph->n_edge + nvar);
-       graph->node[graph->n].parallel = parallel;
+       zero = isl_calloc_array(ctx, int, graph->n_edge + nvar);
+       graph->node[graph->n].zero = zero;
        graph->n++;
 
-       if (!sched || !band || !band_id || !parallel)
+       if (!sched || !band || !band_id || !zero)
                return -1;
 
        return 0;
@@ -1101,11 +1103,11 @@ static int count_constraints(struct isl_sched_graph *graph,
  * The constraints are those from the edges plus two or three equalities
  * to express the sums.
  *
- * If force_parallel is set, then we add equalities to ensure that
+ * If force_zero is set, then we add equalities to ensure that
  * the sum of the m_n coefficients and m_0 are both zero.
  */
 static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
-       int force_parallel)
+       int force_zero)
 {
        int i, j;
        int k;
@@ -1133,19 +1135,19 @@ static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
 
        dim = isl_dim_set_alloc(ctx, 0, total);
        isl_basic_set_free(graph->lp);
-       n_eq += 2 + parametric + force_parallel;
+       n_eq += 2 + parametric + force_zero;
        graph->lp = isl_basic_set_alloc_dim(dim, 0, n_eq, n_ineq);
 
        k = isl_basic_set_alloc_equality(graph->lp);
        if (k < 0)
                return -1;
        isl_seq_clr(graph->lp->eq[k], 1 +  total);
-       if (!force_parallel)
+       if (!force_zero)
                isl_int_set_si(graph->lp->eq[k][1], -1);
        for (i = 0; i < 2 * nparam; ++i)
                isl_int_set_si(graph->lp->eq[k][1 + param_pos + i], 1);
 
-       if (force_parallel) {
+       if (force_zero) {
                k = isl_basic_set_alloc_equality(graph->lp);
                if (k < 0)
                        return -1;
@@ -1280,16 +1282,16 @@ static __isl_give isl_vec *solve_lp(struct isl_sched_graph *graph)
  * In this case, we then also need to perform this multiplication
  * to obtain the values of c_i_x.
  *
- * If check_parallel is set, then the first two coordinates of sol are
+ * If check_zero is set, then the first two coordinates of sol are
  * assumed to correspond to the dependence distance.  If these two
  * coordinates are zero, then the corresponding scheduling dimension
- * is marked as being parallel.
+ * is marked as being zero distance.
  */
 static int update_schedule(struct isl_sched_graph *graph,
-       __isl_take isl_vec *sol, int use_cmap, int check_parallel)
+       __isl_take isl_vec *sol, int use_cmap, int check_zero)
 {
        int i, j;
-       int parallel = 0;
+       int zero = 0;
        isl_vec *csol = NULL;
 
        if (!sol)
@@ -1298,8 +1300,8 @@ static int update_schedule(struct isl_sched_graph *graph,
                isl_die(sol->ctx, isl_error_internal,
                        "no solution found", goto error);
 
-       if (check_parallel)
-               parallel = isl_int_is_zero(sol->el[1]) &&
+       if (check_zero)
+               zero = isl_int_is_zero(sol->el[1]) &&
                           isl_int_is_zero(sol->el[2]);
 
        for (i = 0; i < graph->n; ++i) {
@@ -1338,7 +1340,7 @@ static int update_schedule(struct isl_sched_graph *graph,
                        node->sched = isl_mat_set_element(node->sched,
                                        row, 1 + node->nparam + j, csol->el[j]);
                node->band[graph->n_total_row] = graph->n_band;
-               node->parallel[graph->n_total_row] = parallel;
+               node->zero[graph->n_total_row] = zero;
        }
        isl_vec_free(sol);
        isl_vec_free(csol);
@@ -1533,20 +1535,20 @@ static __isl_give isl_schedule *extract_schedule(struct isl_sched_graph *graph,
 
        for (i = 0; i < sched->n; ++i) {
                int r, b;
-               int *band_end, *band_id, *parallel;
+               int *band_end, *band_id, *zero;
 
                band_end = isl_alloc_array(ctx, int, graph->n_band);
                band_id = isl_alloc_array(ctx, int, graph->n_band);
-               parallel = isl_alloc_array(ctx, int, graph->n_total_row);
+               zero = isl_alloc_array(ctx, int, graph->n_total_row);
                sched->node[i].sched = node_extract_schedule(&graph->node[i]);
                sched->node[i].band_end = band_end;
                sched->node[i].band_id = band_id;
-               sched->node[i].parallel = parallel;
-               if (!band_end || !band_id || !parallel)
+               sched->node[i].zero = zero;
+               if (!band_end || !band_id || !zero)
                        goto error;
 
                for (r = 0; r < graph->n_total_row; ++r)
-                       parallel[r] = graph->node[i].parallel[r];
+                       zero[r] = graph->node[i].zero[r];
                for (r = b = 0; r < graph->n_total_row; ++r) {
                        if (graph->node[i].band[r] == b)
                                continue;
@@ -1590,7 +1592,7 @@ static int copy_nodes(struct isl_sched_graph *dst, struct isl_sched_graph *src,
                        isl_map_copy(src->node[i].sched_map);
                dst->node[dst->n].band = src->node[i].band;
                dst->node[dst->n].band_id = src->node[i].band_id;
-               dst->node[dst->n].parallel = src->node[i].parallel;
+               dst->node[dst->n].zero = src->node[i].zero;
                dst->n++;
        }
 
@@ -2128,6 +2130,65 @@ static int setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
        return 0;
 }
 
+/* If the schedule_split_parallel option is set and if the linear
+ * parts of the scheduling rows for all nodes in the graphs are the same,
+ * then split off the constant term from the linear part.
+ * The constant term is then placed in a separate band and
+ * the linear part is simplified.
+ */
+static int split_parallel(isl_ctx *ctx, struct isl_sched_graph *graph)
+{
+       int i;
+       int equal = 1;
+       int row, cols;
+       struct isl_sched_node *node0;
+
+       if (!ctx->opt->schedule_split_parallel)
+               return 0;
+       if (graph->n <= 1)
+               return 0;
+
+       node0 = &graph->node[0];
+       row = isl_mat_rows(node0->sched) - 1;
+       cols = isl_mat_cols(node0->sched);
+       for (i = 1; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+
+               if (!isl_seq_eq(node0->sched->row[row] + 1,
+                               node->sched->row[row] + 1, cols - 1))
+                       return 0;
+               if (equal &&
+                   isl_int_ne(node0->sched->row[row][0],
+                              node->sched->row[row][0]))
+                       equal = 0;
+       }
+       if (equal)
+               return 0;
+
+       next_band(graph);
+
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+
+               isl_map_free(node->sched_map);
+               node->sched_map = NULL;
+               node->sched = isl_mat_add_zero_rows(node->sched, 1);
+               if (!node->sched)
+                       return -1;
+               isl_int_set(node->sched->row[row + 1][0],
+                           node->sched->row[row][0]);
+               isl_int_set_si(node->sched->row[row][0], 0);
+               node->sched = isl_mat_normalize_row(node->sched, row);
+               if (!node->sched)
+                       return -1;
+               node->band[graph->n_total_row] = graph->n_band;
+       }
+
+       graph->n_total_row++;
+
+       return 0;
+}
+
 /* Construct a schedule row for each node such that as many dependences
  * as possible are carried and then continue with the next band.
  */
@@ -2159,6 +2220,9 @@ static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph)
        if (update_schedule(graph, sol, 0, 0) < 0)
                return -1;
 
+       if (split_parallel(ctx, graph) < 0)
+               return -1;
+
        return compute_next_band(ctx, graph);
 }
 
@@ -2177,14 +2241,14 @@ static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph)
  * If we manage to complete the schedule, we finish off by topologically
  * sorting the statements based on the remaining dependences.
  *
- * If ctx->opt->schedule_outer_parallelism is set, then we force the
- * outermost dimension in the current band to be parallel.  If this
+ * If ctx->opt->schedule_outer_zero_distance is set, then we force the
+ * outermost dimension in the current band to be zero distance.  If this
  * turns out to be impossible, we fall back on the general scheme above
  * and try to carry as many dependences as possible.
  */
 static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
 {
-       int force_parallel = 0;
+       int force_zero = 0;
 
        if (detect_sccs(graph) < 0)
                return -1;
@@ -2193,8 +2257,8 @@ static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
        if (compute_maxvar(graph) < 0)
                return -1;
 
-       if (ctx->opt->schedule_outer_parallelism)
-               force_parallel = 1;
+       if (ctx->opt->schedule_outer_zero_distance)
+               force_zero = 1;
 
        while (graph->n_row < graph->maxvar) {
                isl_vec *sol;
@@ -2202,13 +2266,16 @@ static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
                graph->src_scc = -1;
                graph->dst_scc = -1;
 
-               if (setup_lp(ctx, graph, force_parallel) < 0)
+               if (setup_lp(ctx, graph, force_zero) < 0)
                        return -1;
                sol = solve_lp(graph);
                if (!sol)
                        return -1;
                if (sol->size == 0) {
                        isl_vec_free(sol);
+                       if (!ctx->opt->schedule_maximize_band_depth &&
+                           graph->n_total_row > graph->band_start)
+                               return compute_next_band(ctx, graph);
                        if (graph->src_scc >= 0)
                                return compute_split_schedule(ctx, graph);
                        if (graph->n_total_row > graph->band_start)
@@ -2217,7 +2284,7 @@ static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
                }
                if (update_schedule(graph, sol, 1, 1) < 0)
                        return -1;
-               force_parallel = 0;
+               force_zero = 0;
        }
 
        if (graph->n_total_row > graph->band_start)
@@ -2367,7 +2434,7 @@ void *isl_schedule_free(__isl_take isl_schedule *sched)
                isl_map_free(sched->node[i].sched);
                free(sched->node[i].band_end);
                free(sched->node[i].band_id);
-               free(sched->node[i].parallel);
+               free(sched->node[i].zero);
        }
        isl_dim_free(sched->dim);
        isl_band_list_free(sched->band_forest);
@@ -2396,47 +2463,6 @@ __isl_give isl_union_map *isl_schedule_get_map(__isl_keep isl_schedule *sched)
        return umap;
 }
 
-int isl_schedule_n_band(__isl_keep isl_schedule *sched)
-{
-       return sched ? sched->n_band : 0;
-}
-
-/* Construct a mapping that maps each domain to the band in its schedule
- * with the specified band index.  Note that bands with the same index
- * but for different domains do not need to be related.
- */
-__isl_give isl_union_map *isl_schedule_get_band(__isl_keep isl_schedule *sched,
-       unsigned band)
-{
-       int i;
-       isl_union_map *umap;
-
-       if (!sched)
-               return NULL;
-
-       umap = isl_union_map_empty(isl_dim_copy(sched->dim));
-       for (i = 0; i < sched->n; ++i) {
-               int start, end;
-               isl_map *map;
-
-               if (band >= sched->node[i].n_band)
-                       continue;
-
-               start = band > 0 ? sched->node[i].band_end[band - 1] : 0;
-               end = sched->node[i].band_end[band];
-
-               map = isl_map_copy(sched->node[i].sched);
-
-               map = isl_map_project_out(map, isl_dim_out, end,
-                                         sched->n_total_row - end);
-               map = isl_map_project_out(map, isl_dim_out, 0, start);
-
-               umap = isl_union_map_add_map(umap, map);
-       }
-
-       return umap;
-}
-
 static __isl_give isl_band_list *construct_band_list(
        __isl_keep isl_schedule *schedule, __isl_keep isl_band *parent,
        int band_nr, int *parent_active, int n_active);
@@ -2491,12 +2517,12 @@ static __isl_give isl_band *construct_band(__isl_keep isl_schedule *schedule,
                schedule->node[i].band_end[band_nr] : start;
        band->n = end - start;
 
-       band->parallel = isl_alloc_array(ctx, int, band->n);
-       if (!band->parallel)
+       band->zero = isl_alloc_array(ctx, int, band->n);
+       if (!band->zero)
                goto error;
 
        for (j = 0; j < band->n; ++j)
-               band->parallel[j] = schedule->node[i].parallel[start + j];
+               band->zero[j] = schedule->node[i].zero[start + j];
 
        band->map = isl_union_map_empty(isl_dim_copy(schedule->dim));
        for (i = 0; i < schedule->n; ++i) {
@@ -2643,7 +2669,7 @@ __isl_give isl_band_list *isl_schedule_get_band_forest(
                return NULL;
        if (!schedule->band_forest)
                schedule->band_forest = construct_forest(schedule);
-       return isl_band_list_copy(schedule->band_forest);
+       return isl_band_list_dup(schedule->band_forest);
 }
 
 static __isl_give isl_printer *print_band_list(__isl_take isl_printer *p,