Merge branch 'maint'
[platform/upstream/isl.git] / isl_schedule.c
index 925ffe3..9f261e0 100644 (file)
@@ -10,7 +10,7 @@
 
 #include <isl_ctx_private.h>
 #include <isl_map_private.h>
-#include <isl_dim_private.h>
+#include <isl_space_private.h>
 #include <isl/hash.h>
 #include <isl/constraint.h>
 #include <isl/schedule.h>
@@ -22,6 +22,9 @@
 #include <isl_hmap_map_basic_set.h>
 #include <isl_qsort.h>
 #include <isl_schedule_private.h>
+#include <isl_band_private.h>
+#include <isl_list_private.h>
+#include <isl_options_private.h>
 
 /*
  * The scheduling algorithm implemented in this file was inspired by
  *
  * scc is the index of SCC (or WCC) this node belongs to
  *
- * band contains the band index for each of the rows of the schedule
+ * band contains the band index for each of the rows of the schedule.
+ * band_id is used to differentiate between separate bands at the same
+ * level within the same parent band, i.e., bands that are separated
+ * by the parent band or bands that are independent of each other.
+ * zero contains a boolean for each of the rows of the schedule,
+ * indicating whether the corresponding scheduling dimension results
+ * in zero dependence distances within its band and with respect
+ * to the proximity edges.
  *
  * index, min_index and on_stack are used during the SCC detection
  * index represents the order in which nodes are visited.
@@ -58,7 +68,7 @@
  * on_stack indicates whether the node is currently on the stack.
  */
 struct isl_sched_node {
-       isl_dim *dim;
+       isl_space *dim;
        isl_mat *sched;
        isl_map *sched_map;
        int      rank;
@@ -70,6 +80,8 @@ struct isl_sched_node {
        int      scc;
 
        int     *band;
+       int     *band_id;
+       int     *zero;
 
        /* scc detection */
        int      index;
@@ -80,9 +92,9 @@ struct isl_sched_node {
 static int node_has_dim(const void *entry, const void *val)
 {
        struct isl_sched_node *node = (struct isl_sched_node *)entry;
-       isl_dim *dim = (isl_dim *)val;
+       isl_space *dim = (isl_space *)val;
 
-       return isl_dim_equal(node->dim, dim);
+       return isl_space_is_equal(node->dim, dim);
 }
 
 /* An edge in the dependence graph.  An edge may be used to
@@ -206,7 +218,7 @@ static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
                struct isl_hash_table_entry *entry;
                uint32_t hash;
 
-               hash = isl_dim_get_hash(graph->node[i].dim);
+               hash = isl_space_get_hash(graph->node[i].dim);
                entry = isl_hash_table_find(ctx, graph->node_table, hash,
                                            &node_has_dim,
                                            graph->node[i].dim, 1);
@@ -222,12 +234,12 @@ static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
  * or NULL if there is no such node.
  */
 static struct isl_sched_node *graph_find_node(isl_ctx *ctx,
-       struct isl_sched_graph *graph, __isl_keep isl_dim *dim)
+       struct isl_sched_graph *graph, __isl_keep isl_space *dim)
 {
        struct isl_hash_table_entry *entry;
        uint32_t hash;
 
-       hash = isl_dim_get_hash(dim);
+       hash = isl_space_get_hash(dim);
        entry = isl_hash_table_find(ctx, graph->node_table, hash,
                                    &node_has_dim, dim, 0);
 
@@ -280,7 +292,7 @@ static int graph_init_edge_table(isl_ctx *ctx, struct isl_sched_graph *graph)
 static int graph_has_edge(struct isl_sched_graph *graph,
        struct isl_sched_node *src, struct isl_sched_node *dst)
 {
-       isl_ctx *ctx = isl_dim_get_ctx(src->dim);
+       isl_ctx *ctx = isl_space_get_ctx(src->dim);
        struct isl_hash_table_entry *entry;
        uint32_t hash;
        struct isl_sched_edge temp = { .src = src, .dst = dst };
@@ -338,12 +350,15 @@ static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
        isl_hmap_map_basic_set_free(ctx, graph->inter_hmap);
 
        for (i = 0; i < graph->n; ++i) {
-               isl_dim_free(graph->node[i].dim);
+               isl_space_free(graph->node[i].dim);
                isl_mat_free(graph->node[i].sched);
                isl_map_free(graph->node[i].sched_map);
                isl_mat_free(graph->node[i].cmap);
-               if (graph->root)
+               if (graph->root) {
                        free(graph->node[i].band);
+                       free(graph->node[i].band_id);
+                       free(graph->node[i].zero);
+               }
        }
        free(graph->node);
        free(graph->sorted);
@@ -363,16 +378,16 @@ static int extract_node(__isl_take isl_set *set, void *user)
 {
        int nvar, nparam;
        isl_ctx *ctx;
-       isl_dim *dim;
+       isl_space *dim;
        isl_mat *sched;
        struct isl_sched_graph *graph = user;
-       int *band;
+       int *band, *band_id, *zero;
 
        ctx = isl_set_get_ctx(set);
-       dim = isl_set_get_dim(set);
+       dim = isl_set_get_space(set);
        isl_set_free(set);
-       nvar = isl_dim_size(dim, isl_dim_set);
-       nparam = isl_dim_size(dim, isl_dim_param);
+       nvar = isl_space_dim(dim, isl_dim_set);
+       nparam = isl_space_dim(dim, isl_dim_param);
        if (!ctx->opt->schedule_parametric)
                nparam = 0;
        sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar);
@@ -383,9 +398,13 @@ static int extract_node(__isl_take isl_set *set, void *user)
        graph->node[graph->n].sched_map = NULL;
        band = isl_alloc_array(ctx, int, graph->n_edge + nvar);
        graph->node[graph->n].band = band;
+       band_id = isl_calloc_array(ctx, int, graph->n_edge + nvar);
+       graph->node[graph->n].band_id = band_id;
+       zero = isl_calloc_array(ctx, int, graph->n_edge + nvar);
+       graph->node[graph->n].zero = zero;
        graph->n++;
 
-       if (!sched || !band)
+       if (!sched || !band || !band_id || !zero)
                return -1;
 
        return 0;
@@ -405,14 +424,14 @@ static int extract_edge(__isl_take isl_map *map, void *user)
        isl_ctx *ctx = isl_map_get_ctx(map);
        struct isl_sched_graph *graph = user;
        struct isl_sched_node *src, *dst;
-       isl_dim *dim;
+       isl_space *dim;
 
-       dim = isl_dim_domain(isl_map_get_dim(map));
+       dim = isl_space_domain(isl_map_get_space(map));
        src = graph_find_node(ctx, graph, dim);
-       isl_dim_free(dim);
-       dim = isl_dim_range(isl_map_get_dim(map));
+       isl_space_free(dim);
+       dim = isl_space_range(isl_map_get_space(map));
        dst = graph_find_node(ctx, graph, dim);
-       isl_dim_free(dim);
+       isl_space_free(dim);
 
        if (!src || !dst) {
                isl_map_free(map);
@@ -656,31 +675,31 @@ static int add_intra_validity_constraints(struct isl_sched_graph *graph,
        unsigned total;
        isl_map *map = isl_map_copy(edge->map);
        isl_ctx *ctx = isl_map_get_ctx(map);
-       isl_dim *dim;
+       isl_space *dim;
        isl_dim_map *dim_map;
        isl_basic_set *coef;
        struct isl_sched_node *node = edge->src;
 
        coef = intra_coefficients(graph, map);
 
-       dim = isl_dim_domain(isl_dim_unwrap(isl_basic_set_get_dim(coef)));
+       dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
 
        coef = isl_basic_set_transform_dims(coef, isl_dim_set,
-                   isl_dim_size(dim, isl_dim_set), isl_mat_copy(node->cmap));
+                   isl_space_dim(dim, isl_dim_set), isl_mat_copy(node->cmap));
 
        total = isl_basic_set_total_dim(graph->lp);
        dim_map = isl_dim_map_alloc(ctx, total);
        isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 1, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          node->nvar, -1);
        isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 2, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          node->nvar, 1);
        graph->lp = isl_basic_set_extend_constraints(graph->lp,
                        coef->n_eq, coef->n_ineq);
        graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
                                                           coef, dim_map);
-       isl_dim_free(dim);
+       isl_space_free(dim);
 
        return 0;
 }
@@ -709,7 +728,7 @@ static int add_inter_validity_constraints(struct isl_sched_graph *graph,
        unsigned total;
        isl_map *map = isl_map_copy(edge->map);
        isl_ctx *ctx = isl_map_get_ctx(map);
-       isl_dim *dim;
+       isl_space *dim;
        isl_dim_map *dim_map;
        isl_basic_set *coef;
        struct isl_sched_node *src = edge->src;
@@ -717,12 +736,12 @@ static int add_inter_validity_constraints(struct isl_sched_graph *graph,
 
        coef = inter_coefficients(graph, map);
 
-       dim = isl_dim_domain(isl_dim_unwrap(isl_basic_set_get_dim(coef)));
+       dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
 
        coef = isl_basic_set_transform_dims(coef, isl_dim_set,
-                   isl_dim_size(dim, isl_dim_set), isl_mat_copy(src->cmap));
+                   isl_space_dim(dim, isl_dim_set), isl_mat_copy(src->cmap));
        coef = isl_basic_set_transform_dims(coef, isl_dim_set,
-                   isl_dim_size(dim, isl_dim_set) + src->nvar,
+                   isl_space_dim(dim, isl_dim_set) + src->nvar,
                    isl_mat_copy(dst->cmap));
 
        total = isl_basic_set_total_dim(graph->lp);
@@ -732,20 +751,20 @@ static int add_inter_validity_constraints(struct isl_sched_graph *graph,
        isl_dim_map_range(dim_map, dst->start + 1, 2, 1, 1, dst->nparam, -1);
        isl_dim_map_range(dim_map, dst->start + 2, 2, 1, 1, dst->nparam, 1);
        isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 1, 2,
-                         isl_dim_size(dim, isl_dim_set) + src->nvar, 1,
+                         isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
                          dst->nvar, -1);
        isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 2, 2,
-                         isl_dim_size(dim, isl_dim_set) + src->nvar, 1,
+                         isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
                          dst->nvar, 1);
 
        isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, -1);
        isl_dim_map_range(dim_map, src->start + 1, 2, 1, 1, src->nparam, 1);
        isl_dim_map_range(dim_map, src->start + 2, 2, 1, 1, src->nparam, -1);
        isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 1, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          src->nvar, 1);
        isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 2, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          src->nvar, -1);
 
        edge->start = graph->lp->n_ineq;
@@ -753,7 +772,7 @@ static int add_inter_validity_constraints(struct isl_sched_graph *graph,
                        coef->n_eq, coef->n_ineq);
        graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
                                                           coef, dim_map);
-       isl_dim_free(dim);
+       isl_space_free(dim);
        edge->end = graph->lp->n_ineq;
 
        return 0;
@@ -795,35 +814,35 @@ static int add_intra_proximity_constraints(struct isl_sched_graph *graph,
        unsigned nparam;
        isl_map *map = isl_map_copy(edge->map);
        isl_ctx *ctx = isl_map_get_ctx(map);
-       isl_dim *dim;
+       isl_space *dim;
        isl_dim_map *dim_map;
        isl_basic_set *coef;
        struct isl_sched_node *node = edge->src;
 
        coef = intra_coefficients(graph, map);
 
-       dim = isl_dim_domain(isl_dim_unwrap(isl_basic_set_get_dim(coef)));
+       dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
 
        coef = isl_basic_set_transform_dims(coef, isl_dim_set,
-                   isl_dim_size(dim, isl_dim_set), isl_mat_copy(node->cmap));
+                   isl_space_dim(dim, isl_dim_set), isl_mat_copy(node->cmap));
 
-       nparam = isl_dim_size(node->dim, isl_dim_param);
+       nparam = isl_space_dim(node->dim, isl_dim_param);
        total = isl_basic_set_total_dim(graph->lp);
        dim_map = isl_dim_map_alloc(ctx, total);
        isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
        isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
        isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
        isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 1, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          node->nvar, s);
        isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 2, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          node->nvar, -s);
        graph->lp = isl_basic_set_extend_constraints(graph->lp,
                        coef->n_eq, coef->n_ineq);
        graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
                                                           coef, dim_map);
-       isl_dim_free(dim);
+       isl_space_free(dim);
 
        return 0;
 }
@@ -870,7 +889,7 @@ static int add_inter_proximity_constraints(struct isl_sched_graph *graph,
        unsigned nparam;
        isl_map *map = isl_map_copy(edge->map);
        isl_ctx *ctx = isl_map_get_ctx(map);
-       isl_dim *dim;
+       isl_space *dim;
        isl_dim_map *dim_map;
        isl_basic_set *coef;
        struct isl_sched_node *src = edge->src;
@@ -878,15 +897,15 @@ static int add_inter_proximity_constraints(struct isl_sched_graph *graph,
 
        coef = inter_coefficients(graph, map);
 
-       dim = isl_dim_domain(isl_dim_unwrap(isl_basic_set_get_dim(coef)));
+       dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
 
        coef = isl_basic_set_transform_dims(coef, isl_dim_set,
-                   isl_dim_size(dim, isl_dim_set), isl_mat_copy(src->cmap));
+                   isl_space_dim(dim, isl_dim_set), isl_mat_copy(src->cmap));
        coef = isl_basic_set_transform_dims(coef, isl_dim_set,
-                   isl_dim_size(dim, isl_dim_set) + src->nvar,
+                   isl_space_dim(dim, isl_dim_set) + src->nvar,
                    isl_mat_copy(dst->cmap));
 
-       nparam = isl_dim_size(src->dim, isl_dim_param);
+       nparam = isl_space_dim(src->dim, isl_dim_param);
        total = isl_basic_set_total_dim(graph->lp);
        dim_map = isl_dim_map_alloc(ctx, total);
 
@@ -898,27 +917,27 @@ static int add_inter_proximity_constraints(struct isl_sched_graph *graph,
        isl_dim_map_range(dim_map, dst->start + 1, 2, 1, 1, dst->nparam, s);
        isl_dim_map_range(dim_map, dst->start + 2, 2, 1, 1, dst->nparam, -s);
        isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 1, 2,
-                         isl_dim_size(dim, isl_dim_set) + src->nvar, 1,
+                         isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
                          dst->nvar, s);
        isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 2, 2,
-                         isl_dim_size(dim, isl_dim_set) + src->nvar, 1,
+                         isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
                          dst->nvar, -s);
 
        isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, s);
        isl_dim_map_range(dim_map, src->start + 1, 2, 1, 1, src->nparam, -s);
        isl_dim_map_range(dim_map, src->start + 2, 2, 1, 1, src->nparam, s);
        isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 1, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          src->nvar, -s);
        isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 2, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          src->nvar, s);
 
        graph->lp = isl_basic_set_extend_constraints(graph->lp,
                        coef->n_eq, coef->n_ineq);
        graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
                                                           coef, dim_map);
-       isl_dim_free(dim);
+       isl_space_free(dim);
 
        return 0;
 }
@@ -1022,7 +1041,36 @@ static int node_update_cmap(struct isl_sched_node *node)
 }
 
 /* Count the number of equality and inequality constraints
- * that will be added.  If once is set, then we count
+ * that will be added for the given map.
+ * If once is set, then we count
+ * each edge exactly once.  Otherwise, we count as follows
+ * validity            -> 1 (>= 0)
+ * validity+proximity  -> 2 (>= 0 and upper bound)
+ * proximity           -> 2 (lower and upper bound)
+ */
+static int count_map_constraints(struct isl_sched_graph *graph,
+       struct isl_sched_edge *edge, __isl_take isl_map *map,
+       int *n_eq, int *n_ineq, int once)
+{
+       isl_basic_set *coef;
+       int f = once ? 1 : edge->proximity ? 2 : 1;
+
+       if (edge->src == edge->dst)
+               coef = intra_coefficients(graph, map);
+       else
+               coef = inter_coefficients(graph, map);
+       if (!coef)
+               return -1;
+       *n_eq += f * coef->n_eq;
+       *n_ineq += f * coef->n_ineq;
+       isl_basic_set_free(coef);
+
+       return 0;
+}
+
+/* Count the number of equality and inequality constraints
+ * that will be added to the main lp problem.
+ * If once is set, then we count
  * each edge exactly once.  Otherwise, we count as follows
  * validity            -> 1 (>= 0)
  * validity+proximity  -> 2 (>= 0 and upper bound)
@@ -1032,23 +1080,15 @@ static int count_constraints(struct isl_sched_graph *graph,
        int *n_eq, int *n_ineq, int once)
 {
        int i;
-       isl_basic_set *coef;
 
        *n_eq = *n_ineq = 0;
        for (i = 0; i < graph->n_edge; ++i) {
                struct isl_sched_edge *edge= &graph->edge[i];
                isl_map *map = isl_map_copy(edge->map);
-               int f = once ? 1 : edge->proximity ? 2 : 1;
 
-               if (edge->src == edge->dst)
-                       coef = intra_coefficients(graph, map);
-               else
-                       coef = inter_coefficients(graph, map);
-               if (!coef)
+               if (count_map_constraints(graph, edge, map,
+                                         n_eq, n_ineq, once) < 0)
                        return -1;
-               *n_eq += f * coef->n_eq;
-               *n_ineq += f * coef->n_ineq;
-               isl_basic_set_free(coef);
        }
 
        return 0;
@@ -1084,20 +1124,24 @@ static int count_constraints(struct isl_sched_graph *graph,
  *
  * The constraints are those from the edges plus two or three equalities
  * to express the sums.
+ *
+ * If force_zero is set, then we add equalities to ensure that
+ * the sum of the m_n coefficients and m_0 are both zero.
  */
-static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
+static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
+       int force_zero)
 {
        int i, j;
        int k;
        unsigned nparam;
        unsigned total;
-       isl_dim *dim;
+       isl_space *dim;
        int parametric;
        int param_pos;
        int n_eq, n_ineq;
 
        parametric = ctx->opt->schedule_parametric;
-       nparam = isl_dim_size(graph->node[0].dim, isl_dim_param);
+       nparam = isl_space_dim(graph->node[0].dim, isl_dim_param);
        param_pos = 4;
        total = param_pos + 2 * nparam;
        for (i = 0; i < graph->n; ++i) {
@@ -1111,19 +1155,28 @@ static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
        if (count_constraints(graph, &n_eq, &n_ineq, 0) < 0)
                return -1;
 
-       dim = isl_dim_set_alloc(ctx, 0, total);
+       dim = isl_space_set_alloc(ctx, 0, total);
        isl_basic_set_free(graph->lp);
-       n_eq += 2 + parametric;
-       graph->lp = isl_basic_set_alloc_dim(dim, 0, n_eq, n_ineq);
+       n_eq += 2 + parametric + force_zero;
+       graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
 
        k = isl_basic_set_alloc_equality(graph->lp);
        if (k < 0)
                return -1;
        isl_seq_clr(graph->lp->eq[k], 1 +  total);
-       isl_int_set_si(graph->lp->eq[k][1], -1);
+       if (!force_zero)
+               isl_int_set_si(graph->lp->eq[k][1], -1);
        for (i = 0; i < 2 * nparam; ++i)
                isl_int_set_si(graph->lp->eq[k][1 + param_pos + i], 1);
 
+       if (force_zero) {
+               k = isl_basic_set_alloc_equality(graph->lp);
+               if (k < 0)
+                       return -1;
+               isl_seq_clr(graph->lp->eq[k], 1 +  total);
+               isl_int_set_si(graph->lp->eq[k][2], -1);
+       }
+
        if (parametric) {
                k = isl_basic_set_alloc_equality(graph->lp);
                if (k < 0)
@@ -1250,11 +1303,17 @@ static __isl_give isl_vec *solve_lp(struct isl_sched_graph *graph)
  * t_i_x such that c_i_x = Q t_i_x and Q is equal to node->cmap.
  * In this case, we then also need to perform this multiplication
  * to obtain the values of c_i_x.
+ *
+ * If check_zero is set, then the first two coordinates of sol are
+ * assumed to correspond to the dependence distance.  If these two
+ * coordinates are zero, then the corresponding scheduling dimension
+ * is marked as being zero distance.
  */
 static int update_schedule(struct isl_sched_graph *graph,
-       __isl_take isl_vec *sol, int use_cmap)
+       __isl_take isl_vec *sol, int use_cmap, int check_zero)
 {
        int i, j;
+       int zero = 0;
        isl_vec *csol = NULL;
 
        if (!sol)
@@ -1263,6 +1322,10 @@ static int update_schedule(struct isl_sched_graph *graph,
                isl_die(sol->ctx, isl_error_internal,
                        "no solution found", goto error);
 
+       if (check_zero)
+               zero = isl_int_is_zero(sol->el[1]) &&
+                          isl_int_is_zero(sol->el[2]);
+
        for (i = 0; i < graph->n; ++i) {
                struct isl_sched_node *node = &graph->node[i];
                int pos = node->start;
@@ -1299,6 +1362,7 @@ static int update_schedule(struct isl_sched_graph *graph,
                        node->sched = isl_mat_set_element(node->sched,
                                        row, 1 + node->nparam + j, csol->el[j]);
                node->band[graph->n_total_row] = graph->n_band;
+               node->zero[graph->n_total_row] = zero;
        }
        isl_vec_free(sol);
        isl_vec_free(csol);
@@ -1324,7 +1388,8 @@ error:
 static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
 {
        int i, j;
-       isl_dim *dim;
+       isl_space *dim;
+       isl_local_space *ls;
        isl_basic_map *bmap;
        isl_constraint *c;
        int nrow, ncol;
@@ -1335,14 +1400,15 @@ static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
 
        nrow = isl_mat_rows(node->sched);
        ncol = isl_mat_cols(node->sched) - 1;
-       dim = isl_dim_from_domain(isl_dim_copy(node->dim));
-       dim = isl_dim_add(dim, isl_dim_out, nrow);
-       bmap = isl_basic_map_universe(isl_dim_copy(dim));
+       dim = isl_space_from_domain(isl_space_copy(node->dim));
+       dim = isl_space_add_dims(dim, isl_dim_out, nrow);
+       bmap = isl_basic_map_universe(isl_space_copy(dim));
+       ls = isl_local_space_from_space(dim);
 
        isl_int_init(v);
 
        for (i = 0; i < nrow; ++i) {
-               c = isl_equality_alloc(isl_dim_copy(dim));
+               c = isl_equality_alloc(isl_local_space_copy(ls));
                isl_constraint_set_coefficient_si(c, isl_dim_out, i, -1);
                isl_mat_get_element(node->sched, i, 0, &v);
                isl_constraint_set_constant(c, v);
@@ -1360,7 +1426,7 @@ static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
 
        isl_int_clear(v);
 
-       isl_dim_free(dim);
+       isl_local_space_free(ls);
 
        node->sched_map = isl_map_from_basic_map(bmap);
        return isl_map_copy(node->sched_map);
@@ -1470,7 +1536,7 @@ static int sort_statements(isl_ctx *ctx, struct isl_sched_graph *graph)
  * in graph and with parameters specified by dim.
  */
 static __isl_give isl_schedule *extract_schedule(struct isl_sched_graph *graph,
-       __isl_take isl_dim *dim)
+       __isl_take isl_space *dim)
 {
        int i;
        isl_ctx *ctx;
@@ -1479,27 +1545,34 @@ static __isl_give isl_schedule *extract_schedule(struct isl_sched_graph *graph,
        if (!dim)
                return NULL;
 
-       ctx = isl_dim_get_ctx(dim);
+       ctx = isl_space_get_ctx(dim);
        sched = isl_calloc(ctx, struct isl_schedule,
                           sizeof(struct isl_schedule) +
                           (graph->n - 1) * sizeof(struct isl_schedule_node));
        if (!sched)
                goto error;
 
+       sched->ref = 1;
        sched->n = graph->n;
        sched->n_band = graph->n_band;
        sched->n_total_row = graph->n_total_row;
 
        for (i = 0; i < sched->n; ++i) {
                int r, b;
-               int *band_end;
+               int *band_end, *band_id, *zero;
 
                band_end = isl_alloc_array(ctx, int, graph->n_band);
-               if (!band_end)
-                       goto error;
+               band_id = isl_alloc_array(ctx, int, graph->n_band);
+               zero = isl_alloc_array(ctx, int, graph->n_total_row);
                sched->node[i].sched = node_extract_schedule(&graph->node[i]);
                sched->node[i].band_end = band_end;
+               sched->node[i].band_id = band_id;
+               sched->node[i].zero = zero;
+               if (!band_end || !band_id || !zero)
+                       goto error;
 
+               for (r = 0; r < graph->n_total_row; ++r)
+                       zero[r] = graph->node[i].zero[r];
                for (r = b = 0; r < graph->n_total_row; ++r) {
                        if (graph->node[i].band[r] == b)
                                continue;
@@ -1510,13 +1583,15 @@ static __isl_give isl_schedule *extract_schedule(struct isl_sched_graph *graph,
                if (r == graph->n_total_row)
                        band_end[b++] = r;
                sched->node[i].n_band = b;
+               for (--b; b >= 0; --b)
+                       band_id[b] = graph->node[i].band_id[b];
        }
 
        sched->dim = dim;
 
        return sched;
 error:
-       isl_dim_free(dim);
+       isl_space_free(dim);
        isl_schedule_free(sched);
        return NULL;
 }
@@ -1533,13 +1608,15 @@ static int copy_nodes(struct isl_sched_graph *dst, struct isl_sched_graph *src,
        for (i = 0; i < src->n; ++i) {
                if (!node_pred(&src->node[i], data))
                        continue;
-               dst->node[dst->n].dim = isl_dim_copy(src->node[i].dim);
+               dst->node[dst->n].dim = isl_space_copy(src->node[i].dim);
                dst->node[dst->n].nvar = src->node[i].nvar;
                dst->node[dst->n].nparam = src->node[i].nparam;
                dst->node[dst->n].sched = isl_mat_copy(src->node[i].sched);
                dst->node[dst->n].sched_map =
                        isl_map_copy(src->node[i].sched_map);
                dst->node[dst->n].band = src->node[i].band;
+               dst->node[dst->n].band_id = src->node[i].band_id;
+               dst->node[dst->n].zero = src->node[i].zero;
                dst->n++;
        }
 
@@ -1752,6 +1829,11 @@ static int pad_schedule(struct isl_sched_graph *graph)
  * It would be possible to reuse them as the first rows in the next
  * band, but recomputing them may result in better rows as we are looking
  * at a smaller part of the dependence graph.
+ *
+ * The band_id of the second group is set to n, where n is the number
+ * of nodes in the first group.  This ensures that the band_ids over
+ * the two groups remain disjoint, even if either or both of the two
+ * groups contain independent components.
  */
 static int compute_split_schedule(isl_ctx *ctx, struct isl_sched_graph *graph)
 {
@@ -1800,6 +1882,12 @@ static int compute_split_schedule(isl_ctx *ctx, struct isl_sched_graph *graph)
        graph->n_total_row++;
        next_band(graph);
 
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+               if (node->scc > graph->src_scc)
+                       node->band_id[graph->n_band] = n;
+       }
+
        orig_total_row = graph->n_total_row;
        orig_band = graph->n_band;
        if (compute_sub_schedule(ctx, graph, n, e1,
@@ -1834,9 +1922,10 @@ static int compute_next_band(isl_ctx *ctx, struct isl_sched_graph *graph)
        return compute_schedule(ctx, graph);
 }
 
-/* Add constraints to graph->lp that force the dependence of edge i
- * to be respected and attempt to carry it, where edge i is one from
- * a node j to itself.
+/* Add constraints to graph->lp that force the dependence "map" (which
+ * is part of the dependence relation of "edge")
+ * to be respected and attempt to carry it, where the edge is one from
+ * a node j to itself.  "pos" is the sequence number of the given map.
  * That is, add constraints that enforce
  *
  *     (c_j_0 + c_j_n n + c_j_x y) - (c_j_0 + c_j_n n + c_j_x x)
@@ -1848,42 +1937,42 @@ static int compute_next_band(isl_ctx *ctx, struct isl_sched_graph *graph)
  * with each coefficient in c_j_x represented as a pair of non-negative
  * coefficients.
  */
-static int add_intra_constraints(struct isl_sched_graph *graph, int i)
+static int add_intra_constraints(struct isl_sched_graph *graph,
+       struct isl_sched_edge *edge, __isl_take isl_map *map, int pos)
 {
        unsigned total;
-       struct isl_sched_edge *edge= &graph->edge[i];
-       isl_map *map = isl_map_copy(edge->map);
        isl_ctx *ctx = isl_map_get_ctx(map);
-       isl_dim *dim;
+       isl_space *dim;
        isl_dim_map *dim_map;
        isl_basic_set *coef;
        struct isl_sched_node *node = edge->src;
 
        coef = intra_coefficients(graph, map);
 
-       dim = isl_dim_domain(isl_dim_unwrap(isl_basic_set_get_dim(coef)));
+       dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
 
        total = isl_basic_set_total_dim(graph->lp);
        dim_map = isl_dim_map_alloc(ctx, total);
-       isl_dim_map_range(dim_map, 3 + i, 0, 0, 0, 1, -1);
+       isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
        isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 1, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          node->nvar, -1);
        isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 2, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          node->nvar, 1);
        graph->lp = isl_basic_set_extend_constraints(graph->lp,
                        coef->n_eq, coef->n_ineq);
        graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
                                                           coef, dim_map);
-       isl_dim_free(dim);
+       isl_space_free(dim);
 
        return 0;
 }
 
-/* Add constraints to graph->lp that force the dependence of edge i
- * to be respected and attempt to carry it, where edge i is one from
- * node j to node k.
+/* Add constraints to graph->lp that force the dependence "map" (which
+ * is part of the dependence relation of "edge")
+ * to be respected and attempt to carry it, where the edge is one from
+ * node j to node k.  "pos" is the sequence number of the given map.
  * That is, add constraints that enforce
  *
  *     (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
@@ -1895,13 +1984,12 @@ static int add_intra_constraints(struct isl_sched_graph *graph, int i)
  * with each coefficient (except e_i, c_k_0 and c_j_0)
  * represented as a pair of non-negative coefficients.
  */
-static int add_inter_constraints(struct isl_sched_graph *graph, int i)
+static int add_inter_constraints(struct isl_sched_graph *graph,
+       struct isl_sched_edge *edge, __isl_take isl_map *map, int pos)
 {
        unsigned total;
-       struct isl_sched_edge *edge= &graph->edge[i];
-       isl_map *map = isl_map_copy(edge->map);
        isl_ctx *ctx = isl_map_get_ctx(map);
-       isl_dim *dim;
+       isl_space *dim;
        isl_dim_map *dim_map;
        isl_basic_set *coef;
        struct isl_sched_node *src = edge->src;
@@ -1909,38 +1997,38 @@ static int add_inter_constraints(struct isl_sched_graph *graph, int i)
 
        coef = inter_coefficients(graph, map);
 
-       dim = isl_dim_domain(isl_dim_unwrap(isl_basic_set_get_dim(coef)));
+       dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
 
        total = isl_basic_set_total_dim(graph->lp);
        dim_map = isl_dim_map_alloc(ctx, total);
 
-       isl_dim_map_range(dim_map, 3 + i, 0, 0, 0, 1, -1);
+       isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
 
        isl_dim_map_range(dim_map, dst->start, 0, 0, 0, 1, 1);
        isl_dim_map_range(dim_map, dst->start + 1, 2, 1, 1, dst->nparam, -1);
        isl_dim_map_range(dim_map, dst->start + 2, 2, 1, 1, dst->nparam, 1);
        isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 1, 2,
-                         isl_dim_size(dim, isl_dim_set) + src->nvar, 1,
+                         isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
                          dst->nvar, -1);
        isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 2, 2,
-                         isl_dim_size(dim, isl_dim_set) + src->nvar, 1,
+                         isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
                          dst->nvar, 1);
 
        isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, -1);
        isl_dim_map_range(dim_map, src->start + 1, 2, 1, 1, src->nparam, 1);
        isl_dim_map_range(dim_map, src->start + 2, 2, 1, 1, src->nparam, -1);
        isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 1, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          src->nvar, 1);
        isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 2, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          src->nvar, -1);
 
        graph->lp = isl_basic_set_extend_constraints(graph->lp,
                        coef->n_eq, coef->n_ineq);
        graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
                                                           coef, dim_map);
-       isl_dim_free(dim);
+       isl_space_free(dim);
 
        return 0;
 }
@@ -1950,16 +2038,59 @@ static int add_inter_constraints(struct isl_sched_graph *graph, int i)
  */
 static int add_all_constraints(struct isl_sched_graph *graph)
 {
-       int i;
+       int i, j;
+       int pos;
 
+       pos = 0;
        for (i = 0; i < graph->n_edge; ++i) {
                struct isl_sched_edge *edge= &graph->edge[i];
-               if (edge->src == edge->dst &&
-                   add_intra_constraints(graph, i) < 0)
-                       return -1;
-               if (edge->src != edge->dst &&
-                   add_inter_constraints(graph, i) < 0)
-                       return -1;
+               for (j = 0; j < edge->map->n; ++j) {
+                       isl_basic_map *bmap;
+                       isl_map *map;
+
+                       bmap = isl_basic_map_copy(edge->map->p[j]);
+                       map = isl_map_from_basic_map(bmap);
+
+                       if (edge->src == edge->dst &&
+                           add_intra_constraints(graph, edge, map, pos) < 0)
+                               return -1;
+                       if (edge->src != edge->dst &&
+                           add_inter_constraints(graph, edge, map, pos) < 0)
+                               return -1;
+                       ++pos;
+               }
+       }
+
+       return 0;
+}
+
+/* Count the number of equality and inequality constraints
+ * that will be added to the carry_lp problem.
+ * If once is set, then we count
+ * each edge exactly once.  Otherwise, we count as follows
+ * validity            -> 1 (>= 0)
+ * validity+proximity  -> 2 (>= 0 and upper bound)
+ * proximity           -> 2 (lower and upper bound)
+ */
+static int count_all_constraints(struct isl_sched_graph *graph,
+       int *n_eq, int *n_ineq, int once)
+{
+       int i, j;
+
+       *n_eq = *n_ineq = 0;
+       for (i = 0; i < graph->n_edge; ++i) {
+               struct isl_sched_edge *edge= &graph->edge[i];
+               for (j = 0; j < edge->map->n; ++j) {
+                       isl_basic_map *bmap;
+                       isl_map *map;
+
+                       bmap = isl_basic_map_copy(edge->map->p[j]);
+                       map = isl_map_from_basic_map(bmap);
+
+                       if (count_map_constraints(graph, edge, map,
+                                                 n_eq, n_ineq, once) < 0)
+                                   return -1;
+               }
        }
 
        return 0;
@@ -1971,6 +2102,11 @@ static int add_all_constraints(struct isl_sched_graph *graph)
  * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
  * of all e_i's.  Dependence with e_i = 0 in the solution are simply
  * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
+ * Note that if the dependence relation is a union of basic maps,
+ * then we have to consider each basic map individually as it may only
+ * be possible to carry the dependences expressed by some of those
+ * basic maps and not all off them.
+ * Below, we consider each of those basic maps as a separate "edge".
  *
  * All variables of the LP are non-negative.  The actual coefficients
  * may be negative, so each coefficient is represented as the difference
@@ -1996,34 +2132,39 @@ static int setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
 {
        int i, j;
        int k;
-       isl_dim *dim;
+       isl_space *dim;
        unsigned total;
        int n_eq, n_ineq;
+       int n_edge;
 
-       total = 3 + graph->n_edge;
+       n_edge = 0;
+       for (i = 0; i < graph->n_edge; ++i)
+               n_edge += graph->edge[i].map->n;
+
+       total = 3 + n_edge;
        for (i = 0; i < graph->n; ++i) {
                struct isl_sched_node *node = &graph->node[graph->sorted[i]];
                node->start = total;
                total += 1 + 2 * (node->nparam + node->nvar);
        }
 
-       if (count_constraints(graph, &n_eq, &n_ineq, 1) < 0)
+       if (count_all_constraints(graph, &n_eq, &n_ineq, 1) < 0)
                return -1;
 
-       dim = isl_dim_set_alloc(ctx, 0, total);
+       dim = isl_space_set_alloc(ctx, 0, total);
        isl_basic_set_free(graph->lp);
        n_eq += 3;
-       n_ineq += graph->n_edge;
-       graph->lp = isl_basic_set_alloc_dim(dim, 0, n_eq, n_ineq);
+       n_ineq += n_edge;
+       graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
        graph->lp = isl_basic_set_set_rational(graph->lp);
 
        k = isl_basic_set_alloc_equality(graph->lp);
        if (k < 0)
                return -1;
        isl_seq_clr(graph->lp->eq[k], 1 +  total);
-       isl_int_set_si(graph->lp->eq[k][0], -graph->n_edge);
+       isl_int_set_si(graph->lp->eq[k][0], -n_edge);
        isl_int_set_si(graph->lp->eq[k][1], 1);
-       for (i = 0; i < graph->n_edge; ++i)
+       for (i = 0; i < n_edge; ++i)
                isl_int_set_si(graph->lp->eq[k][4 + i], 1);
 
        k = isl_basic_set_alloc_equality(graph->lp);
@@ -2051,7 +2192,7 @@ static int setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
                        isl_int_set_si(graph->lp->eq[k][pos + j], 1);
        }
 
-       for (i = 0; i < graph->n_edge; ++i) {
+       for (i = 0; i < n_edge; ++i) {
                k = isl_basic_set_alloc_inequality(graph->lp);
                if (k < 0)
                        return -1;
@@ -2066,14 +2207,79 @@ static int setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
        return 0;
 }
 
+/* If the schedule_split_parallel option is set and if the linear
+ * parts of the scheduling rows for all nodes in the graphs are the same,
+ * then split off the constant term from the linear part.
+ * The constant term is then placed in a separate band and
+ * the linear part is simplified.
+ */
+static int split_parallel(isl_ctx *ctx, struct isl_sched_graph *graph)
+{
+       int i;
+       int equal = 1;
+       int row, cols;
+       struct isl_sched_node *node0;
+
+       if (!ctx->opt->schedule_split_parallel)
+               return 0;
+       if (graph->n <= 1)
+               return 0;
+
+       node0 = &graph->node[0];
+       row = isl_mat_rows(node0->sched) - 1;
+       cols = isl_mat_cols(node0->sched);
+       for (i = 1; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+
+               if (!isl_seq_eq(node0->sched->row[row] + 1,
+                               node->sched->row[row] + 1, cols - 1))
+                       return 0;
+               if (equal &&
+                   isl_int_ne(node0->sched->row[row][0],
+                              node->sched->row[row][0]))
+                       equal = 0;
+       }
+       if (equal)
+               return 0;
+
+       next_band(graph);
+
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+
+               isl_map_free(node->sched_map);
+               node->sched_map = NULL;
+               node->sched = isl_mat_add_zero_rows(node->sched, 1);
+               if (!node->sched)
+                       return -1;
+               isl_int_set(node->sched->row[row + 1][0],
+                           node->sched->row[row][0]);
+               isl_int_set_si(node->sched->row[row][0], 0);
+               node->sched = isl_mat_normalize_row(node->sched, row);
+               if (!node->sched)
+                       return -1;
+               node->band[graph->n_total_row] = graph->n_band;
+       }
+
+       graph->n_total_row++;
+
+       return 0;
+}
+
 /* Construct a schedule row for each node such that as many dependences
  * as possible are carried and then continue with the next band.
  */
 static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph)
 {
+       int i;
+       int n_edge;
        isl_vec *sol;
        isl_basic_set *lp;
 
+       n_edge = 0;
+       for (i = 0; i < graph->n_edge; ++i)
+               n_edge += graph->edge[i].map->n;
+
        if (setup_carry_lp(ctx, graph) < 0)
                return -1;
 
@@ -2088,13 +2294,16 @@ static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph)
                        "error in schedule construction", return -1);
        }
 
-       if (isl_int_cmp_si(sol->el[1], graph->n_edge) >= 0) {
+       if (isl_int_cmp_si(sol->el[1], n_edge) >= 0) {
                isl_vec_free(sol);
                isl_die(ctx, isl_error_unknown,
                        "unable to carry dependences", return -1);
        }
 
-       if (update_schedule(graph, sol, 0) < 0)
+       if (update_schedule(graph, sol, 0, 0) < 0)
+               return -1;
+
+       if (split_parallel(ctx, graph) < 0)
                return -1;
 
        return compute_next_band(ctx, graph);
@@ -2114,9 +2323,16 @@ static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph)
  *
  * If we manage to complete the schedule, we finish off by topologically
  * sorting the statements based on the remaining dependences.
+ *
+ * If ctx->opt->schedule_outer_zero_distance is set, then we force the
+ * outermost dimension in the current band to be zero distance.  If this
+ * turns out to be impossible, we fall back on the general scheme above
+ * and try to carry as many dependences as possible.
  */
 static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
 {
+       int force_zero = 0;
+
        if (detect_sccs(graph) < 0)
                return -1;
        sort_sccs(graph);
@@ -2124,27 +2340,34 @@ static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
        if (compute_maxvar(graph) < 0)
                return -1;
 
+       if (ctx->opt->schedule_outer_zero_distance)
+               force_zero = 1;
+
        while (graph->n_row < graph->maxvar) {
                isl_vec *sol;
 
                graph->src_scc = -1;
                graph->dst_scc = -1;
 
-               if (setup_lp(ctx, graph) < 0)
+               if (setup_lp(ctx, graph, force_zero) < 0)
                        return -1;
                sol = solve_lp(graph);
                if (!sol)
                        return -1;
                if (sol->size == 0) {
                        isl_vec_free(sol);
+                       if (!ctx->opt->schedule_maximize_band_depth &&
+                           graph->n_total_row > graph->band_start)
+                               return compute_next_band(ctx, graph);
                        if (graph->src_scc >= 0)
                                return compute_split_schedule(ctx, graph);
                        if (graph->n_total_row > graph->band_start)
                                return compute_next_band(ctx, graph);
                        return carry_dependences(ctx, graph);
                }
-               if (update_schedule(graph, sol, 1) < 0)
+               if (update_schedule(graph, sol, 1, 1) < 0)
                        return -1;
+               force_zero = 0;
        }
 
        if (graph->n_total_row > graph->band_start)
@@ -2154,6 +2377,10 @@ static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
 
 /* Compute a schedule for each component (identified by node->scc)
  * of the dependence graph separately and then combine the results.
+ *
+ * The band_id is adjusted such that each component has a separate id.
+ * Note that the band_id may have already been set to a value different
+ * from zero by compute_split_schedule.
  */
 static int compute_component_schedule(isl_ctx *ctx,
        struct isl_sched_graph *graph)
@@ -2167,6 +2394,8 @@ static int compute_component_schedule(isl_ctx *ctx,
        orig_total_row = graph->n_total_row;
        n_band = 0;
        orig_band = graph->n_band;
+       for (i = 0; i < graph->n; ++i)
+               graph->node[i].band_id[graph->n_band] += graph->node[i].scc;
        for (wcc = 0; wcc < graph->scc; ++wcc) {
                n = 0;
                for (i = 0; i < graph->n; ++i)
@@ -2197,7 +2426,7 @@ static int compute_component_schedule(isl_ctx *ctx,
 
 /* Compute a schedule for the given dependence graph.
  * We first check if the graph is connected (through validity dependences)
- * and if so compute a schedule for each component separately.
+ * and, if not, compute a schedule for each component separately.
  */
 static int compute_schedule(isl_ctx *ctx, struct isl_sched_graph *graph)
 {
@@ -2220,16 +2449,16 @@ __isl_give isl_schedule *isl_union_set_compute_schedule(
        __isl_take isl_union_map *proximity)
 {
        isl_ctx *ctx = isl_union_set_get_ctx(domain);
-       isl_dim *dim;
+       isl_space *dim;
        struct isl_sched_graph graph = { 0 };
        isl_schedule *sched;
 
        domain = isl_union_set_align_params(domain,
-                                           isl_union_map_get_dim(validity));
+                                           isl_union_map_get_space(validity));
        domain = isl_union_set_align_params(domain,
-                                           isl_union_map_get_dim(proximity));
-       dim = isl_union_set_get_dim(domain);
-       validity = isl_union_map_align_params(validity, isl_dim_copy(dim));
+                                           isl_union_map_get_space(proximity));
+       dim = isl_union_set_get_space(domain);
+       validity = isl_union_map_align_params(validity, isl_space_copy(dim));
        proximity = isl_union_map_align_params(proximity, dim);
 
        if (!domain)
@@ -2259,7 +2488,7 @@ __isl_give isl_schedule *isl_union_set_compute_schedule(
                goto error;
 
 empty:
-       sched = extract_schedule(&graph, isl_union_set_get_dim(domain));
+       sched = extract_schedule(&graph, isl_union_set_get_space(domain));
 
        graph_free(ctx, &graph);
        isl_union_set_free(domain);
@@ -2280,15 +2509,27 @@ void *isl_schedule_free(__isl_take isl_schedule *sched)
        int i;
        if (!sched)
                return NULL;
+
+       if (--sched->ref > 0)
+               return NULL;
+
        for (i = 0; i < sched->n; ++i) {
                isl_map_free(sched->node[i].sched);
                free(sched->node[i].band_end);
+               free(sched->node[i].band_id);
+               free(sched->node[i].zero);
        }
-       isl_dim_free(sched->dim);
+       isl_space_free(sched->dim);
+       isl_band_list_free(sched->band_forest);
        free(sched);
        return NULL;
 }
 
+isl_ctx *isl_schedule_get_ctx(__isl_keep isl_schedule *schedule)
+{
+       return schedule ? isl_space_get_ctx(schedule->dim) : NULL;
+}
+
 __isl_give isl_union_map *isl_schedule_get_map(__isl_keep isl_schedule *sched)
 {
        int i;
@@ -2297,7 +2538,7 @@ __isl_give isl_union_map *isl_schedule_get_map(__isl_keep isl_schedule *sched)
        if (!sched)
                return NULL;
 
-       umap = isl_union_map_empty(isl_dim_copy(sched->dim));
+       umap = isl_union_map_empty(isl_space_copy(sched->dim));
        for (i = 0; i < sched->n; ++i)
                umap = isl_union_map_add_map(umap,
                                            isl_map_copy(sched->node[i].sched));
@@ -2305,43 +2546,280 @@ __isl_give isl_union_map *isl_schedule_get_map(__isl_keep isl_schedule *sched)
        return umap;
 }
 
-int isl_schedule_n_band(__isl_keep isl_schedule *sched)
+static __isl_give isl_band_list *construct_band_list(
+       __isl_keep isl_schedule *schedule, __isl_keep isl_band *parent,
+       int band_nr, int *parent_active, int n_active);
+
+/* Construct an isl_band structure for the band in the given schedule
+ * with sequence number band_nr for the n_active nodes marked by active.
+ * If the nodes don't have a band with the given sequence number,
+ * then a band without members is created.
+ *
+ * Because of the way the schedule is constructed, we know that
+ * the position of the band inside the schedule of a node is the same
+ * for all active nodes.
+ */
+static __isl_give isl_band *construct_band(__isl_keep isl_schedule *schedule,
+       __isl_keep isl_band *parent,
+       int band_nr, int *active, int n_active)
 {
-       return sched ? sched->n_band : 0;
+       int i, j;
+       isl_ctx *ctx = isl_schedule_get_ctx(schedule);
+       isl_band *band;
+       unsigned start, end;
+
+       band = isl_calloc_type(ctx, isl_band);
+       if (!band)
+               return NULL;
+
+       band->ref = 1;
+       band->schedule = schedule;
+       band->parent = parent;
+
+       for (i = 0; i < schedule->n; ++i)
+               if (active[i] && schedule->node[i].n_band > band_nr + 1)
+                       break;
+
+       if (i < schedule->n) {
+               band->children = construct_band_list(schedule, band,
+                                               band_nr + 1, active, n_active);
+               if (!band->children)
+                       goto error;
+       }
+
+       for (i = 0; i < schedule->n; ++i)
+               if (active[i])
+                       break;
+
+       if (i >= schedule->n)
+               isl_die(ctx, isl_error_internal,
+                       "band without active statements", goto error);
+
+       start = band_nr ? schedule->node[i].band_end[band_nr - 1] : 0;
+       end = band_nr < schedule->node[i].n_band ?
+               schedule->node[i].band_end[band_nr] : start;
+       band->n = end - start;
+
+       band->zero = isl_alloc_array(ctx, int, band->n);
+       if (!band->zero)
+               goto error;
+
+       for (j = 0; j < band->n; ++j)
+               band->zero[j] = schedule->node[i].zero[start + j];
+
+       band->map = isl_union_map_empty(isl_space_copy(schedule->dim));
+       for (i = 0; i < schedule->n; ++i) {
+               isl_map *map;
+               unsigned n_out;
+
+               if (!active[i])
+                       continue;
+
+               map = isl_map_copy(schedule->node[i].sched);
+               n_out = isl_map_dim(map, isl_dim_out);
+               map = isl_map_project_out(map, isl_dim_out, end, n_out - end);
+               map = isl_map_project_out(map, isl_dim_out, 0, start);
+               band->map = isl_union_map_union(band->map,
+                                               isl_union_map_from_map(map));
+       }
+       if (!band->map)
+               goto error;
+
+       return band;
+error:
+       isl_band_free(band);
+       return NULL;
 }
 
-/* Construct a mapping that maps each domain to the band in its schedule
- * with the specified band index.  Note that bands with the same index
- * but for different domains do not need to be related.
+/* Construct a list of bands that start at the same position (with
+ * sequence number band_nr) in the schedules of the nodes that
+ * were active in the parent band.
+ *
+ * A separate isl_band structure is created for each band_id
+ * and for each node that does not have a band with sequence
+ * number band_nr.  In the latter case, a band without members
+ * is created.
+ * This ensures that if a band has any children, then each node
+ * that was active in the band is active in exactly one of the children.
  */
-__isl_give isl_union_map *isl_schedule_get_band(__isl_keep isl_schedule *sched,
-       unsigned band)
+static __isl_give isl_band_list *construct_band_list(
+       __isl_keep isl_schedule *schedule, __isl_keep isl_band *parent,
+       int band_nr, int *parent_active, int n_active)
 {
-       int i;
-       isl_union_map *umap;
+       int i, j;
+       isl_ctx *ctx = isl_schedule_get_ctx(schedule);
+       int *active;
+       int n_band;
+       isl_band_list *list;
 
-       if (!sched)
+       n_band = 0;
+       for (i = 0; i < n_active; ++i) {
+               for (j = 0; j < schedule->n; ++j) {
+                       if (!parent_active[j])
+                               continue;
+                       if (schedule->node[j].n_band <= band_nr)
+                               continue;
+                       if (schedule->node[j].band_id[band_nr] == i) {
+                               n_band++;
+                               break;
+                       }
+               }
+       }
+       for (j = 0; j < schedule->n; ++j)
+               if (schedule->node[j].n_band <= band_nr)
+                       n_band++;
+
+       if (n_band == 1) {
+               isl_band *band;
+               list = isl_band_list_alloc(ctx, n_band);
+               band = construct_band(schedule, parent, band_nr,
+                                       parent_active, n_active);
+               return isl_band_list_add(list, band);
+       }
+
+       active = isl_alloc_array(ctx, int, schedule->n);
+       if (!active)
                return NULL;
 
-       umap = isl_union_map_empty(isl_dim_copy(sched->dim));
-       for (i = 0; i < sched->n; ++i) {
-               int start, end;
-               isl_map *map;
+       list = isl_band_list_alloc(ctx, n_band);
+
+       for (i = 0; i < n_active; ++i) {
+               int n = 0;
+               isl_band *band;
+
+               for (j = 0; j < schedule->n; ++j) {
+                       active[j] = parent_active[j] &&
+                                       schedule->node[j].n_band > band_nr &&
+                                       schedule->node[j].band_id[band_nr] == i;
+                       if (active[j])
+                               n++;
+               }
+               if (n == 0)
+                       continue;
+
+               band = construct_band(schedule, parent, band_nr, active, n);
 
-               if (band >= sched->node[i].n_band)
+               list = isl_band_list_add(list, band);
+       }
+       for (i = 0; i < schedule->n; ++i) {
+               isl_band *band;
+               if (!parent_active[i])
+                       continue;
+               if (schedule->node[i].n_band > band_nr)
                        continue;
+               for (j = 0; j < schedule->n; ++j)
+                       active[j] = j == i;
+               band = construct_band(schedule, parent, band_nr, active, 1);
+               list = isl_band_list_add(list, band);
+       }
 
-               start = band > 0 ? sched->node[i].band_end[band - 1] : 0;
-               end = sched->node[i].band_end[band];
+       free(active);
 
-               map = isl_map_copy(sched->node[i].sched);
+       return list;
+}
 
-               map = isl_map_project_out(map, isl_dim_out, end,
-                                         sched->n_total_row - end);
-               map = isl_map_project_out(map, isl_dim_out, 0, start);
+/* Construct a band forest representation of the schedule and
+ * return the list of roots.
+ */
+static __isl_give isl_band_list *construct_forest(
+       __isl_keep isl_schedule *schedule)
+{
+       int i;
+       isl_ctx *ctx = isl_schedule_get_ctx(schedule);
+       isl_band_list *forest;
+       int *active;
+
+       active = isl_alloc_array(ctx, int, schedule->n);
+       if (!active)
+               return NULL;
+
+       for (i = 0; i < schedule->n; ++i)
+               active[i] = 1;
+
+       forest = construct_band_list(schedule, NULL, 0, active, schedule->n);
 
-               umap = isl_union_map_add_map(umap, map);
+       free(active);
+
+       return forest;
+}
+
+/* Return the roots of a band forest representation of the schedule.
+ */
+__isl_give isl_band_list *isl_schedule_get_band_forest(
+       __isl_keep isl_schedule *schedule)
+{
+       if (!schedule)
+               return NULL;
+       if (!schedule->band_forest)
+               schedule->band_forest = construct_forest(schedule);
+       return isl_band_list_dup(schedule->band_forest);
+}
+
+static __isl_give isl_printer *print_band_list(__isl_take isl_printer *p,
+       __isl_keep isl_band_list *list);
+
+static __isl_give isl_printer *print_band(__isl_take isl_printer *p,
+       __isl_keep isl_band *band)
+{
+       isl_band_list *children;
+
+       p = isl_printer_start_line(p);
+       p = isl_printer_print_union_map(p, band->map);
+       p = isl_printer_end_line(p);
+
+       if (!isl_band_has_children(band))
+               return p;
+
+       children = isl_band_get_children(band);
+
+       p = isl_printer_indent(p, 4);
+       p = print_band_list(p, children);
+       p = isl_printer_indent(p, -4);
+
+       isl_band_list_free(children);
+
+       return p;
+}
+
+static __isl_give isl_printer *print_band_list(__isl_take isl_printer *p,
+       __isl_keep isl_band_list *list)
+{
+       int i, n;
+
+       n = isl_band_list_n_band(list);
+       for (i = 0; i < n; ++i) {
+               isl_band *band;
+               band = isl_band_list_get_band(list, i);
+               p = print_band(p, band);
+               isl_band_free(band);
        }
 
-       return umap;
+       return p;
+}
+
+__isl_give isl_printer *isl_printer_print_schedule(__isl_take isl_printer *p,
+       __isl_keep isl_schedule *schedule)
+{
+       isl_band_list *forest;
+
+       forest = isl_schedule_get_band_forest(schedule);
+
+       p = print_band_list(p, forest);
+
+       isl_band_list_free(forest);
+
+       return p;
+}
+
+void isl_schedule_dump(__isl_keep isl_schedule *schedule)
+{
+       isl_printer *printer;
+
+       if (!schedule)
+               return;
+
+       printer = isl_printer_to_file(isl_schedule_get_ctx(schedule), stderr);
+       printer = isl_printer_print_schedule(printer, schedule);
+
+       isl_printer_free(printer);
 }