isl_schedule.c: extract out graph_edge_table_add
[platform/upstream/isl.git] / isl_schedule.c
index 0b1ff56..76a4ce3 100644 (file)
@@ -10,7 +10,7 @@
 
 #include <isl_ctx_private.h>
 #include <isl_map_private.h>
-#include <isl_dim_private.h>
+#include <isl_space_private.h>
 #include <isl/hash.h>
 #include <isl/constraint.h>
 #include <isl/schedule.h>
@@ -23,6 +23,8 @@
 #include <isl_qsort.h>
 #include <isl_schedule_private.h>
 #include <isl_band_private.h>
+#include <isl_list_private.h>
+#include <isl_options_private.h>
 
 /*
  * The scheduling algorithm implemented in this file was inspired by
  * band_id is used to differentiate between separate bands at the same
  * level within the same parent band, i.e., bands that are separated
  * by the parent band or bands that are independent of each other.
- * parallel contains a boolean for each of the rows of the schedule,
- * indicating whether the corresponding scheduling dimension is parallel
- * within its band and with respect to the proximity edges.
+ * zero contains a boolean for each of the rows of the schedule,
+ * indicating whether the corresponding scheduling dimension results
+ * in zero dependence distances within its band and with respect
+ * to the proximity edges.
  *
  * index, min_index and on_stack are used during the SCC detection
  * index represents the order in which nodes are visited.
@@ -65,7 +68,7 @@
  * on_stack indicates whether the node is currently on the stack.
  */
 struct isl_sched_node {
-       isl_dim *dim;
+       isl_space *dim;
        isl_mat *sched;
        isl_map *sched_map;
        int      rank;
@@ -78,7 +81,7 @@ struct isl_sched_node {
 
        int     *band;
        int     *band_id;
-       int     *parallel;
+       int     *zero;
 
        /* scc detection */
        int      index;
@@ -89,9 +92,9 @@ struct isl_sched_node {
 static int node_has_dim(const void *entry, const void *val)
 {
        struct isl_sched_node *node = (struct isl_sched_node *)entry;
-       isl_dim *dim = (isl_dim *)val;
+       isl_space *dim = (isl_space *)val;
 
-       return isl_dim_equal(node->dim, dim);
+       return isl_space_is_equal(node->dim, dim);
 }
 
 /* An edge in the dependence graph.  An edge may be used to
@@ -215,7 +218,7 @@ static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
                struct isl_hash_table_entry *entry;
                uint32_t hash;
 
-               hash = isl_dim_get_hash(graph->node[i].dim);
+               hash = isl_space_get_hash(graph->node[i].dim);
                entry = isl_hash_table_find(ctx, graph->node_table, hash,
                                            &node_has_dim,
                                            graph->node[i].dim, 1);
@@ -231,12 +234,12 @@ static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
  * or NULL if there is no such node.
  */
 static struct isl_sched_node *graph_find_node(isl_ctx *ctx,
-       struct isl_sched_graph *graph, __isl_keep isl_dim *dim)
+       struct isl_sched_graph *graph, __isl_keep isl_space *dim)
 {
        struct isl_hash_table_entry *entry;
        uint32_t hash;
 
-       hash = isl_dim_get_hash(dim);
+       hash = isl_space_get_hash(dim);
        entry = isl_hash_table_find(ctx, graph->node_table, hash,
                                    &node_has_dim, dim, 0);
 
@@ -251,6 +254,29 @@ static int edge_has_src_and_dst(const void *entry, const void *val)
        return edge->src == temp->src && edge->dst == temp->dst;
 }
 
+/* Add the given edge to graph->edge_table if it is a validity edge.
+ */
+static int graph_edge_table_add(isl_ctx *ctx, struct isl_sched_graph *graph,
+       struct isl_sched_edge *edge)
+{
+       struct isl_hash_table_entry *entry;
+       uint32_t hash;
+
+       if (!edge->validity)
+               return 0;
+
+       hash = isl_hash_init();
+       hash = isl_hash_builtin(hash, edge->src);
+       hash = isl_hash_builtin(hash, edge->dst);
+       entry = isl_hash_table_find(ctx, graph->edge_table, hash,
+                                   &edge_has_src_and_dst, edge, 1);
+       if (!entry)
+               return -1;
+       entry->data = edge;
+
+       return 0;
+}
+
 /* Initialize edge_table based on the list of edges.
  * Only edges with validity set are added to the table.
  */
@@ -262,23 +288,9 @@ static int graph_init_edge_table(isl_ctx *ctx, struct isl_sched_graph *graph)
        if (!graph->edge_table)
                return -1;
 
-       for (i = 0; i < graph->n_edge; ++i) {
-               struct isl_hash_table_entry *entry;
-               uint32_t hash;
-
-               if (!graph->edge[i].validity)
-                       continue;
-
-               hash = isl_hash_init();
-               hash = isl_hash_builtin(hash, graph->edge[i].src);
-               hash = isl_hash_builtin(hash, graph->edge[i].dst);
-               entry = isl_hash_table_find(ctx, graph->edge_table, hash,
-                                           &edge_has_src_and_dst,
-                                           &graph->edge[i], 1);
-               if (!entry)
+       for (i = 0; i < graph->n_edge; ++i)
+               if (graph_edge_table_add(ctx, graph, &graph->edge[i]) < 0)
                        return -1;
-               entry->data = &graph->edge[i];
-       }
 
        return 0;
 }
@@ -289,7 +301,7 @@ static int graph_init_edge_table(isl_ctx *ctx, struct isl_sched_graph *graph)
 static int graph_has_edge(struct isl_sched_graph *graph,
        struct isl_sched_node *src, struct isl_sched_node *dst)
 {
-       isl_ctx *ctx = isl_dim_get_ctx(src->dim);
+       isl_ctx *ctx = isl_space_get_ctx(src->dim);
        struct isl_hash_table_entry *entry;
        uint32_t hash;
        struct isl_sched_edge temp = { .src = src, .dst = dst };
@@ -347,14 +359,14 @@ static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
        isl_hmap_map_basic_set_free(ctx, graph->inter_hmap);
 
        for (i = 0; i < graph->n; ++i) {
-               isl_dim_free(graph->node[i].dim);
+               isl_space_free(graph->node[i].dim);
                isl_mat_free(graph->node[i].sched);
                isl_map_free(graph->node[i].sched_map);
                isl_mat_free(graph->node[i].cmap);
                if (graph->root) {
                        free(graph->node[i].band);
                        free(graph->node[i].band_id);
-                       free(graph->node[i].parallel);
+                       free(graph->node[i].zero);
                }
        }
        free(graph->node);
@@ -375,16 +387,16 @@ static int extract_node(__isl_take isl_set *set, void *user)
 {
        int nvar, nparam;
        isl_ctx *ctx;
-       isl_dim *dim;
+       isl_space *dim;
        isl_mat *sched;
        struct isl_sched_graph *graph = user;
-       int *band, *band_id, *parallel;
+       int *band, *band_id, *zero;
 
        ctx = isl_set_get_ctx(set);
-       dim = isl_set_get_dim(set);
+       dim = isl_set_get_space(set);
        isl_set_free(set);
-       nvar = isl_dim_size(dim, isl_dim_set);
-       nparam = isl_dim_size(dim, isl_dim_param);
+       nvar = isl_space_dim(dim, isl_dim_set);
+       nparam = isl_space_dim(dim, isl_dim_param);
        if (!ctx->opt->schedule_parametric)
                nparam = 0;
        sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar);
@@ -397,11 +409,11 @@ static int extract_node(__isl_take isl_set *set, void *user)
        graph->node[graph->n].band = band;
        band_id = isl_calloc_array(ctx, int, graph->n_edge + nvar);
        graph->node[graph->n].band_id = band_id;
-       parallel = isl_calloc_array(ctx, int, graph->n_edge + nvar);
-       graph->node[graph->n].parallel = parallel;
+       zero = isl_calloc_array(ctx, int, graph->n_edge + nvar);
+       graph->node[graph->n].zero = zero;
        graph->n++;
 
-       if (!sched || !band || !band_id || !parallel)
+       if (!sched || !band || !band_id || !zero)
                return -1;
 
        return 0;
@@ -421,14 +433,14 @@ static int extract_edge(__isl_take isl_map *map, void *user)
        isl_ctx *ctx = isl_map_get_ctx(map);
        struct isl_sched_graph *graph = user;
        struct isl_sched_node *src, *dst;
-       isl_dim *dim;
+       isl_space *dim;
 
-       dim = isl_dim_domain(isl_map_get_dim(map));
+       dim = isl_space_domain(isl_map_get_space(map));
        src = graph_find_node(ctx, graph, dim);
-       isl_dim_free(dim);
-       dim = isl_dim_range(isl_map_get_dim(map));
+       isl_space_free(dim);
+       dim = isl_space_range(isl_map_get_space(map));
        dst = graph_find_node(ctx, graph, dim);
-       isl_dim_free(dim);
+       isl_space_free(dim);
 
        if (!src || !dst) {
                isl_map_free(map);
@@ -672,31 +684,31 @@ static int add_intra_validity_constraints(struct isl_sched_graph *graph,
        unsigned total;
        isl_map *map = isl_map_copy(edge->map);
        isl_ctx *ctx = isl_map_get_ctx(map);
-       isl_dim *dim;
+       isl_space *dim;
        isl_dim_map *dim_map;
        isl_basic_set *coef;
        struct isl_sched_node *node = edge->src;
 
        coef = intra_coefficients(graph, map);
 
-       dim = isl_dim_domain(isl_dim_unwrap(isl_basic_set_get_dim(coef)));
+       dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
 
        coef = isl_basic_set_transform_dims(coef, isl_dim_set,
-                   isl_dim_size(dim, isl_dim_set), isl_mat_copy(node->cmap));
+                   isl_space_dim(dim, isl_dim_set), isl_mat_copy(node->cmap));
 
        total = isl_basic_set_total_dim(graph->lp);
        dim_map = isl_dim_map_alloc(ctx, total);
        isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 1, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          node->nvar, -1);
        isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 2, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          node->nvar, 1);
        graph->lp = isl_basic_set_extend_constraints(graph->lp,
                        coef->n_eq, coef->n_ineq);
        graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
                                                           coef, dim_map);
-       isl_dim_free(dim);
+       isl_space_free(dim);
 
        return 0;
 }
@@ -725,7 +737,7 @@ static int add_inter_validity_constraints(struct isl_sched_graph *graph,
        unsigned total;
        isl_map *map = isl_map_copy(edge->map);
        isl_ctx *ctx = isl_map_get_ctx(map);
-       isl_dim *dim;
+       isl_space *dim;
        isl_dim_map *dim_map;
        isl_basic_set *coef;
        struct isl_sched_node *src = edge->src;
@@ -733,12 +745,12 @@ static int add_inter_validity_constraints(struct isl_sched_graph *graph,
 
        coef = inter_coefficients(graph, map);
 
-       dim = isl_dim_domain(isl_dim_unwrap(isl_basic_set_get_dim(coef)));
+       dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
 
        coef = isl_basic_set_transform_dims(coef, isl_dim_set,
-                   isl_dim_size(dim, isl_dim_set), isl_mat_copy(src->cmap));
+                   isl_space_dim(dim, isl_dim_set), isl_mat_copy(src->cmap));
        coef = isl_basic_set_transform_dims(coef, isl_dim_set,
-                   isl_dim_size(dim, isl_dim_set) + src->nvar,
+                   isl_space_dim(dim, isl_dim_set) + src->nvar,
                    isl_mat_copy(dst->cmap));
 
        total = isl_basic_set_total_dim(graph->lp);
@@ -748,20 +760,20 @@ static int add_inter_validity_constraints(struct isl_sched_graph *graph,
        isl_dim_map_range(dim_map, dst->start + 1, 2, 1, 1, dst->nparam, -1);
        isl_dim_map_range(dim_map, dst->start + 2, 2, 1, 1, dst->nparam, 1);
        isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 1, 2,
-                         isl_dim_size(dim, isl_dim_set) + src->nvar, 1,
+                         isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
                          dst->nvar, -1);
        isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 2, 2,
-                         isl_dim_size(dim, isl_dim_set) + src->nvar, 1,
+                         isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
                          dst->nvar, 1);
 
        isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, -1);
        isl_dim_map_range(dim_map, src->start + 1, 2, 1, 1, src->nparam, 1);
        isl_dim_map_range(dim_map, src->start + 2, 2, 1, 1, src->nparam, -1);
        isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 1, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          src->nvar, 1);
        isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 2, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          src->nvar, -1);
 
        edge->start = graph->lp->n_ineq;
@@ -769,7 +781,7 @@ static int add_inter_validity_constraints(struct isl_sched_graph *graph,
                        coef->n_eq, coef->n_ineq);
        graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
                                                           coef, dim_map);
-       isl_dim_free(dim);
+       isl_space_free(dim);
        edge->end = graph->lp->n_ineq;
 
        return 0;
@@ -811,35 +823,35 @@ static int add_intra_proximity_constraints(struct isl_sched_graph *graph,
        unsigned nparam;
        isl_map *map = isl_map_copy(edge->map);
        isl_ctx *ctx = isl_map_get_ctx(map);
-       isl_dim *dim;
+       isl_space *dim;
        isl_dim_map *dim_map;
        isl_basic_set *coef;
        struct isl_sched_node *node = edge->src;
 
        coef = intra_coefficients(graph, map);
 
-       dim = isl_dim_domain(isl_dim_unwrap(isl_basic_set_get_dim(coef)));
+       dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
 
        coef = isl_basic_set_transform_dims(coef, isl_dim_set,
-                   isl_dim_size(dim, isl_dim_set), isl_mat_copy(node->cmap));
+                   isl_space_dim(dim, isl_dim_set), isl_mat_copy(node->cmap));
 
-       nparam = isl_dim_size(node->dim, isl_dim_param);
+       nparam = isl_space_dim(node->dim, isl_dim_param);
        total = isl_basic_set_total_dim(graph->lp);
        dim_map = isl_dim_map_alloc(ctx, total);
        isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
        isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
        isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
        isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 1, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          node->nvar, s);
        isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 2, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          node->nvar, -s);
        graph->lp = isl_basic_set_extend_constraints(graph->lp,
                        coef->n_eq, coef->n_ineq);
        graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
                                                           coef, dim_map);
-       isl_dim_free(dim);
+       isl_space_free(dim);
 
        return 0;
 }
@@ -886,7 +898,7 @@ static int add_inter_proximity_constraints(struct isl_sched_graph *graph,
        unsigned nparam;
        isl_map *map = isl_map_copy(edge->map);
        isl_ctx *ctx = isl_map_get_ctx(map);
-       isl_dim *dim;
+       isl_space *dim;
        isl_dim_map *dim_map;
        isl_basic_set *coef;
        struct isl_sched_node *src = edge->src;
@@ -894,15 +906,15 @@ static int add_inter_proximity_constraints(struct isl_sched_graph *graph,
 
        coef = inter_coefficients(graph, map);
 
-       dim = isl_dim_domain(isl_dim_unwrap(isl_basic_set_get_dim(coef)));
+       dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
 
        coef = isl_basic_set_transform_dims(coef, isl_dim_set,
-                   isl_dim_size(dim, isl_dim_set), isl_mat_copy(src->cmap));
+                   isl_space_dim(dim, isl_dim_set), isl_mat_copy(src->cmap));
        coef = isl_basic_set_transform_dims(coef, isl_dim_set,
-                   isl_dim_size(dim, isl_dim_set) + src->nvar,
+                   isl_space_dim(dim, isl_dim_set) + src->nvar,
                    isl_mat_copy(dst->cmap));
 
-       nparam = isl_dim_size(src->dim, isl_dim_param);
+       nparam = isl_space_dim(src->dim, isl_dim_param);
        total = isl_basic_set_total_dim(graph->lp);
        dim_map = isl_dim_map_alloc(ctx, total);
 
@@ -914,27 +926,27 @@ static int add_inter_proximity_constraints(struct isl_sched_graph *graph,
        isl_dim_map_range(dim_map, dst->start + 1, 2, 1, 1, dst->nparam, s);
        isl_dim_map_range(dim_map, dst->start + 2, 2, 1, 1, dst->nparam, -s);
        isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 1, 2,
-                         isl_dim_size(dim, isl_dim_set) + src->nvar, 1,
+                         isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
                          dst->nvar, s);
        isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 2, 2,
-                         isl_dim_size(dim, isl_dim_set) + src->nvar, 1,
+                         isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
                          dst->nvar, -s);
 
        isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, s);
        isl_dim_map_range(dim_map, src->start + 1, 2, 1, 1, src->nparam, -s);
        isl_dim_map_range(dim_map, src->start + 2, 2, 1, 1, src->nparam, s);
        isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 1, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          src->nvar, -s);
        isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 2, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          src->nvar, s);
 
        graph->lp = isl_basic_set_extend_constraints(graph->lp,
                        coef->n_eq, coef->n_ineq);
        graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
                                                           coef, dim_map);
-       isl_dim_free(dim);
+       isl_space_free(dim);
 
        return 0;
 }
@@ -1038,33 +1050,96 @@ static int node_update_cmap(struct isl_sched_node *node)
 }
 
 /* Count the number of equality and inequality constraints
- * that will be added.  If once is set, then we count
+ * that will be added for the given map.
+ * If carry is set, then we are counting the number of (validity)
+ * constraints that will be added in setup_carry_lp and we count
  * each edge exactly once.  Otherwise, we count as follows
  * validity            -> 1 (>= 0)
  * validity+proximity  -> 2 (>= 0 and upper bound)
  * proximity           -> 2 (lower and upper bound)
  */
+static int count_map_constraints(struct isl_sched_graph *graph,
+       struct isl_sched_edge *edge, __isl_take isl_map *map,
+       int *n_eq, int *n_ineq, int carry)
+{
+       isl_basic_set *coef;
+       int f = carry ? 1 : edge->proximity ? 2 : 1;
+
+       if (carry && !edge->validity) {
+               isl_map_free(map);
+               return 0;
+       }
+
+       if (edge->src == edge->dst)
+               coef = intra_coefficients(graph, map);
+       else
+               coef = inter_coefficients(graph, map);
+       if (!coef)
+               return -1;
+       *n_eq += f * coef->n_eq;
+       *n_ineq += f * coef->n_ineq;
+       isl_basic_set_free(coef);
+
+       return 0;
+}
+
+/* Count the number of equality and inequality constraints
+ * that will be added to the main lp problem.
+ * We count as follows
+ * validity            -> 1 (>= 0)
+ * validity+proximity  -> 2 (>= 0 and upper bound)
+ * proximity           -> 2 (lower and upper bound)
+ */
 static int count_constraints(struct isl_sched_graph *graph,
-       int *n_eq, int *n_ineq, int once)
+       int *n_eq, int *n_ineq)
 {
        int i;
-       isl_basic_set *coef;
 
        *n_eq = *n_ineq = 0;
        for (i = 0; i < graph->n_edge; ++i) {
                struct isl_sched_edge *edge= &graph->edge[i];
                isl_map *map = isl_map_copy(edge->map);
-               int f = once ? 1 : edge->proximity ? 2 : 1;
 
-               if (edge->src == edge->dst)
-                       coef = intra_coefficients(graph, map);
-               else
-                       coef = inter_coefficients(graph, map);
-               if (!coef)
+               if (count_map_constraints(graph, edge, map,
+                                         n_eq, n_ineq, 0) < 0)
                        return -1;
-               *n_eq += f * coef->n_eq;
-               *n_ineq += f * coef->n_ineq;
-               isl_basic_set_free(coef);
+       }
+
+       return 0;
+}
+
+/* Add constraints that bound the values of the variable and parameter
+ * coefficients of the schedule.
+ *
+ * The maximal value of the coefficients is defined by the option
+ * 'schedule_max_coefficient'.
+ */
+static int add_bound_coefficient_constraints(isl_ctx *ctx,
+       struct isl_sched_graph *graph)
+{
+       int i, j, k;
+       int max_coefficient;
+       int total;
+
+       max_coefficient = ctx->opt->schedule_max_coefficient;
+
+       if (max_coefficient == -1)
+               return 0;
+
+       total = isl_basic_set_total_dim(graph->lp);
+
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+               for (j = 0; j < 2 * node->nparam + 2 * node->nvar; ++j) {
+                       int dim;
+                       k = isl_basic_set_alloc_inequality(graph->lp);
+                       if (k < 0)
+                               return -1;
+                       dim = 1 + node->start + 1 + j;
+                       isl_seq_clr(graph->lp->ineq[k], 1 +  total);
+                       isl_int_set_si(graph->lp->ineq[k][dim], -1);
+                       isl_int_set_si(graph->lp->ineq[k][0], max_coefficient);
+               }
        }
 
        return 0;
@@ -1100,20 +1175,29 @@ static int count_constraints(struct isl_sched_graph *graph,
  *
  * The constraints are those from the edges plus two or three equalities
  * to express the sums.
+ *
+ * If force_zero is set, then we add equalities to ensure that
+ * the sum of the m_n coefficients and m_0 are both zero.
  */
-static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
+static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
+       int force_zero)
 {
        int i, j;
        int k;
        unsigned nparam;
        unsigned total;
-       isl_dim *dim;
+       isl_space *dim;
        int parametric;
        int param_pos;
        int n_eq, n_ineq;
+       int max_constant_term;
+       int max_coefficient;
+
+       max_constant_term = ctx->opt->schedule_max_constant_term;
+       max_coefficient = ctx->opt->schedule_max_coefficient;
 
        parametric = ctx->opt->schedule_parametric;
-       nparam = isl_dim_size(graph->node[0].dim, isl_dim_param);
+       nparam = isl_space_dim(graph->node[0].dim, isl_dim_param);
        param_pos = 4;
        total = param_pos + 2 * nparam;
        for (i = 0; i < graph->n; ++i) {
@@ -1124,22 +1208,38 @@ static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
                total += 1 + 2 * (node->nparam + node->nvar);
        }
 
-       if (count_constraints(graph, &n_eq, &n_ineq, 0) < 0)
+       if (count_constraints(graph, &n_eq, &n_ineq) < 0)
                return -1;
 
-       dim = isl_dim_set_alloc(ctx, 0, total);
+       dim = isl_space_set_alloc(ctx, 0, total);
        isl_basic_set_free(graph->lp);
-       n_eq += 2 + parametric;
-       graph->lp = isl_basic_set_alloc_dim(dim, 0, n_eq, n_ineq);
+       n_eq += 2 + parametric + force_zero;
+       if (max_constant_term != -1)
+               n_ineq += graph->n;
+       if (max_coefficient != -1)
+               for (i = 0; i < graph->n; ++i)
+                       n_ineq += 2 * graph->node[i].nparam +
+                                 2 * graph->node[i].nvar;
+
+       graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
 
        k = isl_basic_set_alloc_equality(graph->lp);
        if (k < 0)
                return -1;
        isl_seq_clr(graph->lp->eq[k], 1 +  total);
-       isl_int_set_si(graph->lp->eq[k][1], -1);
+       if (!force_zero)
+               isl_int_set_si(graph->lp->eq[k][1], -1);
        for (i = 0; i < 2 * nparam; ++i)
                isl_int_set_si(graph->lp->eq[k][1 + param_pos + i], 1);
 
+       if (force_zero) {
+               k = isl_basic_set_alloc_equality(graph->lp);
+               if (k < 0)
+                       return -1;
+               isl_seq_clr(graph->lp->eq[k], 1 +  total);
+               isl_int_set_si(graph->lp->eq[k][2], -1);
+       }
+
        if (parametric) {
                k = isl_basic_set_alloc_equality(graph->lp);
                if (k < 0)
@@ -1167,6 +1267,19 @@ static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
                        isl_int_set_si(graph->lp->eq[k][pos + j], 1);
        }
 
+       if (max_constant_term != -1)
+               for (i = 0; i < graph->n; ++i) {
+                       struct isl_sched_node *node = &graph->node[i];
+                       k = isl_basic_set_alloc_inequality(graph->lp);
+                       if (k < 0)
+                               return -1;
+                       isl_seq_clr(graph->lp->ineq[k], 1 +  total);
+                       isl_int_set_si(graph->lp->ineq[k][1 + node->start], -1);
+                       isl_int_set_si(graph->lp->ineq[k][0], max_constant_term);
+               }
+
+       if (add_bound_coefficient_constraints(ctx, graph) < 0)
+               return -1;
        if (add_all_validity_constraints(graph) < 0)
                return -1;
        if (add_all_proximity_constraints(graph) < 0)
@@ -1267,16 +1380,16 @@ static __isl_give isl_vec *solve_lp(struct isl_sched_graph *graph)
  * In this case, we then also need to perform this multiplication
  * to obtain the values of c_i_x.
  *
- * If check_parallel is set, then the first two coordinates of sol are
+ * If check_zero is set, then the first two coordinates of sol are
  * assumed to correspond to the dependence distance.  If these two
  * coordinates are zero, then the corresponding scheduling dimension
- * is marked as being parallel.
+ * is marked as being zero distance.
  */
 static int update_schedule(struct isl_sched_graph *graph,
-       __isl_take isl_vec *sol, int use_cmap, int check_parallel)
+       __isl_take isl_vec *sol, int use_cmap, int check_zero)
 {
        int i, j;
-       int parallel = 0;
+       int zero = 0;
        isl_vec *csol = NULL;
 
        if (!sol)
@@ -1285,8 +1398,8 @@ static int update_schedule(struct isl_sched_graph *graph,
                isl_die(sol->ctx, isl_error_internal,
                        "no solution found", goto error);
 
-       if (check_parallel)
-               parallel = isl_int_is_zero(sol->el[1]) &&
+       if (check_zero)
+               zero = isl_int_is_zero(sol->el[1]) &&
                           isl_int_is_zero(sol->el[2]);
 
        for (i = 0; i < graph->n; ++i) {
@@ -1325,7 +1438,7 @@ static int update_schedule(struct isl_sched_graph *graph,
                        node->sched = isl_mat_set_element(node->sched,
                                        row, 1 + node->nparam + j, csol->el[j]);
                node->band[graph->n_total_row] = graph->n_band;
-               node->parallel[graph->n_total_row] = parallel;
+               node->zero[graph->n_total_row] = zero;
        }
        isl_vec_free(sol);
        isl_vec_free(csol);
@@ -1351,7 +1464,8 @@ error:
 static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
 {
        int i, j;
-       isl_dim *dim;
+       isl_space *dim;
+       isl_local_space *ls;
        isl_basic_map *bmap;
        isl_constraint *c;
        int nrow, ncol;
@@ -1362,14 +1476,15 @@ static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
 
        nrow = isl_mat_rows(node->sched);
        ncol = isl_mat_cols(node->sched) - 1;
-       dim = isl_dim_from_domain(isl_dim_copy(node->dim));
-       dim = isl_dim_add(dim, isl_dim_out, nrow);
-       bmap = isl_basic_map_universe(isl_dim_copy(dim));
+       dim = isl_space_from_domain(isl_space_copy(node->dim));
+       dim = isl_space_add_dims(dim, isl_dim_out, nrow);
+       bmap = isl_basic_map_universe(isl_space_copy(dim));
+       ls = isl_local_space_from_space(dim);
 
        isl_int_init(v);
 
        for (i = 0; i < nrow; ++i) {
-               c = isl_equality_alloc(isl_dim_copy(dim));
+               c = isl_equality_alloc(isl_local_space_copy(ls));
                isl_constraint_set_coefficient_si(c, isl_dim_out, i, -1);
                isl_mat_get_element(node->sched, i, 0, &v);
                isl_constraint_set_constant(c, v);
@@ -1387,7 +1502,7 @@ static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
 
        isl_int_clear(v);
 
-       isl_dim_free(dim);
+       isl_local_space_free(ls);
 
        node->sched_map = isl_map_from_basic_map(bmap);
        return isl_map_copy(node->sched_map);
@@ -1450,7 +1565,7 @@ static void next_band(struct isl_sched_graph *graph)
        graph->n_band++;
 }
 
-/* Topologically sort statements mapped to same schedule iteration
+/* Topologically sort statements mapped to the same schedule iteration
  * and add a row to the schedule corresponding to this order.
  */
 static int sort_statements(isl_ctx *ctx, struct isl_sched_graph *graph)
@@ -1497,7 +1612,7 @@ static int sort_statements(isl_ctx *ctx, struct isl_sched_graph *graph)
  * in graph and with parameters specified by dim.
  */
 static __isl_give isl_schedule *extract_schedule(struct isl_sched_graph *graph,
-       __isl_take isl_dim *dim)
+       __isl_take isl_space *dim)
 {
        int i;
        isl_ctx *ctx;
@@ -1506,7 +1621,7 @@ static __isl_give isl_schedule *extract_schedule(struct isl_sched_graph *graph,
        if (!dim)
                return NULL;
 
-       ctx = isl_dim_get_ctx(dim);
+       ctx = isl_space_get_ctx(dim);
        sched = isl_calloc(ctx, struct isl_schedule,
                           sizeof(struct isl_schedule) +
                           (graph->n - 1) * sizeof(struct isl_schedule_node));
@@ -1520,20 +1635,20 @@ static __isl_give isl_schedule *extract_schedule(struct isl_sched_graph *graph,
 
        for (i = 0; i < sched->n; ++i) {
                int r, b;
-               int *band_end, *band_id, *parallel;
+               int *band_end, *band_id, *zero;
 
                band_end = isl_alloc_array(ctx, int, graph->n_band);
                band_id = isl_alloc_array(ctx, int, graph->n_band);
-               parallel = isl_alloc_array(ctx, int, graph->n_total_row);
+               zero = isl_alloc_array(ctx, int, graph->n_total_row);
                sched->node[i].sched = node_extract_schedule(&graph->node[i]);
                sched->node[i].band_end = band_end;
                sched->node[i].band_id = band_id;
-               sched->node[i].parallel = parallel;
-               if (!band_end || !band_id || !parallel)
+               sched->node[i].zero = zero;
+               if (!band_end || !band_id || !zero)
                        goto error;
 
                for (r = 0; r < graph->n_total_row; ++r)
-                       parallel[r] = graph->node[i].parallel[r];
+                       zero[r] = graph->node[i].zero[r];
                for (r = b = 0; r < graph->n_total_row; ++r) {
                        if (graph->node[i].band[r] == b)
                                continue;
@@ -1552,7 +1667,7 @@ static __isl_give isl_schedule *extract_schedule(struct isl_sched_graph *graph,
 
        return sched;
 error:
-       isl_dim_free(dim);
+       isl_space_free(dim);
        isl_schedule_free(sched);
        return NULL;
 }
@@ -1569,7 +1684,7 @@ static int copy_nodes(struct isl_sched_graph *dst, struct isl_sched_graph *src,
        for (i = 0; i < src->n; ++i) {
                if (!node_pred(&src->node[i], data))
                        continue;
-               dst->node[dst->n].dim = isl_dim_copy(src->node[i].dim);
+               dst->node[dst->n].dim = isl_space_copy(src->node[i].dim);
                dst->node[dst->n].nvar = src->node[i].nvar;
                dst->node[dst->n].nparam = src->node[i].nparam;
                dst->node[dst->n].sched = isl_mat_copy(src->node[i].sched);
@@ -1577,7 +1692,7 @@ static int copy_nodes(struct isl_sched_graph *dst, struct isl_sched_graph *src,
                        isl_map_copy(src->node[i].sched_map);
                dst->node[dst->n].band = src->node[i].band;
                dst->node[dst->n].band_id = src->node[i].band_id;
-               dst->node[dst->n].parallel = src->node[i].parallel;
+               dst->node[dst->n].zero = src->node[i].zero;
                dst->n++;
        }
 
@@ -1586,6 +1701,9 @@ static int copy_nodes(struct isl_sched_graph *dst, struct isl_sched_graph *src,
 
 /* Copy non-empty edges that satisfy edge_pred from the src dependence graph
  * to the dst dependence graph.
+ * If the source or destination node of the edge is not in the destination
+ * graph, then it must be a backward proximity edge and it should simply
+ * be ignored.
  */
 static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
        struct isl_sched_graph *src,
@@ -1597,6 +1715,7 @@ static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
        for (i = 0; i < src->n_edge; ++i) {
                struct isl_sched_edge *edge = &src->edge[i];
                isl_map *map;
+               struct isl_sched_node *dst_src, *dst_dst;
 
                if (!edge_pred(edge, data))
                        continue;
@@ -1604,12 +1723,19 @@ static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
                if (isl_map_plain_is_empty(edge->map))
                        continue;
 
+               dst_src = graph_find_node(ctx, dst, edge->src->dim);
+               dst_dst = graph_find_node(ctx, dst, edge->dst->dim);
+               if (!dst_src || !dst_dst) {
+                       if (edge->validity)
+                               isl_die(ctx, isl_error_internal,
+                                       "backward validity edge", return -1);
+                       continue;
+               }
+
                map = isl_map_copy(edge->map);
 
-               dst->edge[dst->n_edge].src =
-                       graph_find_node(ctx, dst, edge->src->dim);
-               dst->edge[dst->n_edge].dst =
-                       graph_find_node(ctx, dst, edge->dst->dim);
+               dst->edge[dst->n_edge].src = dst_src;
+               dst->edge[dst->n_edge].dst = dst_dst;
                dst->edge[dst->n_edge].map = map;
                dst->edge[dst->n_edge].validity = edge->validity;
                dst->edge[dst->n_edge].proximity = edge->proximity;
@@ -1738,9 +1864,9 @@ static int node_scc_at_least(struct isl_sched_node *node, int scc)
        return node->scc >= scc;
 }
 
-static int edge_src_scc_exactly(struct isl_sched_edge *edge, int scc)
+static int edge_scc_exactly(struct isl_sched_edge *edge, int scc)
 {
-       return edge->src->scc == scc;
+       return edge->src->scc == scc && edge->dst->scc == scc;
 }
 
 static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
@@ -1790,6 +1916,9 @@ static int pad_schedule(struct isl_sched_graph *graph)
  * It would be possible to reuse them as the first rows in the next
  * band, but recomputing them may result in better rows as we are looking
  * at a smaller part of the dependence graph.
+ * compute_split_schedule is only called when no zero-distance schedule row
+ * could be found on the entire graph, so we wark the splitting row as
+ * non zero-distance.
  *
  * The band_id of the second group is set to n, where n is the number
  * of nodes in the first group.  This ensures that the band_ids over
@@ -1830,6 +1959,7 @@ static int compute_split_schedule(isl_ctx *ctx, struct isl_sched_graph *graph)
                        node->sched = isl_mat_set_element_si(node->sched,
                                                             row, j, 0);
                node->band[graph->n_total_row] = graph->n_band;
+               node->zero[graph->n_total_row] = 0;
        }
 
        e1 = e2 = 0;
@@ -1883,9 +2013,10 @@ static int compute_next_band(isl_ctx *ctx, struct isl_sched_graph *graph)
        return compute_schedule(ctx, graph);
 }
 
-/* Add constraints to graph->lp that force the dependence of edge i
- * to be respected and attempt to carry it, where edge i is one from
- * a node j to itself.
+/* Add constraints to graph->lp that force the dependence "map" (which
+ * is part of the dependence relation of "edge")
+ * to be respected and attempt to carry it, where the edge is one from
+ * a node j to itself.  "pos" is the sequence number of the given map.
  * That is, add constraints that enforce
  *
  *     (c_j_0 + c_j_n n + c_j_x y) - (c_j_0 + c_j_n n + c_j_x x)
@@ -1897,42 +2028,42 @@ static int compute_next_band(isl_ctx *ctx, struct isl_sched_graph *graph)
  * with each coefficient in c_j_x represented as a pair of non-negative
  * coefficients.
  */
-static int add_intra_constraints(struct isl_sched_graph *graph, int i)
+static int add_intra_constraints(struct isl_sched_graph *graph,
+       struct isl_sched_edge *edge, __isl_take isl_map *map, int pos)
 {
        unsigned total;
-       struct isl_sched_edge *edge= &graph->edge[i];
-       isl_map *map = isl_map_copy(edge->map);
        isl_ctx *ctx = isl_map_get_ctx(map);
-       isl_dim *dim;
+       isl_space *dim;
        isl_dim_map *dim_map;
        isl_basic_set *coef;
        struct isl_sched_node *node = edge->src;
 
        coef = intra_coefficients(graph, map);
 
-       dim = isl_dim_domain(isl_dim_unwrap(isl_basic_set_get_dim(coef)));
+       dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
 
        total = isl_basic_set_total_dim(graph->lp);
        dim_map = isl_dim_map_alloc(ctx, total);
-       isl_dim_map_range(dim_map, 3 + i, 0, 0, 0, 1, -1);
+       isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
        isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 1, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          node->nvar, -1);
        isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 2, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          node->nvar, 1);
        graph->lp = isl_basic_set_extend_constraints(graph->lp,
                        coef->n_eq, coef->n_ineq);
        graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
                                                           coef, dim_map);
-       isl_dim_free(dim);
+       isl_space_free(dim);
 
        return 0;
 }
 
-/* Add constraints to graph->lp that force the dependence of edge i
- * to be respected and attempt to carry it, where edge i is one from
- * node j to node k.
+/* Add constraints to graph->lp that force the dependence "map" (which
+ * is part of the dependence relation of "edge")
+ * to be respected and attempt to carry it, where the edge is one from
+ * node j to node k.  "pos" is the sequence number of the given map.
  * That is, add constraints that enforce
  *
  *     (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
@@ -1944,13 +2075,12 @@ static int add_intra_constraints(struct isl_sched_graph *graph, int i)
  * with each coefficient (except e_i, c_k_0 and c_j_0)
  * represented as a pair of non-negative coefficients.
  */
-static int add_inter_constraints(struct isl_sched_graph *graph, int i)
+static int add_inter_constraints(struct isl_sched_graph *graph,
+       struct isl_sched_edge *edge, __isl_take isl_map *map, int pos)
 {
        unsigned total;
-       struct isl_sched_edge *edge= &graph->edge[i];
-       isl_map *map = isl_map_copy(edge->map);
        isl_ctx *ctx = isl_map_get_ctx(map);
-       isl_dim *dim;
+       isl_space *dim;
        isl_dim_map *dim_map;
        isl_basic_set *coef;
        struct isl_sched_node *src = edge->src;
@@ -1958,57 +2088,100 @@ static int add_inter_constraints(struct isl_sched_graph *graph, int i)
 
        coef = inter_coefficients(graph, map);
 
-       dim = isl_dim_domain(isl_dim_unwrap(isl_basic_set_get_dim(coef)));
+       dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
 
        total = isl_basic_set_total_dim(graph->lp);
        dim_map = isl_dim_map_alloc(ctx, total);
 
-       isl_dim_map_range(dim_map, 3 + i, 0, 0, 0, 1, -1);
+       isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
 
        isl_dim_map_range(dim_map, dst->start, 0, 0, 0, 1, 1);
        isl_dim_map_range(dim_map, dst->start + 1, 2, 1, 1, dst->nparam, -1);
        isl_dim_map_range(dim_map, dst->start + 2, 2, 1, 1, dst->nparam, 1);
        isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 1, 2,
-                         isl_dim_size(dim, isl_dim_set) + src->nvar, 1,
+                         isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
                          dst->nvar, -1);
        isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 2, 2,
-                         isl_dim_size(dim, isl_dim_set) + src->nvar, 1,
+                         isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
                          dst->nvar, 1);
 
        isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, -1);
        isl_dim_map_range(dim_map, src->start + 1, 2, 1, 1, src->nparam, 1);
        isl_dim_map_range(dim_map, src->start + 2, 2, 1, 1, src->nparam, -1);
        isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 1, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          src->nvar, 1);
        isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 2, 2,
-                         isl_dim_size(dim, isl_dim_set), 1,
+                         isl_space_dim(dim, isl_dim_set), 1,
                          src->nvar, -1);
 
        graph->lp = isl_basic_set_extend_constraints(graph->lp,
                        coef->n_eq, coef->n_ineq);
        graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
                                                           coef, dim_map);
-       isl_dim_free(dim);
+       isl_space_free(dim);
 
        return 0;
 }
 
-/* Add constraints to graph->lp that force all dependence
- * to be respected and attempt to carry it.
+/* Add constraints to graph->lp that force all validity dependences
+ * to be respected and attempt to carry them.
  */
 static int add_all_constraints(struct isl_sched_graph *graph)
 {
-       int i;
+       int i, j;
+       int pos;
 
+       pos = 0;
        for (i = 0; i < graph->n_edge; ++i) {
                struct isl_sched_edge *edge= &graph->edge[i];
-               if (edge->src == edge->dst &&
-                   add_intra_constraints(graph, i) < 0)
-                       return -1;
-               if (edge->src != edge->dst &&
-                   add_inter_constraints(graph, i) < 0)
-                       return -1;
+
+               if (!edge->validity)
+                       continue;
+
+               for (j = 0; j < edge->map->n; ++j) {
+                       isl_basic_map *bmap;
+                       isl_map *map;
+
+                       bmap = isl_basic_map_copy(edge->map->p[j]);
+                       map = isl_map_from_basic_map(bmap);
+
+                       if (edge->src == edge->dst &&
+                           add_intra_constraints(graph, edge, map, pos) < 0)
+                               return -1;
+                       if (edge->src != edge->dst &&
+                           add_inter_constraints(graph, edge, map, pos) < 0)
+                               return -1;
+                       ++pos;
+               }
+       }
+
+       return 0;
+}
+
+/* Count the number of equality and inequality constraints
+ * that will be added to the carry_lp problem.
+ * We count each edge exactly once.
+ */
+static int count_all_constraints(struct isl_sched_graph *graph,
+       int *n_eq, int *n_ineq)
+{
+       int i, j;
+
+       *n_eq = *n_ineq = 0;
+       for (i = 0; i < graph->n_edge; ++i) {
+               struct isl_sched_edge *edge= &graph->edge[i];
+               for (j = 0; j < edge->map->n; ++j) {
+                       isl_basic_map *bmap;
+                       isl_map *map;
+
+                       bmap = isl_basic_map_copy(edge->map->p[j]);
+                       map = isl_map_from_basic_map(bmap);
+
+                       if (count_map_constraints(graph, edge, map,
+                                                 n_eq, n_ineq, 1) < 0)
+                                   return -1;
+               }
        }
 
        return 0;
@@ -2020,6 +2193,11 @@ static int add_all_constraints(struct isl_sched_graph *graph)
  * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
  * of all e_i's.  Dependence with e_i = 0 in the solution are simply
  * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
+ * Note that if the dependence relation is a union of basic maps,
+ * then we have to consider each basic map individually as it may only
+ * be possible to carry the dependences expressed by some of those
+ * basic maps and not all off them.
+ * Below, we consider each of those basic maps as a separate "edge".
  *
  * All variables of the LP are non-negative.  The actual coefficients
  * may be negative, so each coefficient is represented as the difference
@@ -2038,41 +2216,46 @@ static int add_all_constraints(struct isl_sched_graph *graph)
  *             - positive and negative parts of c_i_n (if parametric)
  *             - positive and negative parts of c_i_x
  *
- * The constraints are those from the edges plus three equalities
+ * The constraints are those from the (validity) edges plus three equalities
  * to express the sums and n_edge inequalities to express e_i <= 1.
  */
 static int setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
 {
        int i, j;
        int k;
-       isl_dim *dim;
+       isl_space *dim;
        unsigned total;
        int n_eq, n_ineq;
+       int n_edge;
+
+       n_edge = 0;
+       for (i = 0; i < graph->n_edge; ++i)
+               n_edge += graph->edge[i].map->n;
 
-       total = 3 + graph->n_edge;
+       total = 3 + n_edge;
        for (i = 0; i < graph->n; ++i) {
                struct isl_sched_node *node = &graph->node[graph->sorted[i]];
                node->start = total;
                total += 1 + 2 * (node->nparam + node->nvar);
        }
 
-       if (count_constraints(graph, &n_eq, &n_ineq, 1) < 0)
+       if (count_all_constraints(graph, &n_eq, &n_ineq) < 0)
                return -1;
 
-       dim = isl_dim_set_alloc(ctx, 0, total);
+       dim = isl_space_set_alloc(ctx, 0, total);
        isl_basic_set_free(graph->lp);
        n_eq += 3;
-       n_ineq += graph->n_edge;
-       graph->lp = isl_basic_set_alloc_dim(dim, 0, n_eq, n_ineq);
+       n_ineq += n_edge;
+       graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
        graph->lp = isl_basic_set_set_rational(graph->lp);
 
        k = isl_basic_set_alloc_equality(graph->lp);
        if (k < 0)
                return -1;
        isl_seq_clr(graph->lp->eq[k], 1 +  total);
-       isl_int_set_si(graph->lp->eq[k][0], -graph->n_edge);
+       isl_int_set_si(graph->lp->eq[k][0], -n_edge);
        isl_int_set_si(graph->lp->eq[k][1], 1);
-       for (i = 0; i < graph->n_edge; ++i)
+       for (i = 0; i < n_edge; ++i)
                isl_int_set_si(graph->lp->eq[k][4 + i], 1);
 
        k = isl_basic_set_alloc_equality(graph->lp);
@@ -2100,7 +2283,7 @@ static int setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
                        isl_int_set_si(graph->lp->eq[k][pos + j], 1);
        }
 
-       for (i = 0; i < graph->n_edge; ++i) {
+       for (i = 0; i < n_edge; ++i) {
                k = isl_basic_set_alloc_inequality(graph->lp);
                if (k < 0)
                        return -1;
@@ -2115,14 +2298,91 @@ static int setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
        return 0;
 }
 
+/* If the schedule_split_scaled option is set and if the linear
+ * parts of the scheduling rows for all nodes in the graphs have
+ * non-trivial common divisor, then split off the constant term
+ * from the linear part.
+ * The constant term is then placed in a separate band and
+ * the linear part is reduced.
+ */
+static int split_scaled(isl_ctx *ctx, struct isl_sched_graph *graph)
+{
+       int i;
+       int row;
+       isl_int gcd, gcd_i;
+
+       if (!ctx->opt->schedule_split_scaled)
+               return 0;
+       if (graph->n <= 1)
+               return 0;
+
+       isl_int_init(gcd);
+       isl_int_init(gcd_i);
+
+       isl_int_set_si(gcd, 0);
+
+       row = isl_mat_rows(graph->node[0].sched) - 1;
+
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+               int cols = isl_mat_cols(node->sched);
+
+               isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
+               isl_int_gcd(gcd, gcd, gcd_i);
+       }
+
+       isl_int_clear(gcd_i);
+
+       if (isl_int_cmp_si(gcd, 1) <= 0) {
+               isl_int_clear(gcd);
+               return 0;
+       }
+
+       next_band(graph);
+
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+
+               isl_map_free(node->sched_map);
+               node->sched_map = NULL;
+               node->sched = isl_mat_add_zero_rows(node->sched, 1);
+               if (!node->sched)
+                       goto error;
+               isl_int_fdiv_r(node->sched->row[row + 1][0],
+                              node->sched->row[row][0], gcd);
+               isl_int_fdiv_q(node->sched->row[row][0],
+                              node->sched->row[row][0], gcd);
+               isl_int_mul(node->sched->row[row][0],
+                           node->sched->row[row][0], gcd);
+               node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
+               if (!node->sched)
+                       goto error;
+               node->band[graph->n_total_row] = graph->n_band;
+       }
+
+       graph->n_total_row++;
+
+       isl_int_clear(gcd);
+       return 0;
+error:
+       isl_int_clear(gcd);
+       return -1;
+}
+
 /* Construct a schedule row for each node such that as many dependences
  * as possible are carried and then continue with the next band.
  */
 static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph)
 {
+       int i;
+       int n_edge;
        isl_vec *sol;
        isl_basic_set *lp;
 
+       n_edge = 0;
+       for (i = 0; i < graph->n_edge; ++i)
+               n_edge += graph->edge[i].map->n;
+
        if (setup_carry_lp(ctx, graph) < 0)
                return -1;
 
@@ -2137,7 +2397,7 @@ static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph)
                        "error in schedule construction", return -1);
        }
 
-       if (isl_int_cmp_si(sol->el[1], graph->n_edge) >= 0) {
+       if (isl_int_cmp_si(sol->el[1], n_edge) >= 0) {
                isl_vec_free(sol);
                isl_die(ctx, isl_error_unknown,
                        "unable to carry dependences", return -1);
@@ -2146,9 +2406,53 @@ static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph)
        if (update_schedule(graph, sol, 0, 0) < 0)
                return -1;
 
+       if (split_scaled(ctx, graph) < 0)
+               return -1;
+
        return compute_next_band(ctx, graph);
 }
 
+/* Are there any validity edges in the graph?
+ */
+static int has_validity_edges(struct isl_sched_graph *graph)
+{
+       int i;
+
+       for (i = 0; i < graph->n_edge; ++i)
+               if (graph->edge[i].validity)
+                       return 1;
+
+       return 0;
+}
+
+/* Should we apply a Feautrier step?
+ * That is, did the user request the Feautrier algorithm and are
+ * there any validity dependences (left)?
+ */
+static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph)
+{
+       if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER)
+               return 0;
+
+       return has_validity_edges(graph);
+}
+
+/* Compute a schedule for a connected dependence graph using Feautrier's
+ * multi-dimensional scheduling algorithm.
+ * The original algorithm is described in [1].
+ * The main idea is to minimize the number of scheduling dimensions, by
+ * trying to satisfy as many dependences as possible per scheduling dimension.
+ *
+ * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
+ *     Problem, Part II: Multi-Dimensional Time.
+ *     In Intl. Journal of Parallel Programming, 1992.
+ */
+static int compute_schedule_wcc_feautrier(isl_ctx *ctx,
+       struct isl_sched_graph *graph)
+{
+       return carry_dependences(ctx, graph);
+}
+
 /* Compute a schedule for a connected dependence graph.
  * We try to find a sequence of as many schedule rows as possible that result
  * in non-negative dependence distances (independent of the previous rows
@@ -2161,11 +2465,22 @@ static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph)
  * - try to carry as many dependences as possible and continue with the next
  *     band
  *
+ * If Feautrier's algorithm is selected, we first recursively try to satisfy
+ * as many validity dependences as possible. When all validity dependences
+ * are satisfied we extend the schedule to a full-dimensional schedule.
+ *
  * If we manage to complete the schedule, we finish off by topologically
  * sorting the statements based on the remaining dependences.
+ *
+ * If ctx->opt->schedule_outer_zero_distance is set, then we force the
+ * outermost dimension in the current band to be zero distance.  If this
+ * turns out to be impossible, we fall back on the general scheme above
+ * and try to carry as many dependences as possible.
  */
 static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
 {
+       int force_zero = 0;
+
        if (detect_sccs(graph) < 0)
                return -1;
        sort_sccs(graph);
@@ -2173,19 +2488,28 @@ static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
        if (compute_maxvar(graph) < 0)
                return -1;
 
+       if (need_feautrier_step(ctx, graph))
+               return compute_schedule_wcc_feautrier(ctx, graph);
+
+       if (ctx->opt->schedule_outer_zero_distance)
+               force_zero = 1;
+
        while (graph->n_row < graph->maxvar) {
                isl_vec *sol;
 
                graph->src_scc = -1;
                graph->dst_scc = -1;
 
-               if (setup_lp(ctx, graph) < 0)
+               if (setup_lp(ctx, graph, force_zero) < 0)
                        return -1;
                sol = solve_lp(graph);
                if (!sol)
                        return -1;
                if (sol->size == 0) {
                        isl_vec_free(sol);
+                       if (!ctx->opt->schedule_maximize_band_depth &&
+                           graph->n_total_row > graph->band_start)
+                               return compute_next_band(ctx, graph);
                        if (graph->src_scc >= 0)
                                return compute_split_schedule(ctx, graph);
                        if (graph->n_total_row > graph->band_start)
@@ -2194,6 +2518,7 @@ static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
                }
                if (update_schedule(graph, sol, 1, 1) < 0)
                        return -1;
+               force_zero = 0;
        }
 
        if (graph->n_total_row > graph->band_start)
@@ -2201,8 +2526,37 @@ static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
        return sort_statements(ctx, graph);
 }
 
+/* Add a row to the schedules that separates the SCCs and move
+ * to the next band.
+ */
+static int split_on_scc(struct isl_sched_graph *graph)
+{
+       int i;
+
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+               int row = isl_mat_rows(node->sched);
+
+               isl_map_free(node->sched_map);
+               node->sched_map = NULL;
+               node->sched = isl_mat_add_zero_rows(node->sched, 1);
+               node->sched = isl_mat_set_element_si(node->sched, row, 0,
+                                                    node->scc);
+               if (!node->sched)
+                       return -1;
+               node->band[graph->n_total_row] = graph->n_band;
+       }
+
+       graph->n_total_row++;
+       next_band(graph);
+
+       return 0;
+}
+
 /* Compute a schedule for each component (identified by node->scc)
  * of the dependence graph separately and then combine the results.
+ * Depending on the setting of schedule_fuse, a component may be
+ * either weakly or strongly connected.
  *
  * The band_id is adjusted such that each component has a separate id.
  * Note that the band_id may have already been set to a value different
@@ -2216,6 +2570,9 @@ static int compute_component_schedule(isl_ctx *ctx,
        int n_total_row, orig_total_row;
        int n_band, orig_band;
 
+       if (ctx->opt->schedule_fuse == ISL_SCHEDULE_FUSE_MIN)
+               split_on_scc(graph);
+
        n_total_row = 0;
        orig_total_row = graph->n_total_row;
        n_band = 0;
@@ -2229,12 +2586,13 @@ static int compute_component_schedule(isl_ctx *ctx,
                                n++;
                n_edge = 0;
                for (i = 0; i < graph->n_edge; ++i)
-                       if (graph->edge[i].src->scc == wcc)
+                       if (graph->edge[i].src->scc == wcc &&
+                           graph->edge[i].dst->scc == wcc)
                                n_edge++;
 
                if (compute_sub_schedule(ctx, graph, n, n_edge,
                                    &node_scc_exactly,
-                                   &edge_src_scc_exactly, wcc, 1) < 0)
+                                   &edge_scc_exactly, wcc, 1) < 0)
                        return -1;
                if (graph->n_total_row > n_total_row)
                        n_total_row = graph->n_total_row;
@@ -2252,12 +2610,20 @@ static int compute_component_schedule(isl_ctx *ctx,
 
 /* Compute a schedule for the given dependence graph.
  * We first check if the graph is connected (through validity dependences)
- * and if so compute a schedule for each component separately.
+ * and, if not, compute a schedule for each component separately.
+ * If schedule_fuse is set to minimal fusion, then we check for strongly
+ * connected components instead and compute a separate schedule for
+ * each such strongly connected component.
  */
 static int compute_schedule(isl_ctx *ctx, struct isl_sched_graph *graph)
 {
-       if (detect_wccs(graph) < 0)
-               return -1;
+       if (ctx->opt->schedule_fuse == ISL_SCHEDULE_FUSE_MIN) {
+               if (detect_sccs(graph) < 0)
+                       return -1;
+       } else {
+               if (detect_wccs(graph) < 0)
+                       return -1;
+       }
 
        if (graph->scc > 1)
                return compute_component_schedule(ctx, graph);
@@ -2266,8 +2632,12 @@ static int compute_schedule(isl_ctx *ctx, struct isl_sched_graph *graph)
 }
 
 /* Compute a schedule for the given union of domains that respects
- * all the validity dependences and tries to minimize the dependence
- * distances over the proximity dependences.
+ * all the validity dependences.
+ * If the default isl scheduling algorithm is used, it tries to minimize
+ * the dependence distances over the proximity dependences.
+ * If Feautrier's scheduling algorithm is used, the proximity dependence
+ * distances are only minimized during the extension to a full-dimensional
+ * schedule.
  */
 __isl_give isl_schedule *isl_union_set_compute_schedule(
        __isl_take isl_union_set *domain,
@@ -2275,16 +2645,16 @@ __isl_give isl_schedule *isl_union_set_compute_schedule(
        __isl_take isl_union_map *proximity)
 {
        isl_ctx *ctx = isl_union_set_get_ctx(domain);
-       isl_dim *dim;
+       isl_space *dim;
        struct isl_sched_graph graph = { 0 };
        isl_schedule *sched;
 
        domain = isl_union_set_align_params(domain,
-                                           isl_union_map_get_dim(validity));
+                                           isl_union_map_get_space(validity));
        domain = isl_union_set_align_params(domain,
-                                           isl_union_map_get_dim(proximity));
-       dim = isl_union_set_get_dim(domain);
-       validity = isl_union_map_align_params(validity, isl_dim_copy(dim));
+                                           isl_union_map_get_space(proximity));
+       dim = isl_union_set_get_space(domain);
+       validity = isl_union_map_align_params(validity, isl_space_copy(dim));
        proximity = isl_union_map_align_params(proximity, dim);
 
        if (!domain)
@@ -2314,7 +2684,7 @@ __isl_give isl_schedule *isl_union_set_compute_schedule(
                goto error;
 
 empty:
-       sched = extract_schedule(&graph, isl_union_set_get_dim(domain));
+       sched = extract_schedule(&graph, isl_union_set_get_space(domain));
 
        graph_free(ctx, &graph);
        isl_union_set_free(domain);
@@ -2343,9 +2713,9 @@ void *isl_schedule_free(__isl_take isl_schedule *sched)
                isl_map_free(sched->node[i].sched);
                free(sched->node[i].band_end);
                free(sched->node[i].band_id);
-               free(sched->node[i].parallel);
+               free(sched->node[i].zero);
        }
-       isl_dim_free(sched->dim);
+       isl_space_free(sched->dim);
        isl_band_list_free(sched->band_forest);
        free(sched);
        return NULL;
@@ -2353,7 +2723,7 @@ void *isl_schedule_free(__isl_take isl_schedule *sched)
 
 isl_ctx *isl_schedule_get_ctx(__isl_keep isl_schedule *schedule)
 {
-       return schedule ? isl_dim_get_ctx(schedule->dim) : NULL;
+       return schedule ? isl_space_get_ctx(schedule->dim) : NULL;
 }
 
 __isl_give isl_union_map *isl_schedule_get_map(__isl_keep isl_schedule *sched)
@@ -2364,7 +2734,7 @@ __isl_give isl_union_map *isl_schedule_get_map(__isl_keep isl_schedule *sched)
        if (!sched)
                return NULL;
 
-       umap = isl_union_map_empty(isl_dim_copy(sched->dim));
+       umap = isl_union_map_empty(isl_space_copy(sched->dim));
        for (i = 0; i < sched->n; ++i)
                umap = isl_union_map_add_map(umap,
                                            isl_map_copy(sched->node[i].sched));
@@ -2372,47 +2742,6 @@ __isl_give isl_union_map *isl_schedule_get_map(__isl_keep isl_schedule *sched)
        return umap;
 }
 
-int isl_schedule_n_band(__isl_keep isl_schedule *sched)
-{
-       return sched ? sched->n_band : 0;
-}
-
-/* Construct a mapping that maps each domain to the band in its schedule
- * with the specified band index.  Note that bands with the same index
- * but for different domains do not need to be related.
- */
-__isl_give isl_union_map *isl_schedule_get_band(__isl_keep isl_schedule *sched,
-       unsigned band)
-{
-       int i;
-       isl_union_map *umap;
-
-       if (!sched)
-               return NULL;
-
-       umap = isl_union_map_empty(isl_dim_copy(sched->dim));
-       for (i = 0; i < sched->n; ++i) {
-               int start, end;
-               isl_map *map;
-
-               if (band >= sched->node[i].n_band)
-                       continue;
-
-               start = band > 0 ? sched->node[i].band_end[band - 1] : 0;
-               end = sched->node[i].band_end[band];
-
-               map = isl_map_copy(sched->node[i].sched);
-
-               map = isl_map_project_out(map, isl_dim_out, end,
-                                         sched->n_total_row - end);
-               map = isl_map_project_out(map, isl_dim_out, 0, start);
-
-               umap = isl_union_map_add_map(umap, map);
-       }
-
-       return umap;
-}
-
 static __isl_give isl_band_list *construct_band_list(
        __isl_keep isl_schedule *schedule, __isl_keep isl_band *parent,
        int band_nr, int *parent_active, int n_active);
@@ -2467,14 +2796,14 @@ static __isl_give isl_band *construct_band(__isl_keep isl_schedule *schedule,
                schedule->node[i].band_end[band_nr] : start;
        band->n = end - start;
 
-       band->parallel = isl_alloc_array(ctx, int, band->n);
-       if (!band->parallel)
+       band->zero = isl_alloc_array(ctx, int, band->n);
+       if (!band->zero)
                goto error;
 
        for (j = 0; j < band->n; ++j)
-               band->parallel[j] = schedule->node[i].parallel[start + j];
+               band->zero[j] = schedule->node[i].zero[start + j];
 
-       band->map = isl_union_map_empty(isl_dim_copy(schedule->dim));
+       band->map = isl_union_map_empty(isl_space_copy(schedule->dim));
        for (i = 0; i < schedule->n; ++i) {
                isl_map *map;
                unsigned n_out;
@@ -2619,5 +2948,74 @@ __isl_give isl_band_list *isl_schedule_get_band_forest(
                return NULL;
        if (!schedule->band_forest)
                schedule->band_forest = construct_forest(schedule);
-       return isl_band_list_copy(schedule->band_forest);
+       return isl_band_list_dup(schedule->band_forest);
+}
+
+static __isl_give isl_printer *print_band_list(__isl_take isl_printer *p,
+       __isl_keep isl_band_list *list);
+
+static __isl_give isl_printer *print_band(__isl_take isl_printer *p,
+       __isl_keep isl_band *band)
+{
+       isl_band_list *children;
+
+       p = isl_printer_start_line(p);
+       p = isl_printer_print_union_map(p, band->map);
+       p = isl_printer_end_line(p);
+
+       if (!isl_band_has_children(band))
+               return p;
+
+       children = isl_band_get_children(band);
+
+       p = isl_printer_indent(p, 4);
+       p = print_band_list(p, children);
+       p = isl_printer_indent(p, -4);
+
+       isl_band_list_free(children);
+
+       return p;
+}
+
+static __isl_give isl_printer *print_band_list(__isl_take isl_printer *p,
+       __isl_keep isl_band_list *list)
+{
+       int i, n;
+
+       n = isl_band_list_n_band(list);
+       for (i = 0; i < n; ++i) {
+               isl_band *band;
+               band = isl_band_list_get_band(list, i);
+               p = print_band(p, band);
+               isl_band_free(band);
+       }
+
+       return p;
+}
+
+__isl_give isl_printer *isl_printer_print_schedule(__isl_take isl_printer *p,
+       __isl_keep isl_schedule *schedule)
+{
+       isl_band_list *forest;
+
+       forest = isl_schedule_get_band_forest(schedule);
+
+       p = print_band_list(p, forest);
+
+       isl_band_list_free(forest);
+
+       return p;
+}
+
+void isl_schedule_dump(__isl_keep isl_schedule *schedule)
+{
+       isl_printer *printer;
+
+       if (!schedule)
+               return;
+
+       printer = isl_printer_to_file(isl_schedule_get_ctx(schedule), stderr);
+       printer = isl_printer_print_schedule(printer, schedule);
+
+       isl_printer_free(printer);
 }