add isl_union_set_compute_schedule
authorSven Verdoolaege <skimo@kotnet.org>
Sat, 19 Mar 2011 13:47:47 +0000 (14:47 +0100)
committerSven Verdoolaege <skimo@kotnet.org>
Thu, 21 Apr 2011 12:02:32 +0000 (14:02 +0200)
This is still a very preliminary implementation.
The algorithm used is similar to that of Pluto, except that
it may compute parametric schedules and schedules with negative
coefficients.  The current implementation probably tries to fuse too much.
This should be made tunable at some point.

Signed-off-by: Sven Verdoolaege <skimo@kotnet.org>
Makefile.am
doc/user.pod
include/isl/options.h
include/isl/schedule.h [new file with mode: 0644]
isl_options.c
isl_schedule.c [new file with mode: 0644]
isl_test.c

index debc89d..475a4f7 100644 (file)
@@ -100,6 +100,7 @@ libisl_la_SOURCES = \
        isl_sample.c \
        isl_scan.c \
        isl_scan.h \
+       isl_schedule.c \
        isl_stream.c \
        isl_seq.c \
        isl_tab.c \
@@ -192,6 +193,7 @@ pkginclude_HEADERS = \
        include/isl/point.h \
        include/isl/polynomial.h \
        include/isl/printer.h \
+       include/isl/schedule.h \
        include/isl/seq.h \
        include/isl/set.h \
        include/isl/stream.h \
index 50fdaad..89ddf4c 100644 (file)
@@ -2606,6 +2606,51 @@ Any of C<must_dep>, C<may_dep>, C<must_no_source>
 or C<may_no_source> may be C<NULL>, but a C<NULL> value for
 any of the other arguments is treated as an error.
 
+=head2 Scheduling
+
+B<The functionality described in this section is fairly new
+and may be subject to change.>
+
+The following function can be used to compute a schedule
+for a union of domains.  The generated schedule respects
+all C<validity> dependences.  That is, all dependence distances
+over these dependences in the scheduled space are lexicographically
+positive.  The generated schedule schedule also tries to minimize
+the dependence distances over C<proximity> dependences.
+Moreover, it tries to obtain sequences (bands) of schedule dimensions
+for groups of domains where the dependence distances have only
+non-negative values.
+The algorithm used to construct the schedule is similar to that
+of C<Pluto>.
+
+       #include <isl/schedule.h>
+       __isl_give isl_schedule *isl_union_set_compute_schedule(
+               __isl_take isl_union_set *domain,
+               __isl_take isl_union_map *validity,
+               __isl_take isl_union_map *proximity);
+       void *isl_schedule_free(__isl_take isl_schedule *sched);
+
+A mapping from the domains to the scheduled space can be obtained
+from an C<isl_schedule> using the following function.
+
+       __isl_give isl_union_map *isl_schedule_get_map(
+               __isl_keep isl_schedule *sched);
+
+This mapping can also be obtained in pieces using the following functions.
+
+       int isl_schedule_n_band(__isl_keep isl_schedule *sched);
+       __isl_give isl_union_map *isl_schedule_get_band(
+               __isl_keep isl_schedule *sched, unsigned band);
+
+C<isl_schedule_n_band> returns the maximal number of bands.
+C<isl_schedule_get_band> returns a union of mappings from a domain to
+the band of consecutive schedule dimensions with the given sequence
+number for that domain.  Bands with the same sequence number but for
+different domains may be completely unrelated.
+Within a band, the corresponding coordinates of the distance vectors
+are all non-negative, assuming that the coordinates for all previous
+bands are all zero.
+
 =head2 Parametric Vertex Enumeration
 
 The parametric vertex enumeration described in this section
index 30628a1..a60d2c8 100644 (file)
@@ -58,6 +58,8 @@ struct isl_options {
        #define                 ISL_CONVEX_HULL_WRAP    0
        #define                 ISL_CONVEX_HULL_FM      1
        int                     convex;
+
+       int                     schedule_parametric;
 };
 
 ISL_ARG_DECL(isl_options, struct isl_options, isl_options_arg)
diff --git a/include/isl/schedule.h b/include/isl/schedule.h
new file mode 100644 (file)
index 0000000..fae51d7
--- /dev/null
@@ -0,0 +1,21 @@
+#ifndef ISL_SCHEDULE_H
+#define ISL_SCHEDULE_H
+
+#include <isl/union_set.h>
+#include <isl/union_map.h>
+
+struct isl_schedule;
+typedef struct isl_schedule isl_schedule;
+
+__isl_give isl_schedule *isl_union_set_compute_schedule(
+       __isl_take isl_union_set *domain,
+       __isl_take isl_union_map *validity,
+       __isl_take isl_union_map *proximity);
+void *isl_schedule_free(__isl_take isl_schedule *sched);
+__isl_give isl_union_map *isl_schedule_get_map(__isl_keep isl_schedule *sched);
+
+int isl_schedule_n_band(__isl_keep isl_schedule *sched);
+__isl_give isl_union_map *isl_schedule_get_band(__isl_keep isl_schedule *sched,
+       unsigned band);
+
+#endif
index 70ebd23..d382af7 100644 (file)
@@ -115,6 +115,8 @@ ISL_ARG_BOOL(struct isl_options, pip_symmetry, 0, "pip-symmetry", 1,
        "detect simple symmetries in PIP input")
 ISL_ARG_CHOICE(struct isl_options, convex, 0, "convex-hull", \
        convex, ISL_CONVEX_HULL_WRAP, "convex hull algorithm to use")
+ISL_ARG_BOOL(struct isl_options, schedule_parametric, 0,
+       "schedule-parametric", 1, "construct possibly parametric schedules")
 ISL_ARG_VERSION(print_version)
 ISL_ARG_END
 };
diff --git a/isl_schedule.c b/isl_schedule.c
new file mode 100644 (file)
index 0000000..7f7a8dc
--- /dev/null
@@ -0,0 +1,2374 @@
+/*
+ * Copyright 2011      INRIA Saclay
+ *
+ * Use of this software is governed by the GNU LGPLv2.1 license
+ *
+ * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
+ * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
+ * 91893 Orsay, France
+ */
+
+#include <isl_ctx_private.h>
+#include <isl_map_private.h>
+#include <isl_dim_private.h>
+#include <isl/hash.h>
+#include <isl/constraint.h>
+#include <isl/schedule.h>
+#include <isl_mat_private.h>
+#include <isl/set.h>
+#include <isl/seq.h>
+#include <isl_tab.h>
+#include <isl_dim_map.h>
+#include <isl_hmap_map_basic_set.h>
+#include <isl_qsort.h>
+
+/*
+ * The scheduling algorithm implemented in this file was inspired by
+ * Bondhugula et al., "Automatic Transformations for Communication-Minimized
+ * Parallelization and Locality Optimization in the Polyhedral Model".
+ */
+
+
+/* The schedule for an individual domain, plus information about the bands.
+ * In particular, we keep track of the number of bands and for each
+ * band, the starting position of the next band.  The first band starts at
+ * position 0.
+ */
+struct isl_schedule_node {
+       isl_map *sched;
+       int      n_band;
+       int     *band_end;
+};
+
+/* Information about the computed schedule.
+ * n is the number of nodes/domains/statements.
+ * n_band is the maximal number of bands.
+ * n_total_row is the number of coordinates of the schedule.
+ * dim contains a description of the parameters.
+ */
+struct isl_schedule {
+       int n;
+       int n_band;
+       int n_total_row;
+       isl_dim *dim;
+
+       struct isl_schedule_node node[1];
+};
+
+/* Internal information about a node that is used during the construction
+ * of a schedule.
+ * dim represents the space in which the domain lives
+ * sched is a matrix representation of the schedule being constructed
+ *     for this node
+ * sched_map is an isl_map representation of the same (partial) schedule
+ *     sched_map may be NULL
+ * rank is the number of linearly independent rows in the linear part
+ *     of sched
+ * the columns of cmap represent a change of basis for the schedule
+ *     coefficients; the first rank columns span the linear part of
+ *     the schedule rows
+ * start is the first variable in the LP problem in the sequences that
+ *     represents the schedule coefficients of this node
+ * nvar is the dimension of the domain
+ * nparam is the number of parameters or 0 if we are not constructing
+ *     a parametric schedule
+ *
+ * scc is the index of SCC (or WCC) this node belongs to
+ *
+ * band contains the band index for each of the rows of the schedule
+ *
+ * index, min_index and on_stack are used during the SCC detection
+ * index represents the order in which nodes are visited.
+ * min_index is the index of the root of a (sub)component.
+ * on_stack indicates whether the node is currently on the stack.
+ */
+struct isl_sched_node {
+       isl_dim *dim;
+       isl_mat *sched;
+       isl_map *sched_map;
+       int      rank;
+       isl_mat *cmap;
+       int      start;
+       int      nvar;
+       int      nparam;
+
+       int      scc;
+
+       int     *band;
+
+       /* scc detection */
+       int      index;
+       int      min_index;
+       int      on_stack;
+};
+
+static int node_has_dim(const void *entry, const void *val)
+{
+       struct isl_sched_node *node = (struct isl_sched_node *)entry;
+       isl_dim *dim = (isl_dim *)val;
+
+       return isl_dim_equal(node->dim, dim);
+}
+
+/* An edge in the dependence graph.  An edge may be used to
+ * ensure validity of the generated schedule, to minimize the dependence
+ * distance or both
+ *
+ * map is the dependence relation
+ * src is the source node
+ * dst is the sink node
+ * validity is set if the edge is used to ensure correctness
+ * proximity is set if the edge is used to minimize dependence distances
+ *
+ * For validity edges, start and end mark the sequence of inequality
+ * constraints in the LP problem that encode the validity constraint
+ * corresponding to this edge.
+ */
+struct isl_sched_edge {
+       isl_map *map;
+
+       struct isl_sched_node *src;
+       struct isl_sched_node *dst;
+
+       int validity;
+       int proximity;
+
+       int start;
+       int end;
+};
+
+/* Internal information about the dependence graph used during
+ * the construction of the schedule.
+ *
+ * intra_hmap is a cache, mapping dependence relations to their dual,
+ *     for dependences from a node to itself
+ * inter_hmap is a cache, mapping dependence relations to their dual,
+ *     for dependences between distinct nodes
+ *
+ * n is the number of nodes
+ * node is the list of nodes
+ * maxvar is the maximal number of variables over all nodes
+ * n_row is the current (maximal) number of linearly independent
+ *     rows in the node schedules
+ * n_total_row is the current number of rows in the node schedules
+ * n_band is the current number of completed bands
+ * band_start is the starting row in the node schedules of the current band
+ * root is set if this graph is the original dependence graph,
+ *     without any splitting
+ *
+ * sorted contains a list of node indices sorted according to the
+ *     SCC to which a node belongs
+ *
+ * n_edge is the number of edges
+ * edge is the list of edges
+ * edge_table contains pointers into the edge array, hashed on the source
+ *     and sink spaces; the table only contains edges that represent
+ *     validity constraints (and that may or may not also represent proximity
+ *     constraints)
+ *
+ * node_table contains pointers into the node array, hashed on the space
+ *
+ * region contains a list of variable sequences that should be non-trivial
+ *
+ * lp contains the (I)LP problem used to obtain new schedule rows
+ *
+ * src_scc and dst_scc are the source and sink SCCs of an edge with
+ *     conflicting constraints
+ *
+ * scc, sp, index and stack are used during the detection of SCCs
+ * scc is the number of the next SCC
+ * stack contains the nodes on the path from the root to the current node
+ * sp is the stack pointer
+ * index is the index of the last node visited
+ */
+struct isl_sched_graph {
+       isl_hmap_map_basic_set *intra_hmap;
+       isl_hmap_map_basic_set *inter_hmap;
+
+       struct isl_sched_node *node;
+       int n;
+       int maxvar;
+       int n_row;
+
+       int *sorted;
+
+       int n_band;
+       int n_total_row;
+       int band_start;
+
+       int root;
+
+       struct isl_sched_edge *edge;
+       int n_edge;
+       struct isl_hash_table *edge_table;
+
+       struct isl_hash_table *node_table;
+       struct isl_region *region;
+
+       isl_basic_set *lp;
+
+       int src_scc;
+       int dst_scc;
+
+       /* scc detection */
+       int scc;
+       int sp;
+       int index;
+       int *stack;
+};
+
+/* Initialize node_table based on the list of nodes.
+ */
+static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
+{
+       int i;
+
+       graph->node_table = isl_hash_table_alloc(ctx, graph->n);
+       if (!graph->node_table)
+               return -1;
+
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_hash_table_entry *entry;
+               uint32_t hash;
+
+               hash = isl_dim_get_hash(graph->node[i].dim);
+               entry = isl_hash_table_find(ctx, graph->node_table, hash,
+                                           &node_has_dim,
+                                           graph->node[i].dim, 1);
+               if (!entry)
+                       return -1;
+               entry->data = &graph->node[i];
+       }
+
+       return 0;
+}
+
+/* Return a pointer to the node that lives within the given space,
+ * or NULL if there is no such node.
+ */
+static struct isl_sched_node *graph_find_node(isl_ctx *ctx,
+       struct isl_sched_graph *graph, __isl_keep isl_dim *dim)
+{
+       struct isl_hash_table_entry *entry;
+       uint32_t hash;
+
+       hash = isl_dim_get_hash(dim);
+       entry = isl_hash_table_find(ctx, graph->node_table, hash,
+                                   &node_has_dim, dim, 0);
+
+       return entry ? entry->data : NULL;
+}
+
+static int edge_has_src_and_dst(const void *entry, const void *val)
+{
+       const struct isl_sched_edge *edge = entry;
+       const struct isl_sched_edge *temp = val;
+
+       return edge->src == temp->src && edge->dst == temp->dst;
+}
+
+/* Initialize edge_table based on the list of edges.
+ * Only edges with validity set are added to the table.
+ */
+static int graph_init_edge_table(isl_ctx *ctx, struct isl_sched_graph *graph)
+{
+       int i;
+
+       graph->edge_table = isl_hash_table_alloc(ctx, graph->n_edge);
+       if (!graph->edge_table)
+               return -1;
+
+       for (i = 0; i < graph->n_edge; ++i) {
+               struct isl_hash_table_entry *entry;
+               uint32_t hash;
+
+               if (!graph->edge[i].validity)
+                       continue;
+
+               hash = isl_hash_init();
+               hash = isl_hash_builtin(hash, graph->edge[i].src);
+               hash = isl_hash_builtin(hash, graph->edge[i].dst);
+               entry = isl_hash_table_find(ctx, graph->edge_table, hash,
+                                           &edge_has_src_and_dst,
+                                           &graph->edge[i], 1);
+               if (!entry)
+                       return -1;
+               entry->data = &graph->edge[i];
+       }
+
+       return 0;
+}
+
+/* Check whether the dependence graph has a (validity) edge
+ * between the given two nodes.
+ */
+static int graph_has_edge(struct isl_sched_graph *graph,
+       struct isl_sched_node *src, struct isl_sched_node *dst)
+{
+       isl_ctx *ctx = isl_dim_get_ctx(src->dim);
+       struct isl_hash_table_entry *entry;
+       uint32_t hash;
+       struct isl_sched_edge temp = { .src = src, .dst = dst };
+       struct isl_sched_edge *edge;
+       int empty;
+
+       hash = isl_hash_init();
+       hash = isl_hash_builtin(hash, temp.src);
+       hash = isl_hash_builtin(hash, temp.dst);
+       entry = isl_hash_table_find(ctx, graph->edge_table, hash,
+                                   &edge_has_src_and_dst, &temp, 0);
+       if (!entry)
+               return 0;
+
+       edge = entry->data;
+       empty = isl_map_fast_is_empty(edge->map);
+       if (empty < 0)
+               return -1;
+
+       return !empty;
+}
+
+static int graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph,
+       int n_node, int n_edge)
+{
+       int i;
+
+       graph->n = n_node;
+       graph->n_edge = n_edge;
+       graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n);
+       graph->sorted = isl_calloc_array(ctx, int, graph->n);
+       graph->region = isl_alloc_array(ctx, struct isl_region, graph->n);
+       graph->stack = isl_alloc_array(ctx, int, graph->n);
+       graph->edge = isl_calloc_array(ctx,
+                                       struct isl_sched_edge, graph->n_edge);
+
+       graph->intra_hmap = isl_hmap_map_basic_set_alloc(ctx, 2 * n_edge);
+       graph->inter_hmap = isl_hmap_map_basic_set_alloc(ctx, 2 * n_edge);
+
+       if (!graph->node || !graph->region || !graph->stack || !graph->edge ||
+           !graph->sorted)
+               return -1;
+
+       for(i = 0; i < graph->n; ++i)
+               graph->sorted[i] = i;
+
+       return 0;
+}
+
+static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
+{
+       int i;
+
+       isl_hmap_map_basic_set_free(ctx, graph->intra_hmap);
+       isl_hmap_map_basic_set_free(ctx, graph->inter_hmap);
+
+       for (i = 0; i < graph->n; ++i) {
+               isl_dim_free(graph->node[i].dim);
+               isl_mat_free(graph->node[i].sched);
+               isl_map_free(graph->node[i].sched_map);
+               isl_mat_free(graph->node[i].cmap);
+               if (graph->root)
+                       free(graph->node[i].band);
+       }
+       free(graph->node);
+       free(graph->sorted);
+       for (i = 0; i < graph->n_edge; ++i)
+               isl_map_free(graph->edge[i].map);
+       free(graph->edge);
+       free(graph->region);
+       free(graph->stack);
+       isl_hash_table_free(ctx, graph->edge_table);
+       isl_hash_table_free(ctx, graph->node_table);
+       isl_basic_set_free(graph->lp);
+}
+
+/* Add a new node to the graph representing the given set.
+ */
+static int extract_node(__isl_take isl_set *set, void *user)
+{
+       int i;
+       int nvar, nparam;
+       isl_ctx *ctx;
+       isl_dim *dim;
+       isl_mat *sched;
+       struct isl_sched_graph *graph = user;
+       int *band;
+
+       ctx = isl_set_get_ctx(set);
+       dim = isl_set_get_dim(set);
+       isl_set_free(set);
+       nvar = isl_dim_size(dim, isl_dim_set);
+       nparam = isl_dim_size(dim, isl_dim_param);
+       if (!ctx->opt->schedule_parametric)
+               nparam = 0;
+       sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar);
+       graph->node[graph->n].dim = dim;
+       graph->node[graph->n].nvar = nvar;
+       graph->node[graph->n].nparam = nparam;
+       graph->node[graph->n].sched = sched;
+       graph->node[graph->n].sched_map = NULL;
+       band = isl_alloc_array(ctx, int, graph->n_edge + nvar);
+       graph->node[graph->n].band = band;
+       graph->n++;
+
+       if (!sched || !band)
+               return -1;
+
+       return 0;
+}
+
+/* Add a new edge to the graph based on the given map.
+ * Edges are first extracted from the validity dependences,
+ * from which the edge_table is constructed.
+ * Afterwards, the proximity dependences are added.  If a proximity
+ * dependence relation happens to be identical to one of the
+ * validity dependence relations added before, then we don't create
+ * a new edge, but instead mark the original edge as also representing
+ * a proximity dependence.
+ */
+static int extract_edge(__isl_take isl_map *map, void *user)
+{
+       isl_ctx *ctx = isl_map_get_ctx(map);
+       struct isl_sched_graph *graph = user;
+       struct isl_sched_node *src, *dst;
+       isl_dim *dim;
+
+       dim = isl_dim_domain(isl_map_get_dim(map));
+       src = graph_find_node(ctx, graph, dim);
+       isl_dim_free(dim);
+       dim = isl_dim_range(isl_map_get_dim(map));
+       dst = graph_find_node(ctx, graph, dim);
+       isl_dim_free(dim);
+
+       if (!src || !dst) {
+               isl_map_free(map);
+               return 0;
+       }
+
+       graph->edge[graph->n_edge].src = src;
+       graph->edge[graph->n_edge].dst = dst;
+       graph->edge[graph->n_edge].map = map;
+       graph->edge[graph->n_edge].validity = !graph->edge_table;
+       graph->edge[graph->n_edge].proximity = !!graph->edge_table;
+       graph->n_edge++;
+
+       if (graph->edge_table) {
+               uint32_t hash;
+               struct isl_hash_table_entry *entry;
+               struct isl_sched_edge *edge;
+               int is_equal;
+
+               hash = isl_hash_init();
+               hash = isl_hash_builtin(hash, src);
+               hash = isl_hash_builtin(hash, dst);
+               entry = isl_hash_table_find(ctx, graph->edge_table, hash,
+                                           &edge_has_src_and_dst,
+                                           &graph->edge[graph->n_edge - 1], 0);
+               if (!entry)
+                       return 0;
+               edge = entry->data;
+               is_equal = isl_map_fast_is_equal(map, edge->map);
+               if (is_equal < 0)
+                       return -1;
+               if (!is_equal)
+                       return 0;
+
+               graph->n_edge--;
+               edge->proximity = 1;
+               isl_map_free(map);
+       }
+
+       return 0;
+}
+
+/* Check whether there is a validity dependence from src to dst,
+ * forcing dst to follow src.
+ */
+static int node_follows(struct isl_sched_graph *graph, 
+       struct isl_sched_node *dst, struct isl_sched_node *src)
+{
+       return graph_has_edge(graph, src, dst);
+}
+
+/* Perform Tarjan's algorithm for computing the strongly connected components
+ * in the dependence graph (only validity edges).
+ * If directed is not set, we consider the graph to be undirected and
+ * we effectively compute the (weakly) connected components.
+ */
+static int detect_sccs_tarjan(struct isl_sched_graph *g, int i, int directed)
+{
+       int j;
+
+       g->node[i].index = g->index;
+       g->node[i].min_index = g->index;
+       g->node[i].on_stack = 1;
+       g->index++;
+       g->stack[g->sp++] = i;
+
+       for (j = g->n - 1; j >= 0; --j) {
+               int f;
+
+               if (j == i)
+                       continue;
+               if (g->node[j].index >= 0 &&
+                       (!g->node[j].on_stack ||
+                        g->node[j].index > g->node[i].min_index))
+                       continue;
+               
+               f = node_follows(g, &g->node[i], &g->node[j]);
+               if (f < 0)
+                       return -1;
+               if (!f && !directed) {
+                       f = node_follows(g, &g->node[j], &g->node[i]);
+                       if (f < 0)
+                               return -1;
+               }
+               if (!f)
+                       continue;
+               if (g->node[j].index < 0) {
+                       detect_sccs_tarjan(g, j, directed);
+                       if (g->node[j].min_index < g->node[i].min_index)
+                               g->node[i].min_index = g->node[j].min_index;
+               } else if (g->node[j].index < g->node[i].min_index)
+                       g->node[i].min_index = g->node[j].index;
+       }
+
+       if (g->node[i].index != g->node[i].min_index)
+               return 0;
+
+       do {
+               j = g->stack[--g->sp];
+               g->node[j].on_stack = 0;
+               g->node[j].scc = g->scc;
+       } while (j != i);
+       g->scc++;
+
+       return 0;
+}
+
+static int detect_ccs(struct isl_sched_graph *graph, int directed)
+{
+       int i;
+
+       graph->index = 0;
+       graph->sp = 0;
+       graph->scc = 0;
+       for (i = graph->n - 1; i >= 0; --i)
+               graph->node[i].index = -1;
+
+       for (i = graph->n - 1; i >= 0; --i) {
+               if (graph->node[i].index >= 0)
+                       continue;
+               if (detect_sccs_tarjan(graph, i, directed) < 0)
+                       return -1;
+       }
+
+       return 0;
+}
+
+/* Apply Tarjan's algorithm to detect the strongly connected components
+ * in the dependence graph.
+ */
+static int detect_sccs(struct isl_sched_graph *graph)
+{
+       return detect_ccs(graph, 1);
+}
+
+/* Apply Tarjan's algorithm to detect the (weakly) connected components
+ * in the dependence graph.
+ */
+static int detect_wccs(struct isl_sched_graph *graph)
+{
+       return detect_ccs(graph, 0);
+}
+
+static int cmp_scc(const void *a, const void *b, void *data)
+{
+       struct isl_sched_graph *graph = data;
+       const int *i1 = a;
+       const int *i2 = b;
+
+       return graph->node[*i1].scc - graph->node[*i2].scc;
+}
+
+/* Sort the elements of graph->sorted according to the corresponding SCCs.
+ */
+static void sort_sccs(struct isl_sched_graph *graph)
+{
+       isl_quicksort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph);
+}
+
+/* Given a dependence relation R from a node to itself,
+ * construct the set of coefficients of valid constraints for elements
+ * in that dependence relation.
+ * In particular, the result contains tuples of coefficients
+ * c_0, c_n, c_x such that
+ *
+ *     c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R
+ *
+ * or, equivalently,
+ *
+ *     c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
+ *
+ * We choose here to compute the dual of delta R.
+ * Alternatively, we could have computed the dual of R, resulting
+ * in a set of tuples c_0, c_n, c_x, c_y, and then
+ * plugged in (c_0, c_n, c_x, -c_x).
+ */
+static __isl_give isl_basic_set *intra_coefficients(
+       struct isl_sched_graph *graph, __isl_take isl_map *map)
+{
+       isl_ctx *ctx = isl_map_get_ctx(map);
+       isl_set *delta;
+       isl_basic_set *coef;
+
+       if (isl_hmap_map_basic_set_has(ctx, graph->intra_hmap, map))
+               return isl_hmap_map_basic_set_get(ctx, graph->intra_hmap, map);
+
+       delta = isl_set_remove_divs(isl_map_deltas(isl_map_copy(map)));
+       coef = isl_set_coefficients(delta);
+       isl_hmap_map_basic_set_set(ctx, graph->intra_hmap, map,
+                                       isl_basic_set_copy(coef));
+
+       return coef;
+}
+
+/* Given a dependence relation R, * construct the set of coefficients
+ * of valid constraints for elements in that dependence relation.
+ * In particular, the result contains tuples of coefficients
+ * c_0, c_n, c_x, c_y such that
+ *
+ *     c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
+ *
+ */
+static __isl_give isl_basic_set *inter_coefficients(
+       struct isl_sched_graph *graph, __isl_take isl_map *map)
+{
+       isl_ctx *ctx = isl_map_get_ctx(map);
+       isl_set *set;
+       isl_basic_set *coef;
+
+       if (isl_hmap_map_basic_set_has(ctx, graph->inter_hmap, map))
+               return isl_hmap_map_basic_set_get(ctx, graph->inter_hmap, map);
+
+       set = isl_map_wrap(isl_map_remove_divs(isl_map_copy(map)));
+       coef = isl_set_coefficients(set);
+       isl_hmap_map_basic_set_set(ctx, graph->inter_hmap, map,
+                                       isl_basic_set_copy(coef));
+
+       return coef;
+}
+
+/* Add constraints to graph->lp that force validity for the given
+ * dependence from a node i to itself.
+ * That is, add constraints that enforce
+ *
+ *     (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x)
+ *     = c_i_x (y - x) >= 0
+ *
+ * for each (x,y) in R.
+ * We obtain general constraints on coefficients (c_0, c_n, c_x)
+ * of valid constraints for (y - x) and then plug in (0, 0, c_i_x^+ - c_i_x^-),
+ * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative.
+ * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
+ *
+ * Actually, we do not construct constraints for the c_i_x themselves,
+ * but for the coefficients of c_i_x written as a linear combination
+ * of the columns in node->cmap.
+ */
+static int add_intra_validity_constraints(struct isl_sched_graph *graph,
+       struct isl_sched_edge *edge)
+{
+       unsigned total;
+       isl_map *map = isl_map_copy(edge->map);
+       isl_ctx *ctx = isl_map_get_ctx(map);
+       isl_dim *dim;
+       isl_dim_map *dim_map;
+       isl_basic_set *coef;
+       struct isl_sched_node *node = edge->src;
+
+       coef = intra_coefficients(graph, map);
+
+       dim = isl_dim_domain(isl_dim_unwrap(isl_basic_set_get_dim(coef)));
+
+       coef = isl_basic_set_transform_dims(coef, isl_dim_set,
+                   isl_dim_size(dim, isl_dim_set), isl_mat_copy(node->cmap));
+
+       total = isl_basic_set_total_dim(graph->lp);
+       dim_map = isl_dim_map_alloc(ctx, total);
+       isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 1, 2,
+                         isl_dim_size(dim, isl_dim_set), 1,
+                         node->nvar, -1);
+       isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 2, 2,
+                         isl_dim_size(dim, isl_dim_set), 1,
+                         node->nvar, 1);
+       graph->lp = isl_basic_set_extend_constraints(graph->lp,
+                       coef->n_eq, coef->n_ineq);
+       graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
+                                                          coef, dim_map);
+       isl_dim_free(dim);
+
+       return 0;
+}
+
+/* Add constraints to graph->lp that force validity for the given
+ * dependence from node i to node j.
+ * That is, add constraints that enforce
+ *
+ *     (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0
+ *
+ * for each (x,y) in R.
+ * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
+ * of valid constraints for R and then plug in
+ * (c_j_0 - c_i_0, c_j_n^+ - c_j_n^- - (c_i_n^+ - c_i_n^-),
+ *  c_j_x^+ - c_j_x^- - (c_i_x^+ - c_i_x^-)),
+ * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
+ * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
+ *
+ * Actually, we do not construct constraints for the c_*_x themselves,
+ * but for the coefficients of c_*_x written as a linear combination
+ * of the columns in node->cmap.
+ */
+static int add_inter_validity_constraints(struct isl_sched_graph *graph,
+       struct isl_sched_edge *edge)
+{
+       unsigned total;
+       isl_map *map = isl_map_copy(edge->map);
+       isl_ctx *ctx = isl_map_get_ctx(map);
+       isl_dim *dim;
+       isl_dim_map *dim_map;
+       isl_basic_set *coef;
+       struct isl_sched_node *src = edge->src;
+       struct isl_sched_node *dst = edge->dst;
+
+       coef = inter_coefficients(graph, map);
+
+       dim = isl_dim_domain(isl_dim_unwrap(isl_basic_set_get_dim(coef)));
+
+       coef = isl_basic_set_transform_dims(coef, isl_dim_set,
+                   isl_dim_size(dim, isl_dim_set), isl_mat_copy(src->cmap));
+       coef = isl_basic_set_transform_dims(coef, isl_dim_set,
+                   isl_dim_size(dim, isl_dim_set) + src->nvar,
+                   isl_mat_copy(dst->cmap));
+
+       total = isl_basic_set_total_dim(graph->lp);
+       dim_map = isl_dim_map_alloc(ctx, total);
+
+       isl_dim_map_range(dim_map, dst->start, 0, 0, 0, 1, 1);
+       isl_dim_map_range(dim_map, dst->start + 1, 2, 1, 1, dst->nparam, -1);
+       isl_dim_map_range(dim_map, dst->start + 2, 2, 1, 1, dst->nparam, 1);
+       isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 1, 2,
+                         isl_dim_size(dim, isl_dim_set) + src->nvar, 1,
+                         dst->nvar, -1);
+       isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 2, 2,
+                         isl_dim_size(dim, isl_dim_set) + src->nvar, 1,
+                         dst->nvar, 1);
+
+       isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, -1);
+       isl_dim_map_range(dim_map, src->start + 1, 2, 1, 1, src->nparam, 1);
+       isl_dim_map_range(dim_map, src->start + 2, 2, 1, 1, src->nparam, -1);
+       isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 1, 2,
+                         isl_dim_size(dim, isl_dim_set), 1,
+                         src->nvar, 1);
+       isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 2, 2,
+                         isl_dim_size(dim, isl_dim_set), 1,
+                         src->nvar, -1);
+
+       edge->start = graph->lp->n_ineq;
+       graph->lp = isl_basic_set_extend_constraints(graph->lp,
+                       coef->n_eq, coef->n_ineq);
+       graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
+                                                          coef, dim_map);
+       isl_dim_free(dim);
+       edge->end = graph->lp->n_ineq;
+
+       return 0;
+}
+
+/* Add constraints to graph->lp that bound the dependence distance for the given
+ * dependence from a node i to itself.
+ * If s = 1, we add the constraint
+ *
+ *     c_i_x (y - x) <= m_0 + m_n n
+ *
+ * or
+ *
+ *     -c_i_x (y - x) + m_0 + m_n n >= 0
+ *
+ * for each (x,y) in R.
+ * If s = -1, we add the constraint
+ *
+ *     -c_i_x (y - x) <= m_0 + m_n n
+ *
+ * or
+ *
+ *     c_i_x (y - x) + m_0 + m_n n >= 0
+ *
+ * for each (x,y) in R.
+ * We obtain general constraints on coefficients (c_0, c_n, c_x)
+ * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x),
+ * with each coefficient (except m_0) represented as a pair of non-negative
+ * coefficients.
+ *
+ * Actually, we do not construct constraints for the c_i_x themselves,
+ * but for the coefficients of c_i_x written as a linear combination
+ * of the columns in node->cmap.
+ */
+static int add_intra_proximity_constraints(struct isl_sched_graph *graph,
+       struct isl_sched_edge *edge, int s)
+{
+       unsigned total;
+       unsigned nparam;
+       isl_map *map = isl_map_copy(edge->map);
+       isl_ctx *ctx = isl_map_get_ctx(map);
+       isl_dim *dim;
+       isl_dim_map *dim_map;
+       isl_set *delta;
+       isl_basic_set *coef;
+       struct isl_sched_node *node = edge->src;
+
+       coef = intra_coefficients(graph, map);
+
+       dim = isl_dim_domain(isl_dim_unwrap(isl_basic_set_get_dim(coef)));
+
+       coef = isl_basic_set_transform_dims(coef, isl_dim_set,
+                   isl_dim_size(dim, isl_dim_set), isl_mat_copy(node->cmap));
+
+       nparam = isl_dim_size(node->dim, isl_dim_param);
+       total = isl_basic_set_total_dim(graph->lp);
+       dim_map = isl_dim_map_alloc(ctx, total);
+       isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
+       isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
+       isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
+       isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 1, 2,
+                         isl_dim_size(dim, isl_dim_set), 1,
+                         node->nvar, s);
+       isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 2, 2,
+                         isl_dim_size(dim, isl_dim_set), 1,
+                         node->nvar, -s);
+       graph->lp = isl_basic_set_extend_constraints(graph->lp,
+                       coef->n_eq, coef->n_ineq);
+       graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
+                                                          coef, dim_map);
+       isl_dim_free(dim);
+
+       return 0;
+}
+
+/* Add constraints to graph->lp that bound the dependence distance for the given
+ * dependence from node i to node j.
+ * If s = 1, we add the constraint
+ *
+ *     (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)
+ *             <= m_0 + m_n n
+ *
+ * or
+ *
+ *     -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) +
+ *             m_0 + m_n n >= 0
+ *
+ * for each (x,y) in R.
+ * If s = -1, we add the constraint
+ *
+ *     -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x))
+ *             <= m_0 + m_n n
+ *
+ * or
+ *
+ *     (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) +
+ *             m_0 + m_n n >= 0
+ *
+ * for each (x,y) in R.
+ * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
+ * of valid constraints for R and then plug in
+ * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n,
+ *  -s*c_j_x+s*c_i_x)
+ * with each coefficient (except m_0, c_j_0 and c_i_0)
+ * represented as a pair of non-negative coefficients.
+ *
+ * Actually, we do not construct constraints for the c_*_x themselves,
+ * but for the coefficients of c_*_x written as a linear combination
+ * of the columns in node->cmap.
+ */
+static int add_inter_proximity_constraints(struct isl_sched_graph *graph,
+       struct isl_sched_edge *edge, int s)
+{
+       unsigned total;
+       unsigned nparam;
+       isl_map *map = isl_map_copy(edge->map);
+       isl_ctx *ctx = isl_map_get_ctx(map);
+       isl_dim *dim;
+       isl_dim_map *dim_map;
+       isl_basic_set *coef;
+       struct isl_sched_node *src = edge->src;
+       struct isl_sched_node *dst = edge->dst;
+
+       coef = inter_coefficients(graph, map);
+
+       dim = isl_dim_domain(isl_dim_unwrap(isl_basic_set_get_dim(coef)));
+
+       coef = isl_basic_set_transform_dims(coef, isl_dim_set,
+                   isl_dim_size(dim, isl_dim_set), isl_mat_copy(src->cmap));
+       coef = isl_basic_set_transform_dims(coef, isl_dim_set,
+                   isl_dim_size(dim, isl_dim_set) + src->nvar,
+                   isl_mat_copy(dst->cmap));
+
+       nparam = isl_dim_size(src->dim, isl_dim_param);
+       total = isl_basic_set_total_dim(graph->lp);
+       dim_map = isl_dim_map_alloc(ctx, total);
+
+       isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
+       isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
+       isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
+
+       isl_dim_map_range(dim_map, dst->start, 0, 0, 0, 1, -s);
+       isl_dim_map_range(dim_map, dst->start + 1, 2, 1, 1, dst->nparam, s);
+       isl_dim_map_range(dim_map, dst->start + 2, 2, 1, 1, dst->nparam, -s);
+       isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 1, 2,
+                         isl_dim_size(dim, isl_dim_set) + src->nvar, 1,
+                         dst->nvar, s);
+       isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 2, 2,
+                         isl_dim_size(dim, isl_dim_set) + src->nvar, 1,
+                         dst->nvar, -s);
+
+       isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, s);
+       isl_dim_map_range(dim_map, src->start + 1, 2, 1, 1, src->nparam, -s);
+       isl_dim_map_range(dim_map, src->start + 2, 2, 1, 1, src->nparam, s);
+       isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 1, 2,
+                         isl_dim_size(dim, isl_dim_set), 1,
+                         src->nvar, -s);
+       isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 2, 2,
+                         isl_dim_size(dim, isl_dim_set), 1,
+                         src->nvar, s);
+
+       graph->lp = isl_basic_set_extend_constraints(graph->lp,
+                       coef->n_eq, coef->n_ineq);
+       graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
+                                                          coef, dim_map);
+       isl_dim_free(dim);
+
+       return 0;
+}
+
+static int add_all_validity_constraints(struct isl_sched_graph *graph)
+{
+       int i;
+
+       for (i = 0; i < graph->n_edge; ++i) {
+               struct isl_sched_edge *edge= &graph->edge[i];
+               if (!edge->validity)
+                       continue;
+               if (edge->src != edge->dst)
+                       continue;
+               if (add_intra_validity_constraints(graph, edge) < 0)
+                       return -1;
+       }
+
+       for (i = 0; i < graph->n_edge; ++i) {
+               struct isl_sched_edge *edge = &graph->edge[i];
+               if (!edge->validity)
+                       continue;
+               if (edge->src == edge->dst)
+                       continue;
+               if (add_inter_validity_constraints(graph, edge) < 0)
+                       return -1;
+       }
+
+       return 0;
+}
+
+/* Add constraints to graph->lp that bound the dependence distance
+ * for all dependence relations.
+ * If a given proximity dependence is identical to a validity
+ * dependence, then the dependence distance is already bounded
+ * from below (by zero), so we only need to bound the distance
+ * from above.
+ * Otherwise, we need to bound the distance both from above and from below.
+ */
+static int add_all_proximity_constraints(struct isl_sched_graph *graph)
+{
+       int i;
+
+       for (i = 0; i < graph->n_edge; ++i) {
+               struct isl_sched_edge *edge= &graph->edge[i];
+               if (!edge->proximity)
+                       continue;
+               if (edge->src == edge->dst &&
+                   add_intra_proximity_constraints(graph, edge, 1) < 0)
+                       return -1;
+               if (edge->src != edge->dst &&
+                   add_inter_proximity_constraints(graph, edge, 1) < 0)
+                       return -1;
+               if (edge->validity)
+                       continue;
+               if (edge->src == edge->dst &&
+                   add_intra_proximity_constraints(graph, edge, -1) < 0)
+                       return -1;
+               if (edge->src != edge->dst &&
+                   add_inter_proximity_constraints(graph, edge, -1) < 0)
+                       return -1;
+       }
+
+       return 0;
+}
+
+/* Compute a basis for the rows in the linear part of the schedule
+ * and extend this basis to a full basis.  The remaining rows
+ * can then be used to force linear independence from the rows
+ * in the schedule.
+ *
+ * In particular, given the schedule rows S, we compute
+ *
+ *     S = H Q
+ *
+ * with H the Hermite normal form of S.  That is, all but the
+ * first rank columns of Q are zero and so each row in S is
+ * a linear combination of the first rank rows of Q.
+ * The matrix Q is then transposed because we will write the
+ * coefficients of the next schedule row as a column vector s
+ * and express this s as a linear combination s = Q c of the
+ * computed basis.
+ */
+static int node_update_cmap(struct isl_sched_node *node)
+{
+       isl_mat *H, *Q;
+       int n_row = isl_mat_rows(node->sched);
+
+       H = isl_mat_sub_alloc(node->sched, 0, n_row,
+                             1 + node->nparam, node->nvar);
+
+       H = isl_mat_left_hermite(H, 0, NULL, &Q);
+       isl_mat_free(node->cmap);
+       node->cmap = isl_mat_transpose(Q);
+       node->rank = isl_mat_initial_non_zero_cols(H);
+       isl_mat_free(H);
+
+       if (!node->cmap || node->rank < 0)
+               return -1;
+       return 0;
+}
+
+/* Count the number of equality and inequality constraints
+ * that will be added.  If once is set, then we count
+ * each edge exactly once.  Otherwise, we count as follows
+ * validity            -> 1 (>= 0)
+ * validity+proximity  -> 2 (>= 0 and upper bound)
+ * proximity           -> 2 (lower and upper bound)
+ */
+static int count_constraints(struct isl_sched_graph *graph,
+       int *n_eq, int *n_ineq, int once)
+{
+       int i;
+       isl_basic_set *coef;
+
+       *n_eq = *n_ineq = 0;
+       for (i = 0; i < graph->n_edge; ++i) {
+               struct isl_sched_edge *edge= &graph->edge[i];
+               isl_map *map = isl_map_copy(edge->map);
+               int f = once ? 1 : edge->proximity ? 2 : 1;
+
+               if (edge->src == edge->dst)
+                       coef = intra_coefficients(graph, map);
+               else
+                       coef = inter_coefficients(graph, map);
+               if (!coef)
+                       return -1;
+               *n_eq += f * coef->n_eq;
+               *n_ineq += f * coef->n_ineq;
+               isl_basic_set_free(coef);
+       }
+
+       return 0;
+}
+
+/* Construct an ILP problem for finding schedule coefficients
+ * that result in non-negative, but small dependence distances
+ * over all dependences.
+ * In particular, the dependence distances over proximity edges
+ * are bounded by m_0 + m_n n and we compute schedule coefficients
+ * with small values (preferably zero) of m_n and m_0.
+ *
+ * All variables of the ILP are non-negative.  The actual coefficients
+ * may be negative, so each coefficient is represented as the difference
+ * of two non-negative variables.  The negative part always appears
+ * immediately before the positive part.
+ * Other than that, the variables have the following order
+ *
+ *     - sum of positive and negative parts of m_n coefficients
+ *     - m_0
+ *     - sum of positive and negative parts of all c_n coefficients
+ *             (unconstrained when computing non-parametric schedules)
+ *     - sum of positive and negative parts of all c_x coefficients
+ *     - positive and negative parts of m_n coefficients
+ *     - for each node
+ *             - c_i_0
+ *             - positive and negative parts of c_i_n (if parametric)
+ *             - positive and negative parts of c_i_x
+ *
+ * The c_i_x are not represented directly, but through the columns of
+ * node->cmap.  That is, the computed values are for variable t_i_x
+ * such that c_i_x = Q t_i_x with Q equal to node->cmap.
+ *
+ * The constraints are those from the edges plus two or three equalities
+ * to express the sums.
+ */
+static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
+{
+       int i, j;
+       int k;
+       unsigned nparam;
+       unsigned total;
+       isl_dim *dim;
+       int parametric;
+       int param_pos;
+       int n_eq, n_ineq;
+
+       parametric = ctx->opt->schedule_parametric;
+       nparam = isl_dim_size(graph->node[0].dim, isl_dim_param);
+       param_pos = 4;
+       total = param_pos + 2 * nparam;
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[graph->sorted[i]];
+               if (node_update_cmap(node) < 0)
+                       return -1;
+               node->start = total;
+               total += 1 + 2 * (node->nparam + node->nvar);
+       }
+
+       if (count_constraints(graph, &n_eq, &n_ineq, 0) < 0)
+               return -1;
+
+       dim = isl_dim_set_alloc(ctx, 0, total);
+       isl_basic_set_free(graph->lp);
+       n_eq += 2 + parametric;
+       graph->lp = isl_basic_set_alloc_dim(dim, 0, n_eq, n_ineq);
+
+       k = isl_basic_set_alloc_equality(graph->lp);
+       if (k < 0)
+               return -1;
+       isl_seq_clr(graph->lp->eq[k], 1 +  total);
+       isl_int_set_si(graph->lp->eq[k][1], -1);
+       for (i = 0; i < 2 * nparam; ++i)
+               isl_int_set_si(graph->lp->eq[k][1 + param_pos + i], 1);
+
+       if (parametric) {
+               k = isl_basic_set_alloc_equality(graph->lp);
+               if (k < 0)
+                       return -1;
+               isl_seq_clr(graph->lp->eq[k], 1 +  total);
+               isl_int_set_si(graph->lp->eq[k][3], -1);
+               for (i = 0; i < graph->n; ++i) {
+                       int pos = 1 + graph->node[i].start + 1;
+
+                       for (j = 0; j < 2 * graph->node[i].nparam; ++j)
+                               isl_int_set_si(graph->lp->eq[k][pos + j], 1);
+               }
+       }
+
+       k = isl_basic_set_alloc_equality(graph->lp);
+       if (k < 0)
+               return -1;
+       isl_seq_clr(graph->lp->eq[k], 1 +  total);
+       isl_int_set_si(graph->lp->eq[k][4], -1);
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+               int pos = 1 + node->start + 1 + 2 * node->nparam;
+
+               for (j = 0; j < 2 * node->nvar; ++j)
+                       isl_int_set_si(graph->lp->eq[k][pos + j], 1);
+       }
+
+       if (add_all_validity_constraints(graph) < 0)
+               return -1;
+       if (add_all_proximity_constraints(graph) < 0)
+               return -1;
+
+       return 0;
+}
+
+/* Analyze the conflicting constraint found by
+ * isl_tab_basic_set_non_trivial_lexmin.  If it corresponds to the validity
+ * constraint of one of the edges between distinct nodes, living, moreover
+ * in distinct SCCs, then record the source and sink SCC as this may
+ * be a good place to cut between SCCs.
+ */
+static int check_conflict(int con, void *user)
+{
+       int i;
+       struct isl_sched_graph *graph = user;
+
+       if (graph->src_scc >= 0)
+               return 0;
+
+       con -= graph->lp->n_eq;
+
+       if (con >= graph->lp->n_ineq)
+               return 0;
+
+       for (i = 0; i < graph->n_edge; ++i) {
+               if (!graph->edge[i].validity)
+                       continue;
+               if (graph->edge[i].src == graph->edge[i].dst)
+                       continue;
+               if (graph->edge[i].src->scc == graph->edge[i].dst->scc)
+                       continue;
+               if (graph->edge[i].start > con)
+                       continue;
+               if (graph->edge[i].end <= con)
+                       continue;
+               graph->src_scc = graph->edge[i].src->scc;
+               graph->dst_scc = graph->edge[i].dst->scc;
+       }
+
+       return 0;
+}
+
+/* Check whether the next schedule row of the given node needs to be
+ * non-trivial.  Lower-dimensional domains may have some trivial rows,
+ * but as soon as the number of remaining required non-trivial rows
+ * is as large as the number or remaining rows to be computed,
+ * all remaining rows need to be non-trivial.
+ */
+static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node)
+{
+       return node->nvar - node->rank >= graph->maxvar - graph->n_row;
+}
+
+/* Solve the ILP problem constructed in setup_lp.
+ * For each node such that all the remaining rows of its schedule
+ * need to be non-trivial, we construct a non-triviality region.
+ * This region imposes that the next row is independent of previous rows.
+ * In particular the coefficients c_i_x are represented by t_i_x
+ * variables with c_i_x = Q t_i_x and Q a unimodular matrix such that
+ * its first columns span the rows of the previously computed part
+ * of the schedule.  The non-triviality region enforces that at least
+ * one of the remaining components of t_i_x is non-zero, i.e.,
+ * that the new schedule row depends on at least one of the remaining
+ * columns of Q.
+ */
+static __isl_give isl_vec *solve_lp(struct isl_sched_graph *graph)
+{
+       int i;
+       isl_vec *sol;
+       isl_basic_set *lp;
+
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+               int skip = node->rank;
+               graph->region[i].pos = node->start + 1 + 2*(node->nparam+skip);
+               if (needs_row(graph, node))
+                       graph->region[i].len = 2 * (node->nvar - skip);
+               else
+                       graph->region[i].len = 0;
+       }
+       lp = isl_basic_set_copy(graph->lp);
+       sol = isl_tab_basic_set_non_trivial_lexmin(lp, 2, graph->n,
+                                      graph->region, &check_conflict, graph);
+       return sol;
+}
+
+/* Update the schedules of all nodes based on the given solution
+ * of the LP problem.
+ * The new row is added to the current band.
+ * All possibly negative coefficients are encoded as a difference
+ * of two non-negative variables, so we need to perform the subtraction
+ * here.  Moreover, if use_cmap is set, then the solution does
+ * not refer to the actual coefficients c_i_x, but instead to variables
+ * t_i_x such that c_i_x = Q t_i_x and Q is equal to node->cmap.
+ * In this case, we then also need to perform this multiplication
+ * to obtain the values of c_i_x.
+ */
+static int update_schedule(struct isl_sched_graph *graph,
+       __isl_take isl_vec *sol, int use_cmap)
+{
+       int i, j;
+       isl_vec *csol = NULL;
+
+       if (!sol)
+               goto error;
+       if (sol->size == 0)
+               isl_die(sol->ctx, isl_error_internal,
+                       "no solution found", goto error);
+
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+               int pos = node->start;
+               int row = isl_mat_rows(node->sched);
+
+               isl_vec_free(csol);
+               csol = isl_vec_alloc(sol->ctx, node->nvar);
+               if (!csol)
+                       goto error;
+
+               isl_map_free(node->sched_map);
+               node->sched_map = NULL;
+               node->sched = isl_mat_add_rows(node->sched, 1);
+               if (!node->sched)
+                       goto error;
+               node->sched = isl_mat_set_element(node->sched, row, 0,
+                                                 sol->el[1 + pos]);
+               for (j = 0; j < node->nparam + node->nvar; ++j)
+                       isl_int_sub(sol->el[1 + pos + 1 + 2 * j + 1],
+                                   sol->el[1 + pos + 1 + 2 * j + 1],
+                                   sol->el[1 + pos + 1 + 2 * j]);
+               for (j = 0; j < node->nparam; ++j)
+                       node->sched = isl_mat_set_element(node->sched,
+                                       row, 1 + j, sol->el[1+pos+1+2*j+1]);
+               for (j = 0; j < node->nvar; ++j)
+                       isl_int_set(csol->el[j],
+                                   sol->el[1+pos+1+2*(node->nparam+j)+1]);
+               if (use_cmap)
+                       csol = isl_mat_vec_product(isl_mat_copy(node->cmap),
+                                                  csol);
+               if (!csol)
+                       goto error;
+               for (j = 0; j < node->nvar; ++j)
+                       node->sched = isl_mat_set_element(node->sched,
+                                       row, 1 + node->nparam + j, csol->el[j]);
+               node->band[graph->n_total_row] = graph->n_band;
+       }
+       isl_vec_free(sol);
+       isl_vec_free(csol);
+
+       graph->n_row++;
+       graph->n_total_row++;
+
+       return 0;
+error:
+       isl_vec_free(sol);
+       isl_vec_free(csol);
+       return -1;
+}
+
+/* Convert node->sched into a map and return this map.
+ * We simply add equality constraints that express each output variable
+ * as the affine combination of parameters and input variables specified
+ * by the schedule matrix.
+ *
+ * The result is cached in node->sched_map, which needs to be released
+ * whenever node->sched is updated.
+ */
+static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
+{
+       int i, j;
+       isl_dim *dim;
+       isl_basic_map *bmap;
+       isl_constraint *c;
+       int nrow, ncol;
+       isl_int v;
+
+       if (node->sched_map)
+               return isl_map_copy(node->sched_map);
+
+       nrow = isl_mat_rows(node->sched);
+       ncol = isl_mat_cols(node->sched) - 1;
+       dim = isl_dim_from_domain(isl_dim_copy(node->dim));
+       dim = isl_dim_add(dim, isl_dim_out, nrow);
+       bmap = isl_basic_map_universe(isl_dim_copy(dim));
+
+       isl_int_init(v);
+
+       for (i = 0; i < nrow; ++i) {
+               c = isl_equality_alloc(isl_dim_copy(dim));
+               isl_constraint_set_coefficient_si(c, isl_dim_out, i, -1);
+               isl_mat_get_element(node->sched, i, 0, &v);
+               isl_constraint_set_constant(c, v);
+               for (j = 0; j < node->nparam; ++j) {
+                       isl_mat_get_element(node->sched, i, 1 + j, &v);
+                       isl_constraint_set_coefficient(c, isl_dim_param, j, v);
+               }
+               for (j = 0; j < node->nvar; ++j) {
+                       isl_mat_get_element(node->sched,
+                                           i, 1 + node->nparam + j, &v);
+                       isl_constraint_set_coefficient(c, isl_dim_in, j, v);
+               }
+               bmap = isl_basic_map_add_constraint(bmap, c);
+       }
+
+       isl_int_clear(v);
+
+       isl_dim_free(dim);
+
+       node->sched_map = isl_map_from_basic_map(bmap);
+       return isl_map_copy(node->sched_map);
+}
+
+/* Update the given dependence relation based on the current schedule.
+ * That is, intersect the dependence relation with a map expressing
+ * that source and sink are executed within the same iteration of
+ * the current schedule.
+ * This is not the most efficient way, but this shouldn't be a critical
+ * operation.
+ */
+static __isl_give isl_map *specialize(__isl_take isl_map *map,
+       struct isl_sched_node *src, struct isl_sched_node *dst)
+{
+       isl_map *src_sched, *dst_sched, *id;
+
+       src_sched = node_extract_schedule(src);
+       dst_sched = node_extract_schedule(dst);
+       id = isl_map_apply_range(src_sched, isl_map_reverse(dst_sched));
+       return isl_map_intersect(map, id);
+}
+
+/* Update the dependence relations of all edges based on the current schedule.
+ * If a dependence is carried completely by the current schedule, then
+ * it is removed and edge_table is updated accordingly.
+ */
+static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph)
+{
+       int i;
+       int reset_table = 0;
+
+       for (i = graph->n_edge - 1; i >= 0; --i) {
+               struct isl_sched_edge *edge = &graph->edge[i];
+               edge->map = specialize(edge->map, edge->src, edge->dst);
+               if (!edge->map)
+                       return -1;
+
+               if (isl_map_fast_is_empty(edge->map)) {
+                       reset_table = 1;
+                       isl_map_free(edge->map);
+                       if (i != graph->n_edge - 1)
+                               graph->edge[i] = graph->edge[graph->n_edge - 1];
+                       graph->n_edge--;
+               }
+       }
+
+       if (reset_table) {
+               isl_hash_table_free(ctx, graph->edge_table);
+               graph->edge_table = NULL;
+               return graph_init_edge_table(ctx, graph);
+       }
+
+       return 0;
+}
+
+static void next_band(struct isl_sched_graph *graph)
+{
+       graph->band_start = graph->n_total_row;
+       graph->n_band++;
+}
+
+/* Topologically sort statements mapped to same schedule iteration
+ * and add a row to the schedule corresponding to this order.
+ */
+static int sort_statements(isl_ctx *ctx, struct isl_sched_graph *graph)
+{
+       int i, j;
+
+       if (graph->n <= 1)
+               return 0;
+
+       if (update_edges(ctx, graph) < 0)
+               return -1;
+
+       if (graph->n_edge == 0)
+               return 0;
+
+       if (detect_sccs(graph) < 0)
+               return -1;
+
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+               int row = isl_mat_rows(node->sched);
+               int cols = isl_mat_cols(node->sched);
+
+               isl_map_free(node->sched_map);
+               node->sched_map = NULL;
+               node->sched = isl_mat_add_rows(node->sched, 1);
+               if (!node->sched)
+                       return -1;
+               node->sched = isl_mat_set_element_si(node->sched, row, 0,
+                                                    node->scc);
+               for (j = 1; j < cols; ++j)
+                       node->sched = isl_mat_set_element_si(node->sched,
+                                                            row, j, 0);
+               node->band[graph->n_total_row] = graph->n_band;
+       }
+
+       graph->n_total_row++;
+       next_band(graph);
+
+       return 0;
+}
+
+/* Construct an isl_schedule based on the computed schedule stored
+ * in graph and with parameters specified by dim.
+ */
+static __isl_give isl_schedule *extract_schedule(struct isl_sched_graph *graph,
+       __isl_take isl_dim *dim)
+{
+       int i;
+       isl_ctx *ctx;
+       isl_schedule *sched = NULL;
+               
+       if (!dim)
+               return NULL;
+
+       ctx = isl_dim_get_ctx(dim);
+       sched = isl_calloc(ctx, struct isl_schedule,
+                          sizeof(struct isl_schedule) +
+                          (graph->n - 1) * sizeof(struct isl_schedule_node));
+       if (!sched)
+               goto error;
+
+       sched->n = graph->n;
+       sched->n_band = graph->n_band;
+       sched->n_total_row = graph->n_total_row;
+
+       for (i = 0; i < sched->n; ++i) {
+               int r, b;
+               int *band_end;
+
+               band_end = isl_alloc_array(ctx, int, graph->n_band);
+               if (!band_end)
+                       goto error;
+               sched->node[i].sched = node_extract_schedule(&graph->node[i]);
+               sched->node[i].band_end = band_end;
+
+               for (r = b = 0; r < graph->n_total_row; ++r) {
+                       if (graph->node[i].band[r] == b)
+                               continue;
+                       band_end[b++] = r;
+                       if (graph->node[i].band[r] == -1)
+                               break;
+               }
+               if (r == graph->n_total_row)
+                       band_end[b++] = r;
+               sched->node[i].n_band = b;
+       }
+
+       sched->dim = dim;
+
+       return sched;
+error:
+       isl_dim_free(dim);
+       isl_schedule_free(sched);
+       return NULL;
+}
+
+/* Copy nodes that satisfy node_pred from the src dependence graph
+ * to the dst dependence graph.
+ */
+static int copy_nodes(struct isl_sched_graph *dst, struct isl_sched_graph *src,
+       int (*node_pred)(struct isl_sched_node *node, int data), int data)
+{
+       int i;
+
+       dst->n = 0;
+       for (i = 0; i < src->n; ++i) {
+               if (!node_pred(&src->node[i], data))
+                       continue;
+               dst->node[dst->n].dim = isl_dim_copy(src->node[i].dim);
+               dst->node[dst->n].nvar = src->node[i].nvar;
+               dst->node[dst->n].nparam = src->node[i].nparam;
+               dst->node[dst->n].sched = isl_mat_copy(src->node[i].sched);
+               dst->node[dst->n].sched_map =
+                       isl_map_copy(src->node[i].sched_map);
+               dst->node[dst->n].band = src->node[i].band;
+               dst->n++;
+       }
+
+       return 0;
+}
+
+/* Copy non-empty edges that satisfy edge_pred from the src dependence graph
+ * to the dst dependence graph.
+ */
+static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
+       struct isl_sched_graph *src,
+       int (*edge_pred)(struct isl_sched_edge *edge, int data), int data)
+{
+       int i;
+
+       dst->n_edge = 0;
+       for (i = 0; i < src->n_edge; ++i) {
+               struct isl_sched_edge *edge = &src->edge[i];
+               isl_map *map;
+
+               if (!edge_pred(edge, data))
+                       continue;
+
+               if (isl_map_fast_is_empty(edge->map))
+                       continue;
+
+               map = isl_map_copy(edge->map);
+
+               dst->edge[dst->n_edge].src =
+                       graph_find_node(ctx, dst, edge->src->dim);
+               dst->edge[dst->n_edge].dst =
+                       graph_find_node(ctx, dst, edge->dst->dim);
+               dst->edge[dst->n_edge].map = map;
+               dst->edge[dst->n_edge].validity = edge->validity;
+               dst->edge[dst->n_edge].proximity = edge->proximity;
+               dst->n_edge++;
+       }
+
+       return 0;
+}
+
+/* Given a "src" dependence graph that contains the nodes from "dst"
+ * that satisfy node_pred, copy the schedule computed in "src"
+ * for those nodes back to "dst".
+ */
+static int copy_schedule(struct isl_sched_graph *dst,
+       struct isl_sched_graph *src,
+       int (*node_pred)(struct isl_sched_node *node, int data), int data)
+{
+       int i;
+
+       src->n = 0;
+       for (i = 0; i < dst->n; ++i) {
+               if (!node_pred(&dst->node[i], data))
+                       continue;
+               isl_mat_free(dst->node[i].sched);
+               isl_map_free(dst->node[i].sched_map);
+               dst->node[i].sched = isl_mat_copy(src->node[src->n].sched);
+               dst->node[i].sched_map =
+                       isl_map_copy(src->node[src->n].sched_map);
+               src->n++;
+       }
+
+       dst->n_total_row = src->n_total_row;
+       dst->n_band = src->n_band;
+
+       return 0;
+}
+
+/* Compute the maximal number of variables over all nodes.
+ * This is the maximal number of linearly independent schedule
+ * rows that we need to compute.
+ * Just in case we end up in a part of the dependence graph
+ * with only lower-dimensional domains, we make sure we will
+ * compute the required amount of extra linearly independent rows.
+ */
+static int compute_maxvar(struct isl_sched_graph *graph)
+{
+       int i;
+
+       graph->maxvar = 0;
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+               int nvar;
+
+               if (node_update_cmap(node) < 0)
+                       return -1;
+               nvar = node->nvar + graph->n_row - node->rank;
+               if (nvar > graph->maxvar)
+                       graph->maxvar = nvar;
+       }
+
+       return 0;
+}
+
+static int compute_schedule(isl_ctx *ctx, struct isl_sched_graph *graph);
+static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph);
+
+/* Compute a schedule for a subgraph of "graph".  In particular, for
+ * the graph composed of nodes that satisfy node_pred and edges that
+ * that satisfy edge_pred.  The caller should precompute the number
+ * of nodes and edges that satisfy these predicates and pass them along
+ * as "n" and "n_edge".
+ * If the subgraph is known to consist of a single component, then wcc should
+ * be set and then we call compute_schedule_wcc on the constructed subgraph.
+ * Otherwise, we call compute_schedule, which will check whether the subgraph
+ * is connected.
+ */
+static int compute_sub_schedule(isl_ctx *ctx,
+       struct isl_sched_graph *graph, int n, int n_edge,
+       int (*node_pred)(struct isl_sched_node *node, int data),
+       int (*edge_pred)(struct isl_sched_edge *edge, int data),
+       int data, int wcc)
+{
+       struct isl_sched_graph split = { 0 };
+
+       if (graph_alloc(ctx, &split, n, n_edge) < 0)
+               goto error;
+       if (copy_nodes(&split, graph, node_pred, data) < 0)
+               goto error;
+       if (graph_init_table(ctx, &split) < 0)
+               goto error;
+       if (copy_edges(ctx, &split, graph, edge_pred, data) < 0)
+               goto error;
+       if (graph_init_edge_table(ctx, &split) < 0)
+               goto error;
+       split.n_row = graph->n_row;
+       split.n_total_row = graph->n_total_row;
+       split.n_band = graph->n_band;
+       split.band_start = graph->band_start;
+
+       if (wcc && compute_schedule_wcc(ctx, &split) < 0)
+               goto error;
+       if (!wcc && compute_schedule(ctx, &split) < 0)
+               goto error;
+
+       copy_schedule(graph, &split, node_pred, data);
+
+       graph_free(ctx, &split);
+       return 0;
+error:
+       graph_free(ctx, &split);
+       return -1;
+}
+
+static int node_scc_exactly(struct isl_sched_node *node, int scc)
+{
+       return node->scc == scc;
+}
+
+static int node_scc_at_most(struct isl_sched_node *node, int scc)
+{
+       return node->scc <= scc;
+}
+
+static int node_scc_at_least(struct isl_sched_node *node, int scc)
+{
+       return node->scc >= scc;
+}
+
+static int edge_src_scc_exactly(struct isl_sched_edge *edge, int scc)
+{
+       return edge->src->scc == scc;
+}
+
+static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
+{
+       return edge->dst->scc <= scc;
+}
+
+static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc)
+{
+       return edge->src->scc >= scc;
+}
+
+/* Pad the schedules of all nodes with zero rows such that in the end
+ * they all have graph->n_total_row rows.
+ * The extra rows don't belong to any band, so they get assigned band number -1.
+ */
+static int pad_schedule(struct isl_sched_graph *graph)
+{
+       int i, j;
+
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+               int row = isl_mat_rows(node->sched);
+               if (graph->n_total_row > row) {
+                       isl_map_free(node->sched_map);
+                       node->sched_map = NULL;
+               }
+               node->sched = isl_mat_add_zero_rows(node->sched,
+                                                   graph->n_total_row - row);
+               if (!node->sched)
+                       return -1;
+               for (j = row; j < graph->n_total_row; ++j)
+                       node->band[j] = -1;
+       }
+
+       return 0;
+}
+
+/* Split the current graph into two parts and compute a schedule for each
+ * part individually.  In particular, one part consists of all SCCs up
+ * to and including graph->src_scc, while the other part contains the other
+ * SCCS.
+ *
+ * The split is enforced in the schedule by constant rows with two different
+ * values (0 and 1).  These constant rows replace the previously computed rows
+ * in the current band.
+ * It would be possible to reuse them as the first rows in the next
+ * band, but recomputing them may result in better rows as we are looking
+ * at a smaller part of the dependence graph.
+ */
+static int compute_split_schedule(isl_ctx *ctx, struct isl_sched_graph *graph)
+{
+       int i, j, n, e1, e2;
+       int n_total_row, orig_total_row;
+       int n_band, orig_band;
+       int drop;
+
+       drop = graph->n_total_row - graph->band_start;
+       graph->n_total_row -= drop;
+       graph->n_row -= drop;
+
+       n = 0;
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+               int row = isl_mat_rows(node->sched) - drop;
+               int cols = isl_mat_cols(node->sched);
+               int before = node->scc <= graph->src_scc;
+
+               if (before)
+                       n++;
+
+               isl_map_free(node->sched_map);
+               node->sched_map = NULL;
+               node->sched = isl_mat_drop_rows(node->sched,
+                                               graph->band_start, drop);
+               node->sched = isl_mat_add_rows(node->sched, 1);
+               if (!node->sched)
+                       return -1;
+               node->sched = isl_mat_set_element_si(node->sched, row, 0,
+                                                    !before);
+               for (j = 1; j < cols; ++j)
+                       node->sched = isl_mat_set_element_si(node->sched,
+                                                            row, j, 0);
+               node->band[graph->n_total_row] = graph->n_band;
+       }
+
+       e1 = e2 = 0;
+       for (i = 0; i < graph->n_edge; ++i) {
+               if (graph->edge[i].dst->scc <= graph->src_scc)
+                       e1++;
+               if (graph->edge[i].src->scc > graph->src_scc)
+                       e2++;
+       }
+
+       graph->n_total_row++;
+       next_band(graph);
+
+       orig_total_row = graph->n_total_row;
+       orig_band = graph->n_band;
+       if (compute_sub_schedule(ctx, graph, n, e1,
+                               &node_scc_at_most, &edge_dst_scc_at_most,
+                               graph->src_scc, 0) < 0)
+               return -1;
+       n_total_row = graph->n_total_row;
+       graph->n_total_row = orig_total_row;
+       n_band = graph->n_band;
+       graph->n_band = orig_band;
+       if (compute_sub_schedule(ctx, graph, graph->n - n, e2,
+                               &node_scc_at_least, &edge_src_scc_at_least,
+                               graph->src_scc + 1, 0) < 0)
+               return -1;
+       if (n_total_row > graph->n_total_row)
+               graph->n_total_row = n_total_row;
+       if (n_band > graph->n_band)
+               graph->n_band = n_band;
+
+       return pad_schedule(graph);
+}
+
+/* Compute the next band of the schedule after updating the dependence
+ * relations based on the the current schedule.
+ */
+static int compute_next_band(isl_ctx *ctx, struct isl_sched_graph *graph)
+{
+       if (update_edges(ctx, graph) < 0)
+               return -1;
+       next_band(graph);
+               
+       return compute_schedule(ctx, graph);
+}
+
+/* Add constraints to graph->lp that force the dependence of edge i
+ * to be respected and attempt to carry it, where edge i is one from
+ * a node j to itself.
+ * That is, add constraints that enforce
+ *
+ *     (c_j_0 + c_j_n n + c_j_x y) - (c_j_0 + c_j_n n + c_j_x x)
+ *     = c_j_x (y - x) >= e_i
+ *
+ * for each (x,y) in R.
+ * We obtain general constraints on coefficients (c_0, c_n, c_x)
+ * of valid constraints for (y - x) and then plug in (-e_i, 0, c_j_x),
+ * with each coefficient in c_j_x represented as a pair of non-negative
+ * coefficients.
+ */
+static int add_intra_constraints(struct isl_sched_graph *graph, int i)
+{
+       unsigned total;
+       struct isl_sched_edge *edge= &graph->edge[i];
+       isl_map *map = isl_map_copy(edge->map);
+       isl_ctx *ctx = isl_map_get_ctx(map);
+       isl_dim *dim;
+       isl_dim_map *dim_map;
+       isl_basic_set *coef;
+       struct isl_sched_node *node = edge->src;
+
+       coef = intra_coefficients(graph, map);
+
+       dim = isl_dim_domain(isl_dim_unwrap(isl_basic_set_get_dim(coef)));
+
+       total = isl_basic_set_total_dim(graph->lp);
+       dim_map = isl_dim_map_alloc(ctx, total);
+       isl_dim_map_range(dim_map, 3 + i, 0, 0, 0, 1, -1);
+       isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 1, 2,
+                         isl_dim_size(dim, isl_dim_set), 1,
+                         node->nvar, -1);
+       isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 2, 2,
+                         isl_dim_size(dim, isl_dim_set), 1,
+                         node->nvar, 1);
+       graph->lp = isl_basic_set_extend_constraints(graph->lp,
+                       coef->n_eq, coef->n_ineq);
+       graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
+                                                          coef, dim_map);
+       isl_dim_free(dim);
+
+       return 0;
+}
+
+/* Add constraints to graph->lp that force the dependence of edge i
+ * to be respected and attempt to carry it, where edge i is one from
+ * node j to node k.
+ * That is, add constraints that enforce
+ *
+ *     (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
+ *
+ * for each (x,y) in R.
+ * We obtain general constraints on coefficients (c_0, c_n, c_x)
+ * of valid constraints for R and then plug in
+ * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, c_k_x - c_j_x)
+ * with each coefficient (except e_i, c_k_0 and c_j_0)
+ * represented as a pair of non-negative coefficients.
+ */
+static int add_inter_constraints(struct isl_sched_graph *graph, int i)
+{
+       unsigned total;
+       struct isl_sched_edge *edge= &graph->edge[i];
+       isl_map *map = isl_map_copy(edge->map);
+       isl_ctx *ctx = isl_map_get_ctx(map);
+       isl_dim *dim;
+       isl_dim_map *dim_map;
+       isl_basic_set *coef;
+       struct isl_sched_node *src = edge->src;
+       struct isl_sched_node *dst = edge->dst;
+
+       coef = inter_coefficients(graph, map);
+
+       dim = isl_dim_domain(isl_dim_unwrap(isl_basic_set_get_dim(coef)));
+
+       total = isl_basic_set_total_dim(graph->lp);
+       dim_map = isl_dim_map_alloc(ctx, total);
+
+       isl_dim_map_range(dim_map, 3 + i, 0, 0, 0, 1, -1);
+
+       isl_dim_map_range(dim_map, dst->start, 0, 0, 0, 1, 1);
+       isl_dim_map_range(dim_map, dst->start + 1, 2, 1, 1, dst->nparam, -1);
+       isl_dim_map_range(dim_map, dst->start + 2, 2, 1, 1, dst->nparam, 1);
+       isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 1, 2,
+                         isl_dim_size(dim, isl_dim_set) + src->nvar, 1,
+                         dst->nvar, -1);
+       isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 2, 2,
+                         isl_dim_size(dim, isl_dim_set) + src->nvar, 1,
+                         dst->nvar, 1);
+
+       isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, -1);
+       isl_dim_map_range(dim_map, src->start + 1, 2, 1, 1, src->nparam, 1);
+       isl_dim_map_range(dim_map, src->start + 2, 2, 1, 1, src->nparam, -1);
+       isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 1, 2,
+                         isl_dim_size(dim, isl_dim_set), 1,
+                         src->nvar, 1);
+       isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 2, 2,
+                         isl_dim_size(dim, isl_dim_set), 1,
+                         src->nvar, -1);
+
+       graph->lp = isl_basic_set_extend_constraints(graph->lp,
+                       coef->n_eq, coef->n_ineq);
+       graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
+                                                          coef, dim_map);
+       isl_dim_free(dim);
+
+       return 0;
+}
+
+/* Add constraints to graph->lp that force all dependence
+ * to be respected and attempt to carry it.
+ */
+static int add_all_constraints(struct isl_sched_graph *graph)
+{
+       int i;
+
+       for (i = 0; i < graph->n_edge; ++i) {
+               struct isl_sched_edge *edge= &graph->edge[i];
+               if (edge->src == edge->dst &&
+                   add_intra_constraints(graph, i) < 0)
+                       return -1;
+               if (edge->src != edge->dst &&
+                   add_inter_constraints(graph, i) < 0)
+                       return -1;
+       }
+
+       return 0;
+}
+
+/* Construct an LP problem for finding schedule coefficients
+ * such that the schedule carries as many dependences as possible.
+ * In particular, for each dependence i, we bound the dependence distance
+ * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
+ * of all e_i's.  Dependence with e_i = 0 in the solution are simply
+ * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
+ *
+ * All variables of the LP are non-negative.  The actual coefficients
+ * may be negative, so each coefficient is represented as the difference
+ * of two non-negative variables.  The negative part always appears
+ * immediately before the positive part.
+ * Other than that, the variables have the following order
+ *
+ *     - sum of (1 - e_i) over all edges
+ *     - sum of positive and negative parts of all c_n coefficients
+ *             (unconstrained when computing non-parametric schedules)
+ *     - sum of positive and negative parts of all c_x coefficients
+ *     - for each edge
+ *             - e_i
+ *     - for each node
+ *             - c_i_0
+ *             - positive and negative parts of c_i_n (if parametric)
+ *             - positive and negative parts of c_i_x
+ *
+ * The constraints are those from the edges plus three equalities
+ * to express the sums and n_edge inequalities to express e_i <= 1.
+ */
+static int setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
+{
+       int i, j;
+       int k;
+       isl_dim *dim;
+       unsigned total;
+       int n_eq, n_ineq;
+
+       total = 3 + graph->n_edge;
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[graph->sorted[i]];
+               node->start = total;
+               total += 1 + 2 * (node->nparam + node->nvar);
+       }
+
+       if (count_constraints(graph, &n_eq, &n_ineq, 1) < 0)
+               return -1;
+
+       dim = isl_dim_set_alloc(ctx, 0, total);
+       isl_basic_set_free(graph->lp);
+       n_eq += 3;
+       n_ineq += graph->n_edge;
+       graph->lp = isl_basic_set_alloc_dim(dim, 0, n_eq, n_ineq);
+       graph->lp = isl_basic_set_set_rational(graph->lp);
+
+       k = isl_basic_set_alloc_equality(graph->lp);
+       if (k < 0)
+               return -1;
+       isl_seq_clr(graph->lp->eq[k], 1 +  total);
+       isl_int_set_si(graph->lp->eq[k][0], -graph->n_edge);
+       isl_int_set_si(graph->lp->eq[k][1], 1);
+       for (i = 0; i < graph->n_edge; ++i)
+               isl_int_set_si(graph->lp->eq[k][4 + i], 1);
+
+       k = isl_basic_set_alloc_equality(graph->lp);
+       if (k < 0)
+               return -1;
+       isl_seq_clr(graph->lp->eq[k], 1 +  total);
+       isl_int_set_si(graph->lp->eq[k][2], -1);
+       for (i = 0; i < graph->n; ++i) {
+               int pos = 1 + graph->node[i].start + 1;
+
+               for (j = 0; j < 2 * graph->node[i].nparam; ++j)
+                       isl_int_set_si(graph->lp->eq[k][pos + j], 1);
+       }
+
+       k = isl_basic_set_alloc_equality(graph->lp);
+       if (k < 0)
+               return -1;
+       isl_seq_clr(graph->lp->eq[k], 1 +  total);
+       isl_int_set_si(graph->lp->eq[k][3], -1);
+       for (i = 0; i < graph->n; ++i) {
+               struct isl_sched_node *node = &graph->node[i];
+               int pos = 1 + node->start + 1 + 2 * node->nparam;
+
+               for (j = 0; j < 2 * node->nvar; ++j)
+                       isl_int_set_si(graph->lp->eq[k][pos + j], 1);
+       }
+
+       for (i = 0; i < graph->n_edge; ++i) {
+               k = isl_basic_set_alloc_inequality(graph->lp);
+               if (k < 0)
+                       return -1;
+               isl_seq_clr(graph->lp->ineq[k], 1 +  total);
+               isl_int_set_si(graph->lp->ineq[k][4 + i], -1);
+               isl_int_set_si(graph->lp->ineq[k][0], 1);
+       }
+
+       if (add_all_constraints(graph) < 0)
+               return -1;
+
+       return 0;
+}
+
+/* Construct a schedule row for each node such that as many dependences
+ * as possible are carried and then continue with the next band.
+ */
+static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph)
+{
+       isl_vec *sol;
+       isl_basic_set *lp;
+
+       if (setup_carry_lp(ctx, graph) < 0)
+               return -1;
+
+       lp = isl_basic_set_copy(graph->lp);
+       sol = isl_tab_basic_set_non_neg_lexmin(lp);
+       if (!sol)
+               return -1;
+
+       if (sol->size == 0) {
+               isl_vec_free(sol);
+               isl_die(ctx, isl_error_internal,
+                       "error in schedule construction", return -1);
+       }
+
+       if (isl_int_cmp_si(sol->el[1], graph->n_edge) >= 0) {
+               isl_vec_free(sol);
+               isl_die(ctx, isl_error_unknown,
+                       "unable to carry dependences", return -1);
+       }
+
+       if (update_schedule(graph, sol, 0) < 0)
+               return -1;
+
+       return compute_next_band(ctx, graph);
+}
+
+/* Compute a schedule for a connected dependence graph.
+ * We try to find a sequence of as many schedule rows as possible that result
+ * in non-negative dependence distances (independent of the previous rows
+ * in the sequence, i.e., such that the sequence is tilable).
+ * If we can't find any more rows we either
+ * - split between SCCs and start over (assuming we found an interesting
+ *     pair of SCCs between which to split)
+ * - continue with the next band (assuming the current band has at least
+ *     one row)
+ * - try to carry as many dependences as possible and continue with the next
+ *     band
+ *
+ * If we manage to complete the schedule, we finish off by topologically
+ * sorting the statements based on the remaining dependences.
+ */
+static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
+{
+       if (detect_sccs(graph) < 0)
+               return -1;
+       sort_sccs(graph);
+
+       if (compute_maxvar(graph) < 0)
+               return -1;
+
+       while (graph->n_row < graph->maxvar) {
+               isl_vec *sol;
+
+               graph->src_scc = -1;
+               graph->dst_scc = -1;
+
+               if (setup_lp(ctx, graph) < 0)
+                       return -1;
+               sol = solve_lp(graph);
+               if (!sol)
+                       return -1;
+               if (sol->size == 0) {
+                       isl_vec_free(sol);
+                       if (graph->src_scc >= 0)
+                               return compute_split_schedule(ctx, graph);
+                       if (graph->n_total_row > graph->band_start)
+                               return compute_next_band(ctx, graph);
+                       return carry_dependences(ctx, graph);
+               }
+               if (update_schedule(graph, sol, 1) < 0)
+                       return -1;
+       }
+
+       if (graph->n_total_row > graph->band_start)
+               next_band(graph);
+       return sort_statements(ctx, graph);
+}
+
+/* Compute a schedule for each component (identified by node->scc)
+ * of the dependence graph separately and then combine the results.
+ */
+static int compute_component_schedule(isl_ctx *ctx,
+       struct isl_sched_graph *graph)
+{
+       int wcc, i;
+       int n, n_edge;
+       int n_total_row, orig_total_row;
+       int n_band, orig_band;
+
+       n_total_row = 0;
+       orig_total_row = graph->n_total_row;
+       n_band = 0;
+       orig_band = graph->n_band;
+       for (wcc = 0; wcc < graph->scc; ++wcc) {
+               n = 0;
+               for (i = 0; i < graph->n; ++i)
+                       if (graph->node[i].scc == wcc)
+                               n++;
+               n_edge = 0;
+               for (i = 0; i < graph->n_edge; ++i)
+                       if (graph->edge[i].src->scc == wcc)
+                               n_edge++;
+
+               if (compute_sub_schedule(ctx, graph, n, n_edge,
+                                   &node_scc_exactly,
+                                   &edge_src_scc_exactly, wcc, 1) < 0)
+                       return -1;
+               if (graph->n_total_row > n_total_row)
+                       n_total_row = graph->n_total_row;
+               graph->n_total_row = orig_total_row;
+               if (graph->n_band > n_band)
+                       n_band = graph->n_band;
+               graph->n_band = orig_band;
+       }
+
+       graph->n_total_row = n_total_row;
+       graph->n_band = n_band;
+
+       return pad_schedule(graph);
+}
+
+/* Compute a schedule for the given dependence graph.
+ * We first check if the graph is connected (through validity dependences)
+ * and if so compute a schedule for each component separately.
+ */
+static int compute_schedule(isl_ctx *ctx, struct isl_sched_graph *graph)
+{
+       if (detect_wccs(graph) < 0)
+               return -1;
+
+       if (graph->scc > 1)
+               return compute_component_schedule(ctx, graph);
+
+       return compute_schedule_wcc(ctx, graph);
+}
+
+/* Compute a schedule for the given union of domains that respects
+ * all the validity dependences and tries to minimize the dependence
+ * distances over the proximity dependences.
+ */
+__isl_give isl_schedule *isl_union_set_compute_schedule(
+       __isl_take isl_union_set *domain,
+       __isl_take isl_union_map *validity,
+       __isl_take isl_union_map *proximity)
+{
+       isl_ctx *ctx = isl_union_set_get_ctx(domain);
+       isl_dim *dim;
+       struct isl_sched_graph graph = { 0 };
+       isl_schedule *sched;
+
+       domain = isl_union_set_align_params(domain,
+                                           isl_union_map_get_dim(validity));
+       domain = isl_union_set_align_params(domain,
+                                           isl_union_map_get_dim(proximity));
+       dim = isl_union_set_get_dim(domain);
+       validity = isl_union_map_align_params(validity, isl_dim_copy(dim));
+       proximity = isl_union_map_align_params(proximity, dim);
+
+       if (!domain)
+               goto error;
+
+       graph.n = isl_union_set_n_set(domain);
+       if (graph.n == 0)
+               goto empty;
+       if (graph_alloc(ctx, &graph, graph.n,
+           isl_union_map_n_map(validity) + isl_union_map_n_map(proximity)) < 0)
+               goto error;
+       graph.root = 1;
+       graph.n = 0;
+       if (isl_union_set_foreach_set(domain, &extract_node, &graph) < 0)
+               goto error;
+       if (graph_init_table(ctx, &graph) < 0)
+               goto error;
+       graph.n_edge = 0;
+       if (isl_union_map_foreach_map(validity, &extract_edge, &graph) < 0)
+               goto error;
+       if (graph_init_edge_table(ctx, &graph) < 0)
+               goto error;
+       if (isl_union_map_foreach_map(proximity, &extract_edge, &graph) < 0)
+               goto error;
+
+       if (compute_schedule(ctx, &graph) < 0)
+               goto error;
+
+empty:
+       sched = extract_schedule(&graph, isl_union_set_get_dim(domain));
+
+       graph_free(ctx, &graph);
+       isl_union_set_free(domain);
+       isl_union_map_free(validity);
+       isl_union_map_free(proximity);
+
+       return sched;
+error:
+       graph_free(ctx, &graph);
+       isl_union_set_free(domain);
+       isl_union_map_free(validity);
+       isl_union_map_free(proximity);
+       return NULL;
+}
+
+void *isl_schedule_free(__isl_take isl_schedule *sched)
+{
+       int i;
+       if (!sched)
+               return NULL;
+       for (i = 0; i < sched->n; ++i) {
+               isl_map_free(sched->node[i].sched);
+               free(sched->node[i].band_end);
+       }
+       isl_dim_free(sched->dim);
+       free(sched);
+       return NULL;
+}
+
+__isl_give isl_union_map *isl_schedule_get_map(__isl_keep isl_schedule *sched)
+{
+       int i;
+       isl_union_map *umap;
+
+       if (!sched)
+               return NULL;
+
+       umap = isl_union_map_empty(isl_dim_copy(sched->dim));
+       for (i = 0; i < sched->n; ++i)
+               umap = isl_union_map_add_map(umap,
+                                           isl_map_copy(sched->node[i].sched));
+
+       return umap;
+}
+
+int isl_schedule_n_band(__isl_keep isl_schedule *sched)
+{
+       return sched ? sched->n_band : 0;
+}
+
+/* Construct a mapping that maps each domain to the band in its schedule
+ * with the specified band index.  Note that bands with the same index
+ * but for different domains do not need to be related.
+ */
+__isl_give isl_union_map *isl_schedule_get_band(__isl_keep isl_schedule *sched,
+       unsigned band)
+{
+       int i;
+       isl_union_map *umap;
+
+       if (!sched)
+               return NULL;
+
+       umap = isl_union_map_empty(isl_dim_copy(sched->dim));
+       for (i = 0; i < sched->n; ++i) {
+               int start, end;
+               isl_map *map;
+
+               if (band >= sched->node[i].n_band)
+                       continue;
+
+               start = band > 0 ? sched->node[i].band_end[band - 1] : 0;
+               end = sched->node[i].band_end[band];
+
+               map = isl_map_copy(sched->node[i].sched);
+
+               map = isl_map_project_out(map, isl_dim_out, end,
+                                         sched->n_total_row - end);
+               map = isl_map_project_out(map, isl_dim_out, 0, start);
+
+               umap = isl_union_map_add_map(umap, map);
+       }
+
+       return umap;
+}
index d65ae0e..0558d08 100644 (file)
@@ -18,6 +18,7 @@
 #include <isl/polynomial.h>
 #include <isl/union_map.h>
 #include <isl_factorization.h>
+#include <isl/schedule.h>
 
 static char *srcdir;
 
@@ -1802,6 +1803,334 @@ void test_factorize(isl_ctx *ctx)
        isl_factorizer_free(f);
 }
 
+static int check_injective(__isl_take isl_map *map, void *user)
+{
+       int *injective = user;
+
+       *injective = isl_map_is_injective(map);
+       isl_map_free(map);
+
+       if (*injective < 0 || !*injective)
+               return -1;
+
+       return 0;
+}
+
+int test_one_schedule(isl_ctx *ctx, const char *d, const char *w,
+       const char *r, const char *s, int tilable, int parallel)
+{
+       int i;
+       isl_union_set *D;
+       isl_union_map *W, *R, *S;
+       isl_union_map *empty;
+       isl_union_map *dep_raw, *dep_war, *dep_waw, *dep;
+       isl_union_map *validity, *proximity;
+       isl_union_map *schedule;
+       isl_union_map *test;
+       isl_union_set *delta;
+       isl_union_set *domain;
+       isl_set *delta_set;
+       isl_set *slice;
+       isl_set *origin;
+       isl_schedule *sched;
+       int is_nonneg, is_parallel, is_tilable, is_injection, is_complete;
+
+       D = isl_union_set_read_from_str(ctx, d);
+       W = isl_union_map_read_from_str(ctx, w);
+       R = isl_union_map_read_from_str(ctx, r);
+       S = isl_union_map_read_from_str(ctx, s);
+
+       W = isl_union_map_intersect_domain(W, isl_union_set_copy(D));
+       R = isl_union_map_intersect_domain(R, isl_union_set_copy(D));
+
+       empty = isl_union_map_empty(isl_union_map_get_dim(S));
+        isl_union_map_compute_flow(isl_union_map_copy(R),
+                                  isl_union_map_copy(W), empty,
+                                  isl_union_map_copy(S),
+                                  &dep_raw, NULL, NULL, NULL);
+        isl_union_map_compute_flow(isl_union_map_copy(W),
+                                  isl_union_map_copy(W),
+                                  isl_union_map_copy(R),
+                                  isl_union_map_copy(S),
+                                  &dep_waw, &dep_war, NULL, NULL);
+
+       dep = isl_union_map_union(dep_waw, dep_war);
+       dep = isl_union_map_union(dep, dep_raw);
+       validity = isl_union_map_copy(dep);
+       proximity = isl_union_map_copy(dep);
+
+       sched = isl_union_set_compute_schedule(isl_union_set_copy(D),
+                                              validity, proximity);
+       schedule = isl_schedule_get_map(sched);
+       isl_schedule_free(sched);
+       isl_union_map_free(W);
+       isl_union_map_free(R);
+       isl_union_map_free(S);
+
+       is_injection = 1;
+       isl_union_map_foreach_map(schedule, &check_injective, &is_injection);
+
+       domain = isl_union_map_domain(isl_union_map_copy(schedule));
+       is_complete = isl_union_set_is_subset(D, domain);
+       isl_union_set_free(D);
+       isl_union_set_free(domain);
+
+       test = isl_union_map_reverse(isl_union_map_copy(schedule));
+       test = isl_union_map_apply_range(test, dep);
+       test = isl_union_map_apply_range(test, schedule);
+
+       delta = isl_union_map_deltas(test);
+       if (isl_union_set_n_set(delta) == 0) {
+               is_tilable = 1;
+               is_parallel = 1;
+               is_nonneg = 1;
+       } else {
+               delta_set = isl_union_set_copy_set(delta);
+
+               slice = isl_set_universe(isl_set_get_dim(delta_set));
+               for (i = 0; i < tilable; ++i)
+                       slice = isl_set_lower_bound_si(slice, isl_dim_set, i, 0);
+               is_tilable = isl_set_is_subset(delta_set, slice);
+               isl_set_free(slice);
+
+               slice = isl_set_universe(isl_set_get_dim(delta_set));
+               for (i = 0; i < parallel; ++i)
+                       slice = isl_set_fix_si(slice, isl_dim_set, i, 0);
+               is_parallel = isl_set_is_subset(delta_set, slice);
+               isl_set_free(slice);
+
+               origin = isl_set_universe(isl_set_get_dim(delta_set));
+               for (i = 0; i < isl_set_dim(origin, isl_dim_set); ++i)
+                       origin = isl_set_fix_si(origin, isl_dim_set, i, 0);
+
+               delta_set = isl_set_union(delta_set, isl_set_copy(origin));
+               delta_set = isl_set_lexmin(delta_set);
+
+               is_nonneg = isl_set_is_equal(delta_set, origin);
+
+               isl_set_free(origin);
+               isl_set_free(delta_set);
+       }
+       isl_union_set_free(delta);
+
+       if (is_nonneg < 0 || is_parallel < 0 || is_tilable < 0 ||
+           is_injection < 0 || is_complete < 0)
+               return -1;
+       if (!is_complete)
+               isl_die(ctx, isl_error_unknown,
+                       "generated schedule incomplete", return -1);
+       if (!is_injection)
+               isl_die(ctx, isl_error_unknown,
+                       "generated schedule not injective on each statement",
+                       return -1);
+       if (!is_nonneg)
+               isl_die(ctx, isl_error_unknown,
+                       "negative dependences in generated schedule",
+                       return -1);
+       if (!is_tilable)
+               isl_die(ctx, isl_error_unknown,
+                       "generated schedule not as tilable as expected",
+                       return -1);
+       if (!is_parallel)
+               isl_die(ctx, isl_error_unknown,
+                       "generated schedule not as parallel as expected",
+                       return -1);
+
+       return 0;
+}
+
+int test_special_schedule(isl_ctx *ctx)
+{
+       const char *str;
+       isl_union_set *dom;
+       isl_union_map *empty;
+       isl_union_map *dep;
+       isl_union_map *sched1, *sched2;
+       isl_schedule *schedule;
+       int equal;
+
+       str = "{ S[i,j] : 0 <= i <= 10 }";
+       dom = isl_union_set_read_from_str(ctx, str);
+       str = "{ S[i,j] -> S[i+1,j] : 0 <= i,j <= 10 }";
+       dep = isl_union_map_read_from_str(ctx, str);
+       empty = isl_union_map_read_from_str(ctx, "{}");
+       schedule = isl_union_set_compute_schedule(dom, empty, dep);
+       sched1 = isl_schedule_get_map(schedule);
+       isl_schedule_free(schedule);
+
+       str = "{ S[i, j] -> [j, i] }";
+       sched2 = isl_union_map_read_from_str(ctx, str);
+
+       equal = isl_union_map_is_equal(sched1, sched2);
+       isl_union_map_free(sched1);
+       isl_union_map_free(sched2);
+
+       if (equal < 0)
+               return -1;
+       if (!equal)
+               isl_die(ctx, isl_error_unknown, "unexpected schedule",
+                       return -1);
+
+       return 0;
+}
+
+int test_schedule(isl_ctx *ctx)
+{
+       const char *D, *W, *R, *S;
+
+       /* Jacobi */
+       D = "[T,N] -> { S1[t,i] : 1 <= t <= T and 2 <= i <= N - 1 }";
+       W = "{ S1[t,i] -> a[t,i] }";
+       R = "{ S1[t,i] -> a[t-1,i]; S1[t,i] -> a[t-1,i-1]; "
+               "S1[t,i] -> a[t-1,i+1] }";
+       S = "{ S1[t,i] -> [t,i] }";
+       if (test_one_schedule(ctx, D, W, R, S, 2, 0) < 0)
+               return -1;
+
+       /* Fig. 5 of CC2008 */
+       D = "[N] -> { S_0[i, j] : i >= 0 and i <= -1 + N and j >= 2 and "
+                               "j <= -1 + N }";
+       W = "[N] -> { S_0[i, j] -> a[i, j] : i >= 0 and i <= -1 + N and "
+                               "j >= 2 and j <= -1 + N }";
+       R = "[N] -> { S_0[i, j] -> a[j, i] : i >= 0 and i <= -1 + N and "
+                               "j >= 2 and j <= -1 + N; "
+                   "S_0[i, j] -> a[i, -1 + j] : i >= 0 and i <= -1 + N and "
+                               "j >= 2 and j <= -1 + N }";
+       S = "[N] -> { S_0[i, j] -> [0, i, 0, j, 0] }";
+       if (test_one_schedule(ctx, D, W, R, S, 2, 0) < 0)
+               return -1;
+
+       D = "{ S1[i] : 0 <= i <= 10; S2[i] : 0 <= i <= 9 }";
+       W = "{ S1[i] -> a[i] }";
+       R = "{ S2[i] -> a[i+1] }";
+       S = "{ S1[i] -> [0,i]; S2[i] -> [1,i] }";
+       if (test_one_schedule(ctx, D, W, R, S, 1, 1) < 0)
+               return -1;
+
+       D = "{ S1[i] : 0 <= i < 10; S2[i] : 0 <= i < 10 }";
+       W = "{ S1[i] -> a[i] }";
+       R = "{ S2[i] -> a[9-i] }";
+       S = "{ S1[i] -> [0,i]; S2[i] -> [1,i] }";
+       if (test_one_schedule(ctx, D, W, R, S, 1, 1) < 0)
+               return -1;
+
+       D = "[N] -> { S1[i] : 0 <= i < N; S2[i] : 0 <= i < N }";
+       W = "{ S1[i] -> a[i] }";
+       R = "[N] -> { S2[i] -> a[N-1-i] }";
+       S = "{ S1[i] -> [0,i]; S2[i] -> [1,i] }";
+       if (test_one_schedule(ctx, D, W, R, S, 1, 1) < 0)
+               return -1;
+       
+       D = "{ S1[i] : 0 < i < 10; S2[i] : 0 <= i < 10 }";
+       W = "{ S1[i] -> a[i]; S2[i] -> b[i] }";
+       R = "{ S2[i] -> a[i]; S1[i] -> b[i-1] }";
+       S = "{ S1[i] -> [i,0]; S2[i] -> [i,1] }";
+       if (test_one_schedule(ctx, D, W, R, S, 0, 0) < 0)
+               return -1;
+
+       D = "[N] -> { S1[i] : 1 <= i <= N; S2[i,j] : 1 <= i,j <= N }";
+       W = "{ S1[i] -> a[0,i]; S2[i,j] -> a[i,j] }";
+       R = "{ S2[i,j] -> a[i-1,j] }";
+       S = "{ S1[i] -> [0,i,0]; S2[i,j] -> [1,i,j] }";
+       if (test_one_schedule(ctx, D, W, R, S, 2, 1) < 0)
+               return -1;
+
+       D = "[N] -> { S1[i] : 1 <= i <= N; S2[i,j] : 1 <= i,j <= N }";
+       W = "{ S1[i] -> a[i,0]; S2[i,j] -> a[i,j] }";
+       R = "{ S2[i,j] -> a[i,j-1] }";
+       S = "{ S1[i] -> [0,i,0]; S2[i,j] -> [1,i,j] }";
+       if (test_one_schedule(ctx, D, W, R, S, 2, 1) < 0)
+               return -1;
+
+       D = "[N] -> { S_0[]; S_1[i] : i >= 0 and i <= -1 + N; S_2[] }";
+       W = "[N] -> { S_0[] -> a[0]; S_2[] -> b[0]; "
+                   "S_1[i] -> a[1 + i] : i >= 0 and i <= -1 + N }";
+       R = "[N] -> { S_2[] -> a[N]; S_1[i] -> a[i] : i >= 0 and i <= -1 + N }";
+       S = "[N] -> { S_1[i] -> [1, i, 0]; S_2[] -> [2, 0, 1]; "
+                   "S_0[] -> [0, 0, 0] }";
+       if (test_one_schedule(ctx, D, W, R, S, 1, 0) < 0)
+               return -1;
+       ctx->opt->schedule_parametric = 0;
+       if (test_one_schedule(ctx, D, W, R, S, 0, 0) < 0)
+               return -1;
+       ctx->opt->schedule_parametric = 1;
+
+       D = "[N] -> { S1[i] : 1 <= i <= N; S2[i] : 1 <= i <= N; "
+                   "S3[i,j] : 1 <= i,j <= N; S4[i] : 1 <= i <= N }";
+       W = "{ S1[i] -> a[i,0]; S2[i] -> a[0,i]; S3[i,j] -> a[i,j] }";
+       R = "[N] -> { S3[i,j] -> a[i-1,j]; S3[i,j] -> a[i,j-1]; "
+                   "S4[i] -> a[i,N] }";
+       S = "{ S1[i] -> [0,i,0]; S2[i] -> [1,i,0]; S3[i,j] -> [2,i,j]; "
+               "S4[i] -> [4,i,0] }";
+       if (test_one_schedule(ctx, D, W, R, S, 2, 0) < 0)
+               return -1;
+
+       D = "[N] -> { S_0[i, j] : i >= 1 and i <= N and j >= 1 and j <= N }";
+       W = "[N] -> { S_0[i, j] -> s[0] : i >= 1 and i <= N and j >= 1 and "
+                                       "j <= N }";
+       R = "[N] -> { S_0[i, j] -> s[0] : i >= 1 and i <= N and j >= 1 and "
+                                       "j <= N; "
+                   "S_0[i, j] -> a[i, j] : i >= 1 and i <= N and j >= 1 and "
+                                       "j <= N }";
+       S = "[N] -> { S_0[i, j] -> [0, i, 0, j, 0] }";
+       if (test_one_schedule(ctx, D, W, R, S, 0, 0) < 0)
+               return -1;
+
+       D = "[N] -> { S_0[t] : t >= 0 and t <= -1 + N; "
+                   " S_2[t] : t >= 0 and t <= -1 + N; "
+                   " S_1[t, i] : t >= 0 and t <= -1 + N and i >= 0 and "
+                               "i <= -1 + N }";
+       W = "[N] -> { S_0[t] -> a[t, 0] : t >= 0 and t <= -1 + N; "
+                   " S_2[t] -> b[t] : t >= 0 and t <= -1 + N; "
+                   " S_1[t, i] -> a[t, 1 + i] : t >= 0 and t <= -1 + N and "
+                                               "i >= 0 and i <= -1 + N }";
+       R = "[N] -> { S_1[t, i] -> a[t, i] : t >= 0 and t <= -1 + N and "
+                                           "i >= 0 and i <= -1 + N; "
+                   " S_2[t] -> a[t, N] : t >= 0 and t <= -1 + N }";
+       S = "[N] -> { S_2[t] -> [0, t, 2]; S_1[t, i] -> [0, t, 1, i, 0]; "
+                   " S_0[t] -> [0, t, 0] }";
+
+       if (test_one_schedule(ctx, D, W, R, S, 2, 1) < 0)
+               return -1;
+       ctx->opt->schedule_parametric = 0;
+       if (test_one_schedule(ctx, D, W, R, S, 0, 0) < 0)
+               return -1;
+       ctx->opt->schedule_parametric = 1;
+
+       D = "[N] -> { S1[i,j] : 0 <= i,j < N; S2[i,j] : 0 <= i,j < N }";
+       S = "{ S1[i,j] -> [0,i,j]; S2[i,j] -> [1,i,j] }";
+       if (test_one_schedule(ctx, D, "{}", "{}", S, 2, 2) < 0)
+               return -1;
+
+       D = "[M, N] -> { S_1[i] : i >= 0 and i <= -1 + M; "
+           "S_0[i, j] : i >= 0 and i <= -1 + M and j >= 0 and j <= -1 + N }";
+       W = "[M, N] -> { S_0[i, j] -> a[j] : i >= 0 and i <= -1 + M and "
+                                           "j >= 0 and j <= -1 + N; "
+                       "S_1[i] -> b[0] : i >= 0 and i <= -1 + M }";
+       R = "[M, N] -> { S_0[i, j] -> a[0] : i >= 0 and i <= -1 + M and "
+                                           "j >= 0 and j <= -1 + N; "
+                       "S_1[i] -> b[0] : i >= 0 and i <= -1 + M }";
+       S = "[M, N] -> { S_1[i] -> [1, i, 0]; S_0[i, j] -> [0, i, 0, j, 0] }";
+       if (test_one_schedule(ctx, D, W, R, S, 0, 0) < 0)
+               return -1;
+
+       D = "{ S_0[i] : i >= 0 }";
+       W = "{ S_0[i] -> a[i] : i >= 0 }";
+       R = "{ S_0[i] -> a[0] : i >= 0 }";
+       S = "{ S_0[i] -> [0, i, 0] }";
+       if (test_one_schedule(ctx, D, W, R, S, 0, 0) < 0)
+               return -1;
+
+       D = "{ S_0[i] : i >= 0; S_1[i] : i >= 0 }";
+       W = "{ S_0[i] -> a[i] : i >= 0; S_1[i] -> b[i] : i >= 0 }";
+       R = "{ S_0[i] -> b[0] : i >= 0; S_1[i] -> a[i] : i >= 0 }";
+       S = "{ S_1[i] -> [0, i, 1]; S_0[i] -> [0, i, 0] }";
+       if (test_one_schedule(ctx, D, W, R, S, 0, 0) < 0)
+               return -1;
+
+       return test_special_schedule(ctx);
+}
+
 int main()
 {
        struct isl_ctx *ctx;
@@ -1810,6 +2139,8 @@ int main()
        assert(srcdir);
 
        ctx = isl_ctx_alloc();
+       if (test_schedule(ctx) < 0)
+               goto error;
        test_factorize(ctx);
        test_subset(ctx);
        test_lift(ctx);