2 * Copyright 2011 INRIA Saclay
4 * Use of this software is governed by the GNU LGPLv2.1 license
6 * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
7 * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
11 #include <isl_ctx_private.h>
12 #include <isl_map_private.h>
13 #include <isl_space_private.h>
15 #include <isl/constraint.h>
16 #include <isl/schedule.h>
17 #include <isl_mat_private.h>
21 #include <isl_dim_map.h>
22 #include <isl_hmap_map_basic_set.h>
23 #include <isl_qsort.h>
24 #include <isl_schedule_private.h>
25 #include <isl_band_private.h>
26 #include <isl_list_private.h>
27 #include <isl_options_private.h>
30 * The scheduling algorithm implemented in this file was inspired by
31 * Bondhugula et al., "Automatic Transformations for Communication-Minimized
32 * Parallelization and Locality Optimization in the Polyhedral Model".
36 /* Internal information about a node that is used during the construction
38 * dim represents the space in which the domain lives
39 * sched is a matrix representation of the schedule being constructed
41 * sched_map is an isl_map representation of the same (partial) schedule
42 * sched_map may be NULL
43 * rank is the number of linearly independent rows in the linear part
45 * the columns of cmap represent a change of basis for the schedule
46 * coefficients; the first rank columns span the linear part of
48 * start is the first variable in the LP problem in the sequences that
49 * represents the schedule coefficients of this node
50 * nvar is the dimension of the domain
51 * nparam is the number of parameters or 0 if we are not constructing
52 * a parametric schedule
54 * scc is the index of SCC (or WCC) this node belongs to
56 * band contains the band index for each of the rows of the schedule.
57 * band_id is used to differentiate between separate bands at the same
58 * level within the same parent band, i.e., bands that are separated
59 * by the parent band or bands that are independent of each other.
60 * zero contains a boolean for each of the rows of the schedule,
61 * indicating whether the corresponding scheduling dimension results
62 * in zero dependence distances within its band and with respect
63 * to the proximity edges.
65 * index, min_index and on_stack are used during the SCC detection
66 * index represents the order in which nodes are visited.
67 * min_index is the index of the root of a (sub)component.
68 * on_stack indicates whether the node is currently on the stack.
70 struct isl_sched_node {
92 static int node_has_dim(const void *entry, const void *val)
94 struct isl_sched_node *node = (struct isl_sched_node *)entry;
95 isl_space *dim = (isl_space *)val;
97 return isl_space_is_equal(node->dim, dim);
100 /* An edge in the dependence graph. An edge may be used to
101 * ensure validity of the generated schedule, to minimize the dependence
104 * map is the dependence relation
105 * src is the source node
106 * dst is the sink node
107 * validity is set if the edge is used to ensure correctness
108 * proximity is set if the edge is used to minimize dependence distances
110 * For validity edges, start and end mark the sequence of inequality
111 * constraints in the LP problem that encode the validity constraint
112 * corresponding to this edge.
114 struct isl_sched_edge {
117 struct isl_sched_node *src;
118 struct isl_sched_node *dst;
127 /* Internal information about the dependence graph used during
128 * the construction of the schedule.
130 * intra_hmap is a cache, mapping dependence relations to their dual,
131 * for dependences from a node to itself
132 * inter_hmap is a cache, mapping dependence relations to their dual,
133 * for dependences between distinct nodes
135 * n is the number of nodes
136 * node is the list of nodes
137 * maxvar is the maximal number of variables over all nodes
138 * max_row is the allocated number of rows in the schedule
139 * n_row is the current (maximal) number of linearly independent
140 * rows in the node schedules
141 * n_total_row is the current number of rows in the node schedules
142 * n_band is the current number of completed bands
143 * band_start is the starting row in the node schedules of the current band
144 * root is set if this graph is the original dependence graph,
145 * without any splitting
147 * sorted contains a list of node indices sorted according to the
148 * SCC to which a node belongs
150 * n_edge is the number of edges
151 * edge is the list of edges
152 * edge_table contains pointers into the edge array, hashed on the source
153 * and sink spaces; the table only contains edges that represent
154 * validity constraints (and that may or may not also represent proximity
157 * node_table contains pointers into the node array, hashed on the space
159 * region contains a list of variable sequences that should be non-trivial
161 * lp contains the (I)LP problem used to obtain new schedule rows
163 * src_scc and dst_scc are the source and sink SCCs of an edge with
164 * conflicting constraints
166 * scc, sp, index and stack are used during the detection of SCCs
167 * scc is the number of the next SCC
168 * stack contains the nodes on the path from the root to the current node
169 * sp is the stack pointer
170 * index is the index of the last node visited
172 struct isl_sched_graph {
173 isl_hmap_map_basic_set *intra_hmap;
174 isl_hmap_map_basic_set *inter_hmap;
176 struct isl_sched_node *node;
190 struct isl_sched_edge *edge;
192 struct isl_hash_table *edge_table;
194 struct isl_hash_table *node_table;
195 struct isl_region *region;
209 /* Initialize node_table based on the list of nodes.
211 static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
215 graph->node_table = isl_hash_table_alloc(ctx, graph->n);
216 if (!graph->node_table)
219 for (i = 0; i < graph->n; ++i) {
220 struct isl_hash_table_entry *entry;
223 hash = isl_space_get_hash(graph->node[i].dim);
224 entry = isl_hash_table_find(ctx, graph->node_table, hash,
226 graph->node[i].dim, 1);
229 entry->data = &graph->node[i];
235 /* Return a pointer to the node that lives within the given space,
236 * or NULL if there is no such node.
238 static struct isl_sched_node *graph_find_node(isl_ctx *ctx,
239 struct isl_sched_graph *graph, __isl_keep isl_space *dim)
241 struct isl_hash_table_entry *entry;
244 hash = isl_space_get_hash(dim);
245 entry = isl_hash_table_find(ctx, graph->node_table, hash,
246 &node_has_dim, dim, 0);
248 return entry ? entry->data : NULL;
251 static int edge_has_src_and_dst(const void *entry, const void *val)
253 const struct isl_sched_edge *edge = entry;
254 const struct isl_sched_edge *temp = val;
256 return edge->src == temp->src && edge->dst == temp->dst;
259 /* Initialize edge_table based on the list of edges.
260 * Only edges with validity set are added to the table.
262 static int graph_init_edge_table(isl_ctx *ctx, struct isl_sched_graph *graph)
266 graph->edge_table = isl_hash_table_alloc(ctx, graph->n_edge);
267 if (!graph->edge_table)
270 for (i = 0; i < graph->n_edge; ++i) {
271 struct isl_hash_table_entry *entry;
274 if (!graph->edge[i].validity)
277 hash = isl_hash_init();
278 hash = isl_hash_builtin(hash, graph->edge[i].src);
279 hash = isl_hash_builtin(hash, graph->edge[i].dst);
280 entry = isl_hash_table_find(ctx, graph->edge_table, hash,
281 &edge_has_src_and_dst,
285 entry->data = &graph->edge[i];
291 /* Check whether the dependence graph has a (validity) edge
292 * between the given two nodes.
294 static int graph_has_edge(struct isl_sched_graph *graph,
295 struct isl_sched_node *src, struct isl_sched_node *dst)
297 isl_ctx *ctx = isl_space_get_ctx(src->dim);
298 struct isl_hash_table_entry *entry;
300 struct isl_sched_edge temp = { .src = src, .dst = dst };
301 struct isl_sched_edge *edge;
304 hash = isl_hash_init();
305 hash = isl_hash_builtin(hash, temp.src);
306 hash = isl_hash_builtin(hash, temp.dst);
307 entry = isl_hash_table_find(ctx, graph->edge_table, hash,
308 &edge_has_src_and_dst, &temp, 0);
313 empty = isl_map_plain_is_empty(edge->map);
320 static int graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph,
321 int n_node, int n_edge)
326 graph->n_edge = n_edge;
327 graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n);
328 graph->sorted = isl_calloc_array(ctx, int, graph->n);
329 graph->region = isl_alloc_array(ctx, struct isl_region, graph->n);
330 graph->stack = isl_alloc_array(ctx, int, graph->n);
331 graph->edge = isl_calloc_array(ctx,
332 struct isl_sched_edge, graph->n_edge);
334 graph->intra_hmap = isl_hmap_map_basic_set_alloc(ctx, 2 * n_edge);
335 graph->inter_hmap = isl_hmap_map_basic_set_alloc(ctx, 2 * n_edge);
337 if (!graph->node || !graph->region || !graph->stack || !graph->edge ||
341 for(i = 0; i < graph->n; ++i)
342 graph->sorted[i] = i;
347 static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
351 isl_hmap_map_basic_set_free(ctx, graph->intra_hmap);
352 isl_hmap_map_basic_set_free(ctx, graph->inter_hmap);
354 for (i = 0; i < graph->n; ++i) {
355 isl_space_free(graph->node[i].dim);
356 isl_mat_free(graph->node[i].sched);
357 isl_map_free(graph->node[i].sched_map);
358 isl_mat_free(graph->node[i].cmap);
360 free(graph->node[i].band);
361 free(graph->node[i].band_id);
362 free(graph->node[i].zero);
367 for (i = 0; i < graph->n_edge; ++i)
368 isl_map_free(graph->edge[i].map);
372 isl_hash_table_free(ctx, graph->edge_table);
373 isl_hash_table_free(ctx, graph->node_table);
374 isl_basic_set_free(graph->lp);
377 /* For each "set" on which this function is called, increment
378 * graph->n by one and update graph->maxvar.
380 static int init_n_maxvar(__isl_take isl_set *set, void *user)
382 struct isl_sched_graph *graph = user;
383 int nvar = isl_set_dim(set, isl_dim_set);
386 if (nvar > graph->maxvar)
387 graph->maxvar = nvar;
394 /* Compute the number of rows that should be allocated for the schedule.
395 * The graph can be split at most "n - 1" times, there can be at most
396 * two rows for each dimension in the iteration domains (in particular,
397 * we usually have one row, but it may be split by split_parallel),
398 * and there can be one extra row for ordering the statements.
399 * Note that if we have actually split "n - 1" times, then no ordering
400 * is needed, so in principle we could use "graph->n + 2 * graph->maxvar - 1".
402 static int compute_max_row(struct isl_sched_graph *graph,
403 __isl_keep isl_union_set *domain)
407 if (isl_union_set_foreach_set(domain, &init_n_maxvar, graph) < 0)
409 graph->max_row = graph->n + 2 * graph->maxvar;
414 /* Add a new node to the graph representing the given set.
416 static int extract_node(__isl_take isl_set *set, void *user)
422 struct isl_sched_graph *graph = user;
423 int *band, *band_id, *zero;
425 ctx = isl_set_get_ctx(set);
426 dim = isl_set_get_space(set);
428 nvar = isl_space_dim(dim, isl_dim_set);
429 nparam = isl_space_dim(dim, isl_dim_param);
430 if (!ctx->opt->schedule_parametric)
432 sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar);
433 graph->node[graph->n].dim = dim;
434 graph->node[graph->n].nvar = nvar;
435 graph->node[graph->n].nparam = nparam;
436 graph->node[graph->n].sched = sched;
437 graph->node[graph->n].sched_map = NULL;
438 band = isl_alloc_array(ctx, int, graph->max_row);
439 graph->node[graph->n].band = band;
440 band_id = isl_calloc_array(ctx, int, graph->max_row);
441 graph->node[graph->n].band_id = band_id;
442 zero = isl_calloc_array(ctx, int, graph->max_row);
443 graph->node[graph->n].zero = zero;
446 if (!sched || !band || !band_id || !zero)
452 /* Add a new edge to the graph based on the given map.
453 * Edges are first extracted from the validity dependences,
454 * from which the edge_table is constructed.
455 * Afterwards, the proximity dependences are added. If a proximity
456 * dependence relation happens to be identical to one of the
457 * validity dependence relations added before, then we don't create
458 * a new edge, but instead mark the original edge as also representing
459 * a proximity dependence.
461 static int extract_edge(__isl_take isl_map *map, void *user)
463 isl_ctx *ctx = isl_map_get_ctx(map);
464 struct isl_sched_graph *graph = user;
465 struct isl_sched_node *src, *dst;
468 dim = isl_space_domain(isl_map_get_space(map));
469 src = graph_find_node(ctx, graph, dim);
471 dim = isl_space_range(isl_map_get_space(map));
472 dst = graph_find_node(ctx, graph, dim);
480 graph->edge[graph->n_edge].src = src;
481 graph->edge[graph->n_edge].dst = dst;
482 graph->edge[graph->n_edge].map = map;
483 graph->edge[graph->n_edge].validity = !graph->edge_table;
484 graph->edge[graph->n_edge].proximity = !!graph->edge_table;
487 if (graph->edge_table) {
489 struct isl_hash_table_entry *entry;
490 struct isl_sched_edge *edge;
493 hash = isl_hash_init();
494 hash = isl_hash_builtin(hash, src);
495 hash = isl_hash_builtin(hash, dst);
496 entry = isl_hash_table_find(ctx, graph->edge_table, hash,
497 &edge_has_src_and_dst,
498 &graph->edge[graph->n_edge - 1], 0);
502 is_equal = isl_map_plain_is_equal(map, edge->map);
516 /* Check whether there is a validity dependence from src to dst,
517 * forcing dst to follow src.
519 static int node_follows(struct isl_sched_graph *graph,
520 struct isl_sched_node *dst, struct isl_sched_node *src)
522 return graph_has_edge(graph, src, dst);
525 /* Perform Tarjan's algorithm for computing the strongly connected components
526 * in the dependence graph (only validity edges).
527 * If directed is not set, we consider the graph to be undirected and
528 * we effectively compute the (weakly) connected components.
530 static int detect_sccs_tarjan(struct isl_sched_graph *g, int i, int directed)
534 g->node[i].index = g->index;
535 g->node[i].min_index = g->index;
536 g->node[i].on_stack = 1;
538 g->stack[g->sp++] = i;
540 for (j = g->n - 1; j >= 0; --j) {
545 if (g->node[j].index >= 0 &&
546 (!g->node[j].on_stack ||
547 g->node[j].index > g->node[i].min_index))
550 f = node_follows(g, &g->node[i], &g->node[j]);
553 if (!f && !directed) {
554 f = node_follows(g, &g->node[j], &g->node[i]);
560 if (g->node[j].index < 0) {
561 detect_sccs_tarjan(g, j, directed);
562 if (g->node[j].min_index < g->node[i].min_index)
563 g->node[i].min_index = g->node[j].min_index;
564 } else if (g->node[j].index < g->node[i].min_index)
565 g->node[i].min_index = g->node[j].index;
568 if (g->node[i].index != g->node[i].min_index)
572 j = g->stack[--g->sp];
573 g->node[j].on_stack = 0;
574 g->node[j].scc = g->scc;
581 static int detect_ccs(struct isl_sched_graph *graph, int directed)
588 for (i = graph->n - 1; i >= 0; --i)
589 graph->node[i].index = -1;
591 for (i = graph->n - 1; i >= 0; --i) {
592 if (graph->node[i].index >= 0)
594 if (detect_sccs_tarjan(graph, i, directed) < 0)
601 /* Apply Tarjan's algorithm to detect the strongly connected components
602 * in the dependence graph.
604 static int detect_sccs(struct isl_sched_graph *graph)
606 return detect_ccs(graph, 1);
609 /* Apply Tarjan's algorithm to detect the (weakly) connected components
610 * in the dependence graph.
612 static int detect_wccs(struct isl_sched_graph *graph)
614 return detect_ccs(graph, 0);
617 static int cmp_scc(const void *a, const void *b, void *data)
619 struct isl_sched_graph *graph = data;
623 return graph->node[*i1].scc - graph->node[*i2].scc;
626 /* Sort the elements of graph->sorted according to the corresponding SCCs.
628 static void sort_sccs(struct isl_sched_graph *graph)
630 isl_quicksort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph);
633 /* Given a dependence relation R from a node to itself,
634 * construct the set of coefficients of valid constraints for elements
635 * in that dependence relation.
636 * In particular, the result contains tuples of coefficients
637 * c_0, c_n, c_x such that
639 * c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R
643 * c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
645 * We choose here to compute the dual of delta R.
646 * Alternatively, we could have computed the dual of R, resulting
647 * in a set of tuples c_0, c_n, c_x, c_y, and then
648 * plugged in (c_0, c_n, c_x, -c_x).
650 static __isl_give isl_basic_set *intra_coefficients(
651 struct isl_sched_graph *graph, __isl_take isl_map *map)
653 isl_ctx *ctx = isl_map_get_ctx(map);
657 if (isl_hmap_map_basic_set_has(ctx, graph->intra_hmap, map))
658 return isl_hmap_map_basic_set_get(ctx, graph->intra_hmap, map);
660 delta = isl_set_remove_divs(isl_map_deltas(isl_map_copy(map)));
661 coef = isl_set_coefficients(delta);
662 isl_hmap_map_basic_set_set(ctx, graph->intra_hmap, map,
663 isl_basic_set_copy(coef));
668 /* Given a dependence relation R, * construct the set of coefficients
669 * of valid constraints for elements in that dependence relation.
670 * In particular, the result contains tuples of coefficients
671 * c_0, c_n, c_x, c_y such that
673 * c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
676 static __isl_give isl_basic_set *inter_coefficients(
677 struct isl_sched_graph *graph, __isl_take isl_map *map)
679 isl_ctx *ctx = isl_map_get_ctx(map);
683 if (isl_hmap_map_basic_set_has(ctx, graph->inter_hmap, map))
684 return isl_hmap_map_basic_set_get(ctx, graph->inter_hmap, map);
686 set = isl_map_wrap(isl_map_remove_divs(isl_map_copy(map)));
687 coef = isl_set_coefficients(set);
688 isl_hmap_map_basic_set_set(ctx, graph->inter_hmap, map,
689 isl_basic_set_copy(coef));
694 /* Add constraints to graph->lp that force validity for the given
695 * dependence from a node i to itself.
696 * That is, add constraints that enforce
698 * (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x)
699 * = c_i_x (y - x) >= 0
701 * for each (x,y) in R.
702 * We obtain general constraints on coefficients (c_0, c_n, c_x)
703 * of valid constraints for (y - x) and then plug in (0, 0, c_i_x^+ - c_i_x^-),
704 * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative.
705 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
707 * Actually, we do not construct constraints for the c_i_x themselves,
708 * but for the coefficients of c_i_x written as a linear combination
709 * of the columns in node->cmap.
711 static int add_intra_validity_constraints(struct isl_sched_graph *graph,
712 struct isl_sched_edge *edge)
715 isl_map *map = isl_map_copy(edge->map);
716 isl_ctx *ctx = isl_map_get_ctx(map);
718 isl_dim_map *dim_map;
720 struct isl_sched_node *node = edge->src;
722 coef = intra_coefficients(graph, map);
724 dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
726 coef = isl_basic_set_transform_dims(coef, isl_dim_set,
727 isl_space_dim(dim, isl_dim_set), isl_mat_copy(node->cmap));
729 total = isl_basic_set_total_dim(graph->lp);
730 dim_map = isl_dim_map_alloc(ctx, total);
731 isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 1, 2,
732 isl_space_dim(dim, isl_dim_set), 1,
734 isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 2, 2,
735 isl_space_dim(dim, isl_dim_set), 1,
737 graph->lp = isl_basic_set_extend_constraints(graph->lp,
738 coef->n_eq, coef->n_ineq);
739 graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
746 /* Add constraints to graph->lp that force validity for the given
747 * dependence from node i to node j.
748 * That is, add constraints that enforce
750 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0
752 * for each (x,y) in R.
753 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
754 * of valid constraints for R and then plug in
755 * (c_j_0 - c_i_0, c_j_n^+ - c_j_n^- - (c_i_n^+ - c_i_n^-),
756 * c_j_x^+ - c_j_x^- - (c_i_x^+ - c_i_x^-)),
757 * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
758 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
760 * Actually, we do not construct constraints for the c_*_x themselves,
761 * but for the coefficients of c_*_x written as a linear combination
762 * of the columns in node->cmap.
764 static int add_inter_validity_constraints(struct isl_sched_graph *graph,
765 struct isl_sched_edge *edge)
768 isl_map *map = isl_map_copy(edge->map);
769 isl_ctx *ctx = isl_map_get_ctx(map);
771 isl_dim_map *dim_map;
773 struct isl_sched_node *src = edge->src;
774 struct isl_sched_node *dst = edge->dst;
776 coef = inter_coefficients(graph, map);
778 dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
780 coef = isl_basic_set_transform_dims(coef, isl_dim_set,
781 isl_space_dim(dim, isl_dim_set), isl_mat_copy(src->cmap));
782 coef = isl_basic_set_transform_dims(coef, isl_dim_set,
783 isl_space_dim(dim, isl_dim_set) + src->nvar,
784 isl_mat_copy(dst->cmap));
786 total = isl_basic_set_total_dim(graph->lp);
787 dim_map = isl_dim_map_alloc(ctx, total);
789 isl_dim_map_range(dim_map, dst->start, 0, 0, 0, 1, 1);
790 isl_dim_map_range(dim_map, dst->start + 1, 2, 1, 1, dst->nparam, -1);
791 isl_dim_map_range(dim_map, dst->start + 2, 2, 1, 1, dst->nparam, 1);
792 isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 1, 2,
793 isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
795 isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 2, 2,
796 isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
799 isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, -1);
800 isl_dim_map_range(dim_map, src->start + 1, 2, 1, 1, src->nparam, 1);
801 isl_dim_map_range(dim_map, src->start + 2, 2, 1, 1, src->nparam, -1);
802 isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 1, 2,
803 isl_space_dim(dim, isl_dim_set), 1,
805 isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 2, 2,
806 isl_space_dim(dim, isl_dim_set), 1,
809 edge->start = graph->lp->n_ineq;
810 graph->lp = isl_basic_set_extend_constraints(graph->lp,
811 coef->n_eq, coef->n_ineq);
812 graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
815 edge->end = graph->lp->n_ineq;
820 /* Add constraints to graph->lp that bound the dependence distance for the given
821 * dependence from a node i to itself.
822 * If s = 1, we add the constraint
824 * c_i_x (y - x) <= m_0 + m_n n
828 * -c_i_x (y - x) + m_0 + m_n n >= 0
830 * for each (x,y) in R.
831 * If s = -1, we add the constraint
833 * -c_i_x (y - x) <= m_0 + m_n n
837 * c_i_x (y - x) + m_0 + m_n n >= 0
839 * for each (x,y) in R.
840 * We obtain general constraints on coefficients (c_0, c_n, c_x)
841 * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x),
842 * with each coefficient (except m_0) represented as a pair of non-negative
845 * Actually, we do not construct constraints for the c_i_x themselves,
846 * but for the coefficients of c_i_x written as a linear combination
847 * of the columns in node->cmap.
849 static int add_intra_proximity_constraints(struct isl_sched_graph *graph,
850 struct isl_sched_edge *edge, int s)
854 isl_map *map = isl_map_copy(edge->map);
855 isl_ctx *ctx = isl_map_get_ctx(map);
857 isl_dim_map *dim_map;
859 struct isl_sched_node *node = edge->src;
861 coef = intra_coefficients(graph, map);
863 dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
865 coef = isl_basic_set_transform_dims(coef, isl_dim_set,
866 isl_space_dim(dim, isl_dim_set), isl_mat_copy(node->cmap));
868 nparam = isl_space_dim(node->dim, isl_dim_param);
869 total = isl_basic_set_total_dim(graph->lp);
870 dim_map = isl_dim_map_alloc(ctx, total);
871 isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
872 isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
873 isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
874 isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 1, 2,
875 isl_space_dim(dim, isl_dim_set), 1,
877 isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 2, 2,
878 isl_space_dim(dim, isl_dim_set), 1,
880 graph->lp = isl_basic_set_extend_constraints(graph->lp,
881 coef->n_eq, coef->n_ineq);
882 graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
889 /* Add constraints to graph->lp that bound the dependence distance for the given
890 * dependence from node i to node j.
891 * If s = 1, we add the constraint
893 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)
898 * -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) +
901 * for each (x,y) in R.
902 * If s = -1, we add the constraint
904 * -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x))
909 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) +
912 * for each (x,y) in R.
913 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
914 * of valid constraints for R and then plug in
915 * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n,
917 * with each coefficient (except m_0, c_j_0 and c_i_0)
918 * represented as a pair of non-negative coefficients.
920 * Actually, we do not construct constraints for the c_*_x themselves,
921 * but for the coefficients of c_*_x written as a linear combination
922 * of the columns in node->cmap.
924 static int add_inter_proximity_constraints(struct isl_sched_graph *graph,
925 struct isl_sched_edge *edge, int s)
929 isl_map *map = isl_map_copy(edge->map);
930 isl_ctx *ctx = isl_map_get_ctx(map);
932 isl_dim_map *dim_map;
934 struct isl_sched_node *src = edge->src;
935 struct isl_sched_node *dst = edge->dst;
937 coef = inter_coefficients(graph, map);
939 dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
941 coef = isl_basic_set_transform_dims(coef, isl_dim_set,
942 isl_space_dim(dim, isl_dim_set), isl_mat_copy(src->cmap));
943 coef = isl_basic_set_transform_dims(coef, isl_dim_set,
944 isl_space_dim(dim, isl_dim_set) + src->nvar,
945 isl_mat_copy(dst->cmap));
947 nparam = isl_space_dim(src->dim, isl_dim_param);
948 total = isl_basic_set_total_dim(graph->lp);
949 dim_map = isl_dim_map_alloc(ctx, total);
951 isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
952 isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
953 isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
955 isl_dim_map_range(dim_map, dst->start, 0, 0, 0, 1, -s);
956 isl_dim_map_range(dim_map, dst->start + 1, 2, 1, 1, dst->nparam, s);
957 isl_dim_map_range(dim_map, dst->start + 2, 2, 1, 1, dst->nparam, -s);
958 isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 1, 2,
959 isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
961 isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 2, 2,
962 isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
965 isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, s);
966 isl_dim_map_range(dim_map, src->start + 1, 2, 1, 1, src->nparam, -s);
967 isl_dim_map_range(dim_map, src->start + 2, 2, 1, 1, src->nparam, s);
968 isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 1, 2,
969 isl_space_dim(dim, isl_dim_set), 1,
971 isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 2, 2,
972 isl_space_dim(dim, isl_dim_set), 1,
975 graph->lp = isl_basic_set_extend_constraints(graph->lp,
976 coef->n_eq, coef->n_ineq);
977 graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
984 static int add_all_validity_constraints(struct isl_sched_graph *graph)
988 for (i = 0; i < graph->n_edge; ++i) {
989 struct isl_sched_edge *edge= &graph->edge[i];
992 if (edge->src != edge->dst)
994 if (add_intra_validity_constraints(graph, edge) < 0)
998 for (i = 0; i < graph->n_edge; ++i) {
999 struct isl_sched_edge *edge = &graph->edge[i];
1000 if (!edge->validity)
1002 if (edge->src == edge->dst)
1004 if (add_inter_validity_constraints(graph, edge) < 0)
1011 /* Add constraints to graph->lp that bound the dependence distance
1012 * for all dependence relations.
1013 * If a given proximity dependence is identical to a validity
1014 * dependence, then the dependence distance is already bounded
1015 * from below (by zero), so we only need to bound the distance
1017 * Otherwise, we need to bound the distance both from above and from below.
1019 static int add_all_proximity_constraints(struct isl_sched_graph *graph)
1023 for (i = 0; i < graph->n_edge; ++i) {
1024 struct isl_sched_edge *edge= &graph->edge[i];
1025 if (!edge->proximity)
1027 if (edge->src == edge->dst &&
1028 add_intra_proximity_constraints(graph, edge, 1) < 0)
1030 if (edge->src != edge->dst &&
1031 add_inter_proximity_constraints(graph, edge, 1) < 0)
1035 if (edge->src == edge->dst &&
1036 add_intra_proximity_constraints(graph, edge, -1) < 0)
1038 if (edge->src != edge->dst &&
1039 add_inter_proximity_constraints(graph, edge, -1) < 0)
1046 /* Compute a basis for the rows in the linear part of the schedule
1047 * and extend this basis to a full basis. The remaining rows
1048 * can then be used to force linear independence from the rows
1051 * In particular, given the schedule rows S, we compute
1055 * with H the Hermite normal form of S. That is, all but the
1056 * first rank columns of Q are zero and so each row in S is
1057 * a linear combination of the first rank rows of Q.
1058 * The matrix Q is then transposed because we will write the
1059 * coefficients of the next schedule row as a column vector s
1060 * and express this s as a linear combination s = Q c of the
1063 static int node_update_cmap(struct isl_sched_node *node)
1066 int n_row = isl_mat_rows(node->sched);
1068 H = isl_mat_sub_alloc(node->sched, 0, n_row,
1069 1 + node->nparam, node->nvar);
1071 H = isl_mat_left_hermite(H, 0, NULL, &Q);
1072 isl_mat_free(node->cmap);
1073 node->cmap = isl_mat_transpose(Q);
1074 node->rank = isl_mat_initial_non_zero_cols(H);
1077 if (!node->cmap || node->rank < 0)
1082 /* Count the number of equality and inequality constraints
1083 * that will be added for the given map.
1084 * If once is set, then we count
1085 * each edge exactly once. Otherwise, we count as follows
1086 * validity -> 1 (>= 0)
1087 * validity+proximity -> 2 (>= 0 and upper bound)
1088 * proximity -> 2 (lower and upper bound)
1090 static int count_map_constraints(struct isl_sched_graph *graph,
1091 struct isl_sched_edge *edge, __isl_take isl_map *map,
1092 int *n_eq, int *n_ineq, int once)
1094 isl_basic_set *coef;
1095 int f = once ? 1 : edge->proximity ? 2 : 1;
1097 if (edge->src == edge->dst)
1098 coef = intra_coefficients(graph, map);
1100 coef = inter_coefficients(graph, map);
1103 *n_eq += f * coef->n_eq;
1104 *n_ineq += f * coef->n_ineq;
1105 isl_basic_set_free(coef);
1110 /* Count the number of equality and inequality constraints
1111 * that will be added to the main lp problem.
1112 * If once is set, then we count
1113 * each edge exactly once. Otherwise, we count as follows
1114 * validity -> 1 (>= 0)
1115 * validity+proximity -> 2 (>= 0 and upper bound)
1116 * proximity -> 2 (lower and upper bound)
1118 static int count_constraints(struct isl_sched_graph *graph,
1119 int *n_eq, int *n_ineq, int once)
1123 *n_eq = *n_ineq = 0;
1124 for (i = 0; i < graph->n_edge; ++i) {
1125 struct isl_sched_edge *edge= &graph->edge[i];
1126 isl_map *map = isl_map_copy(edge->map);
1128 if (count_map_constraints(graph, edge, map,
1129 n_eq, n_ineq, once) < 0)
1136 /* Construct an ILP problem for finding schedule coefficients
1137 * that result in non-negative, but small dependence distances
1138 * over all dependences.
1139 * In particular, the dependence distances over proximity edges
1140 * are bounded by m_0 + m_n n and we compute schedule coefficients
1141 * with small values (preferably zero) of m_n and m_0.
1143 * All variables of the ILP are non-negative. The actual coefficients
1144 * may be negative, so each coefficient is represented as the difference
1145 * of two non-negative variables. The negative part always appears
1146 * immediately before the positive part.
1147 * Other than that, the variables have the following order
1149 * - sum of positive and negative parts of m_n coefficients
1151 * - sum of positive and negative parts of all c_n coefficients
1152 * (unconstrained when computing non-parametric schedules)
1153 * - sum of positive and negative parts of all c_x coefficients
1154 * - positive and negative parts of m_n coefficients
1157 * - positive and negative parts of c_i_n (if parametric)
1158 * - positive and negative parts of c_i_x
1160 * The c_i_x are not represented directly, but through the columns of
1161 * node->cmap. That is, the computed values are for variable t_i_x
1162 * such that c_i_x = Q t_i_x with Q equal to node->cmap.
1164 * The constraints are those from the edges plus two or three equalities
1165 * to express the sums.
1167 * If force_zero is set, then we add equalities to ensure that
1168 * the sum of the m_n coefficients and m_0 are both zero.
1170 static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
1181 int max_constant_term;
1183 max_constant_term = ctx->opt->schedule_max_constant_term;
1185 parametric = ctx->opt->schedule_parametric;
1186 nparam = isl_space_dim(graph->node[0].dim, isl_dim_param);
1188 total = param_pos + 2 * nparam;
1189 for (i = 0; i < graph->n; ++i) {
1190 struct isl_sched_node *node = &graph->node[graph->sorted[i]];
1191 if (node_update_cmap(node) < 0)
1193 node->start = total;
1194 total += 1 + 2 * (node->nparam + node->nvar);
1197 if (count_constraints(graph, &n_eq, &n_ineq, 0) < 0)
1200 dim = isl_space_set_alloc(ctx, 0, total);
1201 isl_basic_set_free(graph->lp);
1202 n_eq += 2 + parametric + force_zero;
1203 if (max_constant_term != -1)
1206 graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
1208 k = isl_basic_set_alloc_equality(graph->lp);
1211 isl_seq_clr(graph->lp->eq[k], 1 + total);
1213 isl_int_set_si(graph->lp->eq[k][1], -1);
1214 for (i = 0; i < 2 * nparam; ++i)
1215 isl_int_set_si(graph->lp->eq[k][1 + param_pos + i], 1);
1218 k = isl_basic_set_alloc_equality(graph->lp);
1221 isl_seq_clr(graph->lp->eq[k], 1 + total);
1222 isl_int_set_si(graph->lp->eq[k][2], -1);
1226 k = isl_basic_set_alloc_equality(graph->lp);
1229 isl_seq_clr(graph->lp->eq[k], 1 + total);
1230 isl_int_set_si(graph->lp->eq[k][3], -1);
1231 for (i = 0; i < graph->n; ++i) {
1232 int pos = 1 + graph->node[i].start + 1;
1234 for (j = 0; j < 2 * graph->node[i].nparam; ++j)
1235 isl_int_set_si(graph->lp->eq[k][pos + j], 1);
1239 k = isl_basic_set_alloc_equality(graph->lp);
1242 isl_seq_clr(graph->lp->eq[k], 1 + total);
1243 isl_int_set_si(graph->lp->eq[k][4], -1);
1244 for (i = 0; i < graph->n; ++i) {
1245 struct isl_sched_node *node = &graph->node[i];
1246 int pos = 1 + node->start + 1 + 2 * node->nparam;
1248 for (j = 0; j < 2 * node->nvar; ++j)
1249 isl_int_set_si(graph->lp->eq[k][pos + j], 1);
1252 if (max_constant_term != -1)
1253 for (i = 0; i < graph->n; ++i) {
1254 struct isl_sched_node *node = &graph->node[i];
1255 k = isl_basic_set_alloc_inequality(graph->lp);
1258 isl_seq_clr(graph->lp->ineq[k], 1 + total);
1259 isl_int_set_si(graph->lp->ineq[k][1 + node->start], -1);
1260 isl_int_set_si(graph->lp->ineq[k][0], max_constant_term);
1263 if (add_all_validity_constraints(graph) < 0)
1265 if (add_all_proximity_constraints(graph) < 0)
1271 /* Analyze the conflicting constraint found by
1272 * isl_tab_basic_set_non_trivial_lexmin. If it corresponds to the validity
1273 * constraint of one of the edges between distinct nodes, living, moreover
1274 * in distinct SCCs, then record the source and sink SCC as this may
1275 * be a good place to cut between SCCs.
1277 static int check_conflict(int con, void *user)
1280 struct isl_sched_graph *graph = user;
1282 if (graph->src_scc >= 0)
1285 con -= graph->lp->n_eq;
1287 if (con >= graph->lp->n_ineq)
1290 for (i = 0; i < graph->n_edge; ++i) {
1291 if (!graph->edge[i].validity)
1293 if (graph->edge[i].src == graph->edge[i].dst)
1295 if (graph->edge[i].src->scc == graph->edge[i].dst->scc)
1297 if (graph->edge[i].start > con)
1299 if (graph->edge[i].end <= con)
1301 graph->src_scc = graph->edge[i].src->scc;
1302 graph->dst_scc = graph->edge[i].dst->scc;
1308 /* Check whether the next schedule row of the given node needs to be
1309 * non-trivial. Lower-dimensional domains may have some trivial rows,
1310 * but as soon as the number of remaining required non-trivial rows
1311 * is as large as the number or remaining rows to be computed,
1312 * all remaining rows need to be non-trivial.
1314 static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node)
1316 return node->nvar - node->rank >= graph->maxvar - graph->n_row;
1319 /* Solve the ILP problem constructed in setup_lp.
1320 * For each node such that all the remaining rows of its schedule
1321 * need to be non-trivial, we construct a non-triviality region.
1322 * This region imposes that the next row is independent of previous rows.
1323 * In particular the coefficients c_i_x are represented by t_i_x
1324 * variables with c_i_x = Q t_i_x and Q a unimodular matrix such that
1325 * its first columns span the rows of the previously computed part
1326 * of the schedule. The non-triviality region enforces that at least
1327 * one of the remaining components of t_i_x is non-zero, i.e.,
1328 * that the new schedule row depends on at least one of the remaining
1331 static __isl_give isl_vec *solve_lp(struct isl_sched_graph *graph)
1337 for (i = 0; i < graph->n; ++i) {
1338 struct isl_sched_node *node = &graph->node[i];
1339 int skip = node->rank;
1340 graph->region[i].pos = node->start + 1 + 2*(node->nparam+skip);
1341 if (needs_row(graph, node))
1342 graph->region[i].len = 2 * (node->nvar - skip);
1344 graph->region[i].len = 0;
1346 lp = isl_basic_set_copy(graph->lp);
1347 sol = isl_tab_basic_set_non_trivial_lexmin(lp, 2, graph->n,
1348 graph->region, &check_conflict, graph);
1352 /* Update the schedules of all nodes based on the given solution
1353 * of the LP problem.
1354 * The new row is added to the current band.
1355 * All possibly negative coefficients are encoded as a difference
1356 * of two non-negative variables, so we need to perform the subtraction
1357 * here. Moreover, if use_cmap is set, then the solution does
1358 * not refer to the actual coefficients c_i_x, but instead to variables
1359 * t_i_x such that c_i_x = Q t_i_x and Q is equal to node->cmap.
1360 * In this case, we then also need to perform this multiplication
1361 * to obtain the values of c_i_x.
1363 * If check_zero is set, then the first two coordinates of sol are
1364 * assumed to correspond to the dependence distance. If these two
1365 * coordinates are zero, then the corresponding scheduling dimension
1366 * is marked as being zero distance.
1368 static int update_schedule(struct isl_sched_graph *graph,
1369 __isl_take isl_vec *sol, int use_cmap, int check_zero)
1373 isl_vec *csol = NULL;
1378 isl_die(sol->ctx, isl_error_internal,
1379 "no solution found", goto error);
1382 zero = isl_int_is_zero(sol->el[1]) &&
1383 isl_int_is_zero(sol->el[2]);
1385 for (i = 0; i < graph->n; ++i) {
1386 struct isl_sched_node *node = &graph->node[i];
1387 int pos = node->start;
1388 int row = isl_mat_rows(node->sched);
1391 csol = isl_vec_alloc(sol->ctx, node->nvar);
1395 isl_map_free(node->sched_map);
1396 node->sched_map = NULL;
1397 node->sched = isl_mat_add_rows(node->sched, 1);
1400 node->sched = isl_mat_set_element(node->sched, row, 0,
1402 for (j = 0; j < node->nparam + node->nvar; ++j)
1403 isl_int_sub(sol->el[1 + pos + 1 + 2 * j + 1],
1404 sol->el[1 + pos + 1 + 2 * j + 1],
1405 sol->el[1 + pos + 1 + 2 * j]);
1406 for (j = 0; j < node->nparam; ++j)
1407 node->sched = isl_mat_set_element(node->sched,
1408 row, 1 + j, sol->el[1+pos+1+2*j+1]);
1409 for (j = 0; j < node->nvar; ++j)
1410 isl_int_set(csol->el[j],
1411 sol->el[1+pos+1+2*(node->nparam+j)+1]);
1413 csol = isl_mat_vec_product(isl_mat_copy(node->cmap),
1417 for (j = 0; j < node->nvar; ++j)
1418 node->sched = isl_mat_set_element(node->sched,
1419 row, 1 + node->nparam + j, csol->el[j]);
1420 node->band[graph->n_total_row] = graph->n_band;
1421 node->zero[graph->n_total_row] = zero;
1427 graph->n_total_row++;
1436 /* Convert node->sched into a map and return this map.
1437 * We simply add equality constraints that express each output variable
1438 * as the affine combination of parameters and input variables specified
1439 * by the schedule matrix.
1441 * The result is cached in node->sched_map, which needs to be released
1442 * whenever node->sched is updated.
1444 static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
1448 isl_local_space *ls;
1449 isl_basic_map *bmap;
1454 if (node->sched_map)
1455 return isl_map_copy(node->sched_map);
1457 nrow = isl_mat_rows(node->sched);
1458 ncol = isl_mat_cols(node->sched) - 1;
1459 dim = isl_space_from_domain(isl_space_copy(node->dim));
1460 dim = isl_space_add_dims(dim, isl_dim_out, nrow);
1461 bmap = isl_basic_map_universe(isl_space_copy(dim));
1462 ls = isl_local_space_from_space(dim);
1466 for (i = 0; i < nrow; ++i) {
1467 c = isl_equality_alloc(isl_local_space_copy(ls));
1468 isl_constraint_set_coefficient_si(c, isl_dim_out, i, -1);
1469 isl_mat_get_element(node->sched, i, 0, &v);
1470 isl_constraint_set_constant(c, v);
1471 for (j = 0; j < node->nparam; ++j) {
1472 isl_mat_get_element(node->sched, i, 1 + j, &v);
1473 isl_constraint_set_coefficient(c, isl_dim_param, j, v);
1475 for (j = 0; j < node->nvar; ++j) {
1476 isl_mat_get_element(node->sched,
1477 i, 1 + node->nparam + j, &v);
1478 isl_constraint_set_coefficient(c, isl_dim_in, j, v);
1480 bmap = isl_basic_map_add_constraint(bmap, c);
1485 isl_local_space_free(ls);
1487 node->sched_map = isl_map_from_basic_map(bmap);
1488 return isl_map_copy(node->sched_map);
1491 /* Update the given dependence relation based on the current schedule.
1492 * That is, intersect the dependence relation with a map expressing
1493 * that source and sink are executed within the same iteration of
1494 * the current schedule.
1495 * This is not the most efficient way, but this shouldn't be a critical
1498 static __isl_give isl_map *specialize(__isl_take isl_map *map,
1499 struct isl_sched_node *src, struct isl_sched_node *dst)
1501 isl_map *src_sched, *dst_sched, *id;
1503 src_sched = node_extract_schedule(src);
1504 dst_sched = node_extract_schedule(dst);
1505 id = isl_map_apply_range(src_sched, isl_map_reverse(dst_sched));
1506 return isl_map_intersect(map, id);
1509 /* Update the dependence relations of all edges based on the current schedule.
1510 * If a dependence is carried completely by the current schedule, then
1511 * it is removed and edge_table is updated accordingly.
1513 static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph)
1516 int reset_table = 0;
1518 for (i = graph->n_edge - 1; i >= 0; --i) {
1519 struct isl_sched_edge *edge = &graph->edge[i];
1520 edge->map = specialize(edge->map, edge->src, edge->dst);
1524 if (isl_map_plain_is_empty(edge->map)) {
1526 isl_map_free(edge->map);
1527 if (i != graph->n_edge - 1)
1528 graph->edge[i] = graph->edge[graph->n_edge - 1];
1534 isl_hash_table_free(ctx, graph->edge_table);
1535 graph->edge_table = NULL;
1536 return graph_init_edge_table(ctx, graph);
1542 static void next_band(struct isl_sched_graph *graph)
1544 graph->band_start = graph->n_total_row;
1548 /* Topologically sort statements mapped to same schedule iteration
1549 * and add a row to the schedule corresponding to this order.
1551 static int sort_statements(isl_ctx *ctx, struct isl_sched_graph *graph)
1558 if (update_edges(ctx, graph) < 0)
1561 if (graph->n_edge == 0)
1564 if (detect_sccs(graph) < 0)
1567 for (i = 0; i < graph->n; ++i) {
1568 struct isl_sched_node *node = &graph->node[i];
1569 int row = isl_mat_rows(node->sched);
1570 int cols = isl_mat_cols(node->sched);
1572 isl_map_free(node->sched_map);
1573 node->sched_map = NULL;
1574 node->sched = isl_mat_add_rows(node->sched, 1);
1577 node->sched = isl_mat_set_element_si(node->sched, row, 0,
1579 for (j = 1; j < cols; ++j)
1580 node->sched = isl_mat_set_element_si(node->sched,
1582 node->band[graph->n_total_row] = graph->n_band;
1585 graph->n_total_row++;
1591 /* Construct an isl_schedule based on the computed schedule stored
1592 * in graph and with parameters specified by dim.
1594 static __isl_give isl_schedule *extract_schedule(struct isl_sched_graph *graph,
1595 __isl_take isl_space *dim)
1599 isl_schedule *sched = NULL;
1604 ctx = isl_space_get_ctx(dim);
1605 sched = isl_calloc(ctx, struct isl_schedule,
1606 sizeof(struct isl_schedule) +
1607 (graph->n - 1) * sizeof(struct isl_schedule_node));
1612 sched->n = graph->n;
1613 sched->n_band = graph->n_band;
1614 sched->n_total_row = graph->n_total_row;
1616 for (i = 0; i < sched->n; ++i) {
1618 int *band_end, *band_id, *zero;
1620 band_end = isl_alloc_array(ctx, int, graph->n_band);
1621 band_id = isl_alloc_array(ctx, int, graph->n_band);
1622 zero = isl_alloc_array(ctx, int, graph->n_total_row);
1623 sched->node[i].sched = node_extract_schedule(&graph->node[i]);
1624 sched->node[i].band_end = band_end;
1625 sched->node[i].band_id = band_id;
1626 sched->node[i].zero = zero;
1627 if (!band_end || !band_id || !zero)
1630 for (r = 0; r < graph->n_total_row; ++r)
1631 zero[r] = graph->node[i].zero[r];
1632 for (r = b = 0; r < graph->n_total_row; ++r) {
1633 if (graph->node[i].band[r] == b)
1636 if (graph->node[i].band[r] == -1)
1639 if (r == graph->n_total_row)
1641 sched->node[i].n_band = b;
1642 for (--b; b >= 0; --b)
1643 band_id[b] = graph->node[i].band_id[b];
1650 isl_space_free(dim);
1651 isl_schedule_free(sched);
1655 /* Copy nodes that satisfy node_pred from the src dependence graph
1656 * to the dst dependence graph.
1658 static int copy_nodes(struct isl_sched_graph *dst, struct isl_sched_graph *src,
1659 int (*node_pred)(struct isl_sched_node *node, int data), int data)
1664 for (i = 0; i < src->n; ++i) {
1665 if (!node_pred(&src->node[i], data))
1667 dst->node[dst->n].dim = isl_space_copy(src->node[i].dim);
1668 dst->node[dst->n].nvar = src->node[i].nvar;
1669 dst->node[dst->n].nparam = src->node[i].nparam;
1670 dst->node[dst->n].sched = isl_mat_copy(src->node[i].sched);
1671 dst->node[dst->n].sched_map =
1672 isl_map_copy(src->node[i].sched_map);
1673 dst->node[dst->n].band = src->node[i].band;
1674 dst->node[dst->n].band_id = src->node[i].band_id;
1675 dst->node[dst->n].zero = src->node[i].zero;
1682 /* Copy non-empty edges that satisfy edge_pred from the src dependence graph
1683 * to the dst dependence graph.
1684 * If the source or destination node of the edge is not in the destination
1685 * graph, then it must be a backward proximity edge and it should simply
1688 static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
1689 struct isl_sched_graph *src,
1690 int (*edge_pred)(struct isl_sched_edge *edge, int data), int data)
1695 for (i = 0; i < src->n_edge; ++i) {
1696 struct isl_sched_edge *edge = &src->edge[i];
1698 struct isl_sched_node *dst_src, *dst_dst;
1700 if (!edge_pred(edge, data))
1703 if (isl_map_plain_is_empty(edge->map))
1706 dst_src = graph_find_node(ctx, dst, edge->src->dim);
1707 dst_dst = graph_find_node(ctx, dst, edge->dst->dim);
1708 if (!dst_src || !dst_dst) {
1710 isl_die(ctx, isl_error_internal,
1711 "backward validity edge", return -1);
1715 map = isl_map_copy(edge->map);
1717 dst->edge[dst->n_edge].src = dst_src;
1718 dst->edge[dst->n_edge].dst = dst_dst;
1719 dst->edge[dst->n_edge].map = map;
1720 dst->edge[dst->n_edge].validity = edge->validity;
1721 dst->edge[dst->n_edge].proximity = edge->proximity;
1728 /* Given a "src" dependence graph that contains the nodes from "dst"
1729 * that satisfy node_pred, copy the schedule computed in "src"
1730 * for those nodes back to "dst".
1732 static int copy_schedule(struct isl_sched_graph *dst,
1733 struct isl_sched_graph *src,
1734 int (*node_pred)(struct isl_sched_node *node, int data), int data)
1739 for (i = 0; i < dst->n; ++i) {
1740 if (!node_pred(&dst->node[i], data))
1742 isl_mat_free(dst->node[i].sched);
1743 isl_map_free(dst->node[i].sched_map);
1744 dst->node[i].sched = isl_mat_copy(src->node[src->n].sched);
1745 dst->node[i].sched_map =
1746 isl_map_copy(src->node[src->n].sched_map);
1750 dst->n_total_row = src->n_total_row;
1751 dst->n_band = src->n_band;
1756 /* Compute the maximal number of variables over all nodes.
1757 * This is the maximal number of linearly independent schedule
1758 * rows that we need to compute.
1759 * Just in case we end up in a part of the dependence graph
1760 * with only lower-dimensional domains, we make sure we will
1761 * compute the required amount of extra linearly independent rows.
1763 static int compute_maxvar(struct isl_sched_graph *graph)
1768 for (i = 0; i < graph->n; ++i) {
1769 struct isl_sched_node *node = &graph->node[i];
1772 if (node_update_cmap(node) < 0)
1774 nvar = node->nvar + graph->n_row - node->rank;
1775 if (nvar > graph->maxvar)
1776 graph->maxvar = nvar;
1782 static int compute_schedule(isl_ctx *ctx, struct isl_sched_graph *graph);
1783 static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph);
1785 /* Compute a schedule for a subgraph of "graph". In particular, for
1786 * the graph composed of nodes that satisfy node_pred and edges that
1787 * that satisfy edge_pred. The caller should precompute the number
1788 * of nodes and edges that satisfy these predicates and pass them along
1789 * as "n" and "n_edge".
1790 * If the subgraph is known to consist of a single component, then wcc should
1791 * be set and then we call compute_schedule_wcc on the constructed subgraph.
1792 * Otherwise, we call compute_schedule, which will check whether the subgraph
1795 static int compute_sub_schedule(isl_ctx *ctx,
1796 struct isl_sched_graph *graph, int n, int n_edge,
1797 int (*node_pred)(struct isl_sched_node *node, int data),
1798 int (*edge_pred)(struct isl_sched_edge *edge, int data),
1801 struct isl_sched_graph split = { 0 };
1803 if (graph_alloc(ctx, &split, n, n_edge) < 0)
1805 if (copy_nodes(&split, graph, node_pred, data) < 0)
1807 if (graph_init_table(ctx, &split) < 0)
1809 if (copy_edges(ctx, &split, graph, edge_pred, data) < 0)
1811 if (graph_init_edge_table(ctx, &split) < 0)
1813 split.n_row = graph->n_row;
1814 split.n_total_row = graph->n_total_row;
1815 split.n_band = graph->n_band;
1816 split.band_start = graph->band_start;
1818 if (wcc && compute_schedule_wcc(ctx, &split) < 0)
1820 if (!wcc && compute_schedule(ctx, &split) < 0)
1823 copy_schedule(graph, &split, node_pred, data);
1825 graph_free(ctx, &split);
1828 graph_free(ctx, &split);
1832 static int node_scc_exactly(struct isl_sched_node *node, int scc)
1834 return node->scc == scc;
1837 static int node_scc_at_most(struct isl_sched_node *node, int scc)
1839 return node->scc <= scc;
1842 static int node_scc_at_least(struct isl_sched_node *node, int scc)
1844 return node->scc >= scc;
1847 static int edge_src_scc_exactly(struct isl_sched_edge *edge, int scc)
1849 return edge->src->scc == scc;
1852 static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
1854 return edge->dst->scc <= scc;
1857 static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc)
1859 return edge->src->scc >= scc;
1862 /* Pad the schedules of all nodes with zero rows such that in the end
1863 * they all have graph->n_total_row rows.
1864 * The extra rows don't belong to any band, so they get assigned band number -1.
1866 static int pad_schedule(struct isl_sched_graph *graph)
1870 for (i = 0; i < graph->n; ++i) {
1871 struct isl_sched_node *node = &graph->node[i];
1872 int row = isl_mat_rows(node->sched);
1873 if (graph->n_total_row > row) {
1874 isl_map_free(node->sched_map);
1875 node->sched_map = NULL;
1877 node->sched = isl_mat_add_zero_rows(node->sched,
1878 graph->n_total_row - row);
1881 for (j = row; j < graph->n_total_row; ++j)
1888 /* Split the current graph into two parts and compute a schedule for each
1889 * part individually. In particular, one part consists of all SCCs up
1890 * to and including graph->src_scc, while the other part contains the other
1893 * The split is enforced in the schedule by constant rows with two different
1894 * values (0 and 1). These constant rows replace the previously computed rows
1895 * in the current band.
1896 * It would be possible to reuse them as the first rows in the next
1897 * band, but recomputing them may result in better rows as we are looking
1898 * at a smaller part of the dependence graph.
1899 * compute_split_schedule is only called when no zero-distance schedule row
1900 * could be found on the entire graph, so we wark the splitting row as
1901 * non zero-distance.
1903 * The band_id of the second group is set to n, where n is the number
1904 * of nodes in the first group. This ensures that the band_ids over
1905 * the two groups remain disjoint, even if either or both of the two
1906 * groups contain independent components.
1908 static int compute_split_schedule(isl_ctx *ctx, struct isl_sched_graph *graph)
1910 int i, j, n, e1, e2;
1911 int n_total_row, orig_total_row;
1912 int n_band, orig_band;
1915 drop = graph->n_total_row - graph->band_start;
1916 graph->n_total_row -= drop;
1917 graph->n_row -= drop;
1920 for (i = 0; i < graph->n; ++i) {
1921 struct isl_sched_node *node = &graph->node[i];
1922 int row = isl_mat_rows(node->sched) - drop;
1923 int cols = isl_mat_cols(node->sched);
1924 int before = node->scc <= graph->src_scc;
1929 isl_map_free(node->sched_map);
1930 node->sched_map = NULL;
1931 node->sched = isl_mat_drop_rows(node->sched,
1932 graph->band_start, drop);
1933 node->sched = isl_mat_add_rows(node->sched, 1);
1936 node->sched = isl_mat_set_element_si(node->sched, row, 0,
1938 for (j = 1; j < cols; ++j)
1939 node->sched = isl_mat_set_element_si(node->sched,
1941 node->band[graph->n_total_row] = graph->n_band;
1942 node->zero[graph->n_total_row] = 0;
1946 for (i = 0; i < graph->n_edge; ++i) {
1947 if (graph->edge[i].dst->scc <= graph->src_scc)
1949 if (graph->edge[i].src->scc > graph->src_scc)
1953 graph->n_total_row++;
1956 for (i = 0; i < graph->n; ++i) {
1957 struct isl_sched_node *node = &graph->node[i];
1958 if (node->scc > graph->src_scc)
1959 node->band_id[graph->n_band] = n;
1962 orig_total_row = graph->n_total_row;
1963 orig_band = graph->n_band;
1964 if (compute_sub_schedule(ctx, graph, n, e1,
1965 &node_scc_at_most, &edge_dst_scc_at_most,
1966 graph->src_scc, 0) < 0)
1968 n_total_row = graph->n_total_row;
1969 graph->n_total_row = orig_total_row;
1970 n_band = graph->n_band;
1971 graph->n_band = orig_band;
1972 if (compute_sub_schedule(ctx, graph, graph->n - n, e2,
1973 &node_scc_at_least, &edge_src_scc_at_least,
1974 graph->src_scc + 1, 0) < 0)
1976 if (n_total_row > graph->n_total_row)
1977 graph->n_total_row = n_total_row;
1978 if (n_band > graph->n_band)
1979 graph->n_band = n_band;
1981 return pad_schedule(graph);
1984 /* Compute the next band of the schedule after updating the dependence
1985 * relations based on the the current schedule.
1987 static int compute_next_band(isl_ctx *ctx, struct isl_sched_graph *graph)
1989 if (update_edges(ctx, graph) < 0)
1993 return compute_schedule(ctx, graph);
1996 /* Add constraints to graph->lp that force the dependence "map" (which
1997 * is part of the dependence relation of "edge")
1998 * to be respected and attempt to carry it, where the edge is one from
1999 * a node j to itself. "pos" is the sequence number of the given map.
2000 * That is, add constraints that enforce
2002 * (c_j_0 + c_j_n n + c_j_x y) - (c_j_0 + c_j_n n + c_j_x x)
2003 * = c_j_x (y - x) >= e_i
2005 * for each (x,y) in R.
2006 * We obtain general constraints on coefficients (c_0, c_n, c_x)
2007 * of valid constraints for (y - x) and then plug in (-e_i, 0, c_j_x),
2008 * with each coefficient in c_j_x represented as a pair of non-negative
2011 static int add_intra_constraints(struct isl_sched_graph *graph,
2012 struct isl_sched_edge *edge, __isl_take isl_map *map, int pos)
2015 isl_ctx *ctx = isl_map_get_ctx(map);
2017 isl_dim_map *dim_map;
2018 isl_basic_set *coef;
2019 struct isl_sched_node *node = edge->src;
2021 coef = intra_coefficients(graph, map);
2023 dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
2025 total = isl_basic_set_total_dim(graph->lp);
2026 dim_map = isl_dim_map_alloc(ctx, total);
2027 isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
2028 isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 1, 2,
2029 isl_space_dim(dim, isl_dim_set), 1,
2031 isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 2, 2,
2032 isl_space_dim(dim, isl_dim_set), 1,
2034 graph->lp = isl_basic_set_extend_constraints(graph->lp,
2035 coef->n_eq, coef->n_ineq);
2036 graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
2038 isl_space_free(dim);
2043 /* Add constraints to graph->lp that force the dependence "map" (which
2044 * is part of the dependence relation of "edge")
2045 * to be respected and attempt to carry it, where the edge is one from
2046 * node j to node k. "pos" is the sequence number of the given map.
2047 * That is, add constraints that enforce
2049 * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
2051 * for each (x,y) in R.
2052 * We obtain general constraints on coefficients (c_0, c_n, c_x)
2053 * of valid constraints for R and then plug in
2054 * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, c_k_x - c_j_x)
2055 * with each coefficient (except e_i, c_k_0 and c_j_0)
2056 * represented as a pair of non-negative coefficients.
2058 static int add_inter_constraints(struct isl_sched_graph *graph,
2059 struct isl_sched_edge *edge, __isl_take isl_map *map, int pos)
2062 isl_ctx *ctx = isl_map_get_ctx(map);
2064 isl_dim_map *dim_map;
2065 isl_basic_set *coef;
2066 struct isl_sched_node *src = edge->src;
2067 struct isl_sched_node *dst = edge->dst;
2069 coef = inter_coefficients(graph, map);
2071 dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
2073 total = isl_basic_set_total_dim(graph->lp);
2074 dim_map = isl_dim_map_alloc(ctx, total);
2076 isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
2078 isl_dim_map_range(dim_map, dst->start, 0, 0, 0, 1, 1);
2079 isl_dim_map_range(dim_map, dst->start + 1, 2, 1, 1, dst->nparam, -1);
2080 isl_dim_map_range(dim_map, dst->start + 2, 2, 1, 1, dst->nparam, 1);
2081 isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 1, 2,
2082 isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
2084 isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 2, 2,
2085 isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
2088 isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, -1);
2089 isl_dim_map_range(dim_map, src->start + 1, 2, 1, 1, src->nparam, 1);
2090 isl_dim_map_range(dim_map, src->start + 2, 2, 1, 1, src->nparam, -1);
2091 isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 1, 2,
2092 isl_space_dim(dim, isl_dim_set), 1,
2094 isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 2, 2,
2095 isl_space_dim(dim, isl_dim_set), 1,
2098 graph->lp = isl_basic_set_extend_constraints(graph->lp,
2099 coef->n_eq, coef->n_ineq);
2100 graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
2102 isl_space_free(dim);
2107 /* Add constraints to graph->lp that force all dependence
2108 * to be respected and attempt to carry it.
2110 static int add_all_constraints(struct isl_sched_graph *graph)
2116 for (i = 0; i < graph->n_edge; ++i) {
2117 struct isl_sched_edge *edge= &graph->edge[i];
2118 for (j = 0; j < edge->map->n; ++j) {
2119 isl_basic_map *bmap;
2122 bmap = isl_basic_map_copy(edge->map->p[j]);
2123 map = isl_map_from_basic_map(bmap);
2125 if (edge->src == edge->dst &&
2126 add_intra_constraints(graph, edge, map, pos) < 0)
2128 if (edge->src != edge->dst &&
2129 add_inter_constraints(graph, edge, map, pos) < 0)
2138 /* Count the number of equality and inequality constraints
2139 * that will be added to the carry_lp problem.
2140 * If once is set, then we count
2141 * each edge exactly once. Otherwise, we count as follows
2142 * validity -> 1 (>= 0)
2143 * validity+proximity -> 2 (>= 0 and upper bound)
2144 * proximity -> 2 (lower and upper bound)
2146 static int count_all_constraints(struct isl_sched_graph *graph,
2147 int *n_eq, int *n_ineq, int once)
2151 *n_eq = *n_ineq = 0;
2152 for (i = 0; i < graph->n_edge; ++i) {
2153 struct isl_sched_edge *edge= &graph->edge[i];
2154 for (j = 0; j < edge->map->n; ++j) {
2155 isl_basic_map *bmap;
2158 bmap = isl_basic_map_copy(edge->map->p[j]);
2159 map = isl_map_from_basic_map(bmap);
2161 if (count_map_constraints(graph, edge, map,
2162 n_eq, n_ineq, once) < 0)
2170 /* Construct an LP problem for finding schedule coefficients
2171 * such that the schedule carries as many dependences as possible.
2172 * In particular, for each dependence i, we bound the dependence distance
2173 * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
2174 * of all e_i's. Dependence with e_i = 0 in the solution are simply
2175 * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
2176 * Note that if the dependence relation is a union of basic maps,
2177 * then we have to consider each basic map individually as it may only
2178 * be possible to carry the dependences expressed by some of those
2179 * basic maps and not all off them.
2180 * Below, we consider each of those basic maps as a separate "edge".
2182 * All variables of the LP are non-negative. The actual coefficients
2183 * may be negative, so each coefficient is represented as the difference
2184 * of two non-negative variables. The negative part always appears
2185 * immediately before the positive part.
2186 * Other than that, the variables have the following order
2188 * - sum of (1 - e_i) over all edges
2189 * - sum of positive and negative parts of all c_n coefficients
2190 * (unconstrained when computing non-parametric schedules)
2191 * - sum of positive and negative parts of all c_x coefficients
2196 * - positive and negative parts of c_i_n (if parametric)
2197 * - positive and negative parts of c_i_x
2199 * The constraints are those from the edges plus three equalities
2200 * to express the sums and n_edge inequalities to express e_i <= 1.
2202 static int setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
2212 for (i = 0; i < graph->n_edge; ++i)
2213 n_edge += graph->edge[i].map->n;
2216 for (i = 0; i < graph->n; ++i) {
2217 struct isl_sched_node *node = &graph->node[graph->sorted[i]];
2218 node->start = total;
2219 total += 1 + 2 * (node->nparam + node->nvar);
2222 if (count_all_constraints(graph, &n_eq, &n_ineq, 1) < 0)
2225 dim = isl_space_set_alloc(ctx, 0, total);
2226 isl_basic_set_free(graph->lp);
2229 graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
2230 graph->lp = isl_basic_set_set_rational(graph->lp);
2232 k = isl_basic_set_alloc_equality(graph->lp);
2235 isl_seq_clr(graph->lp->eq[k], 1 + total);
2236 isl_int_set_si(graph->lp->eq[k][0], -n_edge);
2237 isl_int_set_si(graph->lp->eq[k][1], 1);
2238 for (i = 0; i < n_edge; ++i)
2239 isl_int_set_si(graph->lp->eq[k][4 + i], 1);
2241 k = isl_basic_set_alloc_equality(graph->lp);
2244 isl_seq_clr(graph->lp->eq[k], 1 + total);
2245 isl_int_set_si(graph->lp->eq[k][2], -1);
2246 for (i = 0; i < graph->n; ++i) {
2247 int pos = 1 + graph->node[i].start + 1;
2249 for (j = 0; j < 2 * graph->node[i].nparam; ++j)
2250 isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2253 k = isl_basic_set_alloc_equality(graph->lp);
2256 isl_seq_clr(graph->lp->eq[k], 1 + total);
2257 isl_int_set_si(graph->lp->eq[k][3], -1);
2258 for (i = 0; i < graph->n; ++i) {
2259 struct isl_sched_node *node = &graph->node[i];
2260 int pos = 1 + node->start + 1 + 2 * node->nparam;
2262 for (j = 0; j < 2 * node->nvar; ++j)
2263 isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2266 for (i = 0; i < n_edge; ++i) {
2267 k = isl_basic_set_alloc_inequality(graph->lp);
2270 isl_seq_clr(graph->lp->ineq[k], 1 + total);
2271 isl_int_set_si(graph->lp->ineq[k][4 + i], -1);
2272 isl_int_set_si(graph->lp->ineq[k][0], 1);
2275 if (add_all_constraints(graph) < 0)
2281 /* If the schedule_split_parallel option is set and if the linear
2282 * parts of the scheduling rows for all nodes in the graphs are the same,
2283 * then split off the constant term from the linear part.
2284 * The constant term is then placed in a separate band and
2285 * the linear part is simplified.
2287 static int split_parallel(isl_ctx *ctx, struct isl_sched_graph *graph)
2292 struct isl_sched_node *node0;
2294 if (!ctx->opt->schedule_split_parallel)
2299 node0 = &graph->node[0];
2300 row = isl_mat_rows(node0->sched) - 1;
2301 cols = isl_mat_cols(node0->sched);
2302 for (i = 1; i < graph->n; ++i) {
2303 struct isl_sched_node *node = &graph->node[i];
2305 if (isl_mat_cols(node->sched) != cols)
2307 if (!isl_seq_eq(node0->sched->row[row] + 1,
2308 node->sched->row[row] + 1, cols - 1))
2311 isl_int_ne(node0->sched->row[row][0],
2312 node->sched->row[row][0]))
2320 for (i = 0; i < graph->n; ++i) {
2321 struct isl_sched_node *node = &graph->node[i];
2323 isl_map_free(node->sched_map);
2324 node->sched_map = NULL;
2325 node->sched = isl_mat_add_zero_rows(node->sched, 1);
2328 isl_int_set(node->sched->row[row + 1][0],
2329 node->sched->row[row][0]);
2330 isl_int_set_si(node->sched->row[row][0], 0);
2331 node->sched = isl_mat_normalize_row(node->sched, row);
2334 node->band[graph->n_total_row] = graph->n_band;
2337 graph->n_total_row++;
2342 /* Construct a schedule row for each node such that as many dependences
2343 * as possible are carried and then continue with the next band.
2345 static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph)
2353 for (i = 0; i < graph->n_edge; ++i)
2354 n_edge += graph->edge[i].map->n;
2356 if (setup_carry_lp(ctx, graph) < 0)
2359 lp = isl_basic_set_copy(graph->lp);
2360 sol = isl_tab_basic_set_non_neg_lexmin(lp);
2364 if (sol->size == 0) {
2366 isl_die(ctx, isl_error_internal,
2367 "error in schedule construction", return -1);
2370 if (isl_int_cmp_si(sol->el[1], n_edge) >= 0) {
2372 isl_die(ctx, isl_error_unknown,
2373 "unable to carry dependences", return -1);
2376 if (update_schedule(graph, sol, 0, 0) < 0)
2379 if (split_parallel(ctx, graph) < 0)
2382 return compute_next_band(ctx, graph);
2385 /* Are there any validity edges in the graph?
2387 static int has_validity_edges(struct isl_sched_graph *graph)
2391 for (i = 0; i < graph->n_edge; ++i)
2392 if (graph->edge[i].validity)
2398 /* Should we apply a Feautrier step?
2399 * That is, did the user request the Feautrier algorithm and are
2400 * there any validity dependences (left)?
2402 static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph)
2404 if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER)
2407 return has_validity_edges(graph);
2410 /* Compute a schedule for a connected dependence graph using Feautrier's
2411 * multi-dimensional scheduling algorithm.
2412 * The original algorithm is described in [1].
2413 * The main idea is to minimize the number of scheduling dimensions, by
2414 * trying to satisfy as many dependences as possible per scheduling dimension.
2416 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
2417 * Problem, Part II: Multi-Dimensional Time.
2418 * In Intl. Journal of Parallel Programming, 1992.
2420 static int compute_schedule_wcc_feautrier(isl_ctx *ctx,
2421 struct isl_sched_graph *graph)
2423 return carry_dependences(ctx, graph);
2426 /* Compute a schedule for a connected dependence graph.
2427 * We try to find a sequence of as many schedule rows as possible that result
2428 * in non-negative dependence distances (independent of the previous rows
2429 * in the sequence, i.e., such that the sequence is tilable).
2430 * If we can't find any more rows we either
2431 * - split between SCCs and start over (assuming we found an interesting
2432 * pair of SCCs between which to split)
2433 * - continue with the next band (assuming the current band has at least
2435 * - try to carry as many dependences as possible and continue with the next
2438 * If Feautrier's algorithm is selected, we first recursively try to satisfy
2439 * as many validity dependences as possible. When all validity dependences
2440 * are satisfied we extend the schedule to a full-dimensional schedule.
2442 * If we manage to complete the schedule, we finish off by topologically
2443 * sorting the statements based on the remaining dependences.
2445 * If ctx->opt->schedule_outer_zero_distance is set, then we force the
2446 * outermost dimension in the current band to be zero distance. If this
2447 * turns out to be impossible, we fall back on the general scheme above
2448 * and try to carry as many dependences as possible.
2450 static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
2454 if (detect_sccs(graph) < 0)
2458 if (compute_maxvar(graph) < 0)
2461 if (need_feautrier_step(ctx, graph))
2462 return compute_schedule_wcc_feautrier(ctx, graph);
2464 if (ctx->opt->schedule_outer_zero_distance)
2467 while (graph->n_row < graph->maxvar) {
2470 graph->src_scc = -1;
2471 graph->dst_scc = -1;
2473 if (setup_lp(ctx, graph, force_zero) < 0)
2475 sol = solve_lp(graph);
2478 if (sol->size == 0) {
2480 if (!ctx->opt->schedule_maximize_band_depth &&
2481 graph->n_total_row > graph->band_start)
2482 return compute_next_band(ctx, graph);
2483 if (graph->src_scc >= 0)
2484 return compute_split_schedule(ctx, graph);
2485 if (graph->n_total_row > graph->band_start)
2486 return compute_next_band(ctx, graph);
2487 return carry_dependences(ctx, graph);
2489 if (update_schedule(graph, sol, 1, 1) < 0)
2494 if (graph->n_total_row > graph->band_start)
2496 return sort_statements(ctx, graph);
2499 /* Compute a schedule for each component (identified by node->scc)
2500 * of the dependence graph separately and then combine the results.
2502 * The band_id is adjusted such that each component has a separate id.
2503 * Note that the band_id may have already been set to a value different
2504 * from zero by compute_split_schedule.
2506 static int compute_component_schedule(isl_ctx *ctx,
2507 struct isl_sched_graph *graph)
2511 int n_total_row, orig_total_row;
2512 int n_band, orig_band;
2515 orig_total_row = graph->n_total_row;
2517 orig_band = graph->n_band;
2518 for (i = 0; i < graph->n; ++i)
2519 graph->node[i].band_id[graph->n_band] += graph->node[i].scc;
2520 for (wcc = 0; wcc < graph->scc; ++wcc) {
2522 for (i = 0; i < graph->n; ++i)
2523 if (graph->node[i].scc == wcc)
2526 for (i = 0; i < graph->n_edge; ++i)
2527 if (graph->edge[i].src->scc == wcc)
2530 if (compute_sub_schedule(ctx, graph, n, n_edge,
2532 &edge_src_scc_exactly, wcc, 1) < 0)
2534 if (graph->n_total_row > n_total_row)
2535 n_total_row = graph->n_total_row;
2536 graph->n_total_row = orig_total_row;
2537 if (graph->n_band > n_band)
2538 n_band = graph->n_band;
2539 graph->n_band = orig_band;
2542 graph->n_total_row = n_total_row;
2543 graph->n_band = n_band;
2545 return pad_schedule(graph);
2548 /* Compute a schedule for the given dependence graph.
2549 * We first check if the graph is connected (through validity dependences)
2550 * and, if not, compute a schedule for each component separately.
2552 static int compute_schedule(isl_ctx *ctx, struct isl_sched_graph *graph)
2554 if (detect_wccs(graph) < 0)
2558 return compute_component_schedule(ctx, graph);
2560 return compute_schedule_wcc(ctx, graph);
2563 /* Compute a schedule for the given union of domains that respects
2564 * all the validity dependences.
2565 * If the default isl scheduling algorithm is used, it tries to minimize
2566 * the dependence distances over the proximity dependences.
2567 * If Feautrier's scheduling algorithm is used, the proximity dependence
2568 * distances are only minimized during the extension to a full-dimensional
2571 __isl_give isl_schedule *isl_union_set_compute_schedule(
2572 __isl_take isl_union_set *domain,
2573 __isl_take isl_union_map *validity,
2574 __isl_take isl_union_map *proximity)
2576 isl_ctx *ctx = isl_union_set_get_ctx(domain);
2578 struct isl_sched_graph graph = { 0 };
2579 isl_schedule *sched;
2581 domain = isl_union_set_align_params(domain,
2582 isl_union_map_get_space(validity));
2583 domain = isl_union_set_align_params(domain,
2584 isl_union_map_get_space(proximity));
2585 dim = isl_union_set_get_space(domain);
2586 validity = isl_union_map_align_params(validity, isl_space_copy(dim));
2587 proximity = isl_union_map_align_params(proximity, dim);
2592 graph.n = isl_union_set_n_set(domain);
2595 if (graph_alloc(ctx, &graph, graph.n,
2596 isl_union_map_n_map(validity) + isl_union_map_n_map(proximity)) < 0)
2598 if (compute_max_row(&graph, domain) < 0)
2602 if (isl_union_set_foreach_set(domain, &extract_node, &graph) < 0)
2604 if (graph_init_table(ctx, &graph) < 0)
2607 if (isl_union_map_foreach_map(validity, &extract_edge, &graph) < 0)
2609 if (graph_init_edge_table(ctx, &graph) < 0)
2611 if (isl_union_map_foreach_map(proximity, &extract_edge, &graph) < 0)
2614 if (compute_schedule(ctx, &graph) < 0)
2618 sched = extract_schedule(&graph, isl_union_set_get_space(domain));
2620 graph_free(ctx, &graph);
2621 isl_union_set_free(domain);
2622 isl_union_map_free(validity);
2623 isl_union_map_free(proximity);
2627 graph_free(ctx, &graph);
2628 isl_union_set_free(domain);
2629 isl_union_map_free(validity);
2630 isl_union_map_free(proximity);
2634 void *isl_schedule_free(__isl_take isl_schedule *sched)
2640 if (--sched->ref > 0)
2643 for (i = 0; i < sched->n; ++i) {
2644 isl_map_free(sched->node[i].sched);
2645 free(sched->node[i].band_end);
2646 free(sched->node[i].band_id);
2647 free(sched->node[i].zero);
2649 isl_space_free(sched->dim);
2650 isl_band_list_free(sched->band_forest);
2655 isl_ctx *isl_schedule_get_ctx(__isl_keep isl_schedule *schedule)
2657 return schedule ? isl_space_get_ctx(schedule->dim) : NULL;
2660 __isl_give isl_union_map *isl_schedule_get_map(__isl_keep isl_schedule *sched)
2663 isl_union_map *umap;
2668 umap = isl_union_map_empty(isl_space_copy(sched->dim));
2669 for (i = 0; i < sched->n; ++i)
2670 umap = isl_union_map_add_map(umap,
2671 isl_map_copy(sched->node[i].sched));
2676 static __isl_give isl_band_list *construct_band_list(
2677 __isl_keep isl_schedule *schedule, __isl_keep isl_band *parent,
2678 int band_nr, int *parent_active, int n_active);
2680 /* Construct an isl_band structure for the band in the given schedule
2681 * with sequence number band_nr for the n_active nodes marked by active.
2682 * If the nodes don't have a band with the given sequence number,
2683 * then a band without members is created.
2685 * Because of the way the schedule is constructed, we know that
2686 * the position of the band inside the schedule of a node is the same
2687 * for all active nodes.
2689 static __isl_give isl_band *construct_band(__isl_keep isl_schedule *schedule,
2690 __isl_keep isl_band *parent,
2691 int band_nr, int *active, int n_active)
2694 isl_ctx *ctx = isl_schedule_get_ctx(schedule);
2696 unsigned start, end;
2698 band = isl_calloc_type(ctx, isl_band);
2703 band->schedule = schedule;
2704 band->parent = parent;
2706 for (i = 0; i < schedule->n; ++i)
2707 if (active[i] && schedule->node[i].n_band > band_nr + 1)
2710 if (i < schedule->n) {
2711 band->children = construct_band_list(schedule, band,
2712 band_nr + 1, active, n_active);
2713 if (!band->children)
2717 for (i = 0; i < schedule->n; ++i)
2721 if (i >= schedule->n)
2722 isl_die(ctx, isl_error_internal,
2723 "band without active statements", goto error);
2725 start = band_nr ? schedule->node[i].band_end[band_nr - 1] : 0;
2726 end = band_nr < schedule->node[i].n_band ?
2727 schedule->node[i].band_end[band_nr] : start;
2728 band->n = end - start;
2730 band->zero = isl_alloc_array(ctx, int, band->n);
2734 for (j = 0; j < band->n; ++j)
2735 band->zero[j] = schedule->node[i].zero[start + j];
2737 band->map = isl_union_map_empty(isl_space_copy(schedule->dim));
2738 for (i = 0; i < schedule->n; ++i) {
2745 map = isl_map_copy(schedule->node[i].sched);
2746 n_out = isl_map_dim(map, isl_dim_out);
2747 map = isl_map_project_out(map, isl_dim_out, end, n_out - end);
2748 map = isl_map_project_out(map, isl_dim_out, 0, start);
2749 band->map = isl_union_map_union(band->map,
2750 isl_union_map_from_map(map));
2757 isl_band_free(band);
2761 /* Construct a list of bands that start at the same position (with
2762 * sequence number band_nr) in the schedules of the nodes that
2763 * were active in the parent band.
2765 * A separate isl_band structure is created for each band_id
2766 * and for each node that does not have a band with sequence
2767 * number band_nr. In the latter case, a band without members
2769 * This ensures that if a band has any children, then each node
2770 * that was active in the band is active in exactly one of the children.
2772 static __isl_give isl_band_list *construct_band_list(
2773 __isl_keep isl_schedule *schedule, __isl_keep isl_band *parent,
2774 int band_nr, int *parent_active, int n_active)
2777 isl_ctx *ctx = isl_schedule_get_ctx(schedule);
2780 isl_band_list *list;
2783 for (i = 0; i < n_active; ++i) {
2784 for (j = 0; j < schedule->n; ++j) {
2785 if (!parent_active[j])
2787 if (schedule->node[j].n_band <= band_nr)
2789 if (schedule->node[j].band_id[band_nr] == i) {
2795 for (j = 0; j < schedule->n; ++j)
2796 if (schedule->node[j].n_band <= band_nr)
2801 list = isl_band_list_alloc(ctx, n_band);
2802 band = construct_band(schedule, parent, band_nr,
2803 parent_active, n_active);
2804 return isl_band_list_add(list, band);
2807 active = isl_alloc_array(ctx, int, schedule->n);
2811 list = isl_band_list_alloc(ctx, n_band);
2813 for (i = 0; i < n_active; ++i) {
2817 for (j = 0; j < schedule->n; ++j) {
2818 active[j] = parent_active[j] &&
2819 schedule->node[j].n_band > band_nr &&
2820 schedule->node[j].band_id[band_nr] == i;
2827 band = construct_band(schedule, parent, band_nr, active, n);
2829 list = isl_band_list_add(list, band);
2831 for (i = 0; i < schedule->n; ++i) {
2833 if (!parent_active[i])
2835 if (schedule->node[i].n_band > band_nr)
2837 for (j = 0; j < schedule->n; ++j)
2839 band = construct_band(schedule, parent, band_nr, active, 1);
2840 list = isl_band_list_add(list, band);
2848 /* Construct a band forest representation of the schedule and
2849 * return the list of roots.
2851 static __isl_give isl_band_list *construct_forest(
2852 __isl_keep isl_schedule *schedule)
2855 isl_ctx *ctx = isl_schedule_get_ctx(schedule);
2856 isl_band_list *forest;
2859 active = isl_alloc_array(ctx, int, schedule->n);
2863 for (i = 0; i < schedule->n; ++i)
2866 forest = construct_band_list(schedule, NULL, 0, active, schedule->n);
2873 /* Return the roots of a band forest representation of the schedule.
2875 __isl_give isl_band_list *isl_schedule_get_band_forest(
2876 __isl_keep isl_schedule *schedule)
2880 if (!schedule->band_forest)
2881 schedule->band_forest = construct_forest(schedule);
2882 return isl_band_list_dup(schedule->band_forest);
2885 static __isl_give isl_printer *print_band_list(__isl_take isl_printer *p,
2886 __isl_keep isl_band_list *list);
2888 static __isl_give isl_printer *print_band(__isl_take isl_printer *p,
2889 __isl_keep isl_band *band)
2891 isl_band_list *children;
2893 p = isl_printer_start_line(p);
2894 p = isl_printer_print_union_map(p, band->map);
2895 p = isl_printer_end_line(p);
2897 if (!isl_band_has_children(band))
2900 children = isl_band_get_children(band);
2902 p = isl_printer_indent(p, 4);
2903 p = print_band_list(p, children);
2904 p = isl_printer_indent(p, -4);
2906 isl_band_list_free(children);
2911 static __isl_give isl_printer *print_band_list(__isl_take isl_printer *p,
2912 __isl_keep isl_band_list *list)
2916 n = isl_band_list_n_band(list);
2917 for (i = 0; i < n; ++i) {
2919 band = isl_band_list_get_band(list, i);
2920 p = print_band(p, band);
2921 isl_band_free(band);
2927 __isl_give isl_printer *isl_printer_print_schedule(__isl_take isl_printer *p,
2928 __isl_keep isl_schedule *schedule)
2930 isl_band_list *forest;
2932 forest = isl_schedule_get_band_forest(schedule);
2934 p = print_band_list(p, forest);
2936 isl_band_list_free(forest);
2941 void isl_schedule_dump(__isl_keep isl_schedule *schedule)
2943 isl_printer *printer;
2948 printer = isl_printer_to_file(isl_schedule_get_ctx(schedule), stderr);
2949 printer = isl_printer_print_schedule(printer, schedule);
2951 isl_printer_free(printer);