return edge->src == temp->src && edge->dst == temp->dst;
}
+/* Add the given edge to graph->edge_table if it is a validity edge.
+ */
+static int graph_edge_table_add(isl_ctx *ctx, struct isl_sched_graph *graph,
+ struct isl_sched_edge *edge)
+{
+ struct isl_hash_table_entry *entry;
+ uint32_t hash;
+
+ if (!edge->validity)
+ return 0;
+
+ hash = isl_hash_init();
+ hash = isl_hash_builtin(hash, edge->src);
+ hash = isl_hash_builtin(hash, edge->dst);
+ entry = isl_hash_table_find(ctx, graph->edge_table, hash,
+ &edge_has_src_and_dst, edge, 1);
+ if (!entry)
+ return -1;
+ entry->data = edge;
+
+ return 0;
+}
+
/* Initialize edge_table based on the list of edges.
* Only edges with validity set are added to the table.
*/
if (!graph->edge_table)
return -1;
- for (i = 0; i < graph->n_edge; ++i) {
- struct isl_hash_table_entry *entry;
- uint32_t hash;
-
- if (!graph->edge[i].validity)
- continue;
-
- hash = isl_hash_init();
- hash = isl_hash_builtin(hash, graph->edge[i].src);
- hash = isl_hash_builtin(hash, graph->edge[i].dst);
- entry = isl_hash_table_find(ctx, graph->edge_table, hash,
- &edge_has_src_and_dst,
- &graph->edge[i], 1);
- if (!entry)
+ for (i = 0; i < graph->n_edge; ++i)
+ if (graph_edge_table_add(ctx, graph, &graph->edge[i]) < 0)
return -1;
- entry->data = &graph->edge[i];
- }
return 0;
}
/* Count the number of equality and inequality constraints
* that will be added for the given map.
- * If once is set, then we count
+ * If carry is set, then we are counting the number of (validity)
+ * constraints that will be added in setup_carry_lp and we count
* each edge exactly once. Otherwise, we count as follows
* validity -> 1 (>= 0)
* validity+proximity -> 2 (>= 0 and upper bound)
*/
static int count_map_constraints(struct isl_sched_graph *graph,
struct isl_sched_edge *edge, __isl_take isl_map *map,
- int *n_eq, int *n_ineq, int once)
+ int *n_eq, int *n_ineq, int carry)
{
isl_basic_set *coef;
- int f = once ? 1 : edge->proximity ? 2 : 1;
+ int f = carry ? 1 : edge->proximity ? 2 : 1;
+
+ if (carry && !edge->validity) {
+ isl_map_free(map);
+ return 0;
+ }
if (edge->src == edge->dst)
coef = intra_coefficients(graph, map);
/* Count the number of equality and inequality constraints
* that will be added to the main lp problem.
- * If once is set, then we count
- * each edge exactly once. Otherwise, we count as follows
+ * We count as follows
* validity -> 1 (>= 0)
* validity+proximity -> 2 (>= 0 and upper bound)
* proximity -> 2 (lower and upper bound)
*/
static int count_constraints(struct isl_sched_graph *graph,
- int *n_eq, int *n_ineq, int once)
+ int *n_eq, int *n_ineq)
{
int i;
isl_map *map = isl_map_copy(edge->map);
if (count_map_constraints(graph, edge, map,
- n_eq, n_ineq, once) < 0)
+ n_eq, n_ineq, 0) < 0)
return -1;
}
return 0;
}
+/* Add constraints that bound the values of the variable and parameter
+ * coefficients of the schedule.
+ *
+ * The maximal value of the coefficients is defined by the option
+ * 'schedule_max_coefficient'.
+ */
+static int add_bound_coefficient_constraints(isl_ctx *ctx,
+ struct isl_sched_graph *graph)
+{
+ int i, j, k;
+ int max_coefficient;
+ int total;
+
+ max_coefficient = ctx->opt->schedule_max_coefficient;
+
+ if (max_coefficient == -1)
+ return 0;
+
+ total = isl_basic_set_total_dim(graph->lp);
+
+ for (i = 0; i < graph->n; ++i) {
+ struct isl_sched_node *node = &graph->node[i];
+ for (j = 0; j < 2 * node->nparam + 2 * node->nvar; ++j) {
+ int dim;
+ k = isl_basic_set_alloc_inequality(graph->lp);
+ if (k < 0)
+ return -1;
+ dim = 1 + node->start + 1 + j;
+ isl_seq_clr(graph->lp->ineq[k], 1 + total);
+ isl_int_set_si(graph->lp->ineq[k][dim], -1);
+ isl_int_set_si(graph->lp->ineq[k][0], max_coefficient);
+ }
+ }
+
+ return 0;
+}
+
/* Construct an ILP problem for finding schedule coefficients
* that result in non-negative, but small dependence distances
* over all dependences.
int param_pos;
int n_eq, n_ineq;
int max_constant_term;
+ int max_coefficient;
max_constant_term = ctx->opt->schedule_max_constant_term;
+ max_coefficient = ctx->opt->schedule_max_coefficient;
parametric = ctx->opt->schedule_parametric;
nparam = isl_space_dim(graph->node[0].dim, isl_dim_param);
total += 1 + 2 * (node->nparam + node->nvar);
}
- if (count_constraints(graph, &n_eq, &n_ineq, 0) < 0)
+ if (count_constraints(graph, &n_eq, &n_ineq) < 0)
return -1;
dim = isl_space_set_alloc(ctx, 0, total);
n_eq += 2 + parametric + force_zero;
if (max_constant_term != -1)
n_ineq += graph->n;
+ if (max_coefficient != -1)
+ for (i = 0; i < graph->n; ++i)
+ n_ineq += 2 * graph->node[i].nparam +
+ 2 * graph->node[i].nvar;
graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
isl_int_set_si(graph->lp->ineq[k][0], max_constant_term);
}
+ if (add_bound_coefficient_constraints(ctx, graph) < 0)
+ return -1;
if (add_all_validity_constraints(graph) < 0)
return -1;
if (add_all_proximity_constraints(graph) < 0)
graph->n_band++;
}
-/* Topologically sort statements mapped to same schedule iteration
+/* Topologically sort statements mapped to the same schedule iteration
* and add a row to the schedule corresponding to this order.
*/
static int sort_statements(isl_ctx *ctx, struct isl_sched_graph *graph)
return node->scc >= scc;
}
-static int edge_src_scc_exactly(struct isl_sched_edge *edge, int scc)
+static int edge_scc_exactly(struct isl_sched_edge *edge, int scc)
{
- return edge->src->scc == scc;
+ return edge->src->scc == scc && edge->dst->scc == scc;
}
static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
return 0;
}
-/* Add constraints to graph->lp that force all dependence
- * to be respected and attempt to carry it.
+/* Add constraints to graph->lp that force all validity dependences
+ * to be respected and attempt to carry them.
*/
static int add_all_constraints(struct isl_sched_graph *graph)
{
pos = 0;
for (i = 0; i < graph->n_edge; ++i) {
struct isl_sched_edge *edge= &graph->edge[i];
+
+ if (!edge->validity)
+ continue;
+
for (j = 0; j < edge->map->n; ++j) {
isl_basic_map *bmap;
isl_map *map;
/* Count the number of equality and inequality constraints
* that will be added to the carry_lp problem.
- * If once is set, then we count
- * each edge exactly once. Otherwise, we count as follows
- * validity -> 1 (>= 0)
- * validity+proximity -> 2 (>= 0 and upper bound)
- * proximity -> 2 (lower and upper bound)
+ * We count each edge exactly once.
*/
static int count_all_constraints(struct isl_sched_graph *graph,
- int *n_eq, int *n_ineq, int once)
+ int *n_eq, int *n_ineq)
{
int i, j;
map = isl_map_from_basic_map(bmap);
if (count_map_constraints(graph, edge, map,
- n_eq, n_ineq, once) < 0)
+ n_eq, n_ineq, 1) < 0)
return -1;
}
}
* - positive and negative parts of c_i_n (if parametric)
* - positive and negative parts of c_i_x
*
- * The constraints are those from the edges plus three equalities
+ * The constraints are those from the (validity) edges plus three equalities
* to express the sums and n_edge inequalities to express e_i <= 1.
*/
static int setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
total += 1 + 2 * (node->nparam + node->nvar);
}
- if (count_all_constraints(graph, &n_eq, &n_ineq, 1) < 0)
+ if (count_all_constraints(graph, &n_eq, &n_ineq) < 0)
return -1;
dim = isl_space_set_alloc(ctx, 0, total);
return 0;
}
-/* If the schedule_split_parallel option is set and if the linear
- * parts of the scheduling rows for all nodes in the graphs are the same,
- * then split off the constant term from the linear part.
+/* If the schedule_split_scaled option is set and if the linear
+ * parts of the scheduling rows for all nodes in the graphs have
+ * non-trivial common divisor, then split off the constant term
+ * from the linear part.
* The constant term is then placed in a separate band and
- * the linear part is simplified.
+ * the linear part is reduced.
*/
-static int split_parallel(isl_ctx *ctx, struct isl_sched_graph *graph)
+static int split_scaled(isl_ctx *ctx, struct isl_sched_graph *graph)
{
int i;
- int equal = 1;
- int row, cols;
- struct isl_sched_node *node0;
+ int row;
+ isl_int gcd, gcd_i;
- if (!ctx->opt->schedule_split_parallel)
+ if (!ctx->opt->schedule_split_scaled)
return 0;
if (graph->n <= 1)
return 0;
- node0 = &graph->node[0];
- row = isl_mat_rows(node0->sched) - 1;
- cols = isl_mat_cols(node0->sched);
- for (i = 1; i < graph->n; ++i) {
+ isl_int_init(gcd);
+ isl_int_init(gcd_i);
+
+ isl_int_set_si(gcd, 0);
+
+ row = isl_mat_rows(graph->node[0].sched) - 1;
+
+ for (i = 0; i < graph->n; ++i) {
struct isl_sched_node *node = &graph->node[i];
+ int cols = isl_mat_cols(node->sched);
- if (isl_mat_cols(node->sched) != cols)
- return 0;
- if (!isl_seq_eq(node0->sched->row[row] + 1,
- node->sched->row[row] + 1, cols - 1))
- return 0;
- if (equal &&
- isl_int_ne(node0->sched->row[row][0],
- node->sched->row[row][0]))
- equal = 0;
+ isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
+ isl_int_gcd(gcd, gcd, gcd_i);
}
- if (equal)
+
+ isl_int_clear(gcd_i);
+
+ if (isl_int_cmp_si(gcd, 1) <= 0) {
+ isl_int_clear(gcd);
return 0;
+ }
next_band(graph);
node->sched_map = NULL;
node->sched = isl_mat_add_zero_rows(node->sched, 1);
if (!node->sched)
- return -1;
- isl_int_set(node->sched->row[row + 1][0],
- node->sched->row[row][0]);
- isl_int_set_si(node->sched->row[row][0], 0);
- node->sched = isl_mat_normalize_row(node->sched, row);
+ goto error;
+ isl_int_fdiv_r(node->sched->row[row + 1][0],
+ node->sched->row[row][0], gcd);
+ isl_int_fdiv_q(node->sched->row[row][0],
+ node->sched->row[row][0], gcd);
+ isl_int_mul(node->sched->row[row][0],
+ node->sched->row[row][0], gcd);
+ node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
if (!node->sched)
- return -1;
+ goto error;
node->band[graph->n_total_row] = graph->n_band;
}
graph->n_total_row++;
+ isl_int_clear(gcd);
return 0;
+error:
+ isl_int_clear(gcd);
+ return -1;
}
/* Construct a schedule row for each node such that as many dependences
if (update_schedule(graph, sol, 0, 0) < 0)
return -1;
- if (split_parallel(ctx, graph) < 0)
+ if (split_scaled(ctx, graph) < 0)
return -1;
return compute_next_band(ctx, graph);
return sort_statements(ctx, graph);
}
+/* Add a row to the schedules that separates the SCCs and move
+ * to the next band.
+ */
+static int split_on_scc(struct isl_sched_graph *graph)
+{
+ int i;
+
+ for (i = 0; i < graph->n; ++i) {
+ struct isl_sched_node *node = &graph->node[i];
+ int row = isl_mat_rows(node->sched);
+
+ isl_map_free(node->sched_map);
+ node->sched_map = NULL;
+ node->sched = isl_mat_add_zero_rows(node->sched, 1);
+ node->sched = isl_mat_set_element_si(node->sched, row, 0,
+ node->scc);
+ if (!node->sched)
+ return -1;
+ node->band[graph->n_total_row] = graph->n_band;
+ }
+
+ graph->n_total_row++;
+ next_band(graph);
+
+ return 0;
+}
+
/* Compute a schedule for each component (identified by node->scc)
* of the dependence graph separately and then combine the results.
+ * Depending on the setting of schedule_fuse, a component may be
+ * either weakly or strongly connected.
*
* The band_id is adjusted such that each component has a separate id.
* Note that the band_id may have already been set to a value different
int n_total_row, orig_total_row;
int n_band, orig_band;
+ if (ctx->opt->schedule_fuse == ISL_SCHEDULE_FUSE_MIN)
+ split_on_scc(graph);
+
n_total_row = 0;
orig_total_row = graph->n_total_row;
n_band = 0;
n++;
n_edge = 0;
for (i = 0; i < graph->n_edge; ++i)
- if (graph->edge[i].src->scc == wcc)
+ if (graph->edge[i].src->scc == wcc &&
+ graph->edge[i].dst->scc == wcc)
n_edge++;
if (compute_sub_schedule(ctx, graph, n, n_edge,
&node_scc_exactly,
- &edge_src_scc_exactly, wcc, 1) < 0)
+ &edge_scc_exactly, wcc, 1) < 0)
return -1;
if (graph->n_total_row > n_total_row)
n_total_row = graph->n_total_row;
/* Compute a schedule for the given dependence graph.
* We first check if the graph is connected (through validity dependences)
* and, if not, compute a schedule for each component separately.
+ * If schedule_fuse is set to minimal fusion, then we check for strongly
+ * connected components instead and compute a separate schedule for
+ * each such strongly connected component.
*/
static int compute_schedule(isl_ctx *ctx, struct isl_sched_graph *graph)
{
- if (detect_wccs(graph) < 0)
- return -1;
+ if (ctx->opt->schedule_fuse == ISL_SCHEDULE_FUSE_MIN) {
+ if (detect_sccs(graph) < 0)
+ return -1;
+ } else {
+ if (detect_wccs(graph) < 0)
+ return -1;
+ }
if (graph->scc > 1)
return compute_component_schedule(ctx, graph);