X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=isl_schedule.c;h=0807fcebf45dc120df9a5433c65ef89a961f74ba;hb=d07d9a44cffd76e6276dea1edccb0d8381deee18;hp=b279281786dd4bb16703c92ade55c7051b217fc8;hpb=dbb61f82cfb84f18d8df5d2aa114fd114d559d39;p=platform%2Fupstream%2Fisl.git diff --git a/isl_schedule.c b/isl_schedule.c index b279281..0807fce 100644 --- a/isl_schedule.c +++ b/isl_schedule.c @@ -24,6 +24,7 @@ #include #include #include +#include /* * The scheduling algorithm implemented in this file was inspired by @@ -1041,7 +1042,8 @@ static int node_update_cmap(struct isl_sched_node *node) /* Count the number of equality and inequality constraints * that will be added for the given map. - * If once is set, then we count + * If carry is set, then we are counting the number of (validity) + * constraints that will be added in setup_carry_lp and we count * each edge exactly once. Otherwise, we count as follows * validity -> 1 (>= 0) * validity+proximity -> 2 (>= 0 and upper bound) @@ -1049,10 +1051,15 @@ static int node_update_cmap(struct isl_sched_node *node) */ static int count_map_constraints(struct isl_sched_graph *graph, struct isl_sched_edge *edge, __isl_take isl_map *map, - int *n_eq, int *n_ineq, int once) + int *n_eq, int *n_ineq, int carry) { isl_basic_set *coef; - int f = once ? 1 : edge->proximity ? 2 : 1; + int f = carry ? 1 : edge->proximity ? 2 : 1; + + if (carry && !edge->validity) { + isl_map_free(map); + return 0; + } if (edge->src == edge->dst) coef = intra_coefficients(graph, map); @@ -1069,14 +1076,13 @@ static int count_map_constraints(struct isl_sched_graph *graph, /* Count the number of equality and inequality constraints * that will be added to the main lp problem. - * If once is set, then we count - * each edge exactly once. Otherwise, we count as follows + * We count as follows * validity -> 1 (>= 0) * validity+proximity -> 2 (>= 0 and upper bound) * proximity -> 2 (lower and upper bound) */ static int count_constraints(struct isl_sched_graph *graph, - int *n_eq, int *n_ineq, int once) + int *n_eq, int *n_ineq) { int i; @@ -1086,13 +1092,50 @@ static int count_constraints(struct isl_sched_graph *graph, isl_map *map = isl_map_copy(edge->map); if (count_map_constraints(graph, edge, map, - n_eq, n_ineq, once) < 0) + n_eq, n_ineq, 0) < 0) return -1; } return 0; } +/* Add constraints that bound the values of the variable and parameter + * coefficients of the schedule. + * + * The maximal value of the coefficients is defined by the option + * 'schedule_max_coefficient'. + */ +static int add_bound_coefficient_constraints(isl_ctx *ctx, + struct isl_sched_graph *graph) +{ + int i, j, k; + int max_coefficient; + int total; + + max_coefficient = ctx->opt->schedule_max_coefficient; + + if (max_coefficient == -1) + return 0; + + total = isl_basic_set_total_dim(graph->lp); + + for (i = 0; i < graph->n; ++i) { + struct isl_sched_node *node = &graph->node[i]; + for (j = 0; j < 2 * node->nparam + 2 * node->nvar; ++j) { + int dim; + k = isl_basic_set_alloc_inequality(graph->lp); + if (k < 0) + return -1; + dim = 1 + node->start + 1 + j; + isl_seq_clr(graph->lp->ineq[k], 1 + total); + isl_int_set_si(graph->lp->ineq[k][dim], -1); + isl_int_set_si(graph->lp->ineq[k][0], max_coefficient); + } + } + + return 0; +} + /* Construct an ILP problem for finding schedule coefficients * that result in non-negative, but small dependence distances * over all dependences. @@ -1138,6 +1181,11 @@ static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph, int parametric; int param_pos; int n_eq, n_ineq; + int max_constant_term; + int max_coefficient; + + max_constant_term = ctx->opt->schedule_max_constant_term; + max_coefficient = ctx->opt->schedule_max_coefficient; parametric = ctx->opt->schedule_parametric; nparam = isl_space_dim(graph->node[0].dim, isl_dim_param); @@ -1151,12 +1199,19 @@ static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph, total += 1 + 2 * (node->nparam + node->nvar); } - if (count_constraints(graph, &n_eq, &n_ineq, 0) < 0) + if (count_constraints(graph, &n_eq, &n_ineq) < 0) return -1; dim = isl_space_set_alloc(ctx, 0, total); isl_basic_set_free(graph->lp); n_eq += 2 + parametric + force_zero; + if (max_constant_term != -1) + n_ineq += graph->n; + if (max_coefficient != -1) + for (i = 0; i < graph->n; ++i) + n_ineq += 2 * graph->node[i].nparam + + 2 * graph->node[i].nvar; + graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq); k = isl_basic_set_alloc_equality(graph->lp); @@ -1203,6 +1258,19 @@ static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph, isl_int_set_si(graph->lp->eq[k][pos + j], 1); } + if (max_constant_term != -1) + for (i = 0; i < graph->n; ++i) { + struct isl_sched_node *node = &graph->node[i]; + k = isl_basic_set_alloc_inequality(graph->lp); + if (k < 0) + return -1; + isl_seq_clr(graph->lp->ineq[k], 1 + total); + isl_int_set_si(graph->lp->ineq[k][1 + node->start], -1); + isl_int_set_si(graph->lp->ineq[k][0], max_constant_term); + } + + if (add_bound_coefficient_constraints(ctx, graph) < 0) + return -1; if (add_all_validity_constraints(graph) < 0) return -1; if (add_all_proximity_constraints(graph) < 0) @@ -1488,7 +1556,7 @@ static void next_band(struct isl_sched_graph *graph) graph->n_band++; } -/* Topologically sort statements mapped to same schedule iteration +/* Topologically sort statements mapped to the same schedule iteration * and add a row to the schedule corresponding to this order. */ static int sort_statements(isl_ctx *ctx, struct isl_sched_graph *graph) @@ -1624,6 +1692,9 @@ static int copy_nodes(struct isl_sched_graph *dst, struct isl_sched_graph *src, /* Copy non-empty edges that satisfy edge_pred from the src dependence graph * to the dst dependence graph. + * If the source or destination node of the edge is not in the destination + * graph, then it must be a backward proximity edge and it should simply + * be ignored. */ static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst, struct isl_sched_graph *src, @@ -1635,6 +1706,7 @@ static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst, for (i = 0; i < src->n_edge; ++i) { struct isl_sched_edge *edge = &src->edge[i]; isl_map *map; + struct isl_sched_node *dst_src, *dst_dst; if (!edge_pred(edge, data)) continue; @@ -1642,12 +1714,19 @@ static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst, if (isl_map_plain_is_empty(edge->map)) continue; + dst_src = graph_find_node(ctx, dst, edge->src->dim); + dst_dst = graph_find_node(ctx, dst, edge->dst->dim); + if (!dst_src || !dst_dst) { + if (edge->validity) + isl_die(ctx, isl_error_internal, + "backward validity edge", return -1); + continue; + } + map = isl_map_copy(edge->map); - dst->edge[dst->n_edge].src = - graph_find_node(ctx, dst, edge->src->dim); - dst->edge[dst->n_edge].dst = - graph_find_node(ctx, dst, edge->dst->dim); + dst->edge[dst->n_edge].src = dst_src; + dst->edge[dst->n_edge].dst = dst_dst; dst->edge[dst->n_edge].map = map; dst->edge[dst->n_edge].validity = edge->validity; dst->edge[dst->n_edge].proximity = edge->proximity; @@ -1776,9 +1855,9 @@ static int node_scc_at_least(struct isl_sched_node *node, int scc) return node->scc >= scc; } -static int edge_src_scc_exactly(struct isl_sched_edge *edge, int scc) +static int edge_scc_exactly(struct isl_sched_edge *edge, int scc) { - return edge->src->scc == scc; + return edge->src->scc == scc && edge->dst->scc == scc; } static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc) @@ -1828,6 +1907,9 @@ static int pad_schedule(struct isl_sched_graph *graph) * It would be possible to reuse them as the first rows in the next * band, but recomputing them may result in better rows as we are looking * at a smaller part of the dependence graph. + * compute_split_schedule is only called when no zero-distance schedule row + * could be found on the entire graph, so we wark the splitting row as + * non zero-distance. * * The band_id of the second group is set to n, where n is the number * of nodes in the first group. This ensures that the band_ids over @@ -1868,6 +1950,7 @@ static int compute_split_schedule(isl_ctx *ctx, struct isl_sched_graph *graph) node->sched = isl_mat_set_element_si(node->sched, row, j, 0); node->band[graph->n_total_row] = graph->n_band; + node->zero[graph->n_total_row] = 0; } e1 = e2 = 0; @@ -2032,8 +2115,8 @@ static int add_inter_constraints(struct isl_sched_graph *graph, return 0; } -/* Add constraints to graph->lp that force all dependence - * to be respected and attempt to carry it. +/* Add constraints to graph->lp that force all validity dependences + * to be respected and attempt to carry them. */ static int add_all_constraints(struct isl_sched_graph *graph) { @@ -2043,6 +2126,10 @@ static int add_all_constraints(struct isl_sched_graph *graph) pos = 0; for (i = 0; i < graph->n_edge; ++i) { struct isl_sched_edge *edge= &graph->edge[i]; + + if (!edge->validity) + continue; + for (j = 0; j < edge->map->n; ++j) { isl_basic_map *bmap; isl_map *map; @@ -2065,14 +2152,10 @@ static int add_all_constraints(struct isl_sched_graph *graph) /* Count the number of equality and inequality constraints * that will be added to the carry_lp problem. - * If once is set, then we count - * each edge exactly once. Otherwise, we count as follows - * validity -> 1 (>= 0) - * validity+proximity -> 2 (>= 0 and upper bound) - * proximity -> 2 (lower and upper bound) + * We count each edge exactly once. */ static int count_all_constraints(struct isl_sched_graph *graph, - int *n_eq, int *n_ineq, int once) + int *n_eq, int *n_ineq) { int i, j; @@ -2087,7 +2170,7 @@ static int count_all_constraints(struct isl_sched_graph *graph, map = isl_map_from_basic_map(bmap); if (count_map_constraints(graph, edge, map, - n_eq, n_ineq, once) < 0) + n_eq, n_ineq, 1) < 0) return -1; } } @@ -2124,7 +2207,7 @@ static int count_all_constraints(struct isl_sched_graph *graph, * - positive and negative parts of c_i_n (if parametric) * - positive and negative parts of c_i_x * - * The constraints are those from the edges plus three equalities + * The constraints are those from the (validity) edges plus three equalities * to express the sums and n_edge inequalities to express e_i <= 1. */ static int setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph) @@ -2147,7 +2230,7 @@ static int setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph) total += 1 + 2 * (node->nparam + node->nvar); } - if (count_all_constraints(graph, &n_eq, &n_ineq, 1) < 0) + if (count_all_constraints(graph, &n_eq, &n_ineq) < 0) return -1; dim = isl_space_set_alloc(ctx, 0, total); @@ -2206,40 +2289,45 @@ static int setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph) return 0; } -/* If the schedule_split_parallel option is set and if the linear - * parts of the scheduling rows for all nodes in the graphs are the same, - * then split off the constant term from the linear part. +/* If the schedule_split_scaled option is set and if the linear + * parts of the scheduling rows for all nodes in the graphs have + * non-trivial common divisor, then split off the constant term + * from the linear part. * The constant term is then placed in a separate band and - * the linear part is simplified. + * the linear part is reduced. */ -static int split_parallel(isl_ctx *ctx, struct isl_sched_graph *graph) +static int split_scaled(isl_ctx *ctx, struct isl_sched_graph *graph) { int i; - int equal = 1; - int row, cols; - struct isl_sched_node *node0; + int row; + isl_int gcd, gcd_i; - if (!ctx->opt->schedule_split_parallel) + if (!ctx->opt->schedule_split_scaled) return 0; if (graph->n <= 1) return 0; - node0 = &graph->node[0]; - row = isl_mat_rows(node0->sched) - 1; - cols = isl_mat_cols(node0->sched); - for (i = 1; i < graph->n; ++i) { + isl_int_init(gcd); + isl_int_init(gcd_i); + + isl_int_set_si(gcd, 0); + + row = isl_mat_rows(graph->node[0].sched) - 1; + + for (i = 0; i < graph->n; ++i) { struct isl_sched_node *node = &graph->node[i]; + int cols = isl_mat_cols(node->sched); - if (!isl_seq_eq(node0->sched->row[row] + 1, - node->sched->row[row] + 1, cols - 1)) - return 0; - if (equal && - isl_int_ne(node0->sched->row[row][0], - node->sched->row[row][0])) - equal = 0; + isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i); + isl_int_gcd(gcd, gcd, gcd_i); } - if (equal) + + isl_int_clear(gcd_i); + + if (isl_int_cmp_si(gcd, 1) <= 0) { + isl_int_clear(gcd); return 0; + } next_band(graph); @@ -2250,19 +2338,26 @@ static int split_parallel(isl_ctx *ctx, struct isl_sched_graph *graph) node->sched_map = NULL; node->sched = isl_mat_add_zero_rows(node->sched, 1); if (!node->sched) - return -1; - isl_int_set(node->sched->row[row + 1][0], - node->sched->row[row][0]); - isl_int_set_si(node->sched->row[row][0], 0); - node->sched = isl_mat_normalize_row(node->sched, row); + goto error; + isl_int_fdiv_r(node->sched->row[row + 1][0], + node->sched->row[row][0], gcd); + isl_int_fdiv_q(node->sched->row[row][0], + node->sched->row[row][0], gcd); + isl_int_mul(node->sched->row[row][0], + node->sched->row[row][0], gcd); + node->sched = isl_mat_scale_down_row(node->sched, row, gcd); if (!node->sched) - return -1; + goto error; node->band[graph->n_total_row] = graph->n_band; } graph->n_total_row++; + isl_int_clear(gcd); return 0; +error: + isl_int_clear(gcd); + return -1; } /* Construct a schedule row for each node such that as many dependences @@ -2302,12 +2397,53 @@ static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph) if (update_schedule(graph, sol, 0, 0) < 0) return -1; - if (split_parallel(ctx, graph) < 0) + if (split_scaled(ctx, graph) < 0) return -1; return compute_next_band(ctx, graph); } +/* Are there any validity edges in the graph? + */ +static int has_validity_edges(struct isl_sched_graph *graph) +{ + int i; + + for (i = 0; i < graph->n_edge; ++i) + if (graph->edge[i].validity) + return 1; + + return 0; +} + +/* Should we apply a Feautrier step? + * That is, did the user request the Feautrier algorithm and are + * there any validity dependences (left)? + */ +static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph) +{ + if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER) + return 0; + + return has_validity_edges(graph); +} + +/* Compute a schedule for a connected dependence graph using Feautrier's + * multi-dimensional scheduling algorithm. + * The original algorithm is described in [1]. + * The main idea is to minimize the number of scheduling dimensions, by + * trying to satisfy as many dependences as possible per scheduling dimension. + * + * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling + * Problem, Part II: Multi-Dimensional Time. + * In Intl. Journal of Parallel Programming, 1992. + */ +static int compute_schedule_wcc_feautrier(isl_ctx *ctx, + struct isl_sched_graph *graph) +{ + return carry_dependences(ctx, graph); +} + /* Compute a schedule for a connected dependence graph. * We try to find a sequence of as many schedule rows as possible that result * in non-negative dependence distances (independent of the previous rows @@ -2320,6 +2456,10 @@ static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph) * - try to carry as many dependences as possible and continue with the next * band * + * If Feautrier's algorithm is selected, we first recursively try to satisfy + * as many validity dependences as possible. When all validity dependences + * are satisfied we extend the schedule to a full-dimensional schedule. + * * If we manage to complete the schedule, we finish off by topologically * sorting the statements based on the remaining dependences. * @@ -2339,6 +2479,9 @@ static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph) if (compute_maxvar(graph) < 0) return -1; + if (need_feautrier_step(ctx, graph)) + return compute_schedule_wcc_feautrier(ctx, graph); + if (ctx->opt->schedule_outer_zero_distance) force_zero = 1; @@ -2374,8 +2517,37 @@ static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph) return sort_statements(ctx, graph); } +/* Add a row to the schedules that separates the SCCs and move + * to the next band. + */ +static int split_on_scc(struct isl_sched_graph *graph) +{ + int i; + + for (i = 0; i < graph->n; ++i) { + struct isl_sched_node *node = &graph->node[i]; + int row = isl_mat_rows(node->sched); + + isl_map_free(node->sched_map); + node->sched_map = NULL; + node->sched = isl_mat_add_zero_rows(node->sched, 1); + node->sched = isl_mat_set_element_si(node->sched, row, 0, + node->scc); + if (!node->sched) + return -1; + node->band[graph->n_total_row] = graph->n_band; + } + + graph->n_total_row++; + next_band(graph); + + return 0; +} + /* Compute a schedule for each component (identified by node->scc) * of the dependence graph separately and then combine the results. + * Depending on the setting of schedule_fuse, a component may be + * either weakly or strongly connected. * * The band_id is adjusted such that each component has a separate id. * Note that the band_id may have already been set to a value different @@ -2389,6 +2561,9 @@ static int compute_component_schedule(isl_ctx *ctx, int n_total_row, orig_total_row; int n_band, orig_band; + if (ctx->opt->schedule_fuse == ISL_SCHEDULE_FUSE_MIN) + split_on_scc(graph); + n_total_row = 0; orig_total_row = graph->n_total_row; n_band = 0; @@ -2402,12 +2577,13 @@ static int compute_component_schedule(isl_ctx *ctx, n++; n_edge = 0; for (i = 0; i < graph->n_edge; ++i) - if (graph->edge[i].src->scc == wcc) + if (graph->edge[i].src->scc == wcc && + graph->edge[i].dst->scc == wcc) n_edge++; if (compute_sub_schedule(ctx, graph, n, n_edge, &node_scc_exactly, - &edge_src_scc_exactly, wcc, 1) < 0) + &edge_scc_exactly, wcc, 1) < 0) return -1; if (graph->n_total_row > n_total_row) n_total_row = graph->n_total_row; @@ -2425,12 +2601,20 @@ static int compute_component_schedule(isl_ctx *ctx, /* Compute a schedule for the given dependence graph. * We first check if the graph is connected (through validity dependences) - * and if so compute a schedule for each component separately. + * and, if not, compute a schedule for each component separately. + * If schedule_fuse is set to minimal fusion, then we check for strongly + * connected components instead and compute a separate schedule for + * each such strongly connected component. */ static int compute_schedule(isl_ctx *ctx, struct isl_sched_graph *graph) { - if (detect_wccs(graph) < 0) - return -1; + if (ctx->opt->schedule_fuse == ISL_SCHEDULE_FUSE_MIN) { + if (detect_sccs(graph) < 0) + return -1; + } else { + if (detect_wccs(graph) < 0) + return -1; + } if (graph->scc > 1) return compute_component_schedule(ctx, graph); @@ -2439,8 +2623,12 @@ static int compute_schedule(isl_ctx *ctx, struct isl_sched_graph *graph) } /* Compute a schedule for the given union of domains that respects - * all the validity dependences and tries to minimize the dependence - * distances over the proximity dependences. + * all the validity dependences. + * If the default isl scheduling algorithm is used, it tries to minimize + * the dependence distances over the proximity dependences. + * If Feautrier's scheduling algorithm is used, the proximity dependence + * distances are only minimized during the extension to a full-dimensional + * schedule. */ __isl_give isl_schedule *isl_union_set_compute_schedule( __isl_take isl_union_set *domain,