X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=isl_ast_codegen.c;h=5806c663b6bbfcaab93cd887bd2dc83a91d9a565;hb=63fb8a7f484648c3caa25351c8c94ac2395ec563;hp=7679760c6a9bc199c3e8f5dd31e9ab4e5556f957;hpb=14beb7bfa687199d1a515889fbc2459d5f6d235b;p=platform%2Fupstream%2Fisl.git diff --git a/isl_ast_codegen.c b/isl_ast_codegen.c index 7679760..5806c66 100644 --- a/isl_ast_codegen.c +++ b/isl_ast_codegen.c @@ -7,6 +7,7 @@ * Ecole Normale Superieure, 45 rue d’Ulm, 75230 Paris, France */ +#include #include #include #include @@ -17,7 +18,6 @@ #include #include #include -#include /* Add the constraint to the list that "user" points to, if it is not * a div constraint. @@ -108,6 +108,7 @@ static int generate_non_single_valued(__isl_take isl_map *executed, identity = isl_set_identity(isl_map_range(isl_map_copy(executed))); executed = isl_map_domain_product(executed, identity); + build = isl_ast_build_set_single_valued(build, 1); list = generate_code(isl_union_map_from_map(executed), build, 1); @@ -151,9 +152,20 @@ static __isl_give isl_ast_graft *at_each_domain(__isl_take isl_ast_graft *graft, * in generate_non_single_valued. * Note that the inverse schedule being single-valued may depend * on constraints that are only available in the original context - * domain specified by the user. If the bare inverse schedule - * is not single-valued, we double-check after introducing the constraints - * from data->build->domain. + * domain specified by the user. We therefore first introduce + * the constraints from data->build->domain. + * On the other hand, we only perform the test after having taken the gist + * of the domain as the resulting map is the one from which the call + * expression is constructed. Using this map to construct the call + * expression usually yields simpler results. + * Because we perform the single-valuedness test on the gisted map, + * we may in rare cases fail to recognize that the inverse schedule + * is single-valued. This becomes problematic if this happens + * from the recursive call through generate_non_single_valued + * as we would then end up in an infinite recursion. + * We therefore check if we are inside a call to generate_non_single_valued + * and revert to the ungisted map if the gisted map turns out not to be + * single-valued. * * Otherwise, we generate a call expression for the single executed * domain element and put a guard around it based on the (simplified) @@ -171,22 +183,22 @@ static int generate_domain(__isl_take isl_map *executed, void *user) isl_map *map; int sv; - sv = isl_map_is_single_valued(executed); - if (sv < 0) - goto error; - if (!sv) { - map = isl_map_copy(executed); - map = isl_map_intersect_domain(map, + executed = isl_map_intersect_domain(executed, isl_set_copy(data->build->domain)); - sv = isl_map_is_single_valued(map); - isl_map_free(map); - } - if (!sv) - return generate_non_single_valued(executed, data); executed = isl_map_coalesce(executed); map = isl_map_copy(executed); map = isl_ast_build_compute_gist_map_domain(data->build, map); + sv = isl_map_is_single_valued(map); + if (sv < 0) + goto error; + if (!sv) { + isl_map_free(map); + if (data->build->single_valued) + map = isl_map_copy(executed); + else + return generate_non_single_valued(executed, data); + } guard = isl_map_domain(isl_map_copy(map)); guard = isl_set_coalesce(guard); guard = isl_ast_build_compute_gist(data->build, guard); @@ -201,6 +213,7 @@ static int generate_domain(__isl_take isl_map *executed, void *user) return 0; error: + isl_map_free(map); isl_map_free(executed); return -1; } @@ -269,24 +282,49 @@ error: data.list = NULL; return data.list; } -/* Eliminate the schedule dimension "pos" from "executed" and return - * the result. +/* Call the before_each_for callback, if requested by the user. */ -static __isl_give isl_union_map *eliminate(__isl_take isl_union_map *executed, - int pos, __isl_keep isl_ast_build *build) +static __isl_give isl_ast_node *before_each_for(__isl_take isl_ast_node *node, + __isl_keep isl_ast_build *build) { - isl_space *space; - isl_map *elim; + isl_id *id; - space = isl_ast_build_get_space(build, 1); - space = isl_space_map_from_set(space); - elim = isl_map_identity(space); - elim = isl_map_eliminate(elim, isl_dim_in, pos, 1); + if (!node || !build) + return isl_ast_node_free(node); + if (!build->before_each_for) + return node; + id = build->before_each_for(build, build->before_each_for_user); + node = isl_ast_node_set_annotation(node, id); + return node; +} - executed = isl_union_map_apply_domain(executed, - isl_union_map_from_map(elim)); +/* Call the after_each_for callback, if requested by the user. + */ +static __isl_give isl_ast_graft *after_each_for(__isl_keep isl_ast_graft *graft, + __isl_keep isl_ast_build *build) +{ + if (!graft || !build) + return isl_ast_graft_free(graft); + if (!build->after_each_for) + return graft; + graft->node = build->after_each_for(graft->node, build, + build->after_each_for_user); + if (!graft->node) + return isl_ast_graft_free(graft); + return graft; +} - return executed; +/* Plug in all the know values of the current and outer dimensions + * in the domain of "executed". In principle, we only need to plug + * in the known value of the current dimension since the values of + * outer dimensions have been plugged in already. + * However, it turns out to be easier to just plug in all known values. + */ +static __isl_give isl_union_map *plug_in_values( + __isl_take isl_union_map *executed, __isl_keep isl_ast_build *build) +{ + return isl_ast_build_substitute_values_union_map_domain(build, + executed); } /* Check if the constraint "c" is a lower bound on dimension "pos", @@ -306,13 +344,12 @@ static int constraint_type(isl_constraint *c, int pos) * to be sorted before the lower bounds on "depth", which in * turn are sorted before the upper bounds on "depth". */ -static int cmp_constraint(const void *a, const void *b, void *user) +static int cmp_constraint(__isl_keep isl_constraint *a, + __isl_keep isl_constraint *b, void *user) { int *depth = user; - isl_constraint * const *c1 = a; - isl_constraint * const *c2 = b; - int t1 = constraint_type(*c1, *depth); - int t2 = constraint_type(*c2, *depth); + int t1 = constraint_type(a, *depth); + int t2 = constraint_type(b, *depth); return t1 - t2; } @@ -406,21 +443,23 @@ static __isl_give isl_pw_aff *exact_bound(__isl_keep isl_set *domain, return pa; } -/* Return a list of "n" lower bounds on dimension "pos" - * extracted from the "n" constraints starting at "constraint". - * If "n" is zero, then we extract a lower bound from "domain" instead. +/* Extract a lower bound on dimension "pos" from each constraint + * in "constraints" and return the list of lower bounds. + * If "constraints" has zero elements, then we extract a lower bound + * from "domain" instead. */ static __isl_give isl_pw_aff_list *lower_bounds( - __isl_keep isl_constraint **constraint, int n, int pos, + __isl_keep isl_constraint_list *constraints, int pos, __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) { isl_ctx *ctx; isl_pw_aff_list *list; - int i; + int i, n; if (!build) return NULL; + n = isl_constraint_list_n_constraint(constraints); if (n == 0) { isl_pw_aff *pa; pa = exact_bound(domain, build, 0); @@ -432,26 +471,31 @@ static __isl_give isl_pw_aff_list *lower_bounds( for (i = 0; i < n; ++i) { isl_aff *aff; + isl_constraint *c; - aff = lower_bound(constraint[i], pos, build); + c = isl_constraint_list_get_constraint(constraints, i); + aff = lower_bound(c, pos, build); + isl_constraint_free(c); list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff)); } return list; } -/* Return a list of "n" upper bounds on dimension "pos" - * extracted from the "n" constraints starting at "constraint". - * If "n" is zero, then we extract an upper bound from "domain" instead. +/* Extract an upper bound on dimension "pos" from each constraint + * in "constraints" and return the list of upper bounds. + * If "constraints" has zero elements, then we extract an upper bound + * from "domain" instead. */ static __isl_give isl_pw_aff_list *upper_bounds( - __isl_keep isl_constraint **constraint, int n, int pos, + __isl_keep isl_constraint_list *constraints, int pos, __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) { isl_ctx *ctx; isl_pw_aff_list *list; - int i; + int i, n; + n = isl_constraint_list_n_constraint(constraints); if (n == 0) { isl_pw_aff *pa; pa = exact_bound(domain, build, 1); @@ -463,8 +507,11 @@ static __isl_give isl_pw_aff_list *upper_bounds( for (i = 0; i < n; ++i) { isl_aff *aff; + isl_constraint *c; - aff = isl_constraint_get_bound(constraint[i], isl_dim_set, pos); + c = isl_constraint_list_get_constraint(constraints, i); + aff = isl_constraint_get_bound(c, isl_dim_set, pos); + isl_constraint_free(c); aff = isl_aff_floor(aff); list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff)); } @@ -596,26 +643,26 @@ static __isl_give isl_ast_graft *refine_degenerate( return graft; } -/* Return the intersection of the "n" constraints starting at "constraint" - * as a set. +/* Return the intersection of constraints in "list" as a set. */ -static __isl_give isl_set *intersect_constraints(isl_ctx *ctx, - __isl_keep isl_constraint **constraint, int n) +static __isl_give isl_set *intersect_constraints( + __isl_keep isl_constraint_list *list) { - int i; + int i, n; isl_basic_set *bset; + n = isl_constraint_list_n_constraint(list); if (n < 1) - isl_die(ctx, isl_error_internal, + isl_die(isl_constraint_list_get_ctx(list), isl_error_internal, "expecting at least one constraint", return NULL); bset = isl_basic_set_from_constraint( - isl_constraint_copy(constraint[0])); + isl_constraint_list_get_constraint(list, 0)); for (i = 1; i < n; ++i) { isl_basic_set *bset_i; bset_i = isl_basic_set_from_constraint( - isl_constraint_copy(constraint[i])); + isl_constraint_list_get_constraint(list, i)); bset = isl_basic_set_intersect(bset, bset_i); } @@ -865,6 +912,8 @@ static __isl_give isl_ast_expr *for_inc(__isl_keep isl_ast_build *build) isl_ctx *ctx; isl_ast_expr *inc; + if (!build) + return NULL; ctx = isl_ast_build_get_ctx(build); depth = isl_ast_build_get_depth(build); @@ -951,7 +1000,7 @@ static __isl_give isl_ast_graft *set_for_node_expressions( /* Update "graft" based on "bounds" and "domain" for the generic, * non-degenerate, case. * - * "constraints" contains the "n_lower" lower and "n_upper" upper bounds + * "c_lower" and "c_upper" contain the lower and upper bounds * that the loop node should express. * "domain" is the subset of the intersection of the constraints * for which some code is executed. @@ -981,7 +1030,8 @@ static __isl_give isl_ast_graft *set_for_node_expressions( */ static __isl_give isl_ast_graft *refine_generic_bounds( __isl_take isl_ast_graft *graft, - __isl_keep isl_constraint **constraint, int n_lower, int n_upper, + __isl_take isl_constraint_list *c_lower, + __isl_take isl_constraint_list *c_upper, __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) { int depth; @@ -990,23 +1040,25 @@ static __isl_give isl_ast_graft *refine_generic_bounds( int use_list; isl_set *upper_set = NULL; isl_pw_aff_list *upper_list = NULL; + int n_lower, n_upper; - if (!graft || !build) - return isl_ast_graft_free(graft); + if (!graft || !c_lower || !c_upper || !build) + goto error; depth = isl_ast_build_get_depth(build); ctx = isl_ast_graft_get_ctx(graft); + n_lower = isl_constraint_list_n_constraint(c_lower); + n_upper = isl_constraint_list_n_constraint(c_upper); + use_list = use_upper_bound_list(ctx, n_upper, domain, depth); - lower = lower_bounds(constraint, n_lower, depth, domain, build); + lower = lower_bounds(c_lower, depth, domain, build); if (use_list) - upper_list = upper_bounds(constraint + n_lower, n_upper, depth, - domain, build); + upper_list = upper_bounds(c_upper, depth, domain, build); else if (n_upper > 0) - upper_set = intersect_constraints(ctx, constraint + n_lower, - n_upper); + upper_set = intersect_constraints(c_upper); else upper_set = isl_set_universe(isl_set_get_space(domain)); @@ -1023,26 +1075,46 @@ static __isl_give isl_ast_graft *refine_generic_bounds( isl_pw_aff_list_free(lower); isl_pw_aff_list_free(upper_list); isl_set_free(upper_set); + isl_constraint_list_free(c_lower); + isl_constraint_list_free(c_upper); return graft; +error: + isl_constraint_list_free(c_lower); + isl_constraint_list_free(c_upper); + return isl_ast_graft_free(graft); } -/* How many constraints in the "constraint" array, starting at position "first" - * are of the give type? "n" represents the total number of elements - * in the array. +/* Internal data structure used inside count_constraints to keep + * track of the number of constraints that are independent of dimension "pos", + * the lower bounds in "pos" and the upper bounds in "pos". + */ +struct isl_ast_count_constraints_data { + int pos; + + int n_indep; + int n_lower; + int n_upper; +}; + +/* Increment data->n_indep, data->lower or data->upper depending + * on whether "c" is independenct of dimensions data->pos, + * a lower bound or an upper bound. */ -static int count_constraints(isl_constraint **constraint, int n, int first, - int pos, int type) +static int count_constraints(__isl_take isl_constraint *c, void *user) { - int i; + struct isl_ast_count_constraints_data *data = user; - constraint += first; + if (isl_constraint_is_lower_bound(c, isl_dim_set, data->pos)) + data->n_lower++; + else if (isl_constraint_is_upper_bound(c, isl_dim_set, data->pos)) + data->n_upper++; + else + data->n_indep++; - for (i = 0; first + i < n; i++) - if (constraint_type(constraint[i], pos) != type) - break; + isl_constraint_free(c); - return i; + return 0; } /* Update "graft" based on "bounds" and "domain" for the generic, @@ -1064,42 +1136,51 @@ static int count_constraints(isl_constraint **constraint, int n, int first, * where this guard is enforced. */ static __isl_give isl_ast_graft *refine_generic_split( - __isl_take isl_ast_graft *graft, __isl_keep isl_constraint_list *list, + __isl_take isl_ast_graft *graft, __isl_take isl_constraint_list *list, __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) { - isl_ctx *ctx; isl_ast_build *for_build; isl_set *guard; - int n_indep, n_lower, n_upper; - int pos; - int n; + struct isl_ast_count_constraints_data data; + isl_constraint_list *lower; + isl_constraint_list *upper; if (!list) return isl_ast_graft_free(graft); - pos = isl_ast_build_get_depth(build); + data.pos = isl_ast_build_get_depth(build); - if (isl_sort(list->p, list->n, sizeof(isl_constraint *), - &cmp_constraint, &pos) < 0) + list = isl_constraint_list_sort(list, &cmp_constraint, &data.pos); + if (!list) return isl_ast_graft_free(graft); - n = list->n; - n_indep = count_constraints(list->p, n, 0, pos, 0); - n_lower = count_constraints(list->p, n, n_indep, pos, 1); - n_upper = count_constraints(list->p, n, n_indep + n_lower, pos, 2); + data.n_indep = data.n_lower = data.n_upper = 0; + if (isl_constraint_list_foreach(list, &count_constraints, &data) < 0) { + isl_constraint_list_free(list); + return isl_ast_graft_free(graft); + } - if (n_indep == 0) - return refine_generic_bounds(graft, - list->p + n_indep, n_lower, n_upper, domain, build); + lower = isl_constraint_list_copy(list); + lower = isl_constraint_list_drop(lower, 0, data.n_indep); + upper = isl_constraint_list_copy(lower); + lower = isl_constraint_list_drop(lower, data.n_lower, data.n_upper); + upper = isl_constraint_list_drop(upper, 0, data.n_lower); - ctx = isl_ast_graft_get_ctx(graft); - guard = intersect_constraints(ctx, list->p, n_indep); + if (data.n_indep == 0) { + isl_constraint_list_free(list); + return refine_generic_bounds(graft, lower, upper, + domain, build); + } + + list = isl_constraint_list_drop(list, data.n_indep, + data.n_lower + data.n_upper); + guard = intersect_constraints(list); + isl_constraint_list_free(list); for_build = isl_ast_build_copy(build); for_build = isl_ast_build_restrict_pending(for_build, isl_set_copy(guard)); - graft = refine_generic_bounds(graft, - list->p + n_indep, n_lower, n_upper, domain, for_build); + graft = refine_generic_bounds(graft, lower, upper, domain, for_build); isl_ast_build_free(for_build); graft = isl_ast_graft_add_guard(graft, guard, build); @@ -1107,6 +1188,28 @@ static __isl_give isl_ast_graft *refine_generic_split( return graft; } +/* Add the guard implied by the current stride constraint (if any), + * but not (necessarily) enforced by the generated AST to "graft". + */ +static __isl_give isl_ast_graft *add_stride_guard( + __isl_take isl_ast_graft *graft, __isl_keep isl_ast_build *build) +{ + int depth; + isl_set *dom; + + depth = isl_ast_build_get_depth(build); + if (!isl_ast_build_has_stride(build, depth)) + return graft; + + dom = isl_ast_build_get_stride_constraint(build); + dom = isl_set_eliminate(dom, isl_dim_set, depth, 1); + dom = isl_ast_build_compute_gist(build, dom); + + graft = isl_ast_graft_add_guard(graft, dom, build); + + return graft; +} + /* Update "graft" based on "bounds" and "domain" for the generic, * non-degenerate, case. * @@ -1133,8 +1236,8 @@ static __isl_give isl_ast_graft *refine_generic( list = isl_constraint_list_from_basic_set(bounds); graft = refine_generic_split(graft, list, domain, build); + graft = add_stride_guard(graft, build); - isl_constraint_list_free(list); return graft; } @@ -1177,6 +1280,9 @@ static __isl_give isl_ast_node *create_for(__isl_keep isl_ast_build *build, * we performed separation with explicit bounds. * The very first step is then to copy these constraints to "bounds". * + * Since we may be calling before_each_for and after_each_for + * callbacks, we record the current inverse schedule in the build. + * * We consider three builds, * "build" is the one in which the current level is created, * "body_build" is the build in which the next level is created, @@ -1198,8 +1304,13 @@ static __isl_give isl_ast_node *create_for(__isl_keep isl_ast_build *build, * it can be printed as an assignment of the single value to the loop * "iterator". * - * If the current level is eliminated, we eliminate the current dimension - * from the inverse schedule to make sure no inner dimensions depend + * If the current level is eliminated, we explicitly plug in the value + * for the current level found by isl_ast_build_set_loop_bounds in the + * inverse schedule. This ensures that if we are working on a slice + * of the domain based on information available in the inverse schedule + * and the build domain, that then this information is also reflected + * in the inverse schedule. This operation also eliminates the current + * dimension from the inverse schedule making sure no inner dimensions depend * on the current dimension. Otherwise, we create a for node, marking * it degenerate if appropriate. The initial for node is still incomplete * and will be completed in either refine_degenerate or refine_generic. @@ -1230,6 +1341,7 @@ static __isl_give isl_ast_graft *create_node_scaled( domain = isl_set_detect_equalities(domain); hull = isl_set_unshifted_simple_hull(isl_set_copy(domain)); bounds = isl_basic_set_intersect(bounds, hull); + build = isl_ast_build_set_executed(build, isl_union_map_copy(executed)); depth = isl_ast_build_get_depth(build); sub_build = isl_ast_build_copy(build); @@ -1241,16 +1353,18 @@ static __isl_give isl_ast_graft *create_node_scaled( if (degenerate < 0 || eliminated < 0) executed = isl_union_map_free(executed); if (eliminated) - executed = eliminate(executed, depth, build); + executed = plug_in_values(executed, sub_build); else node = create_for(build, degenerate); body_build = isl_ast_build_copy(sub_build); body_build = isl_ast_build_increase_depth(body_build); + if (!eliminated) + node = before_each_for(node, body_build); children = generate_next_level(executed, isl_ast_build_copy(body_build)); - graft = isl_ast_graft_alloc_level(children, sub_build); + graft = isl_ast_graft_alloc_level(children, build, sub_build); if (!eliminated) graft = isl_ast_graft_insert_for(graft, node); if (eliminated) @@ -1259,6 +1373,8 @@ static __isl_give isl_ast_graft *create_node_scaled( graft = refine_degenerate(graft, bounds, build, sub_build); else graft = refine_generic(graft, bounds, domain, build); + if (!eliminated) + graft = after_each_for(graft, body_build); isl_ast_build_free(body_build); isl_ast_build_free(sub_build); @@ -1416,7 +1532,7 @@ static __isl_give isl_ast_graft *create_node(__isl_take isl_union_map *executed, if (isl_aff_get_denominator(offset, &data.d) < 0) executed = isl_union_map_free(executed); - if (isl_int_is_divisible_by(data.m, data.d)) + if (executed && isl_int_is_divisible_by(data.m, data.d)) isl_int_divexact(data.m, data.m, data.d); else isl_int_set_si(data.m, 1); @@ -1551,49 +1667,105 @@ done: return list; } -struct isl_domain_follows_at_depth_data { - int depth; - isl_basic_set **piece; -}; - /* Does any element of i follow or coincide with any element of j - * at the current depth (data->depth) for equal values of the outer - * dimensions? + * at the current depth for equal values of the outer dimensions? */ -static int domain_follows_at_depth(int i, int j, void *user) +static int domain_follows_at_depth(__isl_keep isl_basic_set *i, + __isl_keep isl_basic_set *j, void *user) { - struct isl_domain_follows_at_depth_data *data = user; + int depth = *(int *) user; isl_basic_map *test; int empty; int l; - test = isl_basic_map_from_domain_and_range( - isl_basic_set_copy(data->piece[i]), - isl_basic_set_copy(data->piece[j])); - for (l = 0; l < data->depth; ++l) + test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i), + isl_basic_set_copy(j)); + for (l = 0; l < depth; ++l) test = isl_basic_map_equate(test, isl_dim_in, l, isl_dim_out, l); - test = isl_basic_map_order_ge(test, isl_dim_in, data->depth, - isl_dim_out, data->depth); + test = isl_basic_map_order_ge(test, isl_dim_in, depth, + isl_dim_out, depth); empty = isl_basic_map_is_empty(test); isl_basic_map_free(test); return empty < 0 ? -1 : !empty; } +/* Split up each element of "list" into a part that is related to "bset" + * according to "gt" and a part that is not. + * Return a list that consist of "bset" and all the pieces. + */ +static __isl_give isl_basic_set_list *add_split_on( + __isl_take isl_basic_set_list *list, __isl_take isl_basic_set *bset, + __isl_keep isl_basic_map *gt) +{ + int i, n; + isl_basic_set_list *res; + + gt = isl_basic_map_copy(gt); + gt = isl_basic_map_intersect_domain(gt, isl_basic_set_copy(bset)); + n = isl_basic_set_list_n_basic_set(list); + res = isl_basic_set_list_from_basic_set(bset); + for (i = 0; res && i < n; ++i) { + isl_basic_set *bset; + isl_set *set1, *set2; + isl_basic_map *bmap; + int empty; + + bset = isl_basic_set_list_get_basic_set(list, i); + bmap = isl_basic_map_copy(gt); + bmap = isl_basic_map_intersect_range(bmap, bset); + bset = isl_basic_map_range(bmap); + empty = isl_basic_set_is_empty(bset); + if (empty < 0) + res = isl_basic_set_list_free(res); + if (empty) { + isl_basic_set_free(bset); + bset = isl_basic_set_list_get_basic_set(list, i); + res = isl_basic_set_list_add(res, bset); + continue; + } + + res = isl_basic_set_list_add(res, isl_basic_set_copy(bset)); + set1 = isl_set_from_basic_set(bset); + bset = isl_basic_set_list_get_basic_set(list, i); + set2 = isl_set_from_basic_set(bset); + set1 = isl_set_subtract(set2, set1); + set1 = isl_set_make_disjoint(set1); + + res = isl_basic_set_list_concat(res, + isl_basic_set_list_from_set(set1)); + } + isl_basic_map_free(gt); + isl_basic_set_list_free(list); + return res; +} + static __isl_give isl_ast_graft_list *generate_sorted_domains( __isl_keep isl_basic_set_list *domain_list, __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build); -/* Generate code for the "n" schedule domains in "domain_list" - * with positions specified by the entries of the "pos" array +/* Internal data structure for add_nodes. + * + * "executed" and "build" are extra arguments to be passed to add_node. + * "list" collects the results. + */ +struct isl_add_nodes_data { + isl_union_map *executed; + isl_ast_build *build; + + isl_ast_graft_list *list; +}; + +/* Generate code for the schedule domains in "scc" * and add the results to "list". * - * The "n" domains form a strongly connected component in the ordering. - * If n is larger than 1, then this means that we cannot determine a valid - * ordering for the n domains in the component. This should be fairly - * rare because the individual domains have been made disjoint first. + * The domains in "scc" form a strongly connected component in the ordering. + * If the number of domains in "scc" is larger than 1, then this means + * that we cannot determine a valid ordering for the domains in the component. + * This should be fairly rare because the individual domains + * have been made disjoint first. * The problem is that the domains may be integrally disjoint but not * rationally disjoint. For example, we may have domains * @@ -1608,52 +1780,51 @@ static __isl_give isl_ast_graft_list *generate_sorted_domains( * This may happen in particular in case of unrolling since the domain * of each slice is replaced by its simple hull. * - * We collect the basic sets in the component, call isl_set_make_disjoint - * and try again. Note that we rely here on isl_set_make_disjoint also - * making the basic sets rationally disjoint. If the basic sets - * are rationally disjoint, then the ordering problem does not occur. - * To see this, there can only be a problem if there are points - * (i,a) and (j,b) in one set and (i,c) and (j,d) in the other with - * a < c and b > d. This means that either the interval spanned - * by a en b lies inside that spanned by c and or the other way around. - * In either case, there is a point inside both intervals with the - * convex combination in terms of a and b and in terms of c and d. - * Taking the same combination of i and j gives a point in the intersection. - */ -static __isl_give isl_ast_graft_list *add_nodes( - __isl_take isl_ast_graft_list *list, int *pos, int n, - __isl_keep isl_basic_set_list *domain_list, - __isl_keep isl_union_map *executed, - __isl_keep isl_ast_build *build) + * For each basic set i in "scc" and for each of the following basic sets j, + * we split off that part of the basic set i that shares the outer dimensions + * with j and lies before j in the current dimension. + * We collect all the pieces in a new list that replaces "scc". + */ +static int add_nodes(__isl_take isl_basic_set_list *scc, void *user) { - int i; + struct isl_add_nodes_data *data = user; + int i, n, depth; isl_basic_set *bset; - isl_set *set; + isl_basic_set_list *list; + isl_space *space; + isl_basic_map *gt; + + n = isl_basic_set_list_n_basic_set(scc); + bset = isl_basic_set_list_get_basic_set(scc, 0); + if (n == 1) { + isl_basic_set_list_free(scc); + data->list = add_node(data->list, + isl_union_map_copy(data->executed), bset, + isl_ast_build_copy(data->build)); + return data->list ? 0 : -1; + } - bset = isl_basic_set_list_get_basic_set(domain_list, pos[0]); - if (n == 1) - return add_node(list, isl_union_map_copy(executed), bset, - isl_ast_build_copy(build)); + depth = isl_ast_build_get_depth(data->build); + space = isl_basic_set_get_space(bset); + space = isl_space_map_from_set(space); + gt = isl_basic_map_universe(space); + for (i = 0; i < depth; ++i) + gt = isl_basic_map_equate(gt, isl_dim_in, i, isl_dim_out, i); + gt = isl_basic_map_order_gt(gt, isl_dim_in, depth, isl_dim_out, depth); - set = isl_set_from_basic_set(bset); + list = isl_basic_set_list_from_basic_set(bset); for (i = 1; i < n; ++i) { - bset = isl_basic_set_list_get_basic_set(domain_list, pos[i]); - set = isl_set_union(set, isl_set_from_basic_set(bset)); + bset = isl_basic_set_list_get_basic_set(scc, i); + list = add_split_on(list, bset, gt); } + isl_basic_map_free(gt); + isl_basic_set_list_free(scc); + scc = list; + data->list = isl_ast_graft_list_concat(data->list, + generate_sorted_domains(scc, data->executed, data->build)); + isl_basic_set_list_free(scc); - set = isl_set_make_disjoint(set); - if (isl_set_n_basic_set(set) == n) - isl_die(isl_ast_graft_list_get_ctx(list), isl_error_internal, - "unable to separate loop parts", goto error); - domain_list = isl_basic_set_list_from_set(set); - list = isl_ast_graft_list_concat(list, - generate_sorted_domains(domain_list, executed, build)); - isl_basic_set_list_free(domain_list); - - return list; -error: - isl_set_free(set); - return isl_ast_graft_list_free(list); + return data->list ? 0 : -1; } /* Sort the domains in "domain_list" according to the execution order @@ -1674,69 +1845,47 @@ static __isl_give isl_ast_graft_list *generate_sorted_domains( __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build) { isl_ctx *ctx; - isl_ast_graft_list *list; - struct isl_domain_follows_at_depth_data data; - struct isl_tarjan_graph *g; - int i, n; + struct isl_add_nodes_data data; + int depth; + int n; if (!domain_list) return NULL; ctx = isl_basic_set_list_get_ctx(domain_list); n = isl_basic_set_list_n_basic_set(domain_list); - list = isl_ast_graft_list_alloc(ctx, n); + data.list = isl_ast_graft_list_alloc(ctx, n); if (n == 0) - return list; + return data.list; if (n == 1) - return add_node(list, isl_union_map_copy(executed), + return add_node(data.list, isl_union_map_copy(executed), isl_basic_set_list_get_basic_set(domain_list, 0), isl_ast_build_copy(build)); - data.depth = isl_ast_build_get_depth(build); - data.piece = domain_list->p; - g = isl_tarjan_graph_init(ctx, n, &domain_follows_at_depth, &data); - - i = 0; - while (list && n) { - int first; - - if (g->order[i] == -1) - isl_die(ctx, isl_error_internal, "cannot happen", - goto error); - first = i; - while (g->order[i] != -1) { - ++i; --n; - } - list = add_nodes(list, g->order + first, i - first, - domain_list, executed, build); - ++i; - } - - if (0) -error: list = isl_ast_graft_list_free(list); - isl_tarjan_graph_free(g); + depth = isl_ast_build_get_depth(build); + data.executed = executed; + data.build = build; + if (isl_basic_set_list_foreach_scc(domain_list, + &domain_follows_at_depth, &depth, + &add_nodes, &data) < 0) + data.list = isl_ast_graft_list_free(data.list); - return list; + return data.list; } -struct isl_shared_outer_data { - int depth; - isl_basic_set **piece; -}; - -/* Do elements i and j share any values for the outer dimensions? +/* Do i and j share any values for the outer dimensions? */ -static int shared_outer(int i, int j, void *user) +static int shared_outer(__isl_keep isl_basic_set *i, + __isl_keep isl_basic_set *j, void *user) { - struct isl_shared_outer_data *data = user; + int depth = *(int *) user; isl_basic_map *test; int empty; int l; - test = isl_basic_map_from_domain_and_range( - isl_basic_set_copy(data->piece[i]), - isl_basic_set_copy(data->piece[j])); - for (l = 0; l < data->depth; ++l) + test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i), + isl_basic_set_copy(j)); + for (l = 0; l < depth; ++l) test = isl_basic_map_equate(test, isl_dim_in, l, isl_dim_out, l); empty = isl_basic_map_is_empty(test); @@ -1745,44 +1894,67 @@ static int shared_outer(int i, int j, void *user) return empty < 0 ? -1 : !empty; } -/* Call generate_sorted_domains on a list containing the elements - * of "domain_list indexed by the first "n" elements of "pos". +/* Internal data structure for generate_sorted_domains_wrap. + * + * "n" is the total number of basic sets + * "executed" and "build" are extra arguments to be passed + * to generate_sorted_domains. + * + * "single" is set to 1 by generate_sorted_domains_wrap if there + * is only a single component. + * "list" collects the results. */ -static __isl_give isl_ast_graft_list *generate_sorted_domains_part( - __isl_keep isl_basic_set_list *domain_list, int *pos, int n, - __isl_keep isl_union_map *executed, - __isl_keep isl_ast_build *build) -{ - int i; - isl_ctx *ctx; - isl_basic_set_list *slice; +struct isl_ast_generate_parallel_domains_data { + int n; + isl_union_map *executed; + isl_ast_build *build; + + int single; isl_ast_graft_list *list; +}; - ctx = isl_ast_build_get_ctx(build); - slice = isl_basic_set_list_alloc(ctx, n); - for (i = 0; i < n; ++i) { - isl_basic_set *bset; +/* Call generate_sorted_domains on "scc", fuse the result into a list + * with either zero or one graft and collect the these single element + * lists into data->list. + * + * If there is only one component, i.e., if the number of basic sets + * in the current component is equal to the total number of basic sets, + * then data->single is set to 1 and the result of generate_sorted_domains + * is not fused. + */ +static int generate_sorted_domains_wrap(__isl_take isl_basic_set_list *scc, + void *user) +{ + struct isl_ast_generate_parallel_domains_data *data = user; + isl_ast_graft_list *list; - bset = isl_basic_set_copy(domain_list->p[pos[i]]); - slice = isl_basic_set_list_add(slice, bset); - } + list = generate_sorted_domains(scc, data->executed, data->build); + data->single = isl_basic_set_list_n_basic_set(scc) == data->n; + if (!data->single) + list = isl_ast_graft_list_fuse(list, data->build); + if (!data->list) + data->list = list; + else + data->list = isl_ast_graft_list_concat(data->list, list); - list = generate_sorted_domains(slice, executed, build); - isl_basic_set_list_free(slice); + isl_basic_set_list_free(scc); + if (!data->list) + return -1; - return list; + return 0; } /* Look for any (weakly connected) components in the "domain_list" * of domains that share some values of the outer dimensions. * That is, domains in different components do not share any values * of the outer dimensions. This means that these components - * can be freely reorderd. + * can be freely reordered. * Within each of the components, we sort the domains according * to the execution order at the current depth. * - * We fuse the result of each call to generate_sorted_domains_part - * into a list with either zero or one graft and collect these (at most) + * If there is more than one component, then generate_sorted_domains_wrap + * fuses the result of each call to generate_sorted_domains + * into a list with either zero or one graft and collects these (at most) * single element lists into a bigger list. This means that the elements of the * final list can be freely reordered. In particular, we sort them * according to an arbitrary but fixed ordering to ease merging of @@ -1792,62 +1964,30 @@ static __isl_give isl_ast_graft_list *generate_parallel_domains( __isl_keep isl_basic_set_list *domain_list, __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build) { - int i, n; - isl_ctx *ctx; - isl_ast_graft_list *list; - struct isl_shared_outer_data data; - struct isl_tarjan_graph *g; + int depth; + struct isl_ast_generate_parallel_domains_data data; if (!domain_list) return NULL; - n = isl_basic_set_list_n_basic_set(domain_list); - if (n <= 1) + data.n = isl_basic_set_list_n_basic_set(domain_list); + if (data.n <= 1) return generate_sorted_domains(domain_list, executed, build); - ctx = isl_basic_set_list_get_ctx(domain_list); - - data.depth = isl_ast_build_get_depth(build); - data.piece = domain_list->p; - g = isl_tarjan_graph_init(ctx, n, &shared_outer, &data); - if (!g) - return NULL; - - i = 0; - do { - int first; - isl_ast_graft_list *list_c; - - if (g->order[i] == -1) - isl_die(ctx, isl_error_internal, "cannot happen", - break); - first = i; - while (g->order[i] != -1) { - ++i; --n; - } - if (first == 0 && n == 0) { - isl_tarjan_graph_free(g); - return generate_sorted_domains(domain_list, - executed, build); - } - list_c = generate_sorted_domains_part(domain_list, - g->order + first, i - first, executed, build); - list_c = isl_ast_graft_list_fuse(list_c, build); - if (first == 0) - list = list_c; - else - list = isl_ast_graft_list_concat(list, list_c); - ++i; - } while (list && n); - - if (n > 0) - list = isl_ast_graft_list_free(list); - - list = isl_ast_graft_list_sort(list); + depth = isl_ast_build_get_depth(build); + data.list = NULL; + data.executed = executed; + data.build = build; + data.single = 0; + if (isl_basic_set_list_foreach_scc(domain_list, &shared_outer, &depth, + &generate_sorted_domains_wrap, + &data) < 0) + data.list = isl_ast_graft_list_free(data.list); - isl_tarjan_graph_free(g); + if (!data.single) + data.list = isl_ast_graft_list_sort_guard(data.list); - return list; + return data.list; } /* Internal data for separate_domain. @@ -2033,7 +2173,8 @@ static int update_unrolling_lower_bound(struct isl_find_unroll_data *data, return 0; } - if (!data->lower || isl_int_cmp_si(data->tmp, *data->n) < 0) { + if (isl_int_cmp_si(data->tmp, INT_MAX) <= 0 && + (!data->lower || isl_int_cmp_si(data->tmp, *data->n) < 0)) { isl_aff_free(data->lower); data->lower = lower; *data->n = isl_int_get_si(data->tmp); @@ -2109,22 +2250,17 @@ error: return isl_aff_free(data.lower); } -/* Intersect "set" with the constraint +/* Return the constraint * * i_"depth" = aff + offset */ -static __isl_give isl_set *at_offset(__isl_take isl_set *set, int depth, - __isl_keep isl_aff *aff, int offset) +static __isl_give isl_constraint *at_offset(int depth, __isl_keep isl_aff *aff, + int offset) { - isl_constraint *eq; - aff = isl_aff_copy(aff); aff = isl_aff_add_coefficient_si(aff, isl_dim_in, depth, -1); aff = isl_aff_add_constant_si(aff, offset); - eq = isl_equality_from_aff(aff); - set = isl_set_add_constraint(set, eq); - - return set; + return isl_equality_from_aff(aff); } /* Return a list of basic sets, one for each value of the current dimension @@ -2150,7 +2286,10 @@ static __isl_give isl_set *at_offset(__isl_take isl_set *set, int depth, * * We compute the unshifted simple hull of each slice to ensure that * we have a single basic set per offset. The slicing constraint - * is preserved by taking the unshifted simple hull, so these basic sets + * may get simplified away before the unshifted simple hull is taken + * and may therefore in some rare cases disappear from the result. + * We therefore explicitly add the constraint back after computing + * the unshifted simple hull to ensure that the basic sets * remain disjoint. The constraints that are dropped by taking the hull * will be taken into account at the next level, as in the case of the * atomic option. @@ -2195,9 +2334,13 @@ static __isl_give isl_basic_set_list *do_unroll(__isl_take isl_set *domain, for (i = 0; list && i < n; ++i) { isl_set *set; isl_basic_set *bset; + isl_constraint *slice; - set = at_offset(isl_set_copy(domain), depth, lower, i); + slice = at_offset(depth, lower, i); + set = isl_set_copy(domain); + set = isl_set_add_constraint(set, isl_constraint_copy(slice)); bset = isl_set_unshifted_simple_hull(set); + bset = isl_basic_set_add_constraint(bset, slice); bset = isl_basic_set_apply(bset, isl_basic_map_copy(bmap)); list = isl_basic_set_list_add(list, bset); } @@ -2223,14 +2366,13 @@ static __isl_give isl_basic_set_list *do_unroll(__isl_take isl_set *domain, * the user, except that inner dimensions have been eliminated and * that they have been made pair-wise disjoint. * - * "includes_schedule_domain" is set if the "class_domain" (not stored - * in this structure, but passed to the various functions) has been - * intersected with "schedule_domain". - * * "sep_class" contains the user-specified split into separation classes * specialized to the current depth. - * "done" contains the union of th separation domains that have already + * "done" contains the union of the separation domains that have already * been handled. + * "atomic" contains the domain that has effectively been made atomic. + * This domain may be larger than the intersection of option[atomic] + * and the schedule domain. */ struct isl_codegen_domains { isl_basic_set_list *list; @@ -2241,10 +2383,9 @@ struct isl_codegen_domains { isl_set *option[3]; - int includes_schedule_domain; - isl_map *sep_class; isl_set *done; + isl_set *atomic; }; /* Add domains to domains->list for each individual value of the current @@ -2309,7 +2450,8 @@ static int compute_unroll_domains(struct isl_codegen_domains *domains, /* Construct a single basic set that includes the intersection of * the schedule domain, the atomic option domain and the class domain. - * Add the resulting basic set to domains->list. + * Add the resulting basic set to domains->list and save a copy + * in domains->atomic for use in compute_partial_domains. * * We construct a single domain rather than trying to combine * the schedule domains of individual domains because we are working @@ -2335,12 +2477,13 @@ static int compute_atomic_domain(struct isl_codegen_domains *domains, atomic_domain = isl_set_intersect(atomic_domain, isl_set_copy(domain)); empty = isl_set_is_empty(atomic_domain); if (empty < 0 || empty) { - isl_set_free(atomic_domain); + domains->atomic = atomic_domain; return empty < 0 ? -1 : 0; } atomic_domain = isl_set_coalesce(atomic_domain); bset = isl_set_unshifted_simple_hull(atomic_domain); + domains->atomic = isl_set_from_basic_set(isl_basic_set_copy(bset)); domains->list = isl_basic_set_list_add(domains->list, bset); return 0; @@ -2391,6 +2534,10 @@ static int compute_separate_domain(struct isl_codegen_domains *domains, * basic sets for which code should be generated separately * for the given separation class domain. * + * If any separation classes have been defined, then "class_domain" + * is the domain of the current class and does not refer to inner dimensions. + * Otherwise, "class_domain" is the universe domain. + * * We first make sure that the class domain is disjoint from * previously considered class domains. * @@ -2405,6 +2552,14 @@ static int compute_separate_domain(struct isl_codegen_domains *domains, * * For atomic and remainder domains, inner dimensions and divs involving * the current dimensions should be eliminated. + * In case we are working within a separation class, we need to intersect + * the result with the current "class_domain" to ensure that the domains + * are disjoint from those generated from other class domains. + * + * The domain that has been made atomic may be larger than specified + * by the user since it needs to be representable as a single basic set. + * This possibly larger domain is stored in domains->atomic by + * compute_atomic_domain. * * If anything is left after handling separate, unroll and atomic, * we split it up into basic sets and append the basic sets to domains->list. @@ -2413,42 +2568,46 @@ static int compute_partial_domains(struct isl_codegen_domains *domains, __isl_take isl_set *class_domain) { isl_basic_set_list *list; + isl_set *domain; class_domain = isl_set_subtract(class_domain, isl_set_copy(domains->done)); domains->done = isl_set_union(domains->done, isl_set_copy(class_domain)); - if (compute_separate_domain(domains, class_domain) < 0) + domain = isl_set_copy(class_domain); + + if (compute_separate_domain(domains, domain) < 0) goto error; - class_domain = isl_set_subtract(class_domain, + domain = isl_set_subtract(domain, isl_set_copy(domains->option[separate])); - if (!domains->includes_schedule_domain) - class_domain = isl_set_intersect(class_domain, - isl_set_copy(domains->schedule_domain)); + domain = isl_set_intersect(domain, + isl_set_copy(domains->schedule_domain)); - if (compute_unroll_domains(domains, class_domain) < 0) + if (compute_unroll_domains(domains, domain) < 0) goto error; - class_domain = isl_set_subtract(class_domain, + domain = isl_set_subtract(domain, isl_set_copy(domains->option[unroll])); - class_domain = isl_ast_build_eliminate(domains->build, - class_domain); + domain = isl_ast_build_eliminate(domains->build, domain); + domain = isl_set_intersect(domain, isl_set_copy(class_domain)); - if (compute_atomic_domain(domains, class_domain) < 0) - goto error; - class_domain = isl_set_subtract(class_domain, - isl_set_copy(domains->option[atomic])); + if (compute_atomic_domain(domains, domain) < 0) + domain = isl_set_free(domain); + domain = isl_set_subtract(domain, domains->atomic); - class_domain = isl_set_coalesce(class_domain); - class_domain = isl_set_make_disjoint(class_domain); + domain = isl_set_coalesce(domain); + domain = isl_set_make_disjoint(domain); - list = isl_basic_set_list_from_set(class_domain); + list = isl_basic_set_list_from_set(domain); domains->list = isl_basic_set_list_concat(domains->list, list); + isl_set_free(class_domain); + return 0; error: + isl_set_free(domain); isl_set_free(class_domain); return -1; } @@ -2470,6 +2629,7 @@ static int compute_class_domains(__isl_take isl_point *pnt, void *user) class_set = isl_set_from_point(pnt); domain = isl_map_domain(isl_map_intersect_range( isl_map_copy(domains->sep_class), class_set)); + domain = isl_ast_build_compute_gist(domains->build, domain); domain = isl_ast_build_eliminate(domains->build, domain); disjoint = isl_set_plain_is_disjoint(domain, domains->schedule_domain); @@ -2480,7 +2640,6 @@ static int compute_class_domains(__isl_take isl_point *pnt, void *user) return 0; } - domains->includes_schedule_domain = 0; return compute_partial_domains(domains, domain); } @@ -2522,9 +2681,8 @@ static void compute_domains_init_options(isl_set *option[3], * and split up the domain for each of them separately. * Finally, we consider the remainder. If no separation classes were * specified, then we call compute_partial_domains with the universe - * "class_domain". Otherwise, we take the "schedule_domain" as "class_domain" - * and set includes_schedule_domain to reflect that the schedule domain - * has already been taken into account. We do this because we want to + * "class_domain". Otherwise, we take the "schedule_domain" as "class_domain", + * with inner dimensions removed. We do this because we want to * avoid computing the complement of the class domains (i.e., the difference * between the universe and domains->done). */ @@ -2539,6 +2697,10 @@ static __isl_give isl_basic_set_list *compute_domains( isl_space *space; int n_param; enum isl_ast_build_domain_type type; + int empty; + + if (!executed) + return NULL; ctx = isl_union_map_get_ctx(executed); domains.list = isl_basic_set_list_alloc(ctx, 0); @@ -2563,12 +2725,15 @@ static __isl_give isl_basic_set_list *compute_domains( domains.list = isl_basic_set_list_free(domains.list); isl_set_free(classes); - if (!domains.done) + empty = isl_set_is_empty(domains.done); + if (empty < 0) { domains.list = isl_basic_set_list_free(domains.list); - domains.includes_schedule_domain = !isl_set_is_empty(domains.done); - if (!domains.includes_schedule_domain) { + domain = isl_set_free(domain); + } else if (empty) { isl_set_free(domain); domain = isl_set_universe(isl_set_get_space(domains.done)); + } else { + domain = isl_ast_build_eliminate(build, domain); } if (compute_partial_domains(&domains, domain) < 0) domains.list = isl_basic_set_list_free(domains.list); @@ -3573,9 +3738,12 @@ __isl_give isl_ast_node *isl_ast_build_ast_from_schedule( isl_ast_node *node; isl_union_map *executed; + build = isl_ast_build_copy(build); + build = isl_ast_build_set_single_valued(build, 0); executed = isl_union_map_reverse(schedule); list = generate_code(executed, isl_ast_build_copy(build), 0); node = isl_ast_node_from_graft_list(list, build); + isl_ast_build_free(build); return node; }