return !involves;
}
+/* Plug in the known values (fixed affine expressions in terms of
+ * parameters and outer loop iterators) of all loop iterators
+ * in the domain of "umap".
+ *
+ * We simply precompose "umap" with build->values.
+ */
+__isl_give isl_union_map *isl_ast_build_substitute_values_union_map_domain(
+ __isl_keep isl_ast_build *build, __isl_take isl_union_map *umap)
+{
+ isl_multi_aff *values;
+
+ if (!build)
+ return isl_union_map_free(umap);
+
+ values = isl_multi_aff_copy(build->values);
+ umap = isl_union_map_preimage_domain_multi_aff(umap, values);
+
+ return umap;
+}
+
/* Is the current dimension known to attain only a single value?
*/
int isl_ast_build_has_value(__isl_keep isl_ast_build *build)
__isl_give isl_pw_multi_aff *isl_ast_build_compute_gist_pw_multi_aff(
__isl_keep isl_ast_build *build, __isl_take isl_pw_multi_aff *pma);
+__isl_give isl_union_map *isl_ast_build_substitute_values_union_map_domain(
+ __isl_keep isl_ast_build *build, __isl_take isl_union_map *umap);
+
int isl_ast_build_aff_is_nonneg(__isl_keep isl_ast_build *build,
__isl_keep isl_aff *aff);
return graft;
}
-/* Eliminate the schedule dimension "pos" from "executed" and return
- * the result.
+/* Plug in all the know values of the current and outer dimensions
+ * in the domain of "executed". In principle, we only need to plug
+ * in the known value of the current dimension since the values of
+ * outer dimensions have been plugged in already.
+ * However, it turns out to be easier to just plug in all known values.
*/
-static __isl_give isl_union_map *eliminate(__isl_take isl_union_map *executed,
- int pos, __isl_keep isl_ast_build *build)
+static __isl_give isl_union_map *plug_in_values(
+ __isl_take isl_union_map *executed, __isl_keep isl_ast_build *build)
{
- isl_space *space;
- isl_map *elim;
-
- space = isl_ast_build_get_space(build, 1);
- space = isl_space_map_from_set(space);
- elim = isl_map_identity(space);
- elim = isl_map_eliminate(elim, isl_dim_in, pos, 1);
-
- executed = isl_union_map_apply_domain(executed,
- isl_union_map_from_map(elim));
-
- return executed;
+ return isl_ast_build_substitute_values_union_map_domain(build,
+ executed);
}
/* Check if the constraint "c" is a lower bound on dimension "pos",
* it can be printed as an assignment of the single value to the loop
* "iterator".
*
- * If the current level is eliminated, we eliminate the current dimension
- * from the inverse schedule to make sure no inner dimensions depend
+ * If the current level is eliminated, we explicitly plug in the value
+ * for the current level found by isl_ast_build_set_loop_bounds in the
+ * inverse schedule. This ensures that if we are working on a slice
+ * of the domain based on information available in the inverse schedule
+ * and the build domain, that then this information is also reflected
+ * in the inverse schedule. This operation also eliminates the current
+ * dimension from the inverse schedule making sure no inner dimensions depend
* on the current dimension. Otherwise, we create a for node, marking
* it degenerate if appropriate. The initial for node is still incomplete
* and will be completed in either refine_degenerate or refine_generic.
if (degenerate < 0 || eliminated < 0)
executed = isl_union_map_free(executed);
if (eliminated)
- executed = eliminate(executed, depth, build);
+ executed = plug_in_values(executed, sub_build);
else
node = create_for(build, degenerate);
return 0;
}
+/* This function is called for each leaf in the AST generated
+ * from test_ast_gen5.
+ *
+ * We finalize the AST generation by extending the outer schedule
+ * with a zero-dimensional schedule. If this results in any for loops,
+ * then this means that we did not pass along enough information
+ * about the outer schedule to the inner AST generation.
+ */
+static __isl_give isl_ast_node *create_leaf(__isl_take isl_ast_build *build,
+ void *user)
+{
+ isl_union_map *schedule, *extra;
+ isl_ast_node *tree;
+
+ schedule = isl_ast_build_get_schedule(build);
+ extra = isl_union_map_copy(schedule);
+ extra = isl_union_map_from_domain(isl_union_map_domain(extra));
+ schedule = isl_union_map_range_product(schedule, extra);
+ tree = isl_ast_build_ast_from_schedule(build, schedule);
+ isl_ast_build_free(build);
+
+ if (!tree)
+ return NULL;
+
+ if (isl_ast_node_get_type(tree) == isl_ast_node_for)
+ isl_die(isl_ast_node_get_ctx(tree), isl_error_unknown,
+ "code should not contain any for loop",
+ return isl_ast_node_free(tree));
+
+ return tree;
+}
+
+/* Check that we do not lose any information when going back and
+ * forth between internal and external schedule.
+ *
+ * In particular, we create an AST where we unroll the only
+ * non-constant dimension in the schedule. We therefore do
+ * not expect any for loops in the AST. However, older versions
+ * of isl would not pass along enough information about the outer
+ * schedule when performing an inner code generation from a create_leaf
+ * callback, resulting in the inner code generation producing a for loop.
+ */
+static int test_ast_gen5(isl_ctx *ctx)
+{
+ const char *str;
+ isl_set *set;
+ isl_union_map *schedule, *options;
+ isl_ast_build *build;
+ isl_ast_node *tree;
+
+ str = "{ A[] -> [1, 1, 2]; B[i] -> [1, i, 0] : i >= 1 and i <= 2 }";
+ schedule = isl_union_map_read_from_str(ctx, str);
+
+ str = "{ [a, b, c] -> unroll[1] : exists (e0 = [(a)/4]: "
+ "4e0 >= -1 + a - b and 4e0 <= -2 + a + b) }";
+ options = isl_union_map_read_from_str(ctx, str);
+
+ set = isl_set_universe(isl_space_params_alloc(ctx, 0));
+ build = isl_ast_build_from_context(set);
+ build = isl_ast_build_set_options(build, options);
+ build = isl_ast_build_set_create_leaf(build, &create_leaf, NULL);
+ tree = isl_ast_build_ast_from_schedule(build, schedule);
+ isl_ast_build_free(build);
+ isl_ast_node_free(tree);
+ if (!tree)
+ return -1;
+
+ return 0;
+}
+
static int test_ast_gen(isl_ctx *ctx)
{
if (test_ast_gen1(ctx) < 0)
return -1;
if (test_ast_gen4(ctx) < 0)
return -1;
+ if (test_ast_gen5(ctx) < 0)
+ return -1;
return 0;
}
for (int c0 = floord(n, 3); c0 <= 2 * floord(n, 3); c0 += 1)
for (int c1 = 0; c1 < n; c1 += 1)
for (int c3 = max(1, (n % 3) - n + 3 * c0); c3 <= min(n, (n % 3) - n + 3 * c0 + 2); c3 += 1)
- S2(c1 + 1, c3, 0, (n + 3) / 3 - 1, c0 - (n + 3) / 3 + 1);
+ S2(c1 + 1, c3, 0, n / 3, c0 - n / 3);
}
for (int c1 = max(2 * c0 - 3, c0 / 2); c1 <= min(c0 + 1, 3); c1 += 1)
for (int c2 = c0; c2 <= min(min(3, 2 * c0 - c1 + 1), 3 * c1 + 2); c2 += 1)
for (int c3 = max(max(max(c2 - (c2 + 2) / 3, c2 + floord(3 * c1 - c2 - 1, 6)), c1 - (-c1 + 3) / 3), c0 - (-c2 + 3) / 3); c3 <= min(c0 + c0 / 2 + 1, 3); c3 += 1)
- for (int c5 = max(max(max(max(2 * c3 - 4, c2 - (c2 + 3) / 3), 0), c3 - (c3 + 3) / 3), c1 - (-c1 + 3) / 3); c5 <= min(min(c1 + 1, -c2 + 2 * c3 - (c2 + 3) / 3 + 2), c3); c5 += 1)
- for (int c6 = max(max(max(max(max(667 * c0 - 333 * c1 - (c0 + c1 + 3) / 3 - 332, 333 * c2 + (c2 + 1) / 3), -200 * c1 + 400 * c3 - 199), 1000 * c0 - 500 * c5 - 501), 333 * c1 + c1 / 3), 250 * c3 + 1); c6 <= min(min(min(min(min(min(1000 * c0 - 500 * c5 + 997, 500 * c0 + 499), 333 * c2 - (-c2 + 3) / 3 + 333), 333 * c3 - (-c3 + 3) / 3 + 334), -200 * c1 + 400 * c3 + 400), 500 * c5 + 501), 1000); c6 += 1)
+ for (int c5 = max(max(max(max(c1 - (c1 - 2 * c3 + 5) / 5, 0), c3 - (c3 + 3) / 3), 2 * c3 - 4), c2 - (c2 + 3) / 3); c5 <= min(min(c1 + 1, c3), -c2 + 2 * c3 - (c2 + 3) / 3 + 2); c5 += 1)
+ for (int c6 = max(max(max(max(max(1000 * c0 - 500 * c5 - 501, -200 * c1 + 400 * c3 - 199), 333 * c1 + c1 / 3), 250 * c3 + 1), 667 * c0 - 333 * c1 - (c0 + c1 + 3) / 3 - 332), 333 * c2 + (c2 + 1) / 3); c6 <= min(min(min(min(min(min(-200 * c1 + 400 * c3 + 400, 500 * c0 + 499), 333 * c2 - (-c2 + 3) / 3 + 333), 333 * c3 - (-c3 + 3) / 3 + 334), 500 * c5 + 501), 1000), 1000 * c0 - 500 * c5 + 997); c6 += 1)
for (int c7 = max(max(max(max(c6, 500 * c5 + 2), 1000 * c3 - 2 * c6 + 2), 500 * c1 + (c6 + 1) / 2), 1000 * c0 - c6); c7 <= min(min(min(min(500 * c5 + 501, 2 * c6 + 1), 1000 * c0 - c6 + 999), 500 * c1 + (c6 + 1) / 2 + 499), 1000 * c3 - 2 * c6 + 1001); c7 += 1)
s0(c0, c1, c2, c3, c2 / 3, c5, c6, c7);
}
for (int c1 = 0; c1 < n; c1 += 8)
for (int c2 = 0; c2 < n % 8; c2 += 1)
- for (int c3 = 0; c3 <= min(7, n - c1 - 1); c3 += 1)
+ for (int c3 = 0; c3 <= min(n - c1 - 1, 7); c3 += 1)
A(-((n - 1) % 8) + n + c2 - 1, c1 + c3);
}
if (2 * (63 * t1 % 64) + t1 <= 134)
- S(2 * (63 * t1 % 64) + t1);
+ S(-(2 * ((t1 - 1) % 64)) + t1 + 126);
if (t1 % 3 == 0 && t2 >= 1 && t2 <= 2)
write_shared_A(3, (-t1 + 12) / 3, t2 + 32);
{
- int c3 = t2 <= 1 || (t2 >= 2 && ((3 * t1 + t2 + 3) % 4) + 1 >= t2) ? t2 + 32 : t2;
+ int c3 = ((t1 + 3) % 4) + 1 >= t2 && t2 >= 2 && t2 <= 33 ? t2 + 32 : ((t2 + 30) % 32) + 2;
if (c3 == t2 + 32 || (c3 == t2 && ((-t1 + 8) % 4) + t2 >= ((t2 + 1) % 2) + 5))
write_shared_A(3, ((t1 + 3) % 4) + 5, c3);
}
if (t1 % 3 == 0 && t2 >= 1 && t2 <= 2)
write_shared_A(4, (-t1 + 12) / 3, t2 + 32);
{
- int c3 = t2 <= 1 || (t2 >= 2 && ((3 * t1 + t2 + 3) % 4) + 1 >= t2) ? t2 + 32 : t2;
+ int c3 = ((t1 + 3) % 4) + 1 >= t2 && t2 >= 2 && t2 <= 33 ? t2 + 32 : ((t2 + 30) % 32) + 2;
if (c3 == t2 + 32 || (c3 == t2 && ((-t1 + 8) % 4) + t2 >= ((t2 + 1) % 2) + 5))
write_shared_A(4, ((t1 + 3) % 4) + 5, c3);
}