From ad90fe4edbcbc676e57585c7587a88ed04c66f1f Mon Sep 17 00:00:00 2001 From: Sven Verdoolaege Date: Sat, 29 Sep 2012 20:11:32 +0200 Subject: [PATCH] isl_aff_normalize: plug in divs with unit coefficient in other divs The outer div expression can then be simplified and the inner div expression may be removed if it was only used inside the outer div expression. Signed-off-by: Sven Verdoolaege --- isl_aff.c | 45 +++++++++++++++++++++++++ test_inputs/codegen/cloog/nul_complex1.c | 2 +- test_inputs/codegen/cloog/reservoir-cholesky2.c | 5 ++- test_inputs/codegen/omega/dagstuhl1-0.c | 2 +- test_inputs/codegen/omega/floor_bound-0.c | 2 +- test_inputs/codegen/omega/substitution-1.c | 2 +- 6 files changed, 51 insertions(+), 7 deletions(-) diff --git a/isl_aff.c b/isl_aff.c index a77533f..fb57f24 100644 --- a/isl_aff.c +++ b/isl_aff.c @@ -785,6 +785,50 @@ error: return isl_aff_free(aff); } +/* Look for any divs j that appear with a unit coefficient inside + * the definitions of other divs i and plug them into the definitions + * of the divs i. + * + * In particular, an expression of the form + * + * floor((f(..) + floor(g(..)/n))/m) + * + * is simplified to + * + * floor((n * f(..) + g(..))/(n * m)) + * + * This simplification is correct because we can move the expression + * f(..) into the inner floor in the original expression to obtain + * + * floor(floor((n * f(..) + g(..))/n)/m) + * + * from which we can derive the simplified expression. + */ +static __isl_give isl_aff *plug_in_unit_divs(__isl_take isl_aff *aff) +{ + int i, j, n; + int off; + + if (!aff) + return NULL; + + n = isl_local_space_dim(aff->ls, isl_dim_div); + off = isl_local_space_offset(aff->ls, isl_dim_div); + for (i = 1; i < n; ++i) { + for (j = 0; j < i; ++j) { + if (!isl_int_is_one(aff->ls->div->row[i][1 + off + j])) + continue; + aff->ls = isl_local_space_substitute_seq(aff->ls, + isl_dim_div, j, aff->ls->div->row[j], + aff->v->size, i, 1); + if (!aff->ls) + return isl_aff_free(aff); + } + } + + return aff; +} + /* Swap divs "a" and "b" in "aff", which is assumed to be non-NULL. * * Even though this function is only called on isl_affs with a single @@ -889,6 +933,7 @@ __isl_give isl_aff *isl_aff_normalize(__isl_take isl_aff *aff) if (!aff->v) return isl_aff_free(aff); aff = plug_in_integral_divs(aff); + aff = plug_in_unit_divs(aff); aff = sort_divs(aff); aff = isl_aff_remove_unused_divs(aff); return aff; diff --git a/test_inputs/codegen/cloog/nul_complex1.c b/test_inputs/codegen/cloog/nul_complex1.c index 6395f8a..8dbd63f 100644 --- a/test_inputs/codegen/cloog/nul_complex1.c +++ b/test_inputs/codegen/cloog/nul_complex1.c @@ -1,3 +1,3 @@ for (int c0 = 0; c0 <= 5 * n; c0 += 1) - for (int c1 = max(-((n + c0 + 1) % 2) - n + c0 + 1, 2 * floord(c0 - (c0 + 3) / 3, 2) + 2); c1 <= min(n + c0 - (n + c0 + 2) / 3, c0); c1 += 2) + for (int c1 = max(-((n + c0 + 1) % 2) - n + c0 + 1, 2 * floord(c0 - 1, 3) + 2); c1 <= min(n + c0 - (n + c0 + 2) / 3, c0); c1 += 2) S1((-2 * c0 + 3 * c1) / 2, c0 - c1); diff --git a/test_inputs/codegen/cloog/reservoir-cholesky2.c b/test_inputs/codegen/cloog/reservoir-cholesky2.c index 1e60bb6..7ed1450 100644 --- a/test_inputs/codegen/cloog/reservoir-cholesky2.c +++ b/test_inputs/codegen/cloog/reservoir-cholesky2.c @@ -4,7 +4,6 @@ for (int c1 = 2; c1 < 3 * M; c1 += 1) { for (int c3 = (c1 + 1) / 3 + 1; c3 <= min(c1 - 2, M); c3 += 1) for (int c5 = -c3 + (c1 + c3 + 1) / 2 + 1; c5 <= min(c1 - c3, c3); c5 += 1) S3(c1 - c3 - c5 + 1, c3, c5); - if (2 * c1 + 1 >= 3 * ((c1 + c1 / 3 + 1) / 2) && 3 * ((c1 + c1 / 3 + 1) / 2) + 1 >= 2 * c1) - for (int c3 = -((c1 + c1 / 3 + 1) % 2) + c1 / 3 + 3; c3 <= min(c1, M); c3 += 2) - S2((c1 - c3 + 2) / 2, c3); + for (int c3 = -c1 + 2 * ((2 * c1 + 1) / 3) + 2; c3 <= min(c1, M); c3 += 2) + S2((c1 - c3 + 2) / 2, c3); } diff --git a/test_inputs/codegen/omega/dagstuhl1-0.c b/test_inputs/codegen/omega/dagstuhl1-0.c index 9a6333d..3e403dd 100644 --- a/test_inputs/codegen/omega/dagstuhl1-0.c +++ b/test_inputs/codegen/omega/dagstuhl1-0.c @@ -1,2 +1,2 @@ for (int c0 = 0; c0 <= 99; c0 += 1) - s0(c0 - 10 * (9 * c0 / 10 / 9), 9 * c0 / 10 / 9); + s0(c0 % 10, c0 / 10); diff --git a/test_inputs/codegen/omega/floor_bound-0.c b/test_inputs/codegen/omega/floor_bound-0.c index 959f268..db7f437 100644 --- a/test_inputs/codegen/omega/floor_bound-0.c +++ b/test_inputs/codegen/omega/floor_bound-0.c @@ -1,2 +1,2 @@ -for (int c0 = 4 * floord(floord(m - 1, 3), 4) + 4; c0 <= floord(n, 3); c0 += 4) +for (int c0 = 4 * floord(m - 1, 12) + 4; c0 <= floord(n, 3); c0 += 4) s0(c0); diff --git a/test_inputs/codegen/omega/substitution-1.c b/test_inputs/codegen/omega/substitution-1.c index 51ab952..486514c 100644 --- a/test_inputs/codegen/omega/substitution-1.c +++ b/test_inputs/codegen/omega/substitution-1.c @@ -1,3 +1,3 @@ for (int c0 = 0; c0 <= 14; c0 += 1) - for (int c1 = max(2 * c0 - 12, -c0 + 3 * ((c0 + 1) / 2)); c1 <= min(2 * c0, c0 / 2 + 9); c1 += 3) + for (int c1 = max(2 * c0 - 12, -c0 + 3 * floord(c0 - 1, 2) + 3); c1 <= min(2 * c0, c0 / 2 + 9); c1 += 3) s0((2 * c0 - c1) / 3, (-c0 + 2 * c1) / 3); -- 2.7.4