The split_parallel option was ill-conceived.
The condition split_parallel looked for was too specific
and the implementation was wrong. Part of this has been fixed
in 1b2fdb6 (schedule.c: split_parallel: avoid invalid memory accesses,
Mon Jan 30 15:23:18 2012 +0100), but the code could in principle
still produce an incorrect schedule because it didn't check the sizes
of the constant terms.
The new option essentially applies strip-mining and should therefore
be safe. In particular, it is applied when the linear parts of the
schedules have a non-trivial common divisor. The strip-mining is
then performed with respect to this common divisor.
Signed-off-by: Sven Verdoolaege <skimo@kotnet.org>
=back
+=head3 Changes since isl-0.09
+
+=over
+
+=item * The C<schedule_split_parallel> option has been replaced
+by the C<schedule_split_scaled> option.
+
+=back
+
=head1 Installation
The source of C<isl> can be obtained either as a tarball
isl_ctx *ctx, int val);
int isl_options_get_schedule_outer_zero_distance(
isl_ctx *ctx);
- int isl_options_set_schedule_split_parallel(
+ int isl_options_set_schedule_split_scaled(
isl_ctx *ctx, int val);
- int isl_options_get_schedule_split_parallel(
+ int isl_options_get_schedule_split_scaled(
isl_ctx *ctx);
int isl_options_set_schedule_algorithm(
isl_ctx *ctx, int val);
results in a zero dependence distance over the proximity
dependences.
-=item * schedule_split_parallel
+=item * schedule_split_scaled
If this option is set, then we try to construct schedules in which the
constant term is split off from the linear part if the linear parts of
-the scheduling rows for all nodes in the graphs are the same.
+the scheduling rows for all nodes in the graphs have a common non-trivial
+divisor.
The constant term is then placed in a separate band and the linear
-part is simplified.
+part is reduced.
=item * schedule_algorithm
int isl_options_set_schedule_outer_zero_distance(isl_ctx *ctx, int val);
int isl_options_get_schedule_outer_zero_distance(isl_ctx *ctx);
-int isl_options_set_schedule_split_parallel(isl_ctx *ctx, int val);
-int isl_options_get_schedule_split_parallel(isl_ctx *ctx);
+int isl_options_set_schedule_split_scaled(isl_ctx *ctx, int val);
+int isl_options_get_schedule_split_scaled(isl_ctx *ctx);
#define ISL_SCHEDULE_FUSE_MAX 0
#define ISL_SCHEDULE_FUSE_MIN 1
ISL_ARG_BOOL(struct isl_options, schedule_maximize_band_depth, 0,
"schedule-maximize-band-depth", 0,
"maximize the number of scheduling dimensions in a band")
-ISL_ARG_BOOL(struct isl_options, schedule_split_parallel, 0,
- "schedule-split-parallel", 1,
- "split non-tilable bands with parallel schedules")
+ISL_ARG_BOOL(struct isl_options, schedule_split_scaled, 0,
+ "schedule-split-scaled", 1,
+ "split non-tilable bands with scaled schedules")
ISL_ARG_CHOICE(struct isl_options, schedule_algorithm, 0,
"schedule-algorithm", isl_schedule_algorithm_choice,
ISL_SCHEDULE_ALGORITHM_ISL, "scheduling algorithm to use")
schedule_maximize_band_depth)
ISL_CTX_SET_BOOL_DEF(isl_options, struct isl_options, isl_options_args,
- schedule_split_parallel)
+ schedule_split_scaled)
ISL_CTX_GET_BOOL_DEF(isl_options, struct isl_options, isl_options_args,
- schedule_split_parallel)
+ schedule_split_scaled)
ISL_CTX_SET_BOOL_DEF(isl_options, struct isl_options, isl_options_args,
schedule_outer_zero_distance)
int schedule_parametric;
int schedule_outer_zero_distance;
int schedule_maximize_band_depth;
- int schedule_split_parallel;
+ int schedule_split_scaled;
unsigned schedule_algorithm;
int schedule_fuse;
};
return 0;
}
-/* If the schedule_split_parallel option is set and if the linear
- * parts of the scheduling rows for all nodes in the graphs are the same,
- * then split off the constant term from the linear part.
+/* If the schedule_split_scaled option is set and if the linear
+ * parts of the scheduling rows for all nodes in the graphs have
+ * non-trivial common divisor, then split off the constant term
+ * from the linear part.
* The constant term is then placed in a separate band and
- * the linear part is simplified.
+ * the linear part is reduced.
*/
-static int split_parallel(isl_ctx *ctx, struct isl_sched_graph *graph)
+static int split_scaled(isl_ctx *ctx, struct isl_sched_graph *graph)
{
int i;
- int equal = 1;
- int row, cols;
- struct isl_sched_node *node0;
+ int row;
+ isl_int gcd, gcd_i;
- if (!ctx->opt->schedule_split_parallel)
+ if (!ctx->opt->schedule_split_scaled)
return 0;
if (graph->n <= 1)
return 0;
- node0 = &graph->node[0];
- row = isl_mat_rows(node0->sched) - 1;
- cols = isl_mat_cols(node0->sched);
- for (i = 1; i < graph->n; ++i) {
+ isl_int_init(gcd);
+ isl_int_init(gcd_i);
+
+ isl_int_set_si(gcd, 0);
+
+ row = isl_mat_rows(graph->node[0].sched) - 1;
+
+ for (i = 0; i < graph->n; ++i) {
struct isl_sched_node *node = &graph->node[i];
+ int cols = isl_mat_cols(node->sched);
- if (isl_mat_cols(node->sched) != cols)
- return 0;
- if (!isl_seq_eq(node0->sched->row[row] + 1,
- node->sched->row[row] + 1, cols - 1))
- return 0;
- if (equal &&
- isl_int_ne(node0->sched->row[row][0],
- node->sched->row[row][0]))
- equal = 0;
+ isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
+ isl_int_gcd(gcd, gcd, gcd_i);
}
- if (equal)
+
+ isl_int_clear(gcd_i);
+
+ if (isl_int_cmp_si(gcd, 1) <= 0) {
+ isl_int_clear(gcd);
return 0;
+ }
next_band(graph);
node->sched_map = NULL;
node->sched = isl_mat_add_zero_rows(node->sched, 1);
if (!node->sched)
- return -1;
- isl_int_set(node->sched->row[row + 1][0],
- node->sched->row[row][0]);
- isl_int_set_si(node->sched->row[row][0], 0);
- node->sched = isl_mat_normalize_row(node->sched, row);
+ goto error;
+ isl_int_fdiv_r(node->sched->row[row + 1][0],
+ node->sched->row[row][0], gcd);
+ isl_int_fdiv_q(node->sched->row[row][0],
+ node->sched->row[row][0], gcd);
+ isl_int_mul(node->sched->row[row][0],
+ node->sched->row[row][0], gcd);
+ node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
if (!node->sched)
- return -1;
+ goto error;
node->band[graph->n_total_row] = graph->n_band;
}
graph->n_total_row++;
+ isl_int_clear(gcd);
return 0;
+error:
+ isl_int_clear(gcd);
+ return -1;
}
/* Construct a schedule row for each node such that as many dependences
if (update_schedule(graph, sol, 0, 0) < 0)
return -1;
- if (split_parallel(ctx, graph) < 0)
+ if (split_scaled(ctx, graph) < 0)
return -1;
return compute_next_band(ctx, graph);