The split_parallel option was ill-conceived.
The condition split_parallel looked for was too specific
and the implementation was wrong. Part of this has been fixed
in 1b2fdb6 (schedule.c: split_parallel: avoid invalid memory accesses,
Mon Jan 30 15:23:18 2012 +0100), but the code could in principle
still produce an incorrect schedule because it didn't check the sizes
of the constant terms.
The new option essentially applies strip-mining and should therefore
be safe. In particular, it is applied when the linear parts of the
schedules have a non-trivial common divisor. The strip-mining is
then performed with respect to this common divisor.
Signed-off-by: Sven Verdoolaege <skimo@kotnet.org>
+=head3 Changes since isl-0.09
+
+=over
+
+=item * The C<schedule_split_parallel> option has been replaced
+by the C<schedule_split_scaled> option.
+
+=back
+
=head1 Installation
The source of C<isl> can be obtained either as a tarball
=head1 Installation
The source of C<isl> can be obtained either as a tarball
isl_ctx *ctx, int val);
int isl_options_get_schedule_outer_zero_distance(
isl_ctx *ctx);
isl_ctx *ctx, int val);
int isl_options_get_schedule_outer_zero_distance(
isl_ctx *ctx);
- int isl_options_set_schedule_split_parallel(
+ int isl_options_set_schedule_split_scaled(
- int isl_options_get_schedule_split_parallel(
+ int isl_options_get_schedule_split_scaled(
isl_ctx *ctx);
int isl_options_set_schedule_algorithm(
isl_ctx *ctx, int val);
isl_ctx *ctx);
int isl_options_set_schedule_algorithm(
isl_ctx *ctx, int val);
results in a zero dependence distance over the proximity
dependences.
results in a zero dependence distance over the proximity
dependences.
-=item * schedule_split_parallel
+=item * schedule_split_scaled
If this option is set, then we try to construct schedules in which the
constant term is split off from the linear part if the linear parts of
If this option is set, then we try to construct schedules in which the
constant term is split off from the linear part if the linear parts of
-the scheduling rows for all nodes in the graphs are the same.
+the scheduling rows for all nodes in the graphs have a common non-trivial
+divisor.
The constant term is then placed in a separate band and the linear
The constant term is then placed in a separate band and the linear
=item * schedule_algorithm
=item * schedule_algorithm
int isl_options_set_schedule_outer_zero_distance(isl_ctx *ctx, int val);
int isl_options_get_schedule_outer_zero_distance(isl_ctx *ctx);
int isl_options_set_schedule_outer_zero_distance(isl_ctx *ctx, int val);
int isl_options_get_schedule_outer_zero_distance(isl_ctx *ctx);
-int isl_options_set_schedule_split_parallel(isl_ctx *ctx, int val);
-int isl_options_get_schedule_split_parallel(isl_ctx *ctx);
+int isl_options_set_schedule_split_scaled(isl_ctx *ctx, int val);
+int isl_options_get_schedule_split_scaled(isl_ctx *ctx);
#define ISL_SCHEDULE_FUSE_MAX 0
#define ISL_SCHEDULE_FUSE_MIN 1
#define ISL_SCHEDULE_FUSE_MAX 0
#define ISL_SCHEDULE_FUSE_MIN 1
ISL_ARG_BOOL(struct isl_options, schedule_maximize_band_depth, 0,
"schedule-maximize-band-depth", 0,
"maximize the number of scheduling dimensions in a band")
ISL_ARG_BOOL(struct isl_options, schedule_maximize_band_depth, 0,
"schedule-maximize-band-depth", 0,
"maximize the number of scheduling dimensions in a band")
-ISL_ARG_BOOL(struct isl_options, schedule_split_parallel, 0,
- "schedule-split-parallel", 1,
- "split non-tilable bands with parallel schedules")
+ISL_ARG_BOOL(struct isl_options, schedule_split_scaled, 0,
+ "schedule-split-scaled", 1,
+ "split non-tilable bands with scaled schedules")
ISL_ARG_CHOICE(struct isl_options, schedule_algorithm, 0,
"schedule-algorithm", isl_schedule_algorithm_choice,
ISL_SCHEDULE_ALGORITHM_ISL, "scheduling algorithm to use")
ISL_ARG_CHOICE(struct isl_options, schedule_algorithm, 0,
"schedule-algorithm", isl_schedule_algorithm_choice,
ISL_SCHEDULE_ALGORITHM_ISL, "scheduling algorithm to use")
schedule_maximize_band_depth)
ISL_CTX_SET_BOOL_DEF(isl_options, struct isl_options, isl_options_args,
schedule_maximize_band_depth)
ISL_CTX_SET_BOOL_DEF(isl_options, struct isl_options, isl_options_args,
- schedule_split_parallel)
ISL_CTX_GET_BOOL_DEF(isl_options, struct isl_options, isl_options_args,
ISL_CTX_GET_BOOL_DEF(isl_options, struct isl_options, isl_options_args,
- schedule_split_parallel)
ISL_CTX_SET_BOOL_DEF(isl_options, struct isl_options, isl_options_args,
schedule_outer_zero_distance)
ISL_CTX_SET_BOOL_DEF(isl_options, struct isl_options, isl_options_args,
schedule_outer_zero_distance)
int schedule_parametric;
int schedule_outer_zero_distance;
int schedule_maximize_band_depth;
int schedule_parametric;
int schedule_outer_zero_distance;
int schedule_maximize_band_depth;
- int schedule_split_parallel;
+ int schedule_split_scaled;
unsigned schedule_algorithm;
int schedule_fuse;
};
unsigned schedule_algorithm;
int schedule_fuse;
};
-/* If the schedule_split_parallel option is set and if the linear
- * parts of the scheduling rows for all nodes in the graphs are the same,
- * then split off the constant term from the linear part.
+/* If the schedule_split_scaled option is set and if the linear
+ * parts of the scheduling rows for all nodes in the graphs have
+ * non-trivial common divisor, then split off the constant term
+ * from the linear part.
* The constant term is then placed in a separate band and
* The constant term is then placed in a separate band and
- * the linear part is simplified.
+ * the linear part is reduced.
-static int split_parallel(isl_ctx *ctx, struct isl_sched_graph *graph)
+static int split_scaled(isl_ctx *ctx, struct isl_sched_graph *graph)
- int equal = 1;
- int row, cols;
- struct isl_sched_node *node0;
+ int row;
+ isl_int gcd, gcd_i;
- if (!ctx->opt->schedule_split_parallel)
+ if (!ctx->opt->schedule_split_scaled)
return 0;
if (graph->n <= 1)
return 0;
return 0;
if (graph->n <= 1)
return 0;
- node0 = &graph->node[0];
- row = isl_mat_rows(node0->sched) - 1;
- cols = isl_mat_cols(node0->sched);
- for (i = 1; i < graph->n; ++i) {
+ isl_int_init(gcd);
+ isl_int_init(gcd_i);
+
+ isl_int_set_si(gcd, 0);
+
+ row = isl_mat_rows(graph->node[0].sched) - 1;
+
+ for (i = 0; i < graph->n; ++i) {
struct isl_sched_node *node = &graph->node[i];
struct isl_sched_node *node = &graph->node[i];
+ int cols = isl_mat_cols(node->sched);
- if (isl_mat_cols(node->sched) != cols)
- return 0;
- if (!isl_seq_eq(node0->sched->row[row] + 1,
- node->sched->row[row] + 1, cols - 1))
- return 0;
- if (equal &&
- isl_int_ne(node0->sched->row[row][0],
- node->sched->row[row][0]))
- equal = 0;
+ isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
+ isl_int_gcd(gcd, gcd, gcd_i);
+
+ isl_int_clear(gcd_i);
+
+ if (isl_int_cmp_si(gcd, 1) <= 0) {
+ isl_int_clear(gcd);
node->sched_map = NULL;
node->sched = isl_mat_add_zero_rows(node->sched, 1);
if (!node->sched)
node->sched_map = NULL;
node->sched = isl_mat_add_zero_rows(node->sched, 1);
if (!node->sched)
- return -1;
- isl_int_set(node->sched->row[row + 1][0],
- node->sched->row[row][0]);
- isl_int_set_si(node->sched->row[row][0], 0);
- node->sched = isl_mat_normalize_row(node->sched, row);
+ goto error;
+ isl_int_fdiv_r(node->sched->row[row + 1][0],
+ node->sched->row[row][0], gcd);
+ isl_int_fdiv_q(node->sched->row[row][0],
+ node->sched->row[row][0], gcd);
+ isl_int_mul(node->sched->row[row][0],
+ node->sched->row[row][0], gcd);
+ node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
node->band[graph->n_total_row] = graph->n_band;
}
graph->n_total_row++;
node->band[graph->n_total_row] = graph->n_band;
}
graph->n_total_row++;
+error:
+ isl_int_clear(gcd);
+ return -1;
}
/* Construct a schedule row for each node such that as many dependences
}
/* Construct a schedule row for each node such that as many dependences
if (update_schedule(graph, sol, 0, 0) < 0)
return -1;
if (update_schedule(graph, sol, 0, 0) < 0)
return -1;
- if (split_parallel(ctx, graph) < 0)
+ if (split_scaled(ctx, graph) < 0)
return -1;
return compute_next_band(ctx, graph);
return -1;
return compute_next_band(ctx, graph);