#include "isl/map.h"
#include "isl/options.h"
#include "isl/schedule.h"
+#include "isl/schedule_node.h"
#include "isl/space.h"
#include "polly/CodeGen/CodeGeneration.h"
#include "polly/DependenceInfo.h"
/// @return True, if we believe @p NewSchedule is an improvement for @p S.
bool isProfitableSchedule(Scop &S, __isl_keep isl_union_map *NewSchedule);
- static void extendScattering(Scop &S, unsigned NewDimensions);
-
- /// @brief Create a map that describes a n-dimensonal tiling.
- ///
- /// getTileMap creates a map from a n-dimensional scattering space into an
- /// 2*n-dimensional scattering space. The map describes a rectangular
- /// tiling.
- ///
- /// Example:
- /// scheduleDimensions = 2, parameterDimensions = 1, TileSizes = <32, 64>
- ///
- /// tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
- /// t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
- /// t1 % 64 = 0 and t1 <= s1 < t1 + 64}
- ///
- /// Before tiling:
- ///
- /// for (i = 0; i < N; i++)
- /// for (j = 0; j < M; j++)
- /// S(i,j)
- ///
- /// After tiling:
- ///
- /// for (t_i = 0; t_i < N; i+=32)
- /// for (t_j = 0; t_j < M; j+=64)
- /// for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
- /// for (j = t_j; j < t_j + 64; j++) | Known that M % 64 = 0
- /// S(i,j)
- ///
- static isl_basic_map *getTileMap(isl_ctx *ctx, int scheduleDimensions);
-
- /// @brief Get the schedule for this band.
- ///
- /// Polly applies transformations like tiling on top of the isl calculated
- /// value. This can influence the number of scheduling dimension. The
- /// number of schedule dimensions is returned in the parameter 'Dimension'.
- static isl_union_map *getScheduleForBand(isl_band *Band, int *Dimensions);
-
/// @brief Create a map that pre-vectorizes one scheduling dimension.
///
/// getPrevectorMap creates a map that maps each input dimension to the same
int ScheduleDimensions,
int VectorWidth = 4);
- /// @brief Get the scheduling map for a list of bands.
+ /// @brief Apply additional optimizations on the bands in the schedule tree.
+ ///
+ /// We are looking for an innermost band node and apply the following
+ /// transformations:
+ ///
+ /// - Tile the band
+ /// - if the band is tileable
+ /// - if the band has more than one loop dimension
+ ///
+ /// - Prevectorize the point loop of the tile
+ /// - if vectorization is enabled
///
- /// Walk recursively the forest of bands to combine the schedules of the
- /// individual bands to the overall schedule. In case tiling is requested,
- /// the individual bands are tiled.
- static isl_union_map *getScheduleForBandList(isl_band_list *BandList);
+ /// @param Node The schedule node to (possibly) optimize.
+ /// @param User A pointer to forward some use information (currently unused).
+ static isl_schedule_node *optimizeBand(isl_schedule_node *Node, void *User);
static __isl_give isl_union_map *
getScheduleMap(__isl_keep isl_schedule *Schedule);
char IslScheduleOptimizer::ID = 0;
-void IslScheduleOptimizer::extendScattering(Scop &S, unsigned NewDimensions) {
- for (ScopStmt *Stmt : S) {
- unsigned OldDimensions = Stmt->getNumScattering();
- isl_space *Space;
- isl_map *Map, *New;
-
- Space = isl_space_alloc(Stmt->getIslCtx(), 0, OldDimensions, NewDimensions);
- Map = isl_map_universe(Space);
-
- for (unsigned i = 0; i < OldDimensions; i++)
- Map = isl_map_equate(Map, isl_dim_in, i, isl_dim_out, i);
-
- for (unsigned i = OldDimensions; i < NewDimensions; i++)
- Map = isl_map_fix_si(Map, isl_dim_out, i, 0);
-
- Map = isl_map_align_params(Map, S.getParamSpace());
- New = isl_map_apply_range(Stmt->getScattering(), Map);
- Stmt->setScattering(New);
- }
-}
-
-isl_basic_map *IslScheduleOptimizer::getTileMap(isl_ctx *ctx,
- int scheduleDimensions) {
- // We construct
- //
- // tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
- // s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 64 and
- // s1 = a1 * 64 and s1 = p1 and t1 <= p1 < t1 + 64}
- //
- // and project out the auxilary dimensions a0 and a1.
- isl_space *Space =
- isl_space_alloc(ctx, 0, scheduleDimensions, scheduleDimensions * 3);
- isl_basic_map *tileMap = isl_basic_map_universe(isl_space_copy(Space));
-
- isl_local_space *LocalSpace = isl_local_space_from_space(Space);
-
- for (int x = 0; x < scheduleDimensions; x++) {
- int sX = x;
- int tX = x;
- int pX = scheduleDimensions + x;
- int aX = 2 * scheduleDimensions + x;
- int tileSize = (int)TileSizes.size() > x ? TileSizes[x] : DefaultTileSize;
- assert(tileSize > 0 && "Invalid tile size");
-
- isl_constraint *c;
-
- // sX = aX * tileSize;
- c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
- isl_constraint_set_coefficient_si(c, isl_dim_out, sX, 1);
- isl_constraint_set_coefficient_si(c, isl_dim_out, aX, -tileSize);
- tileMap = isl_basic_map_add_constraint(tileMap, c);
-
- // pX = sX;
- c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
- isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1);
- isl_constraint_set_coefficient_si(c, isl_dim_in, sX, -1);
- tileMap = isl_basic_map_add_constraint(tileMap, c);
-
- // tX <= pX
- c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
- isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1);
- isl_constraint_set_coefficient_si(c, isl_dim_out, tX, -1);
- tileMap = isl_basic_map_add_constraint(tileMap, c);
-
- // pX <= tX + (tileSize - 1)
- c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
- isl_constraint_set_coefficient_si(c, isl_dim_out, tX, 1);
- isl_constraint_set_coefficient_si(c, isl_dim_out, pX, -1);
- isl_constraint_set_constant_si(c, tileSize - 1);
- tileMap = isl_basic_map_add_constraint(tileMap, c);
- }
-
- // Project out auxilary dimensions.
- //
- // The auxilary dimensions are transformed into existentially quantified ones.
- // This reduces the number of visible scattering dimensions and allows Cloog
- // to produces better code.
- tileMap = isl_basic_map_project_out(
- tileMap, isl_dim_out, 2 * scheduleDimensions, scheduleDimensions);
- isl_local_space_free(LocalSpace);
- return tileMap;
-}
-
-isl_union_map *IslScheduleOptimizer::getScheduleForBand(isl_band *Band,
- int *Dimensions) {
- isl_union_map *PartialSchedule;
- isl_ctx *ctx;
- isl_space *Space;
- isl_basic_map *TileMap;
- isl_union_map *TileUMap;
-
- PartialSchedule = isl_band_get_partial_schedule(Band);
- *Dimensions = isl_band_n_member(Band);
-
- if (DisableTiling)
- return PartialSchedule;
-
- // It does not make any sense to tile a band with just one dimension.
- if (*Dimensions == 1)
- return PartialSchedule;
-
- ctx = isl_union_map_get_ctx(PartialSchedule);
- Space = isl_union_map_get_space(PartialSchedule);
-
- TileMap = getTileMap(ctx, *Dimensions);
- TileUMap = isl_union_map_from_map(isl_map_from_basic_map(TileMap));
- TileUMap = isl_union_map_align_params(TileUMap, Space);
- *Dimensions = 2 * *Dimensions;
-
- return isl_union_map_apply_range(PartialSchedule, TileUMap);
-}
-
__isl_give isl_map *
IslScheduleOptimizer::getPrevectorMap(isl_ctx *ctx, int DimToVectorize,
int ScheduleDimensions, int VectorWidth) {
return TilingMap;
}
-isl_union_map *
-IslScheduleOptimizer::getScheduleForBandList(isl_band_list *BandList) {
- int NumBands;
- isl_union_map *Schedule;
- isl_ctx *ctx;
-
- ctx = isl_band_list_get_ctx(BandList);
- NumBands = isl_band_list_n_band(BandList);
- Schedule = isl_union_map_empty(isl_space_params_alloc(ctx, 0));
-
- for (int i = 0; i < NumBands; i++) {
- isl_band *Band;
- isl_union_map *PartialSchedule;
- int ScheduleDimensions;
- isl_space *Space;
-
- Band = isl_band_list_get_band(BandList, i);
- PartialSchedule = getScheduleForBand(Band, &ScheduleDimensions);
- Space = isl_union_map_get_space(PartialSchedule);
-
- if (isl_band_has_children(Band)) {
- isl_band_list *Children;
- isl_union_map *SuffixSchedule;
-
- Children = isl_band_get_children(Band);
- SuffixSchedule = getScheduleForBandList(Children);
- PartialSchedule =
- isl_union_map_flat_range_product(PartialSchedule, SuffixSchedule);
- isl_band_list_free(Children);
- } else if (PollyVectorizerChoice != VECTORIZER_NONE) {
- // In case we are at the innermost band, we try to prepare for
- // vectorization. This means, we look for the innermost parallel loop
- // and strip mine this loop to the innermost level using a strip-mine
- // factor corresponding to the number of vector iterations.
- int NumDims = isl_band_n_member(Band);
- for (int j = NumDims - 1; j >= 0; j--) {
- if (isl_band_member_is_coincident(Band, j)) {
- isl_map *TileMap;
- isl_union_map *TileUMap;
-
- TileMap = getPrevectorMap(ctx, ScheduleDimensions - NumDims + j,
- ScheduleDimensions);
- TileUMap = isl_union_map_from_map(TileMap);
- TileUMap =
- isl_union_map_align_params(TileUMap, isl_space_copy(Space));
- PartialSchedule =
- isl_union_map_apply_range(PartialSchedule, TileUMap);
- break;
- }
- }
- }
+isl_schedule_node *IslScheduleOptimizer::optimizeBand(isl_schedule_node *Node,
+ void *User) {
+ if (isl_schedule_node_get_type(Node) != isl_schedule_node_band)
+ return Node;
+
+ if (isl_schedule_node_n_children(Node) != 1)
+ return Node;
- Schedule = isl_union_map_union(Schedule, PartialSchedule);
+ if (!isl_schedule_node_band_get_permutable(Node))
+ return Node;
- isl_band_free(Band);
+ auto Space = isl_schedule_node_band_get_space(Node);
+ auto Dims = isl_space_dim(Space, isl_dim_set);
+
+ if (Dims <= 1) {
isl_space_free(Space);
+ return Node;
}
- return Schedule;
+ auto Child = isl_schedule_node_get_child(Node, 0);
+ auto Type = isl_schedule_node_get_type(Child);
+ isl_schedule_node_free(Child);
+
+ if (Type != isl_schedule_node_leaf) {
+ isl_space_free(Space);
+ return Node;
+ }
+
+ auto Sizes = isl_multi_val_zero(Space);
+ auto Ctx = isl_schedule_node_get_ctx(Node);
+
+ for (unsigned i = 0; i < Dims; i++) {
+ auto tileSize = TileSizes.size() > i ? TileSizes[i] : DefaultTileSize;
+ Sizes = isl_multi_val_set_val(Sizes, i, isl_val_int_from_si(Ctx, tileSize));
+ }
+
+ auto Res = isl_schedule_node_band_tile(Node, Sizes);
+
+ if (PollyVectorizerChoice == VECTORIZER_NONE)
+ return Res;
+
+ Child = isl_schedule_node_get_child(Res, 0);
+ auto ChildSchedule = isl_schedule_node_band_get_partial_schedule(Child);
+
+ for (int i = Dims - 1; i >= 0; i--) {
+ if (isl_schedule_node_band_member_get_coincident(Child, i)) {
+ auto TileMap = IslScheduleOptimizer::getPrevectorMap(Ctx, i, Dims);
+ auto TileUMap = isl_union_map_from_map(TileMap);
+ auto ChildSchedule2 = isl_union_map_apply_range(
+ isl_union_map_from_multi_union_pw_aff(ChildSchedule), TileUMap);
+ ChildSchedule = isl_multi_union_pw_aff_from_union_map(ChildSchedule2);
+ break;
+ }
+ }
+
+ isl_schedule_node_free(Res);
+ Res = isl_schedule_node_delete(Child);
+ Res = isl_schedule_node_insert_partial_schedule(Res, ChildSchedule);
+ return Res;
}
__isl_give isl_union_map *
IslScheduleOptimizer::getScheduleMap(__isl_keep isl_schedule *Schedule) {
- isl_band_list *BandList = isl_schedule_get_band_forest(Schedule);
- isl_union_map *ScheduleMap = getScheduleForBandList(BandList);
- isl_band_list_free(BandList);
+ isl_schedule_node *Root = isl_schedule_get_root(Schedule);
+ Root = isl_schedule_node_map_descendant(
+ Root, IslScheduleOptimizer::optimizeBand, NULL);
+ auto ScheduleMap = isl_schedule_node_get_subtree_schedule_union_map(Root);
+ ScheduleMap = isl_union_map_detect_equalities(ScheduleMap);
+ isl_schedule_node_free(Root);
return ScheduleMap;
}
Stmt->setScattering(StmtSchedule);
}
+ isl_schedule_free(Schedule);
isl_union_map_free(NewSchedule);
- LastSchedule = Schedule;
-
- unsigned MaxScatDims = 0;
-
- for (ScopStmt *Stmt : S)
- MaxScatDims = std::max(Stmt->getNumScattering(), MaxScatDims);
-
- extendScattering(S, MaxScatDims);
return false;
}
--- /dev/null
+; RUN: opt %loadPolly -polly-opt-isl -polly-ast -analyze < %s | FileCheck %s
+;
+; void jacobi1d(long T, long N, float *A, float *B) {
+; long t, i, j;
+; for (t = 0; t < T; t++) {
+; for (i = 1; i < N - 1; i++)
+; B[i] = 0.33333 * (A[i - 1] + A[i] + A[i + 1]);
+; for (j = 1; j < N - 1; j++)
+; A[j] = 0.33333 * (B[i - 1] + B[i] + B[i + 1]);
+; }
+; }
+
+; Verify that we do not tile bands that have just a single dimension.
+
+; CHECK: for (int c0 = 0; c0 < T; c0 += 1) {
+; CHECK: for (int c2 = 0; c2 < N - 2; c2 += 1)
+; CHECK: Stmt_for_body3(c0, c2);
+; CHECK: for (int c2 = 0; c2 < N - 2; c2 += 1)
+; CHECK: Stmt_for_body15(c0, c2);
+; CHECK: }
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @jacobi1d(i64 %T, i64 %N, float* %A, float* %B) {
+entry:
+ %tmp = add i64 %N, -1
+ %tmp1 = icmp sgt i64 %tmp, 1
+ %smax = select i1 %tmp1, i64 %tmp, i64 1
+ br label %for.cond
+
+for.cond: ; preds = %for.inc30, %entry
+ %t.0 = phi i64 [ 0, %entry ], [ %inc31, %for.inc30 ]
+ %cmp = icmp slt i64 %t.0, %T
+ br i1 %cmp, label %for.body, label %for.end32
+
+for.body: ; preds = %for.cond
+ br label %for.cond1
+
+for.cond1: ; preds = %for.inc, %for.body
+ %i.0 = phi i64 [ 1, %for.body ], [ %inc, %for.inc ]
+ %sub = add nsw i64 %N, -1
+ %cmp2 = icmp slt i64 %i.0, %sub
+ br i1 %cmp2, label %for.body3, label %for.end
+
+for.body3: ; preds = %for.cond1
+ %sub4 = add nsw i64 %i.0, -1
+ %arrayidx = getelementptr inbounds float, float* %A, i64 %sub4
+ %tmp2 = load float, float* %arrayidx, align 4
+ %arrayidx5 = getelementptr inbounds float, float* %A, i64 %i.0
+ %tmp3 = load float, float* %arrayidx5, align 4
+ %add = fadd float %tmp2, %tmp3
+ %add6 = add nuw nsw i64 %i.0, 1
+ %arrayidx7 = getelementptr inbounds float, float* %A, i64 %add6
+ %tmp4 = load float, float* %arrayidx7, align 4
+ %add8 = fadd float %add, %tmp4
+ %conv = fpext float %add8 to double
+ %mul = fmul double %conv, 3.333300e-01
+ %conv9 = fptrunc double %mul to float
+ %arrayidx10 = getelementptr inbounds float, float* %B, i64 %i.0
+ store float %conv9, float* %arrayidx10, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body3
+ %inc = add nuw nsw i64 %i.0, 1
+ br label %for.cond1
+
+for.end: ; preds = %for.cond1
+ br label %for.cond11
+
+for.cond11: ; preds = %for.inc27, %for.end
+ %j.0 = phi i64 [ 1, %for.end ], [ %inc28, %for.inc27 ]
+ %sub12 = add nsw i64 %N, -1
+ %cmp13 = icmp slt i64 %j.0, %sub12
+ br i1 %cmp13, label %for.body15, label %for.end29
+
+for.body15: ; preds = %for.cond11
+ %sub16 = add nsw i64 %smax, -1
+ %arrayidx17 = getelementptr inbounds float, float* %B, i64 %sub16
+ %tmp5 = load float, float* %arrayidx17, align 4
+ %arrayidx18 = getelementptr inbounds float, float* %B, i64 %smax
+ %tmp6 = load float, float* %arrayidx18, align 4
+ %add19 = fadd float %tmp5, %tmp6
+ %add20 = add nsw i64 %smax, 1
+ %arrayidx21 = getelementptr inbounds float, float* %B, i64 %add20
+ %tmp7 = load float, float* %arrayidx21, align 4
+ %add22 = fadd float %add19, %tmp7
+ %conv23 = fpext float %add22 to double
+ %mul24 = fmul double %conv23, 3.333300e-01
+ %conv25 = fptrunc double %mul24 to float
+ %arrayidx26 = getelementptr inbounds float, float* %A, i64 %j.0
+ store float %conv25, float* %arrayidx26, align 4
+ br label %for.inc27
+
+for.inc27: ; preds = %for.body15
+ %inc28 = add nuw nsw i64 %j.0, 1
+ br label %for.cond11
+
+for.end29: ; preds = %for.cond11
+ br label %for.inc30
+
+for.inc30: ; preds = %for.end29
+ %inc31 = add nuw nsw i64 %t.0, 1
+ br label %for.cond
+
+for.end32: ; preds = %for.cond
+ ret void
+}