1 /* A scheduling optimizer for Graphite
2 Copyright (C) 2012 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
26 #include <isl/union_map.h>
27 #include <isl/schedule.h>
30 #include <isl/options.h>
33 #include "coretypes.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
42 #include "graphite-poly.h"
44 static isl_union_set *
45 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED)
49 isl_space *space = isl_set_get_space (scop->context);
50 isl_union_set *res = isl_union_set_empty (space);
52 FOR_EACH_VEC_ELT (poly_bb_p, scop->bbs, i, pbb)
53 res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
58 static isl_union_map *
59 scop_get_dependences (scop_p scop)
61 isl_union_map *dependences;
64 compute_deps (scop, SCOP_BBS (scop),
65 &scop->must_raw, &scop->may_raw,
66 &scop->must_raw_no_source, &scop->may_raw_no_source,
67 &scop->must_war, &scop->may_war,
68 &scop->must_war_no_source, &scop->may_war_no_source,
69 &scop->must_waw, &scop->may_waw,
70 &scop->must_waw_no_source, &scop->may_waw_no_source);
72 dependences = isl_union_map_copy (scop->must_raw);
73 dependences = isl_union_map_union (dependences,
74 isl_union_map_copy (scop->must_war));
75 dependences = isl_union_map_union (dependences,
76 isl_union_map_copy (scop->must_waw));
77 dependences = isl_union_map_union (dependences,
78 isl_union_map_copy (scop->may_raw));
79 dependences = isl_union_map_union (dependences,
80 isl_union_map_copy (scop->may_war));
81 dependences = isl_union_map_union (dependences,
82 isl_union_map_copy (scop->may_waw));
87 /* getTileMap - Create a map that describes a n-dimensonal tiling.
89 getTileMap creates a map from a n-dimensional scattering space into an
90 2*n-dimensional scattering space. The map describes a rectangular tiling.
93 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
95 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
96 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
97 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
101 for (i = 0; i < N; i++)
102 for (j = 0; j < M; j++)
107 for (t_i = 0; t_i < N; i+=32)
108 for (t_j = 0; t_j < M; j+=32)
109 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
110 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
114 static isl_basic_map *
115 getTileMap(isl_ctx *ctx, int scheduleDimensions, int tileSize)
120 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
121 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
122 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
124 and project out the auxilary dimensions a0 and a1. */
125 isl_space *Space = isl_space_alloc(ctx, 0, scheduleDimensions,
126 scheduleDimensions * 3);
127 isl_basic_map *tileMap = isl_basic_map_universe(isl_space_copy(Space));
129 isl_local_space *LocalSpace = isl_local_space_from_space(Space);
131 for (x = 0; x < scheduleDimensions; x++)
135 int pX = scheduleDimensions + x;
136 int aX = 2 * scheduleDimensions + x;
140 /* sX = aX * tileSize; */
141 c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
142 isl_constraint_set_coefficient_si(c, isl_dim_out, sX, 1);
143 isl_constraint_set_coefficient_si(c, isl_dim_out, aX, -tileSize);
144 tileMap = isl_basic_map_add_constraint(tileMap, c);
147 c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
148 isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1);
149 isl_constraint_set_coefficient_si(c, isl_dim_in, sX, -1);
150 tileMap = isl_basic_map_add_constraint(tileMap, c);
153 c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
154 isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1);
155 isl_constraint_set_coefficient_si(c, isl_dim_out, tX, -1);
156 tileMap = isl_basic_map_add_constraint(tileMap, c);
158 /* pX <= tX + (tileSize - 1) */
159 c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
160 isl_constraint_set_coefficient_si(c, isl_dim_out, tX, 1);
161 isl_constraint_set_coefficient_si(c, isl_dim_out, pX, -1);
162 isl_constraint_set_constant_si(c, tileSize - 1);
163 tileMap = isl_basic_map_add_constraint(tileMap, c);
166 /* Project out auxilary dimensions.
168 The auxilary dimensions are transformed into existentially quantified ones.
169 This reduces the number of visible scattering dimensions and allows Cloog
170 to produces better code. */
171 tileMap = isl_basic_map_project_out(tileMap, isl_dim_out,
172 2 * scheduleDimensions,
174 isl_local_space_free(LocalSpace);
178 /* getScheduleForBand - Get the schedule for this band.
180 Polly applies transformations like tiling on top of the isl calculated value.
181 This can influence the number of scheduling dimension. The number of
182 schedule dimensions is returned in the parameter 'Dimension'. */
183 static bool DisableTiling = false;
185 static isl_union_map *
186 getScheduleForBand(isl_band *Band, int *Dimensions)
188 isl_union_map *PartialSchedule;
191 isl_basic_map *TileMap;
192 isl_union_map *TileUMap;
194 PartialSchedule = isl_band_get_partial_schedule(Band);
195 *Dimensions = isl_band_n_member(Band);
198 return PartialSchedule;
200 /* It does not make any sense to tile a band with just one dimension. */
201 if (*Dimensions == 1)
202 return PartialSchedule;
204 ctx = isl_union_map_get_ctx(PartialSchedule);
205 Space = isl_union_map_get_space(PartialSchedule);
207 TileMap = getTileMap(ctx, *Dimensions, 32);
208 TileUMap = isl_union_map_from_map(isl_map_from_basic_map(TileMap));
209 TileUMap = isl_union_map_align_params(TileUMap, Space);
210 *Dimensions = 2 * *Dimensions;
212 return isl_union_map_apply_range(PartialSchedule, TileUMap);
215 /* Create a map that pre-vectorizes one scheduling dimension.
217 getPrevectorMap creates a map that maps each input dimension to the same
218 output dimension, except for the dimension DimToVectorize. DimToVectorize is
219 strip mined by 'VectorWidth' and the newly created point loop of
220 DimToVectorize is moved to the innermost level.
222 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
224 | Before transformation
228 | for (i = 0; i < 128; i++)
229 | for (j = 0; j < 128; j++)
233 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
235 | After transformation:
237 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
239 | for (it = 0; it < 128; it+=4)
240 | for (j = 0; j < 128; j++)
241 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
244 The goal of this transformation is to create a trivially vectorizable loop.
245 This means a parallel loop at the innermost level that has a constant number
246 of iterations corresponding to the target vector width.
248 This transformation creates a loop at the innermost level. The loop has a
249 constant number of iterations, if the number of loop iterations at
250 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
251 currently constant and not yet target specific. This function does not reason
252 about parallelism. */
254 getPrevectorMap(isl_ctx *ctx, int DimToVectorize,
255 int ScheduleDimensions,
259 isl_local_space *LocalSpace, *LocalSpaceRange;
264 int PointDimension; /* ip */
265 int TileDimension; /* it */
266 isl_int VectorWidthMP;
269 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
271 Space = isl_space_alloc(ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
272 TilingMap = isl_map_universe(isl_space_copy(Space));
273 LocalSpace = isl_local_space_from_space(Space);
274 PointDimension = ScheduleDimensions;
275 TileDimension = DimToVectorize;
277 /* Create an identity map for everything except DimToVectorize and map
278 DimToVectorize to the point loop at the innermost dimension. */
279 for (i = 0; i < ScheduleDimensions; i++)
281 c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
282 isl_constraint_set_coefficient_si(c, isl_dim_in, i, -1);
284 if (i == DimToVectorize)
285 isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, 1);
287 isl_constraint_set_coefficient_si(c, isl_dim_out, i, 1);
289 TilingMap = isl_map_add_constraint(TilingMap, c);
292 /* it % 'VectorWidth' = 0 */
293 LocalSpaceRange = isl_local_space_range(isl_local_space_copy(LocalSpace));
294 Aff = isl_aff_zero_on_domain(LocalSpaceRange);
295 Aff = isl_aff_set_constant_si(Aff, VectorWidth);
296 Aff = isl_aff_set_coefficient_si(Aff, isl_dim_in, TileDimension, 1);
297 isl_int_init(VectorWidthMP);
298 isl_int_set_si(VectorWidthMP, VectorWidth);
299 Aff = isl_aff_mod(Aff, VectorWidthMP);
300 isl_int_clear(VectorWidthMP);
301 Modulo = isl_pw_aff_zero_set(isl_pw_aff_from_aff(Aff));
302 TilingMap = isl_map_intersect_range(TilingMap, Modulo);
305 c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
306 isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, -1);
307 isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, 1);
308 TilingMap = isl_map_add_constraint(TilingMap, c);
310 /* ip <= it + ('VectorWidth' - 1) */
311 c = isl_inequality_alloc(LocalSpace);
312 isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, 1);
313 isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, -1);
314 isl_constraint_set_constant_si(c, VectorWidth - 1);
315 TilingMap = isl_map_add_constraint(TilingMap, c);
317 isl_map_dump(TilingMap);
322 static bool EnablePollyVector = false;
324 /* getScheduleForBandList - Get the scheduling map for a list of bands.
326 We walk recursively the forest of bands to combine the schedules of the
327 individual bands to the overall schedule. In case tiling is requested,
328 the individual bands are tiled. */
329 static isl_union_map *
330 getScheduleForBandList(isl_band_list *BandList)
333 isl_union_map *Schedule;
336 ctx = isl_band_list_get_ctx(BandList);
337 NumBands = isl_band_list_n_band(BandList);
338 Schedule = isl_union_map_empty(isl_space_params_alloc(ctx, 0));
340 for (i = 0; i < NumBands; i++)
343 isl_union_map *PartialSchedule;
344 int ScheduleDimensions;
347 Band = isl_band_list_get_band(BandList, i);
348 PartialSchedule = getScheduleForBand(Band, &ScheduleDimensions);
349 Space = isl_union_map_get_space(PartialSchedule);
351 if (isl_band_has_children(Band))
353 isl_band_list *Children;
354 isl_union_map *SuffixSchedule;
356 Children = isl_band_get_children(Band);
357 SuffixSchedule = getScheduleForBandList(Children);
358 PartialSchedule = isl_union_map_flat_range_product(PartialSchedule,
360 isl_band_list_free(Children);
362 else if (EnablePollyVector)
364 for (i = ScheduleDimensions - 1 ; i >= 0 ; i--)
366 if (isl_band_member_is_zero_distance(Band, i))
369 isl_union_map *TileUMap;
371 TileMap = getPrevectorMap(ctx, i, ScheduleDimensions, 4);
372 TileUMap = isl_union_map_from_map(TileMap);
373 TileUMap = isl_union_map_align_params(TileUMap,
374 isl_space_copy(Space));
375 PartialSchedule = isl_union_map_apply_range(PartialSchedule,
382 Schedule = isl_union_map_union(Schedule, PartialSchedule);
385 isl_space_free(Space);
391 static isl_union_map *
392 getScheduleMap(isl_schedule *Schedule)
394 isl_band_list *BandList = isl_schedule_get_band_forest(Schedule);
395 isl_union_map *ScheduleMap = getScheduleForBandList(BandList);
396 isl_band_list_free(BandList);
401 getSingleMap(__isl_take isl_map *map, void *user)
403 isl_map **singleMap = (isl_map **) user;
410 apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map)
415 FOR_EACH_VEC_ELT (poly_bb_p, scop->bbs, i, pbb)
417 isl_set *domain = isl_set_copy (pbb->domain);
418 isl_union_map *stmtBand;
419 isl_map *stmtSchedule;
421 stmtBand = isl_union_map_intersect_domain(isl_union_map_copy(schedule_map),
422 isl_union_set_from_set(domain));
423 isl_union_map_foreach_map(stmtBand, getSingleMap, &stmtSchedule);
424 isl_map_free(pbb->transformed);
425 pbb->transformed = stmtSchedule;
426 isl_union_map_free(stmtBand);
430 static const int CONSTANT_BOUND = 20;
433 optimize_isl (scop_p scop)
436 isl_schedule *schedule;
437 isl_union_set *domain;
438 isl_union_map *validity, *proximity, *dependences;
439 isl_union_map *schedule_map;
441 domain = scop_get_domains (scop);
442 dependences = scop_get_dependences (scop);
443 dependences = isl_union_map_gist_domain(dependences,
444 isl_union_set_copy(domain));
445 dependences = isl_union_map_gist_range(dependences,
446 isl_union_set_copy(domain));
447 validity = dependences;
449 proximity = isl_union_map_copy (validity);
451 isl_options_set_schedule_max_constant_term(scop->ctx, CONSTANT_BOUND);
452 isl_options_set_schedule_maximize_band_depth(scop->ctx, 1);
453 isl_options_set_schedule_fuse(scop->ctx, ISL_SCHEDULE_FUSE_MIN);
454 isl_options_set_on_error(scop->ctx, ISL_ON_ERROR_CONTINUE);
455 schedule = isl_union_set_compute_schedule (domain, validity, proximity);
456 isl_options_set_on_error(scop->ctx, ISL_ON_ERROR_ABORT);
461 schedule_map = getScheduleMap (schedule);
463 apply_schedule_map_to_scop (scop, schedule_map);
465 isl_schedule_free (schedule);
466 isl_union_map_free (schedule_map);