1 /* A scheduling optimizer for Graphite
2 Copyright (C) 2012-2015 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
24 #include <isl/constraint.h>
26 #include <isl/union_set.h>
28 #include <isl/union_map.h>
29 #include <isl/schedule.h>
32 #include <isl/options.h>
36 #include "coretypes.h"
40 #include "double-int.h"
48 #include "fold-const.h"
51 #include "hard-reg-set.h"
54 #include "dominance.h"
56 #include "basic-block.h"
57 #include "tree-ssa-alias.h"
58 #include "internal-fn.h"
59 #include "gimple-expr.h"
62 #include "gimple-iterator.h"
63 #include "tree-ssa-loop.h"
66 #include "tree-chrec.h"
67 #include "tree-data-ref.h"
68 #include "tree-scalar-evolution.h"
72 #include "graphite-poly.h"
74 static isl_union_set *
75 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED)
79 isl_space *space = isl_set_get_space (scop->context);
80 isl_union_set *res = isl_union_set_empty (space);
82 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
83 res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
88 /* getTileMap - Create a map that describes a n-dimensonal tiling.
90 getTileMap creates a map from a n-dimensional scattering space into an
91 2*n-dimensional scattering space. The map describes a rectangular tiling.
94 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
96 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
97 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
98 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
102 for (i = 0; i < N; i++)
103 for (j = 0; j < M; j++)
108 for (t_i = 0; t_i < N; i+=32)
109 for (t_j = 0; t_j < M; j+=32)
110 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
111 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
115 static isl_basic_map *
116 getTileMap (isl_ctx *ctx, int scheduleDimensions, int tileSize)
121 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
122 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
123 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
125 and project out the auxilary dimensions a0 and a1. */
126 isl_space *Space = isl_space_alloc (ctx, 0, scheduleDimensions,
127 scheduleDimensions * 3);
128 isl_basic_map *tileMap = isl_basic_map_universe (isl_space_copy (Space));
130 isl_local_space *LocalSpace = isl_local_space_from_space (Space);
132 for (x = 0; x < scheduleDimensions; x++)
136 int pX = scheduleDimensions + x;
137 int aX = 2 * scheduleDimensions + x;
141 /* sX = aX * tileSize; */
142 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
143 isl_constraint_set_coefficient_si (c, isl_dim_out, sX, 1);
144 isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tileSize);
145 tileMap = isl_basic_map_add_constraint (tileMap, c);
148 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
149 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
150 isl_constraint_set_coefficient_si (c, isl_dim_in, sX, -1);
151 tileMap = isl_basic_map_add_constraint (tileMap, c);
154 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
155 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
156 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, -1);
157 tileMap = isl_basic_map_add_constraint (tileMap, c);
159 /* pX <= tX + (tileSize - 1) */
160 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
161 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, 1);
162 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, -1);
163 isl_constraint_set_constant_si (c, tileSize - 1);
164 tileMap = isl_basic_map_add_constraint (tileMap, c);
167 /* Project out auxiliary dimensions.
169 The auxiliary dimensions are transformed into existentially quantified ones.
170 This reduces the number of visible scattering dimensions and allows Cloog
171 to produces better code. */
172 tileMap = isl_basic_map_project_out (tileMap, isl_dim_out,
173 2 * scheduleDimensions,
175 isl_local_space_free (LocalSpace);
179 /* getScheduleForBand - Get the schedule for this band.
181 Polly applies transformations like tiling on top of the isl calculated value.
182 This can influence the number of scheduling dimension. The number of
183 schedule dimensions is returned in the parameter 'Dimension'. */
184 static bool DisableTiling = false;
186 static isl_union_map *
187 getScheduleForBand (isl_band *Band, int *Dimensions)
189 isl_union_map *PartialSchedule;
192 isl_basic_map *TileMap;
193 isl_union_map *TileUMap;
195 PartialSchedule = isl_band_get_partial_schedule (Band);
196 *Dimensions = isl_band_n_member (Band);
198 if (DisableTiling || flag_loop_unroll_jam)
199 return PartialSchedule;
201 /* It does not make any sense to tile a band with just one dimension. */
202 if (*Dimensions == 1)
203 return PartialSchedule;
205 ctx = isl_union_map_get_ctx (PartialSchedule);
206 Space = isl_union_map_get_space (PartialSchedule);
208 TileMap = getTileMap (ctx, *Dimensions, 32);
209 TileUMap = isl_union_map_from_map (isl_map_from_basic_map (TileMap));
210 TileUMap = isl_union_map_align_params (TileUMap, Space);
211 *Dimensions = 2 * *Dimensions;
213 return isl_union_map_apply_range (PartialSchedule, TileUMap);
216 /* Create a map that pre-vectorizes one scheduling dimension.
218 getPrevectorMap creates a map that maps each input dimension to the same
219 output dimension, except for the dimension DimToVectorize. DimToVectorize is
220 strip mined by 'VectorWidth' and the newly created point loop of
221 DimToVectorize is moved to the innermost level.
223 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
225 | Before transformation
229 | for (i = 0; i < 128; i++)
230 | for (j = 0; j < 128; j++)
234 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
236 | After transformation:
238 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
240 | for (it = 0; it < 128; it+=4)
241 | for (j = 0; j < 128; j++)
242 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
245 The goal of this transformation is to create a trivially vectorizable loop.
246 This means a parallel loop at the innermost level that has a constant number
247 of iterations corresponding to the target vector width.
249 This transformation creates a loop at the innermost level. The loop has a
250 constant number of iterations, if the number of loop iterations at
251 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
252 currently constant and not yet target specific. This function does not reason
257 getPrevectorMap (isl_ctx *ctx, int DimToVectorize,
258 int ScheduleDimensions,
262 isl_local_space *LocalSpace, *LocalSpaceRange;
267 int PointDimension; /* ip */
268 int TileDimension; /* it */
269 isl_val *VectorWidthMP;
272 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
274 Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
275 TilingMap = isl_map_universe (isl_space_copy (Space));
276 LocalSpace = isl_local_space_from_space (Space);
277 PointDimension = ScheduleDimensions;
278 TileDimension = DimToVectorize;
280 /* Create an identity map for everything except DimToVectorize and map
281 DimToVectorize to the point loop at the innermost dimension. */
282 for (i = 0; i < ScheduleDimensions; i++)
284 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
285 isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
287 if (i == DimToVectorize)
288 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
290 isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
292 TilingMap = isl_map_add_constraint (TilingMap, c);
295 /* it % 'VectorWidth' = 0 */
296 LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
297 Aff = isl_aff_zero_on_domain (LocalSpaceRange);
298 Aff = isl_aff_set_constant_si (Aff, VectorWidth);
299 Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
301 VectorWidthMP = isl_val_int_from_si (ctx, VectorWidth);
302 Aff = isl_aff_mod_val (Aff, VectorWidthMP);
303 Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
304 TilingMap = isl_map_intersect_range (TilingMap, Modulo);
307 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
308 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
309 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
310 TilingMap = isl_map_add_constraint (TilingMap, c);
312 /* ip <= it + ('VectorWidth' - 1) */
313 c = isl_inequality_alloc (LocalSpace);
314 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
315 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
316 isl_constraint_set_constant_si (c, VectorWidth - 1);
317 TilingMap = isl_map_add_constraint (TilingMap, c);
322 /* Compute an auxiliary map to getPrevectorMap, for computing the separating
323 class defined by full tiles. Used in graphite_isl_ast_to_gimple.c to set the
324 corresponding option for AST build.
326 The map (for VectorWidth=4):
328 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and it + 3 = i and
331 The image of this map is the separation class. The range of this map includes
332 all the i multiple of 4 in the domain such as i + 3 is in the domain too.
336 getPrevectorMap_full (isl_ctx *ctx, int DimToVectorize,
337 int ScheduleDimensions,
341 isl_local_space *LocalSpace, *LocalSpaceRange;
346 int PointDimension; /* ip */
347 int TileDimension; /* it */
348 isl_val *VectorWidthMP;
351 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
353 Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
354 TilingMap = isl_map_universe (isl_space_copy (Space));
355 LocalSpace = isl_local_space_from_space (Space);
356 PointDimension = ScheduleDimensions;
357 TileDimension = DimToVectorize;
359 /* Create an identity map for everything except DimToVectorize and the
361 for (i = 0; i < ScheduleDimensions; i++)
363 if (i == DimToVectorize)
366 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
368 isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
369 isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
371 TilingMap = isl_map_add_constraint (TilingMap, c);
374 /* it % 'VectorWidth' = 0 */
375 LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
376 Aff = isl_aff_zero_on_domain (LocalSpaceRange);
377 Aff = isl_aff_set_constant_si (Aff, VectorWidth);
378 Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
380 VectorWidthMP = isl_val_int_from_si (ctx, VectorWidth);
381 Aff = isl_aff_mod_val (Aff, VectorWidthMP);
382 Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
383 TilingMap = isl_map_intersect_range (TilingMap, Modulo);
385 /* it + ('VectorWidth' - 1) = i0 */
386 c = isl_equality_alloc (isl_local_space_copy(LocalSpace));
387 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension,-1);
388 isl_constraint_set_coefficient_si (c, isl_dim_in, TileDimension, 1);
389 isl_constraint_set_constant_si (c, -VectorWidth + 1);
390 TilingMap = isl_map_add_constraint (TilingMap, c);
393 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
394 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
395 isl_constraint_set_constant_si (c, 0);
396 TilingMap = isl_map_add_constraint (TilingMap, c);
399 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
400 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
401 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
402 TilingMap = isl_map_add_constraint (TilingMap, c);
404 /* ip <= it + ('VectorWidth' - 1) */
405 c = isl_inequality_alloc (LocalSpace);
406 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
407 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
408 isl_constraint_set_constant_si (c, VectorWidth - 1);
409 TilingMap = isl_map_add_constraint (TilingMap, c);
414 static bool EnablePollyVector = false;
416 /* getScheduleForBandList - Get the scheduling map for a list of bands.
418 We walk recursively the forest of bands to combine the schedules of the
419 individual bands to the overall schedule. In case tiling is requested,
420 the individual bands are tiled.
421 For unroll and jam the map the schedule for full tiles of the unrolled
422 dimnesion is computed. */
423 static isl_union_map *
424 getScheduleForBandList (isl_band_list *BandList, isl_union_map **map_sepcl)
427 isl_union_map *Schedule;
430 ctx = isl_band_list_get_ctx (BandList);
431 NumBands = isl_band_list_n_band (BandList);
432 Schedule = isl_union_map_empty (isl_space_params_alloc (ctx, 0));
434 for (i = 0; i < NumBands; i++)
437 isl_union_map *PartialSchedule;
438 int ScheduleDimensions;
441 isl_union_map *PartialSchedule_f;
443 Band = isl_band_list_get_band (BandList, i);
444 PartialSchedule = getScheduleForBand (Band, &ScheduleDimensions);
445 Space = isl_union_map_get_space (PartialSchedule);
447 PartialSchedule_f = NULL;
449 if (isl_band_has_children (Band))
451 isl_band_list *Children;
452 isl_union_map *SuffixSchedule;
454 Children = isl_band_get_children (Band);
455 SuffixSchedule = getScheduleForBandList (Children, map_sepcl);
456 PartialSchedule = isl_union_map_flat_range_product (PartialSchedule,
458 isl_band_list_free (Children);
460 else if (EnablePollyVector || flag_loop_unroll_jam)
465 depth = PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_DEPTH);
467 for (i = ScheduleDimensions - 1 ; i >= 0 ; i--)
469 if (flag_loop_unroll_jam && (i != (ScheduleDimensions - depth)))
472 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
473 if (isl_band_member_is_coincident (Band, i))
475 if (isl_band_member_is_zero_distance (Band, i))
479 isl_union_map *TileUMap;
482 stride = PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_SIZE);
484 TileMap = getPrevectorMap_full (ctx, i, ScheduleDimensions,
486 TileUMap = isl_union_map_from_map (TileMap);
487 TileUMap = isl_union_map_align_params
488 (TileUMap, isl_space_copy (Space));
489 PartialSchedule_f = isl_union_map_apply_range
490 (isl_union_map_copy (PartialSchedule), TileUMap);
492 TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, stride);
493 TileUMap = isl_union_map_from_map (TileMap);
494 TileUMap = isl_union_map_align_params
495 (TileUMap, isl_space_copy (Space));
496 PartialSchedule = isl_union_map_apply_range
497 (PartialSchedule, TileUMap);
502 Schedule = isl_union_map_union (Schedule,
503 isl_union_map_copy(PartialSchedule));
505 isl_band_free (Band);
506 isl_space_free (Space);
508 if (!flag_loop_unroll_jam)
510 isl_union_map_free (PartialSchedule);
514 if (PartialSchedule_f)
516 *map_sepcl = isl_union_map_union (*map_sepcl, PartialSchedule_f);
517 isl_union_map_free (PartialSchedule);
520 *map_sepcl = isl_union_map_union (*map_sepcl, PartialSchedule);
526 static isl_union_map *
527 getScheduleMap (isl_schedule *Schedule, isl_union_map **map_sepcl)
529 isl_band_list *BandList = isl_schedule_get_band_forest (Schedule);
530 isl_union_map *ScheduleMap = getScheduleForBandList (BandList, map_sepcl);
531 isl_band_list_free (BandList);
536 getSingleMap (__isl_take isl_map *map, void *user)
538 isl_map **singleMap = (isl_map **) user;
545 apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map, bool sepcl)
550 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
552 isl_set *domain = isl_set_copy (pbb->domain);
553 isl_union_map *stmtBand;
554 isl_map *stmtSchedule;
556 stmtBand = isl_union_map_intersect_domain
557 (isl_union_map_copy (schedule_map),
558 isl_union_set_from_set (domain));
559 isl_union_map_foreach_map (stmtBand, getSingleMap, &stmtSchedule);
563 isl_map_free (pbb->transformed);
564 pbb->transformed = stmtSchedule;
567 pbb->map_sepclass = stmtSchedule;
569 isl_union_map_free (stmtBand);
573 static const int CONSTANT_BOUND = 20;
576 optimize_isl (scop_p scop)
579 isl_schedule *schedule;
580 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
581 isl_schedule_constraints *schedule_constraints;
583 isl_union_set *domain;
584 isl_union_map *validity, *proximity, *dependences;
585 isl_union_map *schedule_map;
586 isl_union_map *schedule_map_f;
588 domain = scop_get_domains (scop);
589 dependences = scop_get_dependences (scop);
590 dependences = isl_union_map_gist_domain (dependences,
591 isl_union_set_copy (domain));
592 dependences = isl_union_map_gist_range (dependences,
593 isl_union_set_copy (domain));
594 validity = dependences;
596 proximity = isl_union_map_copy (validity);
598 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
599 schedule_constraints = isl_schedule_constraints_on_domain (domain);
601 = isl_schedule_constraints_set_proximity (schedule_constraints,
604 = isl_schedule_constraints_set_validity (schedule_constraints,
605 isl_union_map_copy (validity));
607 = isl_schedule_constraints_set_coincidence (schedule_constraints,
611 isl_options_set_schedule_max_constant_term (scop->ctx, CONSTANT_BOUND);
612 isl_options_set_schedule_maximize_band_depth (scop->ctx, 1);
613 #ifdef HAVE_ISL_OPTIONS_SET_SCHEDULE_SERIALIZE_SCCS
614 isl_options_set_schedule_serialize_sccs (scop->ctx, 1);
616 isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
618 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_CONTINUE);
620 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
621 schedule = isl_schedule_constraints_compute_schedule(schedule_constraints);
623 schedule = isl_union_set_compute_schedule (domain, validity, proximity);
626 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_ABORT);
631 schedule_map_f = isl_union_map_empty (isl_space_params_alloc (scop->ctx, 0));
632 schedule_map = getScheduleMap (schedule, &schedule_map_f);
634 apply_schedule_map_to_scop (scop, schedule_map, false);
635 if (!isl_union_map_is_empty (schedule_map_f))
636 apply_schedule_map_to_scop (scop, schedule_map_f, true);
637 isl_union_map_free (schedule_map_f);
639 isl_schedule_free (schedule);
640 isl_union_map_free (schedule_map);