gcc/lambda-code.c

   1 /*  Loop transformation code generation
   2     Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3     Contributed by Daniel Berlin <dberlin@dberlin.org>
   4
   5     This file is part of GCC.
   6
   7     GCC is free software; you can redistribute it and/or modify it under
   8     the terms of the GNU General Public License as published by the Free
   9     Software Foundation; either version 3, or (at your option) any later
  10     version.
  11
  12     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13     WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15     for more details.
  16
  17     You should have received a copy of the GNU General Public License
  18     along with GCC; see the file COPYING3.  If not see
  19     <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "ggc.h"
  26 #include "tree.h"
  27 #include "target.h"
  28 #include "rtl.h"
  29 #include "basic-block.h"
  30 #include "diagnostic.h"
  31 #include "obstack.h"
  32 #include "tree-flow.h"
  33 #include "tree-dump.h"
  34 #include "timevar.h"
  35 #include "cfgloop.h"
  36 #include "expr.h"
  37 #include "optabs.h"
  38 #include "tree-chrec.h"
  39 #include "tree-data-ref.h"
  40 #include "tree-pass.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "vec.h"
  43 #include "lambda.h"
  44 #include "vecprim.h"
  45 #include "pointer-set.h"
  46
  47 /* This loop nest code generation is based on non-singular matrix
  48    math.
  49
  50  A little terminology and a general sketch of the algorithm.  See "A singular
  51  loop transformation framework based on non-singular matrices" by Wei Li and
  52  Keshav Pingali for formal proofs that the various statements below are
  53  correct.
  54
  55  A loop iteration space represents the points traversed by the loop.  A point in the
  56  iteration space can be represented by a vector of size <loop depth>.  You can
  57  therefore represent the iteration space as an integral combinations of a set
  58  of basis vectors.
  59
  60  A loop iteration space is dense if every integer point between the loop
  61  bounds is a point in the iteration space.  Every loop with a step of 1
  62  therefore has a dense iteration space.
  63
  64  for i = 1 to 3, step 1 is a dense iteration space.
  65
  66  A loop iteration space is sparse if it is not dense.  That is, the iteration
  67  space skips integer points that are within the loop bounds.
  68
  69  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
  70  2 is skipped.
  71
  72  Dense source spaces are easy to transform, because they don't skip any
  73  points to begin with.  Thus we can compute the exact bounds of the target
  74  space using min/max and floor/ceil.
  75
  76  For a dense source space, we take the transformation matrix, decompose it
  77  into a lower triangular part (H) and a unimodular part (U).
  78  We then compute the auxiliary space from the unimodular part (source loop
  79  nest . U = auxiliary space) , which has two important properties:
  80   1. It traverses the iterations in the same lexicographic order as the source
  81   space.
  82   2. It is a dense space when the source is a dense space (even if the target
  83   space is going to be sparse).
  84
  85  Given the auxiliary space, we use the lower triangular part to compute the
  86  bounds in the target space by simple matrix multiplication.
  87  The gaps in the target space (IE the new loop step sizes) will be the
  88  diagonals of the H matrix.
  89
  90  Sparse source spaces require another step, because you can't directly compute
  91  the exact bounds of the auxiliary and target space from the sparse space.
  92  Rather than try to come up with a separate algorithm to handle sparse source
  93  spaces directly, we just find a legal transformation matrix that gives you
  94  the sparse source space, from a dense space, and then transform the dense
  95  space.
  96
  97  For a regular sparse space, you can represent the source space as an integer
  98  lattice, and the base space of that lattice will always be dense.  Thus, we
  99  effectively use the lattice to figure out the transformation from the lattice
 100  base space, to the sparse iteration space (IE what transform was applied to
 101  the dense space to make it sparse).  We then compose this transform with the
 102  transformation matrix specified by the user (since our matrix transformations
 103  are closed under composition, this is okay).  We can then use the base space
 104  (which is dense) plus the composed transformation matrix, to compute the rest
 105  of the transform using the dense space algorithm above.
 106
 107  In other words, our sparse source space (B) is decomposed into a dense base
 108  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
 109  We then compute the composition of L and the user transformation matrix (T),
 110  so that T is now a transform from A to the result, instead of from B to the
 111  result.
 112  IE A.(LT) = result instead of B.T = result
 113  Since A is now a dense source space, we can use the dense source space
 114  algorithm above to compute the result of applying transform (LT) to A.
 115
 116  Fourier-Motzkin elimination is used to compute the bounds of the base space
 117  of the lattice.  */
 118
 119 static bool perfect_nestify (struct loop *, VEC(tree,heap) *,
 120                              VEC(tree,heap) *, VEC(int,heap) *,
 121                              VEC(tree,heap) *);
 122 /* Lattice stuff that is internal to the code generation algorithm.  */
 123
 124 typedef struct lambda_lattice_s
 125 {
 126   /* Lattice base matrix.  */
 127   lambda_matrix base;
 128   /* Lattice dimension.  */
 129   int dimension;
 130   /* Origin vector for the coefficients.  */
 131   lambda_vector origin;
 132   /* Origin matrix for the invariants.  */
 133   lambda_matrix origin_invariants;
 134   /* Number of invariants.  */
 135   int invariants;
 136 } *lambda_lattice;
 137
 138 #define LATTICE_BASE(T) ((T)->base)
 139 #define LATTICE_DIMENSION(T) ((T)->dimension)
 140 #define LATTICE_ORIGIN(T) ((T)->origin)
 141 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
 142 #define LATTICE_INVARIANTS(T) ((T)->invariants)
 143
 144 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
 145                        int, int);
 146 static lambda_lattice lambda_lattice_new (int, int, struct obstack *);
 147 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest,
 148                                                    struct obstack *);
 149
 150 static bool can_convert_to_perfect_nest (struct loop *);
 151
 152 /* Create a new lambda body vector.  */
 153
 154 lambda_body_vector
 155 lambda_body_vector_new (int size, struct obstack * lambda_obstack)
 156 {
 157   lambda_body_vector ret;
 158
 159   ret = (lambda_body_vector)obstack_alloc (lambda_obstack, sizeof (*ret));
 160   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
 161   LBV_SIZE (ret) = size;
 162   LBV_DENOMINATOR (ret) = 1;
 163   return ret;
 164 }
 165
 166 /* Compute the new coefficients for the vector based on the
 167   *inverse* of the transformation matrix.  */
 168
 169 lambda_body_vector
 170 lambda_body_vector_compute_new (lambda_trans_matrix transform,
 171                                 lambda_body_vector vect,
 172                                 struct obstack * lambda_obstack)
 173 {
 174   lambda_body_vector temp;
 175   int depth;
 176
 177   /* Make sure the matrix is square.  */
 178   gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
 179
 180   depth = LTM_ROWSIZE (transform);
 181
 182   temp = lambda_body_vector_new (depth, lambda_obstack);
 183   LBV_DENOMINATOR (temp) =
 184     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
 185   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
 186                              LTM_MATRIX (transform), depth,
 187                              LBV_COEFFICIENTS (temp));
 188   LBV_SIZE (temp) = LBV_SIZE (vect);
 189   return temp;
 190 }
 191
 192 /* Print out a lambda body vector.  */
 193
 194 void
 195 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
 196 {
 197   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
 198 }
 199
 200 /* Return TRUE if two linear expressions are equal.  */
 201
 202 static bool
 203 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
 204            int depth, int invariants)
 205 {
 206   int i;
 207
 208   if (lle1 == NULL || lle2 == NULL)
 209     return false;
 210   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
 211     return false;
 212   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
 213     return false;
 214   for (i = 0; i < depth; i++)
 215     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
 216       return false;
 217   for (i = 0; i < invariants; i++)
 218     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
 219         LLE_INVARIANT_COEFFICIENTS (lle2)[i])
 220       return false;
 221   return true;
 222 }
 223
 224 /* Create a new linear expression with dimension DIM, and total number
 225    of invariants INVARIANTS.  */
 226
 227 lambda_linear_expression
 228 lambda_linear_expression_new (int dim, int invariants,
 229                               struct obstack * lambda_obstack)
 230 {
 231   lambda_linear_expression ret;
 232
 233   ret = (lambda_linear_expression)obstack_alloc (lambda_obstack,
 234                                                  sizeof (*ret));
 235   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
 236   LLE_CONSTANT (ret) = 0;
 237   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
 238   LLE_DENOMINATOR (ret) = 1;
 239   LLE_NEXT (ret) = NULL;
 240
 241   return ret;
 242 }
 243
 244 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
 245    The starting letter used for variable names is START.  */
 246
 247 static void
 248 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
 249                          char start)
 250 {
 251   int i;
 252   bool first = true;
 253   for (i = 0; i < size; i++)
 254     {
 255       if (expr[i] != 0)
 256         {
 257           if (first)
 258             {
 259               if (expr[i] < 0)
 260                 fprintf (outfile, "-");
 261               first = false;
 262             }
 263           else if (expr[i] > 0)
 264             fprintf (outfile, " + ");
 265           else
 266             fprintf (outfile, " - ");
 267           if (abs (expr[i]) == 1)
 268             fprintf (outfile, "%c", start + i);
 269           else
 270             fprintf (outfile, "%d%c", abs (expr[i]), start + i);
 271         }
 272     }
 273 }
 274
 275 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
 276    depth/number of coefficients is given by DEPTH, the number of invariants is
 277    given by INVARIANTS, and the character to start variable names with is given
 278    by START.  */
 279
 280 void
 281 print_lambda_linear_expression (FILE * outfile,
 282                                 lambda_linear_expression expr,
 283                                 int depth, int invariants, char start)
 284 {
 285   fprintf (outfile, "\tLinear expression: ");
 286   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
 287   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
 288   fprintf (outfile, "  invariants: ");
 289   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
 290                            invariants, 'A');
 291   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
 292 }
 293
 294 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
 295    coefficients is given by DEPTH, the number of invariants is
 296    given by INVARIANTS, and the character to start variable names with is given
 297    by START.  */
 298
 299 void
 300 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
 301                    int invariants, char start)
 302 {
 303   int step;
 304   lambda_linear_expression expr;
 305
 306   gcc_assert (loop);
 307
 308   expr = LL_LINEAR_OFFSET (loop);
 309   step = LL_STEP (loop);
 310   fprintf (outfile, "  step size = %d \n", step);
 311
 312   if (expr)
 313     {
 314       fprintf (outfile, "  linear offset: \n");
 315       print_lambda_linear_expression (outfile, expr, depth, invariants,
 316                                       start);
 317     }
 318
 319   fprintf (outfile, "  lower bound: \n");
 320   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 321     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 322   fprintf (outfile, "  upper bound: \n");
 323   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 324     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 325 }
 326
 327 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
 328    number of invariants.  */
 329
 330 lambda_loopnest
 331 lambda_loopnest_new (int depth, int invariants,
 332                      struct obstack * lambda_obstack)
 333 {
 334   lambda_loopnest ret;
 335   ret = (lambda_loopnest)obstack_alloc (lambda_obstack, sizeof (*ret));
 336
 337   LN_LOOPS (ret) = (lambda_loop *)
 338       obstack_alloc (lambda_obstack, depth * sizeof(LN_LOOPS(ret)));
 339   LN_DEPTH (ret) = depth;
 340   LN_INVARIANTS (ret) = invariants;
 341
 342   return ret;
 343 }
 344
 345 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
 346    character to use for loop names is given by START.  */
 347
 348 void
 349 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
 350 {
 351   int i;
 352   for (i = 0; i < LN_DEPTH (nest); i++)
 353     {
 354       fprintf (outfile, "Loop %c\n", start + i);
 355       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
 356                          LN_INVARIANTS (nest), 'i');
 357       fprintf (outfile, "\n");
 358     }
 359 }
 360
 361 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
 362    of invariants.  */
 363
 364 static lambda_lattice
 365 lambda_lattice_new (int depth, int invariants, struct obstack * lambda_obstack)
 366 {
 367   lambda_lattice ret
 368       = (lambda_lattice)obstack_alloc (lambda_obstack, sizeof (*ret));
 369   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
 370   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
 371   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
 372   LATTICE_DIMENSION (ret) = depth;
 373   LATTICE_INVARIANTS (ret) = invariants;
 374   return ret;
 375 }
 376
 377 /* Compute the lattice base for NEST.  The lattice base is essentially a
 378    non-singular transform from a dense base space to a sparse iteration space.
 379    We use it so that we don't have to specially handle the case of a sparse
 380    iteration space in other parts of the algorithm.  As a result, this routine
 381    only does something interesting (IE produce a matrix that isn't the
 382    identity matrix) if NEST is a sparse space.  */
 383
 384 static lambda_lattice
 385 lambda_lattice_compute_base (lambda_loopnest nest,
 386                              struct obstack * lambda_obstack)
 387 {
 388   lambda_lattice ret;
 389   int depth, invariants;
 390   lambda_matrix base;
 391
 392   int i, j, step;
 393   lambda_loop loop;
 394   lambda_linear_expression expression;
 395
 396   depth = LN_DEPTH (nest);
 397   invariants = LN_INVARIANTS (nest);
 398
 399   ret = lambda_lattice_new (depth, invariants, lambda_obstack);
 400   base = LATTICE_BASE (ret);
 401   for (i = 0; i < depth; i++)
 402     {
 403       loop = LN_LOOPS (nest)[i];
 404       gcc_assert (loop);
 405       step = LL_STEP (loop);
 406       /* If we have a step of 1, then the base is one, and the
 407          origin and invariant coefficients are 0.  */
 408       if (step == 1)
 409         {
 410           for (j = 0; j < depth; j++)
 411             base[i][j] = 0;
 412           base[i][i] = 1;
 413           LATTICE_ORIGIN (ret)[i] = 0;
 414           for (j = 0; j < invariants; j++)
 415             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
 416         }
 417       else
 418         {
 419           /* Otherwise, we need the lower bound expression (which must
 420              be an affine function)  to determine the base.  */
 421           expression = LL_LOWER_BOUND (loop);
 422           gcc_assert (expression && !LLE_NEXT (expression)
 423                       && LLE_DENOMINATOR (expression) == 1);
 424
 425           /* The lower triangular portion of the base is going to be the
 426              coefficient times the step */
 427           for (j = 0; j < i; j++)
 428             base[i][j] = LLE_COEFFICIENTS (expression)[j]
 429               * LL_STEP (LN_LOOPS (nest)[j]);
 430           base[i][i] = step;
 431           for (j = i + 1; j < depth; j++)
 432             base[i][j] = 0;
 433
 434           /* Origin for this loop is the constant of the lower bound
 435              expression.  */
 436           LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
 437
 438           /* Coefficient for the invariants are equal to the invariant
 439              coefficients in the expression.  */
 440           for (j = 0; j < invariants; j++)
 441             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
 442               LLE_INVARIANT_COEFFICIENTS (expression)[j];
 443         }
 444     }
 445   return ret;
 446 }
 447
 448 /* Compute the least common multiple of two numbers A and B .  */
 449
 450 int
 451 least_common_multiple (int a, int b)
 452 {
 453   return (abs (a) * abs (b) / gcd (a, b));
 454 }
 455
 456 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
 457    auxiliary nest.
 458    Fourier-Motzkin is a way of reducing systems of linear inequalities so that
 459    it is easy to calculate the answer and bounds.
 460    A sketch of how it works:
 461    Given a system of linear inequalities, ai * xj >= bk, you can always
 462    rewrite the constraints so they are all of the form
 463    a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
 464    in b1 ... bk, and some a in a1...ai)
 465    You can then eliminate this x from the non-constant inequalities by
 466    rewriting these as a <= b, x >= constant, and delete the x variable.
 467    You can then repeat this for any remaining x variables, and then we have
 468    an easy to use variable <= constant (or no variables at all) form that we
 469    can construct our bounds from.
 470
 471    In our case, each time we eliminate, we construct part of the bound from
 472    the ith variable, then delete the ith variable.
 473
 474    Remember the constant are in our vector a, our coefficient matrix is A,
 475    and our invariant coefficient matrix is B.
 476
 477    SIZE is the size of the matrices being passed.
 478    DEPTH is the loop nest depth.
 479    INVARIANTS is the number of loop invariants.
 480    A, B, and a are the coefficient matrix, invariant coefficient, and a
 481    vector of constants, respectively.  */
 482
 483 static lambda_loopnest
 484 compute_nest_using_fourier_motzkin (int size,
 485                                     int depth,
 486                                     int invariants,
 487                                     lambda_matrix A,
 488                                     lambda_matrix B,
 489                                     lambda_vector a,
 490                                     struct obstack * lambda_obstack)
 491 {
 492
 493   int multiple, f1, f2;
 494   int i, j, k;
 495   lambda_linear_expression expression;
 496   lambda_loop loop;
 497   lambda_loopnest auxillary_nest;
 498   lambda_matrix swapmatrix, A1, B1;
 499   lambda_vector swapvector, a1;
 500   int newsize;
 501
 502   A1 = lambda_matrix_new (128, depth);
 503   B1 = lambda_matrix_new (128, invariants);
 504   a1 = lambda_vector_new (128);
 505
 506   auxillary_nest = lambda_loopnest_new (depth, invariants, lambda_obstack);
 507
 508   for (i = depth - 1; i >= 0; i--)
 509     {
 510       loop = lambda_loop_new ();
 511       LN_LOOPS (auxillary_nest)[i] = loop;
 512       LL_STEP (loop) = 1;
 513
 514       for (j = 0; j < size; j++)
 515         {
 516           if (A[j][i] < 0)
 517             {
 518               /* Any linear expression in the matrix with a coefficient less
 519                  than 0 becomes part of the new lower bound.  */
 520               expression = lambda_linear_expression_new (depth, invariants,
 521                                                          lambda_obstack);
 522
 523               for (k = 0; k < i; k++)
 524                 LLE_COEFFICIENTS (expression)[k] = A[j][k];
 525
 526               for (k = 0; k < invariants; k++)
 527                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
 528
 529               LLE_DENOMINATOR (expression) = -1 * A[j][i];
 530               LLE_CONSTANT (expression) = -1 * a[j];
 531
 532               /* Ignore if identical to the existing lower bound.  */
 533               if (!lle_equal (LL_LOWER_BOUND (loop),
 534                               expression, depth, invariants))
 535                 {
 536                   LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
 537                   LL_LOWER_BOUND (loop) = expression;
 538                 }
 539
 540             }
 541           else if (A[j][i] > 0)
 542             {
 543               /* Any linear expression with a coefficient greater than 0
 544                  becomes part of the new upper bound.  */
 545               expression = lambda_linear_expression_new (depth, invariants,
 546                                                          lambda_obstack);
 547               for (k = 0; k < i; k++)
 548                 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
 549
 550               for (k = 0; k < invariants; k++)
 551                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
 552
 553               LLE_DENOMINATOR (expression) = A[j][i];
 554               LLE_CONSTANT (expression) = a[j];
 555
 556               /* Ignore if identical to the existing upper bound.  */
 557               if (!lle_equal (LL_UPPER_BOUND (loop),
 558                               expression, depth, invariants))
 559                 {
 560                   LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
 561                   LL_UPPER_BOUND (loop) = expression;
 562                 }
 563
 564             }
 565         }
 566
 567       /* This portion creates a new system of linear inequalities by deleting
 568          the i'th variable, reducing the system by one variable.  */
 569       newsize = 0;
 570       for (j = 0; j < size; j++)
 571         {
 572           /* If the coefficient for the i'th variable is 0, then we can just
 573              eliminate the variable straightaway.  Otherwise, we have to
 574              multiply through by the coefficients we are eliminating.  */
 575           if (A[j][i] == 0)
 576             {
 577               lambda_vector_copy (A[j], A1[newsize], depth);
 578               lambda_vector_copy (B[j], B1[newsize], invariants);
 579               a1[newsize] = a[j];
 580               newsize++;
 581             }
 582           else if (A[j][i] > 0)
 583             {
 584               for (k = 0; k < size; k++)
 585                 {
 586                   if (A[k][i] < 0)
 587                     {
 588                       multiple = least_common_multiple (A[j][i], A[k][i]);
 589                       f1 = multiple / A[j][i];
 590                       f2 = -1 * multiple / A[k][i];
 591
 592                       lambda_vector_add_mc (A[j], f1, A[k], f2,
 593                                             A1[newsize], depth);
 594                       lambda_vector_add_mc (B[j], f1, B[k], f2,
 595                                             B1[newsize], invariants);
 596                       a1[newsize] = f1 * a[j] + f2 * a[k];
 597                       newsize++;
 598                     }
 599                 }
 600             }
 601         }
 602
 603       swapmatrix = A;
 604       A = A1;
 605       A1 = swapmatrix;
 606
 607       swapmatrix = B;
 608       B = B1;
 609       B1 = swapmatrix;
 610
 611       swapvector = a;
 612       a = a1;
 613       a1 = swapvector;
 614
 615       size = newsize;
 616     }
 617
 618   return auxillary_nest;
 619 }
 620
 621 /* Compute the loop bounds for the auxiliary space NEST.
 622    Input system used is Ax <= b.  TRANS is the unimodular transformation.
 623    Given the original nest, this function will
 624    1. Convert the nest into matrix form, which consists of a matrix for the
 625    coefficients, a matrix for the
 626    invariant coefficients, and a vector for the constants.
 627    2. Use the matrix form to calculate the lattice base for the nest (which is
 628    a dense space)
 629    3. Compose the dense space transform with the user specified transform, to
 630    get a transform we can easily calculate transformed bounds for.
 631    4. Multiply the composed transformation matrix times the matrix form of the
 632    loop.
 633    5. Transform the newly created matrix (from step 4) back into a loop nest
 634    using Fourier-Motzkin elimination to figure out the bounds.  */
 635
 636 static lambda_loopnest
 637 lambda_compute_auxillary_space (lambda_loopnest nest,
 638                                 lambda_trans_matrix trans,
 639                                 struct obstack * lambda_obstack)
 640 {
 641   lambda_matrix A, B, A1, B1;
 642   lambda_vector a, a1;
 643   lambda_matrix invertedtrans;
 644   int depth, invariants, size;
 645   int i, j;
 646   lambda_loop loop;
 647   lambda_linear_expression expression;
 648   lambda_lattice lattice;
 649
 650   depth = LN_DEPTH (nest);
 651   invariants = LN_INVARIANTS (nest);
 652
 653   /* Unfortunately, we can't know the number of constraints we'll have
 654      ahead of time, but this should be enough even in ridiculous loop nest
 655      cases. We must not go over this limit.  */
 656   A = lambda_matrix_new (128, depth);
 657   B = lambda_matrix_new (128, invariants);
 658   a = lambda_vector_new (128);
 659
 660   A1 = lambda_matrix_new (128, depth);
 661   B1 = lambda_matrix_new (128, invariants);
 662   a1 = lambda_vector_new (128);
 663
 664   /* Store the bounds in the equation matrix A, constant vector a, and
 665      invariant matrix B, so that we have Ax <= a + B.
 666      This requires a little equation rearranging so that everything is on the
 667      correct side of the inequality.  */
 668   size = 0;
 669   for (i = 0; i < depth; i++)
 670     {
 671       loop = LN_LOOPS (nest)[i];
 672
 673       /* First we do the lower bound.  */
 674       if (LL_STEP (loop) > 0)
 675         expression = LL_LOWER_BOUND (loop);
 676       else
 677         expression = LL_UPPER_BOUND (loop);
 678
 679       for (; expression != NULL; expression = LLE_NEXT (expression))
 680         {
 681           /* Fill in the coefficient.  */
 682           for (j = 0; j < i; j++)
 683             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 684
 685           /* And the invariant coefficient.  */
 686           for (j = 0; j < invariants; j++)
 687             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 688
 689           /* And the constant.  */
 690           a[size] = LLE_CONSTANT (expression);
 691
 692           /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
 693              constants and single variables on   */
 694           A[size][i] = -1 * LLE_DENOMINATOR (expression);
 695           a[size] *= -1;
 696           for (j = 0; j < invariants; j++)
 697             B[size][j] *= -1;
 698
 699           size++;
 700           /* Need to increase matrix sizes above.  */
 701           gcc_assert (size <= 127);
 702
 703         }
 704
 705       /* Then do the exact same thing for the upper bounds.  */
 706       if (LL_STEP (loop) > 0)
 707         expression = LL_UPPER_BOUND (loop);
 708       else
 709         expression = LL_LOWER_BOUND (loop);
 710
 711       for (; expression != NULL; expression = LLE_NEXT (expression))
 712         {
 713           /* Fill in the coefficient.  */
 714           for (j = 0; j < i; j++)
 715             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 716
 717           /* And the invariant coefficient.  */
 718           for (j = 0; j < invariants; j++)
 719             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 720
 721           /* And the constant.  */
 722           a[size] = LLE_CONSTANT (expression);
 723
 724           /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
 725           for (j = 0; j < i; j++)
 726             A[size][j] *= -1;
 727           A[size][i] = LLE_DENOMINATOR (expression);
 728           size++;
 729           /* Need to increase matrix sizes above.  */
 730           gcc_assert (size <= 127);
 731
 732         }
 733     }
 734
 735   /* Compute the lattice base x = base * y + origin, where y is the
 736      base space.  */
 737   lattice = lambda_lattice_compute_base (nest, lambda_obstack);
 738
 739   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
 740
 741   /* A1 = A * L */
 742   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
 743
 744   /* a1 = a - A * origin constant.  */
 745   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
 746   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
 747
 748   /* B1 = B - A * origin invariant.  */
 749   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
 750                       invariants);
 751   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
 752
 753   /* Now compute the auxiliary space bounds by first inverting U, multiplying
 754      it by A1, then performing Fourier-Motzkin.  */
 755
 756   invertedtrans = lambda_matrix_new (depth, depth);
 757
 758   /* Compute the inverse of U.  */
 759   lambda_matrix_inverse (LTM_MATRIX (trans),
 760                          invertedtrans, depth);
 761
 762   /* A = A1 inv(U).  */
 763   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
 764
 765   return compute_nest_using_fourier_motzkin (size, depth, invariants,
 766                                              A, B1, a1, lambda_obstack);
 767 }
 768
 769 /* Compute the loop bounds for the target space, using the bounds of
 770    the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
 771    The target space loop bounds are computed by multiplying the triangular
 772    matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
 773    the loop steps (positive or negative) is then used to swap the bounds if
 774    the loop counts downwards.
 775    Return the target loopnest.  */
 776
 777 static lambda_loopnest
 778 lambda_compute_target_space (lambda_loopnest auxillary_nest,
 779                              lambda_trans_matrix H, lambda_vector stepsigns,
 780                              struct obstack * lambda_obstack)
 781 {
 782   lambda_matrix inverse, H1;
 783   int determinant, i, j;
 784   int gcd1, gcd2;
 785   int factor;
 786
 787   lambda_loopnest target_nest;
 788   int depth, invariants;
 789   lambda_matrix target;
 790
 791   lambda_loop auxillary_loop, target_loop;
 792   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
 793
 794   depth = LN_DEPTH (auxillary_nest);
 795   invariants = LN_INVARIANTS (auxillary_nest);
 796
 797   inverse = lambda_matrix_new (depth, depth);
 798   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
 799
 800   /* H1 is H excluding its diagonal.  */
 801   H1 = lambda_matrix_new (depth, depth);
 802   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
 803
 804   for (i = 0; i < depth; i++)
 805     H1[i][i] = 0;
 806
 807   /* Computes the linear offsets of the loop bounds.  */
 808   target = lambda_matrix_new (depth, depth);
 809   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
 810
 811   target_nest = lambda_loopnest_new (depth, invariants, lambda_obstack);
 812
 813   for (i = 0; i < depth; i++)
 814     {
 815
 816       /* Get a new loop structure.  */
 817       target_loop = lambda_loop_new ();
 818       LN_LOOPS (target_nest)[i] = target_loop;
 819
 820       /* Computes the gcd of the coefficients of the linear part.  */
 821       gcd1 = lambda_vector_gcd (target[i], i);
 822
 823       /* Include the denominator in the GCD.  */
 824       gcd1 = gcd (gcd1, determinant);
 825
 826       /* Now divide through by the gcd.  */
 827       for (j = 0; j < i; j++)
 828         target[i][j] = target[i][j] / gcd1;
 829
 830       expression = lambda_linear_expression_new (depth, invariants,
 831                                                  lambda_obstack);
 832       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
 833       LLE_DENOMINATOR (expression) = determinant / gcd1;
 834       LLE_CONSTANT (expression) = 0;
 835       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
 836                            invariants);
 837       LL_LINEAR_OFFSET (target_loop) = expression;
 838     }
 839
 840   /* For each loop, compute the new bounds from H.  */
 841   for (i = 0; i < depth; i++)
 842     {
 843       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
 844       target_loop = LN_LOOPS (target_nest)[i];
 845       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
 846       factor = LTM_MATRIX (H)[i][i];
 847
 848       /* First we do the lower bound.  */
 849       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
 850
 851       for (; auxillary_expr != NULL;
 852            auxillary_expr = LLE_NEXT (auxillary_expr))
 853         {
 854           target_expr = lambda_linear_expression_new (depth, invariants,
 855                                                       lambda_obstack);
 856           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 857                                      depth, inverse, depth,
 858                                      LLE_COEFFICIENTS (target_expr));
 859           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 860                                     LLE_COEFFICIENTS (target_expr), depth,
 861                                     factor);
 862
 863           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 864           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 865                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 866                               invariants);
 867           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 868                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 869                                     invariants, factor);
 870           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 871
 872           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 873             {
 874               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 875                 * determinant;
 876               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 877                                         (target_expr),
 878                                         LLE_INVARIANT_COEFFICIENTS
 879                                         (target_expr), invariants,
 880                                         determinant);
 881               LLE_DENOMINATOR (target_expr) =
 882                 LLE_DENOMINATOR (target_expr) * determinant;
 883             }
 884           /* Find the gcd and divide by it here, rather than doing it
 885              at the tree level.  */
 886           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 887           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 888                                     invariants);
 889           gcd1 = gcd (gcd1, gcd2);
 890           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 891           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 892           for (j = 0; j < depth; j++)
 893             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 894           for (j = 0; j < invariants; j++)
 895             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 896           LLE_CONSTANT (target_expr) /= gcd1;
 897           LLE_DENOMINATOR (target_expr) /= gcd1;
 898           /* Ignore if identical to existing bound.  */
 899           if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
 900                           invariants))
 901             {
 902               LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
 903               LL_LOWER_BOUND (target_loop) = target_expr;
 904             }
 905         }
 906       /* Now do the upper bound.  */
 907       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
 908
 909       for (; auxillary_expr != NULL;
 910            auxillary_expr = LLE_NEXT (auxillary_expr))
 911         {
 912           target_expr = lambda_linear_expression_new (depth, invariants,
 913                                                       lambda_obstack);
 914           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 915                                      depth, inverse, depth,
 916                                      LLE_COEFFICIENTS (target_expr));
 917           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 918                                     LLE_COEFFICIENTS (target_expr), depth,
 919                                     factor);
 920           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 921           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 922                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 923                               invariants);
 924           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 925                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 926                                     invariants, factor);
 927           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 928
 929           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 930             {
 931               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 932                 * determinant;
 933               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 934                                         (target_expr),
 935                                         LLE_INVARIANT_COEFFICIENTS
 936                                         (target_expr), invariants,
 937                                         determinant);
 938               LLE_DENOMINATOR (target_expr) =
 939                 LLE_DENOMINATOR (target_expr) * determinant;
 940             }
 941           /* Find the gcd and divide by it here, instead of at the
 942              tree level.  */
 943           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 944           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 945                                     invariants);
 946           gcd1 = gcd (gcd1, gcd2);
 947           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 948           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 949           for (j = 0; j < depth; j++)
 950             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 951           for (j = 0; j < invariants; j++)
 952             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 953           LLE_CONSTANT (target_expr) /= gcd1;
 954           LLE_DENOMINATOR (target_expr) /= gcd1;
 955           /* Ignore if equal to existing bound.  */
 956           if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
 957                           invariants))
 958             {
 959               LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
 960               LL_UPPER_BOUND (target_loop) = target_expr;
 961             }
 962         }
 963     }
 964   for (i = 0; i < depth; i++)
 965     {
 966       target_loop = LN_LOOPS (target_nest)[i];
 967       /* If necessary, exchange the upper and lower bounds and negate
 968          the step size.  */
 969       if (stepsigns[i] < 0)
 970         {
 971           LL_STEP (target_loop) *= -1;
 972           tmp_expr = LL_LOWER_BOUND (target_loop);
 973           LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
 974           LL_UPPER_BOUND (target_loop) = tmp_expr;
 975         }
 976     }
 977   return target_nest;
 978 }
 979
 980 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
 981    result.  */
 982
 983 static lambda_vector
 984 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
 985 {
 986   lambda_matrix matrix, H;
 987   int size;
 988   lambda_vector newsteps;
 989   int i, j, factor, minimum_column;
 990   int temp;
 991
 992   matrix = LTM_MATRIX (trans);
 993   size = LTM_ROWSIZE (trans);
 994   H = lambda_matrix_new (size, size);
 995
 996   newsteps = lambda_vector_new (size);
 997   lambda_vector_copy (stepsigns, newsteps, size);
 998
 999   lambda_matrix_copy (matrix, H, size, size);
1000
1001   for (j = 0; j < size; j++)
1002     {
1003       lambda_vector row;
1004       row = H[j];
1005       for (i = j; i < size; i++)
1006         if (row[i] < 0)
1007           lambda_matrix_col_negate (H, size, i);
1008       while (lambda_vector_first_nz (row, size, j + 1) < size)
1009         {
1010           minimum_column = lambda_vector_min_nz (row, size, j);
1011           lambda_matrix_col_exchange (H, size, j, minimum_column);
1012
1013           temp = newsteps[j];
1014           newsteps[j] = newsteps[minimum_column];
1015           newsteps[minimum_column] = temp;
1016
1017           for (i = j + 1; i < size; i++)
1018             {
1019               factor = row[i] / row[j];
1020               lambda_matrix_col_add (H, size, j, i, -1 * factor);
1021             }
1022         }
1023     }
1024   return newsteps;
1025 }
1026
1027 /* Transform NEST according to TRANS, and return the new loopnest.
1028    This involves
1029    1. Computing a lattice base for the transformation
1030    2. Composing the dense base with the specified transformation (TRANS)
1031    3. Decomposing the combined transformation into a lower triangular portion,
1032    and a unimodular portion.
1033    4. Computing the auxiliary nest using the unimodular portion.
1034    5. Computing the target nest using the auxiliary nest and the lower
1035    triangular portion.  */
1036
1037 lambda_loopnest
1038 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans,
1039                            struct obstack * lambda_obstack)
1040 {
1041   lambda_loopnest auxillary_nest, target_nest;
1042
1043   int depth, invariants;
1044   int i, j;
1045   lambda_lattice lattice;
1046   lambda_trans_matrix trans1, H, U;
1047   lambda_loop loop;
1048   lambda_linear_expression expression;
1049   lambda_vector origin;
1050   lambda_matrix origin_invariants;
1051   lambda_vector stepsigns;
1052   int f;
1053
1054   depth = LN_DEPTH (nest);
1055   invariants = LN_INVARIANTS (nest);
1056
1057   /* Keep track of the signs of the loop steps.  */
1058   stepsigns = lambda_vector_new (depth);
1059   for (i = 0; i < depth; i++)
1060     {
1061       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1062         stepsigns[i] = 1;
1063       else
1064         stepsigns[i] = -1;
1065     }
1066
1067   /* Compute the lattice base.  */
1068   lattice = lambda_lattice_compute_base (nest, lambda_obstack);
1069   trans1 = lambda_trans_matrix_new (depth, depth);
1070
1071   /* Multiply the transformation matrix by the lattice base.  */
1072
1073   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1074                       LTM_MATRIX (trans1), depth, depth, depth);
1075
1076   /* Compute the Hermite normal form for the new transformation matrix.  */
1077   H = lambda_trans_matrix_new (depth, depth);
1078   U = lambda_trans_matrix_new (depth, depth);
1079   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1080                          LTM_MATRIX (U));
1081
1082   /* Compute the auxiliary loop nest's space from the unimodular
1083      portion.  */
1084   auxillary_nest = lambda_compute_auxillary_space (nest, U, lambda_obstack);
1085
1086   /* Compute the loop step signs from the old step signs and the
1087      transformation matrix.  */
1088   stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1089
1090   /* Compute the target loop nest space from the auxiliary nest and
1091      the lower triangular matrix H.  */
1092   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns,
1093                                              lambda_obstack);
1094   origin = lambda_vector_new (depth);
1095   origin_invariants = lambda_matrix_new (depth, invariants);
1096   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1097                              LATTICE_ORIGIN (lattice), origin);
1098   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1099                       origin_invariants, depth, depth, invariants);
1100
1101   for (i = 0; i < depth; i++)
1102     {
1103       loop = LN_LOOPS (target_nest)[i];
1104       expression = LL_LINEAR_OFFSET (loop);
1105       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1106         f = 1;
1107       else
1108         f = LLE_DENOMINATOR (expression);
1109
1110       LLE_CONSTANT (expression) += f * origin[i];
1111
1112       for (j = 0; j < invariants; j++)
1113         LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1114           f * origin_invariants[i][j];
1115     }
1116
1117   return target_nest;
1118
1119 }
1120
1121 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1122    return the new expression.  DEPTH is the depth of the loopnest.
1123    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1124    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1125    is the amount we have to add/subtract from the expression because of the
1126    type of comparison it is used in.  */
1127
1128 static lambda_linear_expression
1129 gcc_tree_to_linear_expression (int depth, tree expr,
1130                                VEC(tree,heap) *outerinductionvars,
1131                                VEC(tree,heap) *invariants, int extra,
1132                                struct obstack * lambda_obstack)
1133 {
1134   lambda_linear_expression lle = NULL;
1135   switch (TREE_CODE (expr))
1136     {
1137     case INTEGER_CST:
1138       {
1139         lle = lambda_linear_expression_new (depth, 2 * depth, lambda_obstack);
1140         LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1141         if (extra != 0)
1142           LLE_CONSTANT (lle) += extra;
1143
1144         LLE_DENOMINATOR (lle) = 1;
1145       }
1146       break;
1147     case SSA_NAME:
1148       {
1149         tree iv, invar;
1150         size_t i;
1151         for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1152           if (iv != NULL)
1153             {
1154               if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1155                 {
1156                   lle = lambda_linear_expression_new (depth, 2 * depth,
1157                                                       lambda_obstack);
1158                   LLE_COEFFICIENTS (lle)[i] = 1;
1159                   if (extra != 0)
1160                     LLE_CONSTANT (lle) = extra;
1161
1162                   LLE_DENOMINATOR (lle) = 1;
1163                 }
1164             }
1165         for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1166           if (invar != NULL)
1167             {
1168               if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1169                 {
1170                   lle = lambda_linear_expression_new (depth, 2 * depth,
1171                                                       lambda_obstack);
1172                   LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1173                   if (extra != 0)
1174                     LLE_CONSTANT (lle) = extra;
1175                   LLE_DENOMINATOR (lle) = 1;
1176                 }
1177             }
1178       }
1179       break;
1180     default:
1181       return NULL;
1182     }
1183
1184   return lle;
1185 }
1186
1187 /* Return the depth of the loopnest NEST */
1188
1189 static int
1190 depth_of_nest (struct loop *nest)
1191 {
1192   size_t depth = 0;
1193   while (nest)
1194     {
1195       depth++;
1196       nest = nest->inner;
1197     }
1198   return depth;
1199 }
1200
1201
1202 /* Return true if OP is invariant in LOOP and all outer loops.  */
1203
1204 static bool
1205 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1206 {
1207   if (is_gimple_min_invariant (op))
1208     return true;
1209   if (loop_depth (loop) == 0)
1210     return true;
1211   if (!expr_invariant_in_loop_p (loop, op))
1212     return false;
1213   if (!invariant_in_loop_and_outer_loops (loop_outer (loop), op))
1214     return false;
1215   return true;
1216 }
1217
1218 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1219    or NULL if it could not be converted.
1220    DEPTH is the depth of the loop.
1221    INVARIANTS is a pointer to the array of loop invariants.
1222    The induction variable for this loop should be stored in the parameter
1223    OURINDUCTIONVAR.
1224    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1225
1226 static lambda_loop
1227 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1228                          VEC(tree,heap) ** invariants,
1229                          tree * ourinductionvar,
1230                          VEC(tree,heap) * outerinductionvars,
1231                          VEC(tree,heap) ** lboundvars,
1232                          VEC(tree,heap) ** uboundvars,
1233                          VEC(int,heap) ** steps,
1234                          struct obstack * lambda_obstack)
1235 {
1236   gimple phi;
1237   gimple exit_cond;
1238   tree access_fn, inductionvar;
1239   tree step;
1240   lambda_loop lloop = NULL;
1241   lambda_linear_expression lbound, ubound;
1242   tree test_lhs, test_rhs;
1243   int stepint;
1244   int extra = 0;
1245   tree lboundvar, uboundvar, uboundresult;
1246
1247   /* Find out induction var and exit condition.  */
1248   inductionvar = find_induction_var_from_exit_cond (loop);
1249   exit_cond = get_loop_exit_condition (loop);
1250
1251   if (inductionvar == NULL || exit_cond == NULL)
1252     {
1253       if (dump_file && (dump_flags & TDF_DETAILS))
1254         fprintf (dump_file,
1255                  "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1256       return NULL;
1257     }
1258
1259   if (SSA_NAME_DEF_STMT (inductionvar) == NULL)
1260     {
1261
1262       if (dump_file && (dump_flags & TDF_DETAILS))
1263         fprintf (dump_file,
1264                  "Unable to convert loop: Cannot find PHI node for induction variable\n");
1265
1266       return NULL;
1267     }
1268
1269   phi = SSA_NAME_DEF_STMT (inductionvar);
1270   if (gimple_code (phi) != GIMPLE_PHI)
1271     {
1272       tree op = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1273       if (!op)
1274         {
1275
1276           if (dump_file && (dump_flags & TDF_DETAILS))
1277             fprintf (dump_file,
1278                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1279
1280           return NULL;
1281         }
1282
1283       phi = SSA_NAME_DEF_STMT (op);
1284       if (gimple_code (phi) != GIMPLE_PHI)
1285         {
1286           if (dump_file && (dump_flags & TDF_DETAILS))
1287             fprintf (dump_file,
1288                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1289           return NULL;
1290         }
1291     }
1292
1293   /* The induction variable name/version we want to put in the array is the
1294      result of the induction variable phi node.  */
1295   *ourinductionvar = PHI_RESULT (phi);
1296   access_fn = instantiate_parameters
1297     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1298   if (access_fn == chrec_dont_know)
1299     {
1300       if (dump_file && (dump_flags & TDF_DETAILS))
1301         fprintf (dump_file,
1302                  "Unable to convert loop: Access function for induction variable phi is unknown\n");
1303
1304       return NULL;
1305     }
1306
1307   step = evolution_part_in_loop_num (access_fn, loop->num);
1308   if (!step || step == chrec_dont_know)
1309     {
1310       if (dump_file && (dump_flags & TDF_DETAILS))
1311         fprintf (dump_file,
1312                  "Unable to convert loop: Cannot determine step of loop.\n");
1313
1314       return NULL;
1315     }
1316   if (TREE_CODE (step) != INTEGER_CST)
1317     {
1318
1319       if (dump_file && (dump_flags & TDF_DETAILS))
1320         fprintf (dump_file,
1321                  "Unable to convert loop: Step of loop is not integer.\n");
1322       return NULL;
1323     }
1324
1325   stepint = TREE_INT_CST_LOW (step);
1326
1327   /* Only want phis for induction vars, which will have two
1328      arguments.  */
1329   if (gimple_phi_num_args (phi) != 2)
1330     {
1331       if (dump_file && (dump_flags & TDF_DETAILS))
1332         fprintf (dump_file,
1333                  "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1334       return NULL;
1335     }
1336
1337   /* Another induction variable check. One argument's source should be
1338      in the loop, one outside the loop.  */
1339   if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 0)->src)
1340       && flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 1)->src))
1341     {
1342
1343       if (dump_file && (dump_flags & TDF_DETAILS))
1344         fprintf (dump_file,
1345                  "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1346
1347       return NULL;
1348     }
1349
1350   if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 0)->src))
1351     {
1352       lboundvar = PHI_ARG_DEF (phi, 1);
1353       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1354                                               outerinductionvars, *invariants,
1355                                               0, lambda_obstack);
1356     }
1357   else
1358     {
1359       lboundvar = PHI_ARG_DEF (phi, 0);
1360       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1361                                               outerinductionvars, *invariants,
1362                                               0, lambda_obstack);
1363     }
1364
1365   if (!lbound)
1366     {
1367
1368       if (dump_file && (dump_flags & TDF_DETAILS))
1369         fprintf (dump_file,
1370                  "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1371
1372       return NULL;
1373     }
1374   /* One part of the test may be a loop invariant tree.  */
1375   VEC_reserve (tree, heap, *invariants, 1);
1376   test_lhs = gimple_cond_lhs (exit_cond);
1377   test_rhs = gimple_cond_rhs (exit_cond);
1378
1379   if (TREE_CODE (test_rhs) == SSA_NAME
1380       && invariant_in_loop_and_outer_loops (loop, test_rhs))
1381     VEC_quick_push (tree, *invariants, test_rhs);
1382   else if (TREE_CODE (test_lhs) == SSA_NAME
1383            && invariant_in_loop_and_outer_loops (loop, test_lhs))
1384     VEC_quick_push (tree, *invariants, test_lhs);
1385
1386   /* The non-induction variable part of the test is the upper bound variable.
1387    */
1388   if (test_lhs == inductionvar)
1389     uboundvar = test_rhs;
1390   else
1391     uboundvar = test_lhs;
1392
1393   /* We only size the vectors assuming we have, at max, 2 times as many
1394      invariants as we do loops (one for each bound).
1395      This is just an arbitrary number, but it has to be matched against the
1396      code below.  */
1397   gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1398
1399
1400   /* We might have some leftover.  */
1401   if (gimple_cond_code (exit_cond) == LT_EXPR)
1402     extra = -1 * stepint;
1403   else if (gimple_cond_code (exit_cond) == NE_EXPR)
1404     extra = -1 * stepint;
1405   else if (gimple_cond_code (exit_cond) == GT_EXPR)
1406     extra = -1 * stepint;
1407   else if (gimple_cond_code (exit_cond) == EQ_EXPR)
1408     extra = 1 * stepint;
1409
1410   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1411                                           outerinductionvars,
1412                                           *invariants, extra, lambda_obstack);
1413   uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1414                          build_int_cst (TREE_TYPE (uboundvar), extra));
1415   VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1416   VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1417   VEC_safe_push (int, heap, *steps, stepint);
1418   if (!ubound)
1419     {
1420       if (dump_file && (dump_flags & TDF_DETAILS))
1421         fprintf (dump_file,
1422                  "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1423       return NULL;
1424     }
1425
1426   lloop = lambda_loop_new ();
1427   LL_STEP (lloop) = stepint;
1428   LL_LOWER_BOUND (lloop) = lbound;
1429   LL_UPPER_BOUND (lloop) = ubound;
1430   return lloop;
1431 }
1432
1433 /* Given a LOOP, find the induction variable it is testing against in the exit
1434    condition.  Return the induction variable if found, NULL otherwise.  */
1435
1436 tree
1437 find_induction_var_from_exit_cond (struct loop *loop)
1438 {
1439   gimple expr = get_loop_exit_condition (loop);
1440   tree ivarop;
1441   tree test_lhs, test_rhs;
1442   if (expr == NULL)
1443     return NULL_TREE;
1444   if (gimple_code (expr) != GIMPLE_COND)
1445     return NULL_TREE;
1446   test_lhs = gimple_cond_lhs (expr);
1447   test_rhs = gimple_cond_rhs (expr);
1448
1449   /* Find the side that is invariant in this loop. The ivar must be the other
1450      side.  */
1451
1452   if (expr_invariant_in_loop_p (loop, test_lhs))
1453       ivarop = test_rhs;
1454   else if (expr_invariant_in_loop_p (loop, test_rhs))
1455       ivarop = test_lhs;
1456   else
1457     return NULL_TREE;
1458
1459   if (TREE_CODE (ivarop) != SSA_NAME)
1460     return NULL_TREE;
1461   return ivarop;
1462 }
1463
1464 DEF_VEC_P(lambda_loop);
1465 DEF_VEC_ALLOC_P(lambda_loop,heap);
1466
1467 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1468    Return the new loop nest.
1469    INDUCTIONVARS is a pointer to an array of induction variables for the
1470    loopnest that will be filled in during this process.
1471    INVARIANTS is a pointer to an array of invariants that will be filled in
1472    during this process.  */
1473
1474 lambda_loopnest
1475 gcc_loopnest_to_lambda_loopnest (struct loop *loop_nest,
1476                                  VEC(tree,heap) **inductionvars,
1477                                  VEC(tree,heap) **invariants,
1478                                  struct obstack * lambda_obstack)
1479 {
1480   lambda_loopnest ret = NULL;
1481   struct loop *temp = loop_nest;
1482   int depth = depth_of_nest (loop_nest);
1483   size_t i;
1484   VEC(lambda_loop,heap) *loops = NULL;
1485   VEC(tree,heap) *uboundvars = NULL;
1486   VEC(tree,heap) *lboundvars  = NULL;
1487   VEC(int,heap) *steps = NULL;
1488   lambda_loop newloop;
1489   tree inductionvar = NULL;
1490   bool perfect_nest = perfect_nest_p (loop_nest);
1491
1492   if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
1493     goto fail;
1494
1495   while (temp)
1496     {
1497       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1498                                          &inductionvar, *inductionvars,
1499                                          &lboundvars, &uboundvars,
1500                                          &steps, lambda_obstack);
1501       if (!newloop)
1502         goto fail;
1503
1504       VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1505       VEC_safe_push (lambda_loop, heap, loops, newloop);
1506       temp = temp->inner;
1507     }
1508
1509   if (!perfect_nest)
1510     {
1511       if (!perfect_nestify (loop_nest, lboundvars, uboundvars, steps,
1512                             *inductionvars))
1513         {
1514           if (dump_file)
1515             fprintf (dump_file,
1516                      "Not a perfect loop nest and couldn't convert to one.\n");
1517           goto fail;
1518         }
1519       else if (dump_file)
1520         fprintf (dump_file,
1521                  "Successfully converted loop nest to perfect loop nest.\n");
1522     }
1523
1524   ret = lambda_loopnest_new (depth, 2 * depth, lambda_obstack);
1525
1526   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1527     LN_LOOPS (ret)[i] = newloop;
1528
1529  fail:
1530   VEC_free (lambda_loop, heap, loops);
1531   VEC_free (tree, heap, uboundvars);
1532   VEC_free (tree, heap, lboundvars);
1533   VEC_free (int, heap, steps);
1534
1535   return ret;
1536 }
1537
1538 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1539    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1540    inserted for us are stored.  INDUCTION_VARS is the array of induction
1541    variables for the loop this LBV is from.  TYPE is the tree type to use for
1542    the variables and trees involved.  */
1543
1544 static tree
1545 lbv_to_gcc_expression (lambda_body_vector lbv,
1546                        tree type, VEC(tree,heap) *induction_vars,
1547                        gimple_seq *stmts_to_insert)
1548 {
1549   int k;
1550   tree resvar;
1551   tree expr = build_linear_expr (type, LBV_COEFFICIENTS (lbv), induction_vars);
1552
1553   k = LBV_DENOMINATOR (lbv);
1554   gcc_assert (k != 0);
1555   if (k != 1)
1556     expr = fold_build2 (CEIL_DIV_EXPR, type, expr, build_int_cst (type, k));
1557
1558   resvar = create_tmp_var (type, "lbvtmp");
1559   add_referenced_var (resvar);
1560   return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1561 }
1562
1563 /* Convert a linear expression from coefficient and constant form to a
1564    gcc tree.
1565    Return the tree that represents the final value of the expression.
1566    LLE is the linear expression to convert.
1567    OFFSET is the linear offset to apply to the expression.
1568    TYPE is the tree type to use for the variables and math.
1569    INDUCTION_VARS is a vector of induction variables for the loops.
1570    INVARIANTS is a vector of the loop nest invariants.
1571    WRAP specifies what tree code to wrap the results in, if there is more than
1572    one (it is either MAX_EXPR, or MIN_EXPR).
1573    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1574    statements that need to be inserted for the linear expression.  */
1575
1576 static tree
1577 lle_to_gcc_expression (lambda_linear_expression lle,
1578                        lambda_linear_expression offset,
1579                        tree type,
1580                        VEC(tree,heap) *induction_vars,
1581                        VEC(tree,heap) *invariants,
1582                        enum tree_code wrap, gimple_seq *stmts_to_insert)
1583 {
1584   int k;
1585   tree resvar;
1586   tree expr = NULL_TREE;
1587   VEC(tree,heap) *results = NULL;
1588
1589   gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1590
1591   /* Build up the linear expressions.  */
1592   for (; lle != NULL; lle = LLE_NEXT (lle))
1593     {
1594       expr = build_linear_expr (type, LLE_COEFFICIENTS (lle), induction_vars);
1595       expr = fold_build2 (PLUS_EXPR, type, expr,
1596                           build_linear_expr (type,
1597                                              LLE_INVARIANT_COEFFICIENTS (lle),
1598                                              invariants));
1599
1600       k = LLE_CONSTANT (lle);
1601       if (k)
1602         expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1603
1604       k = LLE_CONSTANT (offset);
1605       if (k)
1606         expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1607
1608       k = LLE_DENOMINATOR (lle);
1609       if (k != 1)
1610         expr = fold_build2 (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1611                             type, expr, build_int_cst (type, k));
1612
1613       expr = fold (expr);
1614       VEC_safe_push (tree, heap, results, expr);
1615     }
1616
1617   gcc_assert (expr);
1618
1619   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1620   if (VEC_length (tree, results) > 1)
1621     {
1622       size_t i;
1623       tree op;
1624
1625       expr = VEC_index (tree, results, 0);
1626       for (i = 1; VEC_iterate (tree, results, i, op); i++)
1627         expr = fold_build2 (wrap, type, expr, op);
1628     }
1629
1630   VEC_free (tree, heap, results);
1631
1632   resvar = create_tmp_var (type, "lletmp");
1633   add_referenced_var (resvar);
1634   return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1635 }
1636
1637 /* Remove the induction variable defined at IV_STMT.  */
1638
1639 void
1640 remove_iv (gimple iv_stmt)
1641 {
1642   gimple_stmt_iterator si = gsi_for_stmt (iv_stmt);
1643
1644   if (gimple_code (iv_stmt) == GIMPLE_PHI)
1645     {
1646       unsigned i;
1647
1648       for (i = 0; i < gimple_phi_num_args (iv_stmt); i++)
1649         {
1650           gimple stmt;
1651           imm_use_iterator imm_iter;
1652           tree arg = gimple_phi_arg_def (iv_stmt, i);
1653           bool used = false;
1654
1655           if (TREE_CODE (arg) != SSA_NAME)
1656             continue;
1657
1658           FOR_EACH_IMM_USE_STMT (stmt, imm_iter, arg)
1659             if (stmt != iv_stmt)
1660               used = true;
1661
1662           if (!used)
1663             remove_iv (SSA_NAME_DEF_STMT (arg));
1664         }
1665
1666       remove_phi_node (&si, true);
1667     }
1668   else
1669     {
1670       gsi_remove (&si, true);
1671       release_defs (iv_stmt);
1672     }
1673 }
1674
1675 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1676    it, back into gcc code.  This changes the
1677    loops, their induction variables, and their bodies, so that they
1678    match the transformed loopnest.
1679    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1680    loopnest.
1681    OLD_IVS is a vector of induction variables from the old loopnest.
1682    INVARIANTS is a vector of loop invariants from the old loopnest.
1683    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1684    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1685    NEW_LOOPNEST.  */
1686
1687 void
1688 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1689                                  VEC(tree,heap) *old_ivs,
1690                                  VEC(tree,heap) *invariants,
1691                                  VEC(gimple,heap) **remove_ivs,
1692                                  lambda_loopnest new_loopnest,
1693                                  lambda_trans_matrix transform,
1694                                  struct obstack * lambda_obstack)
1695 {
1696   struct loop *temp;
1697   size_t i = 0;
1698   unsigned j;
1699   size_t depth = 0;
1700   VEC(tree,heap) *new_ivs = NULL;
1701   tree oldiv;
1702   gimple_stmt_iterator bsi;
1703
1704   transform = lambda_trans_matrix_inverse (transform);
1705
1706   if (dump_file)
1707     {
1708       fprintf (dump_file, "Inverse of transformation matrix:\n");
1709       print_lambda_trans_matrix (dump_file, transform);
1710     }
1711   depth = depth_of_nest (old_loopnest);
1712   temp = old_loopnest;
1713
1714   while (temp)
1715     {
1716       lambda_loop newloop;
1717       basic_block bb;
1718       edge exit;
1719       tree ivvar, ivvarinced;
1720       gimple exitcond;
1721       gimple_seq stmts;
1722       enum tree_code testtype;
1723       tree newupperbound, newlowerbound;
1724       lambda_linear_expression offset;
1725       tree type;
1726       bool insert_after;
1727       gimple inc_stmt;
1728
1729       oldiv = VEC_index (tree, old_ivs, i);
1730       type = TREE_TYPE (oldiv);
1731
1732       /* First, build the new induction variable temporary  */
1733
1734       ivvar = create_tmp_var (type, "lnivtmp");
1735       add_referenced_var (ivvar);
1736
1737       VEC_safe_push (tree, heap, new_ivs, ivvar);
1738
1739       newloop = LN_LOOPS (new_loopnest)[i];
1740
1741       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1742          cases for now.  */
1743       offset = LL_LINEAR_OFFSET (newloop);
1744
1745       gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1746                   lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1747
1748       /* Now build the  new lower bounds, and insert the statements
1749          necessary to generate it on the loop preheader.  */
1750       stmts = NULL;
1751       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1752                                              LL_LINEAR_OFFSET (newloop),
1753                                              type,
1754                                              new_ivs,
1755                                              invariants, MAX_EXPR, &stmts);
1756
1757       if (stmts)
1758         {
1759           gsi_insert_seq_on_edge (loop_preheader_edge (temp), stmts);
1760           gsi_commit_edge_inserts ();
1761         }
1762       /* Build the new upper bound and insert its statements in the
1763          basic block of the exit condition */
1764       stmts = NULL;
1765       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1766                                              LL_LINEAR_OFFSET (newloop),
1767                                              type,
1768                                              new_ivs,
1769                                              invariants, MIN_EXPR, &stmts);
1770       exit = single_exit (temp);
1771       exitcond = get_loop_exit_condition (temp);
1772       bb = gimple_bb (exitcond);
1773       bsi = gsi_after_labels (bb);
1774       if (stmts)
1775         gsi_insert_seq_before (&bsi, stmts, GSI_NEW_STMT);
1776
1777       /* Create the new iv.  */
1778
1779       standard_iv_increment_position (temp, &bsi, &insert_after);
1780       create_iv (newlowerbound,
1781                  build_int_cst (type, LL_STEP (newloop)),
1782                  ivvar, temp, &bsi, insert_after, &ivvar,
1783                  NULL);
1784
1785       /* Unfortunately, the incremented ivvar that create_iv inserted may not
1786          dominate the block containing the exit condition.
1787          So we simply create our own incremented iv to use in the new exit
1788          test,  and let redundancy elimination sort it out.  */
1789       inc_stmt = gimple_build_assign_with_ops (PLUS_EXPR, SSA_NAME_VAR (ivvar),
1790                                                ivvar,
1791                                                build_int_cst (type, LL_STEP (newloop)));
1792
1793       ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1794       gimple_assign_set_lhs (inc_stmt, ivvarinced);
1795       bsi = gsi_for_stmt (exitcond);
1796       gsi_insert_before (&bsi, inc_stmt, GSI_SAME_STMT);
1797
1798       /* Replace the exit condition with the new upper bound
1799          comparison.  */
1800
1801       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1802
1803       /* We want to build a conditional where true means exit the loop, and
1804          false means continue the loop.
1805          So swap the testtype if this isn't the way things are.*/
1806
1807       if (exit->flags & EDGE_FALSE_VALUE)
1808         testtype = swap_tree_comparison (testtype);
1809
1810       gimple_cond_set_condition (exitcond, testtype, newupperbound, ivvarinced);
1811       update_stmt (exitcond);
1812       VEC_replace (tree, new_ivs, i, ivvar);
1813
1814       i++;
1815       temp = temp->inner;
1816     }
1817
1818   /* Rewrite uses of the old ivs so that they are now specified in terms of
1819      the new ivs.  */
1820
1821   for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1822     {
1823       imm_use_iterator imm_iter;
1824       use_operand_p use_p;
1825       tree oldiv_def;
1826       gimple oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1827       gimple stmt;
1828
1829       if (gimple_code (oldiv_stmt) == GIMPLE_PHI)
1830         oldiv_def = PHI_RESULT (oldiv_stmt);
1831       else
1832         oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1833       gcc_assert (oldiv_def != NULL_TREE);
1834
1835       FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def)
1836         {
1837           tree newiv;
1838           gimple_seq stmts;
1839           lambda_body_vector lbv, newlbv;
1840
1841           /* Compute the new expression for the induction
1842              variable.  */
1843           depth = VEC_length (tree, new_ivs);
1844           lbv = lambda_body_vector_new (depth, lambda_obstack);
1845           LBV_COEFFICIENTS (lbv)[i] = 1;
1846
1847           newlbv = lambda_body_vector_compute_new (transform, lbv,
1848                                                    lambda_obstack);
1849
1850           stmts = NULL;
1851           newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1852                                          new_ivs, &stmts);
1853
1854           if (stmts && gimple_code (stmt) != GIMPLE_PHI)
1855             {
1856               bsi = gsi_for_stmt (stmt);
1857               gsi_insert_seq_before (&bsi, stmts, GSI_SAME_STMT);
1858             }
1859
1860           FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1861             propagate_value (use_p, newiv);
1862
1863           if (stmts && gimple_code (stmt) == GIMPLE_PHI)
1864             for (j = 0; j < gimple_phi_num_args (stmt); j++)
1865               if (gimple_phi_arg_def (stmt, j) == newiv)
1866                 gsi_insert_seq_on_edge (gimple_phi_arg_edge (stmt, j), stmts);
1867
1868           update_stmt (stmt);
1869         }
1870
1871       /* Remove the now unused induction variable.  */
1872       VEC_safe_push (gimple, heap, *remove_ivs, oldiv_stmt);
1873     }
1874   VEC_free (tree, heap, new_ivs);
1875 }
1876
1877 /* Return TRUE if this is not interesting statement from the perspective of
1878    determining if we have a perfect loop nest.  */
1879
1880 static bool
1881 not_interesting_stmt (gimple stmt)
1882 {
1883   /* Note that COND_EXPR's aren't interesting because if they were exiting the
1884      loop, we would have already failed the number of exits tests.  */
1885   if (gimple_code (stmt) == GIMPLE_LABEL
1886       || gimple_code (stmt) == GIMPLE_GOTO
1887       || gimple_code (stmt) == GIMPLE_COND)
1888     return true;
1889   return false;
1890 }
1891
1892 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
1893
1894 static bool
1895 phi_loop_edge_uses_def (struct loop *loop, gimple phi, tree def)
1896 {
1897   unsigned i;
1898   for (i = 0; i < gimple_phi_num_args (phi); i++)
1899     if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, i)->src))
1900       if (PHI_ARG_DEF (phi, i) == def)
1901         return true;
1902   return false;
1903 }
1904
1905 /* Return TRUE if STMT is a use of PHI_RESULT.  */
1906
1907 static bool
1908 stmt_uses_phi_result (gimple stmt, tree phi_result)
1909 {
1910   tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1911
1912   /* This is conservatively true, because we only want SIMPLE bumpers
1913      of the form x +- constant for our pass.  */
1914   return (use == phi_result);
1915 }
1916
1917 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
1918    in-loop-edge in a phi node, and the operand it uses is the result of that
1919    phi node.
1920    I.E. i_29 = i_3 + 1
1921         i_3 = PHI (0, i_29);  */
1922
1923 static bool
1924 stmt_is_bumper_for_loop (struct loop *loop, gimple stmt)
1925 {
1926   gimple use;
1927   tree def;
1928   imm_use_iterator iter;
1929   use_operand_p use_p;
1930
1931   def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
1932   if (!def)
1933     return false;
1934
1935   FOR_EACH_IMM_USE_FAST (use_p, iter, def)
1936     {
1937       use = USE_STMT (use_p);
1938       if (gimple_code (use) == GIMPLE_PHI)
1939         {
1940           if (phi_loop_edge_uses_def (loop, use, def))
1941             if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
1942               return true;
1943         }
1944     }
1945   return false;
1946 }
1947
1948
1949 /* Return true if LOOP is a perfect loop nest.
1950    Perfect loop nests are those loop nests where all code occurs in the
1951    innermost loop body.
1952    If S is a program statement, then
1953
1954    i.e.
1955    DO I = 1, 20
1956        S1
1957        DO J = 1, 20
1958        ...
1959        END DO
1960    END DO
1961    is not a perfect loop nest because of S1.
1962
1963    DO I = 1, 20
1964       DO J = 1, 20
1965         S1
1966         ...
1967       END DO
1968    END DO
1969    is a perfect loop nest.
1970
1971    Since we don't have high level loops anymore, we basically have to walk our
1972    statements and ignore those that are there because the loop needs them (IE
1973    the induction variable increment, and jump back to the top of the loop).  */
1974
1975 bool
1976 perfect_nest_p (struct loop *loop)
1977 {
1978   basic_block *bbs;
1979   size_t i;
1980   gimple exit_cond;
1981
1982   /* Loops at depth 0 are perfect nests.  */
1983   if (!loop->inner)
1984     return true;
1985
1986   bbs = get_loop_body (loop);
1987   exit_cond = get_loop_exit_condition (loop);
1988
1989   for (i = 0; i < loop->num_nodes; i++)
1990     {
1991       if (bbs[i]->loop_father == loop)
1992         {
1993           gimple_stmt_iterator bsi;
1994
1995           for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi); gsi_next (&bsi))
1996             {
1997               gimple stmt = gsi_stmt (bsi);
1998
1999               if (gimple_code (stmt) == GIMPLE_COND
2000                   && exit_cond != stmt)
2001                 goto non_perfectly_nested;
2002
2003               if (stmt == exit_cond
2004                   || not_interesting_stmt (stmt)
2005                   || stmt_is_bumper_for_loop (loop, stmt))
2006                 continue;
2007
2008             non_perfectly_nested:
2009               free (bbs);
2010               return false;
2011             }
2012         }
2013     }
2014
2015   free (bbs);
2016
2017   return perfect_nest_p (loop->inner);
2018 }
2019
2020 /* Replace the USES of X in STMT, or uses with the same step as X with Y.
2021    YINIT is the initial value of Y, REPLACEMENTS is a hash table to
2022    avoid creating duplicate temporaries and FIRSTBSI is statement
2023    iterator where new temporaries should be inserted at the beginning
2024    of body basic block.  */
2025
2026 static void
2027 replace_uses_equiv_to_x_with_y (struct loop *loop, gimple stmt, tree x,
2028                                 int xstep, tree y, tree yinit,
2029                                 htab_t replacements,
2030                                 gimple_stmt_iterator *firstbsi)
2031 {
2032   ssa_op_iter iter;
2033   use_operand_p use_p;
2034
2035   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2036     {
2037       tree use = USE_FROM_PTR (use_p);
2038       tree step = NULL_TREE;
2039       tree scev, init, val, var;
2040       gimple setstmt;
2041       struct tree_map *h, in;
2042       void **loc;
2043
2044       /* Replace uses of X with Y right away.  */
2045       if (use == x)
2046         {
2047           SET_USE (use_p, y);
2048           continue;
2049         }
2050
2051       scev = instantiate_parameters (loop,
2052                                      analyze_scalar_evolution (loop, use));
2053
2054       if (scev == NULL || scev == chrec_dont_know)
2055         continue;
2056
2057       step = evolution_part_in_loop_num (scev, loop->num);
2058       if (step == NULL
2059           || step == chrec_dont_know
2060           || TREE_CODE (step) != INTEGER_CST
2061           || int_cst_value (step) != xstep)
2062         continue;
2063
2064       /* Use REPLACEMENTS hash table to cache already created
2065          temporaries.  */
2066       in.hash = htab_hash_pointer (use);
2067       in.base.from = use;
2068       h = (struct tree_map *) htab_find_with_hash (replacements, &in, in.hash);
2069       if (h != NULL)
2070         {
2071           SET_USE (use_p, h->to);
2072           continue;
2073         }
2074
2075       /* USE which has the same step as X should be replaced
2076          with a temporary set to Y + YINIT - INIT.  */
2077       init = initial_condition_in_loop_num (scev, loop->num);
2078       gcc_assert (init != NULL && init != chrec_dont_know);
2079       if (TREE_TYPE (use) == TREE_TYPE (y))
2080         {
2081           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit);
2082           val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val);
2083           if (val == y)
2084             {
2085               /* If X has the same type as USE, the same step
2086                  and same initial value, it can be replaced by Y.  */
2087               SET_USE (use_p, y);
2088               continue;
2089             }
2090         }
2091       else
2092         {
2093           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit);
2094           val = fold_convert (TREE_TYPE (use), val);
2095           val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init);
2096         }
2097
2098       /* Create a temporary variable and insert it at the beginning
2099          of the loop body basic block, right after the PHI node
2100          which sets Y.  */
2101       var = create_tmp_var (TREE_TYPE (use), "perfecttmp");
2102       add_referenced_var (var);
2103       val = force_gimple_operand_gsi (firstbsi, val, false, NULL,
2104                                       true, GSI_SAME_STMT);
2105       setstmt = gimple_build_assign (var, val);
2106       var = make_ssa_name (var, setstmt);
2107       gimple_assign_set_lhs (setstmt, var);
2108       gsi_insert_before (firstbsi, setstmt, GSI_SAME_STMT);
2109       update_stmt (setstmt);
2110       SET_USE (use_p, var);
2111       h = GGC_NEW (struct tree_map);
2112       h->hash = in.hash;
2113       h->base.from = use;
2114       h->to = var;
2115       loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT);
2116       gcc_assert ((*(struct tree_map **)loc) == NULL);
2117       *(struct tree_map **) loc = h;
2118     }
2119 }
2120
2121 /* Return true if STMT is an exit PHI for LOOP */
2122
2123 static bool
2124 exit_phi_for_loop_p (struct loop *loop, gimple stmt)
2125 {
2126   if (gimple_code (stmt) != GIMPLE_PHI
2127       || gimple_phi_num_args (stmt) != 1
2128       || gimple_bb (stmt) != single_exit (loop)->dest)
2129     return false;
2130
2131   return true;
2132 }
2133
2134 /* Return true if STMT can be put back into the loop INNER, by
2135    copying it to the beginning of that loop and changing the uses.  */
2136
2137 static bool
2138 can_put_in_inner_loop (struct loop *inner, gimple stmt)
2139 {
2140   imm_use_iterator imm_iter;
2141   use_operand_p use_p;
2142
2143   gcc_assert (is_gimple_assign (stmt));
2144   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)
2145       || !stmt_invariant_in_loop_p (inner, stmt))
2146     return false;
2147
2148   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_assign_lhs (stmt))
2149     {
2150       if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2151         {
2152           basic_block immbb = gimple_bb (USE_STMT (use_p));
2153
2154           if (!flow_bb_inside_loop_p (inner, immbb))
2155             return false;
2156         }
2157     }
2158   return true;
2159 }
2160
2161 /* Return true if STMT can be put *after* the inner loop of LOOP.  */
2162
2163 static bool
2164 can_put_after_inner_loop (struct loop *loop, gimple stmt)
2165 {
2166   imm_use_iterator imm_iter;
2167   use_operand_p use_p;
2168
2169   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
2170     return false;
2171
2172   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_assign_lhs (stmt))
2173     {
2174       if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
2175         {
2176           basic_block immbb = gimple_bb (USE_STMT (use_p));
2177
2178           if (!dominated_by_p (CDI_DOMINATORS,
2179                                immbb,
2180                                loop->inner->header)
2181               && !can_put_in_inner_loop (loop->inner, stmt))
2182             return false;
2183         }
2184     }
2185   return true;
2186 }
2187
2188 /* Return true when the induction variable IV is simple enough to be
2189    re-synthesized.  */
2190
2191 static bool
2192 can_duplicate_iv (tree iv, struct loop *loop)
2193 {
2194   tree scev = instantiate_parameters
2195     (loop, analyze_scalar_evolution (loop, iv));
2196
2197   if (!automatically_generated_chrec_p (scev))
2198     {
2199       tree step = evolution_part_in_loop_num (scev, loop->num);
2200
2201       if (step && step != chrec_dont_know && TREE_CODE (step) == INTEGER_CST)
2202         return true;
2203     }
2204
2205   return false;
2206 }
2207
2208 /* If this is a scalar operation that can be put back into the inner
2209    loop, or after the inner loop, through copying, then do so. This
2210    works on the theory that any amount of scalar code we have to
2211    reduplicate into or after the loops is less expensive that the win
2212    we get from rearranging the memory walk the loop is doing so that
2213    it has better cache behavior.  */
2214
2215 static bool
2216 cannot_convert_modify_to_perfect_nest (gimple stmt, struct loop *loop)
2217 {
2218   use_operand_p use_a, use_b;
2219   imm_use_iterator imm_iter;
2220   ssa_op_iter op_iter, op_iter1;
2221   tree op0 = gimple_assign_lhs (stmt);
2222
2223   /* The statement should not define a variable used in the inner
2224      loop.  */
2225   if (TREE_CODE (op0) == SSA_NAME
2226       && !can_duplicate_iv (op0, loop))
2227     FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
2228       if (gimple_bb (USE_STMT (use_a))->loop_father == loop->inner)
2229         return true;
2230
2231   FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
2232     {
2233       gimple node;
2234       tree op = USE_FROM_PTR (use_a);
2235
2236       /* The variables should not be used in both loops.  */
2237       if (!can_duplicate_iv (op, loop))
2238         FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
2239           if (gimple_bb (USE_STMT (use_b))->loop_father == loop->inner)
2240             return true;
2241
2242       /* The statement should not use the value of a scalar that was
2243          modified in the loop.  */
2244       node = SSA_NAME_DEF_STMT (op);
2245       if (gimple_code (node) == GIMPLE_PHI)
2246         FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
2247           {
2248             tree arg = USE_FROM_PTR (use_b);
2249
2250             if (TREE_CODE (arg) == SSA_NAME)
2251               {
2252                 gimple arg_stmt = SSA_NAME_DEF_STMT (arg);
2253
2254                 if (gimple_bb (arg_stmt)
2255                     && (gimple_bb (arg_stmt)->loop_father == loop->inner))
2256                   return true;
2257               }
2258           }
2259     }
2260
2261   return false;
2262 }
2263 /* Return true when BB contains statements that can harm the transform
2264    to a perfect loop nest.  */
2265
2266 static bool
2267 cannot_convert_bb_to_perfect_nest (basic_block bb, struct loop *loop)
2268 {
2269   gimple_stmt_iterator bsi;
2270   gimple exit_condition = get_loop_exit_condition (loop);
2271
2272   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2273     {
2274       gimple stmt = gsi_stmt (bsi);
2275
2276       if (stmt == exit_condition
2277           || not_interesting_stmt (stmt)
2278           || stmt_is_bumper_for_loop (loop, stmt))
2279         continue;
2280
2281       if (is_gimple_assign (stmt))
2282         {
2283           if (cannot_convert_modify_to_perfect_nest (stmt, loop))
2284             return true;
2285
2286           if (can_duplicate_iv (gimple_assign_lhs (stmt), loop))
2287             continue;
2288
2289           if (can_put_in_inner_loop (loop->inner, stmt)
2290               || can_put_after_inner_loop (loop, stmt))
2291             continue;
2292         }
2293
2294       /* If the bb of a statement we care about isn't dominated by the
2295          header of the inner loop, then we can't handle this case
2296          right now.  This test ensures that the statement comes
2297          completely *after* the inner loop.  */
2298       if (!dominated_by_p (CDI_DOMINATORS,
2299                            gimple_bb (stmt),
2300                            loop->inner->header))
2301         return true;
2302     }
2303
2304   return false;
2305 }
2306
2307
2308 /* Return TRUE if LOOP is an imperfect nest that we can convert to a
2309    perfect one.  At the moment, we only handle imperfect nests of
2310    depth 2, where all of the statements occur after the inner loop.  */
2311
2312 static bool
2313 can_convert_to_perfect_nest (struct loop *loop)
2314 {
2315   basic_block *bbs;
2316   size_t i;
2317   gimple_stmt_iterator si;
2318
2319   /* Can't handle triply nested+ loops yet.  */
2320   if (!loop->inner || loop->inner->inner)
2321     return false;
2322
2323   bbs = get_loop_body (loop);
2324   for (i = 0; i < loop->num_nodes; i++)
2325     if (bbs[i]->loop_father == loop
2326         && cannot_convert_bb_to_perfect_nest (bbs[i], loop))
2327       goto fail;
2328
2329   /* We also need to make sure the loop exit only has simple copy phis in it,
2330      otherwise we don't know how to transform it into a perfect nest.  */
2331   for (si = gsi_start_phis (single_exit (loop)->dest);
2332        !gsi_end_p (si);
2333        gsi_next (&si))
2334     if (gimple_phi_num_args (gsi_stmt (si)) != 1)
2335       goto fail;
2336
2337   free (bbs);
2338   return true;
2339
2340  fail:
2341   free (bbs);
2342   return false;
2343 }
2344
2345 /* Transform the loop nest into a perfect nest, if possible.
2346    LOOP is the loop nest to transform into a perfect nest
2347    LBOUNDS are the lower bounds for the loops to transform
2348    UBOUNDS are the upper bounds for the loops to transform
2349    STEPS is the STEPS for the loops to transform.
2350    LOOPIVS is the induction variables for the loops to transform.
2351
2352    Basically, for the case of
2353
2354    FOR (i = 0; i < 50; i++)
2355     {
2356      FOR (j =0; j < 50; j++)
2357      {
2358         <whatever>
2359      }
2360      <some code>
2361     }
2362
2363    This function will transform it into a perfect loop nest by splitting the
2364    outer loop into two loops, like so:
2365
2366    FOR (i = 0; i < 50; i++)
2367    {
2368      FOR (j = 0; j < 50; j++)
2369      {
2370          <whatever>
2371      }
2372    }
2373
2374    FOR (i = 0; i < 50; i ++)
2375    {
2376     <some code>
2377    }
2378
2379    Return FALSE if we can't make this loop into a perfect nest.  */
2380
2381 static bool
2382 perfect_nestify (struct loop *loop,
2383                  VEC(tree,heap) *lbounds,
2384                  VEC(tree,heap) *ubounds,
2385                  VEC(int,heap) *steps,
2386                  VEC(tree,heap) *loopivs)
2387 {
2388   basic_block *bbs;
2389   gimple exit_condition;
2390   gimple cond_stmt;
2391   basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2392   int i;
2393   gimple_stmt_iterator bsi, firstbsi;
2394   bool insert_after;
2395   edge e;
2396   struct loop *newloop;
2397   gimple phi;
2398   tree uboundvar;
2399   gimple stmt;
2400   tree oldivvar, ivvar, ivvarinced;
2401   VEC(tree,heap) *phis = NULL;
2402   htab_t replacements = NULL;
2403
2404   /* Create the new loop.  */
2405   olddest = single_exit (loop)->dest;
2406   preheaderbb = split_edge (single_exit (loop));
2407   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2408
2409   /* Push the exit phi nodes that we are moving.  */
2410   for (bsi = gsi_start_phis (olddest); !gsi_end_p (bsi); gsi_next (&bsi))
2411     {
2412       phi = gsi_stmt (bsi);
2413       VEC_reserve (tree, heap, phis, 2);
2414       VEC_quick_push (tree, phis, PHI_RESULT (phi));
2415       VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2416     }
2417   e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2418
2419   /* Remove the exit phis from the old basic block.  */
2420   for (bsi = gsi_start_phis (olddest); !gsi_end_p (bsi); )
2421     remove_phi_node (&bsi, false);
2422
2423   /* and add them back to the new basic block.  */
2424   while (VEC_length (tree, phis) != 0)
2425     {
2426       tree def;
2427       tree phiname;
2428       def = VEC_pop (tree, phis);
2429       phiname = VEC_pop (tree, phis);
2430       phi = create_phi_node (phiname, preheaderbb);
2431       add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2432     }
2433   flush_pending_stmts (e);
2434   VEC_free (tree, heap, phis);
2435
2436   bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2437   latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2438   make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2439   cond_stmt = gimple_build_cond (NE_EXPR, integer_one_node, integer_zero_node,
2440                                  NULL_TREE, NULL_TREE);
2441   bsi = gsi_start_bb (bodybb);
2442   gsi_insert_after (&bsi, cond_stmt, GSI_NEW_STMT);
2443   e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2444   make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2445   make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2446
2447   /* Update the loop structures.  */
2448   newloop = duplicate_loop (loop, olddest->loop_father);
2449   newloop->header = headerbb;
2450   newloop->latch = latchbb;
2451   add_bb_to_loop (latchbb, newloop);
2452   add_bb_to_loop (bodybb, newloop);
2453   add_bb_to_loop (headerbb, newloop);
2454   set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2455   set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2456   set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2457                            single_exit (loop)->src);
2458   set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2459   set_immediate_dominator (CDI_DOMINATORS, olddest,
2460                            recompute_dominator (CDI_DOMINATORS, olddest));
2461   /* Create the new iv.  */
2462   oldivvar = VEC_index (tree, loopivs, 0);
2463   ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2464   add_referenced_var (ivvar);
2465   standard_iv_increment_position (newloop, &bsi, &insert_after);
2466   create_iv (VEC_index (tree, lbounds, 0),
2467              build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2468              ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2469
2470   /* Create the new upper bound.  This may be not just a variable, so we copy
2471      it to one just in case.  */
2472
2473   exit_condition = get_loop_exit_condition (newloop);
2474   uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2475   add_referenced_var (uboundvar);
2476   stmt = gimple_build_assign (uboundvar, VEC_index (tree, ubounds, 0));
2477   uboundvar = make_ssa_name (uboundvar, stmt);
2478   gimple_assign_set_lhs (stmt, uboundvar);
2479
2480   if (insert_after)
2481     gsi_insert_after (&bsi, stmt, GSI_SAME_STMT);
2482   else
2483     gsi_insert_before (&bsi, stmt, GSI_SAME_STMT);
2484   update_stmt (stmt);
2485   gimple_cond_set_condition (exit_condition, GE_EXPR, uboundvar, ivvarinced);
2486   update_stmt (exit_condition);
2487   replacements = htab_create_ggc (20, tree_map_hash,
2488                                   tree_map_eq, NULL);
2489   bbs = get_loop_body_in_dom_order (loop);
2490   /* Now move the statements, and replace the induction variable in the moved
2491      statements with the correct loop induction variable.  */
2492   oldivvar = VEC_index (tree, loopivs, 0);
2493   firstbsi = gsi_start_bb (bodybb);
2494   for (i = loop->num_nodes - 1; i >= 0 ; i--)
2495     {
2496       gimple_stmt_iterator tobsi = gsi_last_bb (bodybb);
2497       if (bbs[i]->loop_father == loop)
2498         {
2499           /* If this is true, we are *before* the inner loop.
2500              If this isn't true, we are *after* it.
2501
2502              The only time can_convert_to_perfect_nest returns true when we
2503              have statements before the inner loop is if they can be moved
2504              into the inner loop.
2505
2506              The only time can_convert_to_perfect_nest returns true when we
2507              have statements after the inner loop is if they can be moved into
2508              the new split loop.  */
2509
2510           if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2511             {
2512               gimple_stmt_iterator header_bsi
2513                 = gsi_after_labels (loop->inner->header);
2514
2515               for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi);)
2516                 {
2517                   gimple stmt = gsi_stmt (bsi);
2518
2519                   if (stmt == exit_condition
2520                       || not_interesting_stmt (stmt)
2521                       || stmt_is_bumper_for_loop (loop, stmt))
2522                     {
2523                       gsi_next (&bsi);
2524                       continue;
2525                     }
2526
2527                   gsi_move_before (&bsi, &header_bsi);
2528                 }
2529             }
2530           else
2531             {
2532               /* Note that the bsi only needs to be explicitly incremented
2533                  when we don't move something, since it is automatically
2534                  incremented when we do.  */
2535               for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi);)
2536                 {
2537                   ssa_op_iter i;
2538                   tree n;
2539                   gimple stmt = gsi_stmt (bsi);
2540
2541                   if (stmt == exit_condition
2542                       || not_interesting_stmt (stmt)
2543                       || stmt_is_bumper_for_loop (loop, stmt))
2544                     {
2545                       gsi_next (&bsi);
2546                       continue;
2547                     }
2548
2549                   replace_uses_equiv_to_x_with_y
2550                     (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
2551                      VEC_index (tree, lbounds, 0), replacements, &firstbsi);
2552
2553                   gsi_move_before (&bsi, &tobsi);
2554
2555                   /* If the statement has any virtual operands, they may
2556                      need to be rewired because the original loop may
2557                      still reference them.  */
2558                   FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2559                     mark_sym_for_renaming (SSA_NAME_VAR (n));
2560                 }
2561             }
2562
2563         }
2564     }
2565
2566   free (bbs);
2567   htab_delete (replacements);
2568   return perfect_nest_p (loop);
2569 }
2570
2571 /* Return true if TRANS is a legal transformation matrix that respects
2572    the dependence vectors in DISTS and DIRS.  The conservative answer
2573    is false.
2574
2575    "Wolfe proves that a unimodular transformation represented by the
2576    matrix T is legal when applied to a loop nest with a set of
2577    lexicographically non-negative distance vectors RDG if and only if
2578    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2579    i.e.: if and only if it transforms the lexicographically positive
2580    distance vectors to lexicographically positive vectors.  Note that
2581    a unimodular matrix must transform the zero vector (and only it) to
2582    the zero vector." S.Muchnick.  */
2583
2584 bool
2585 lambda_transform_legal_p (lambda_trans_matrix trans,
2586                           int nb_loops,
2587                           VEC (ddr_p, heap) *dependence_relations)
2588 {
2589   unsigned int i, j;
2590   lambda_vector distres;
2591   struct data_dependence_relation *ddr;
2592
2593   gcc_assert (LTM_COLSIZE (trans) == nb_loops
2594               && LTM_ROWSIZE (trans) == nb_loops);
2595
2596   /* When there are no dependences, the transformation is correct.  */
2597   if (VEC_length (ddr_p, dependence_relations) == 0)
2598     return true;
2599
2600   ddr = VEC_index (ddr_p, dependence_relations, 0);
2601   if (ddr == NULL)
2602     return true;
2603
2604   /* When there is an unknown relation in the dependence_relations, we
2605      know that it is no worth looking at this loop nest: give up.  */
2606   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2607     return false;
2608
2609   distres = lambda_vector_new (nb_loops);
2610
2611   /* For each distance vector in the dependence graph.  */
2612   for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
2613     {
2614       /* Don't care about relations for which we know that there is no
2615          dependence, nor about read-read (aka. output-dependences):
2616          these data accesses can happen in any order.  */
2617       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2618           || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2619         continue;
2620
2621       /* Conservatively answer: "this transformation is not valid".  */
2622       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2623         return false;
2624
2625       /* If the dependence could not be captured by a distance vector,
2626          conservatively answer that the transform is not valid.  */
2627       if (DDR_NUM_DIST_VECTS (ddr) == 0)
2628         return false;
2629
2630       /* Compute trans.dist_vect */
2631       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2632         {
2633           lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2634                                      DDR_DIST_VECT (ddr, j), distres);
2635
2636           if (!lambda_vector_lexico_pos (distres, nb_loops))
2637             return false;
2638         }
2639     }
2640   return true;
2641 }
2642
2643
2644 /* Collects parameters from affine function ACCESS_FUNCTION, and push
2645    them in PARAMETERS.  */
2646
2647 static void
2648 lambda_collect_parameters_from_af (tree access_function,
2649                                    struct pointer_set_t *param_set,
2650                                    VEC (tree, heap) **parameters)
2651 {
2652   if (access_function == NULL)
2653     return;
2654
2655   if (TREE_CODE (access_function) == SSA_NAME
2656       && pointer_set_contains (param_set, access_function) == 0)
2657     {
2658       pointer_set_insert (param_set, access_function);
2659       VEC_safe_push (tree, heap, *parameters, access_function);
2660     }
2661   else
2662     {
2663       int i, num_operands = tree_operand_length (access_function);
2664
2665       for (i = 0; i < num_operands; i++)
2666         lambda_collect_parameters_from_af (TREE_OPERAND (access_function, i),
2667                                            param_set, parameters);
2668     }
2669 }
2670
2671 /* Collects parameters from DATAREFS, and push them in PARAMETERS.  */
2672
2673 void
2674 lambda_collect_parameters (VEC (data_reference_p, heap) *datarefs,
2675                            VEC (tree, heap) **parameters)
2676 {
2677   unsigned i, j;
2678   struct pointer_set_t *parameter_set = pointer_set_create ();
2679   data_reference_p data_reference;
2680
2681   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, data_reference); i++)
2682     for (j = 0; j < DR_NUM_DIMENSIONS (data_reference); j++)
2683       lambda_collect_parameters_from_af (DR_ACCESS_FN (data_reference, j),
2684                                          parameter_set, parameters);
2685 }
2686
2687 /* Translates BASE_EXPR to vector CY.  AM is needed for inferring
2688    indexing positions in the data access vector.  CST is the analyzed
2689    integer constant.  */
2690
2691 static bool
2692 av_for_af_base (tree base_expr, lambda_vector cy, struct access_matrix *am,
2693                 int cst)
2694 {
2695   bool result = true;
2696
2697   switch (TREE_CODE (base_expr))
2698     {
2699     case INTEGER_CST:
2700       /* Constant part.  */
2701       cy[AM_CONST_COLUMN_INDEX (am)] += int_cst_value (base_expr) * cst;
2702       return true;
2703
2704     case SSA_NAME:
2705       {
2706         int param_index =
2707           access_matrix_get_index_for_parameter (base_expr, am);
2708
2709         if (param_index >= 0)
2710           {
2711             cy[param_index] = cst + cy[param_index];
2712             return true;
2713           }
2714
2715         return false;
2716       }
2717
2718     case PLUS_EXPR:
2719       return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, cst)
2720         && av_for_af_base (TREE_OPERAND (base_expr, 1), cy, am, cst);
2721
2722     case MINUS_EXPR:
2723       return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, cst)
2724         && av_for_af_base (TREE_OPERAND (base_expr, 1), cy, am, -1 * cst);
2725
2726     case MULT_EXPR:
2727       if (TREE_CODE (TREE_OPERAND (base_expr, 0)) == INTEGER_CST)
2728         result = av_for_af_base (TREE_OPERAND (base_expr, 1),
2729                                  cy, am, cst *
2730                                  int_cst_value (TREE_OPERAND (base_expr, 0)));
2731       else if (TREE_CODE (TREE_OPERAND (base_expr, 1)) == INTEGER_CST)
2732         result = av_for_af_base (TREE_OPERAND (base_expr, 0),
2733                                  cy, am, cst *
2734                                  int_cst_value (TREE_OPERAND (base_expr, 1)));
2735       else
2736         result = false;
2737
2738       return result;
2739
2740     case NEGATE_EXPR:
2741       return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, -1 * cst);
2742
2743     default:
2744       return false;
2745     }
2746
2747   return result;
2748 }
2749
2750 /* Translates ACCESS_FUN to vector CY.  AM is needed for inferring
2751    indexing positions in the data access vector.  */
2752
2753 static bool
2754 av_for_af (tree access_fun, lambda_vector cy, struct access_matrix *am)
2755 {
2756   switch (TREE_CODE (access_fun))
2757     {
2758     case POLYNOMIAL_CHREC:
2759       {
2760         tree left = CHREC_LEFT (access_fun);
2761         tree right = CHREC_RIGHT (access_fun);
2762         unsigned var;
2763
2764         if (TREE_CODE (right) != INTEGER_CST)
2765           return false;
2766
2767         var = am_vector_index_for_loop (am, CHREC_VARIABLE (access_fun));
2768         cy[var] = int_cst_value (right);
2769
2770         if (TREE_CODE (left) == POLYNOMIAL_CHREC)
2771           return av_for_af (left, cy, am);
2772         else
2773           return av_for_af_base (left, cy, am, 1);
2774       }
2775
2776     case INTEGER_CST:
2777       /* Constant part.  */
2778       return av_for_af_base (access_fun, cy, am, 1);
2779
2780     default:
2781       return false;
2782     }
2783 }
2784
2785 /* Initializes the access matrix for DATA_REFERENCE.  */
2786
2787 static bool
2788 build_access_matrix (data_reference_p data_reference,
2789                      VEC (tree, heap) *parameters, VEC (loop_p, heap) *nest)
2790 {
2791   struct access_matrix *am = GGC_NEW (struct access_matrix);
2792   unsigned i, ndim = DR_NUM_DIMENSIONS (data_reference);
2793   unsigned nivs = VEC_length (loop_p, nest);
2794   unsigned lambda_nb_columns;
2795   lambda_vector_vec_p matrix;
2796
2797   AM_LOOP_NEST (am) = nest;
2798   AM_NB_INDUCTION_VARS (am) = nivs;
2799   AM_PARAMETERS (am) = parameters;
2800
2801   lambda_nb_columns = AM_NB_COLUMNS (am);
2802   matrix = VEC_alloc (lambda_vector, heap, lambda_nb_columns);
2803   AM_MATRIX (am) = matrix;
2804
2805   for (i = 0; i < ndim; i++)
2806     {
2807       lambda_vector access_vector = lambda_vector_new (lambda_nb_columns);
2808       tree access_function = DR_ACCESS_FN (data_reference, i);
2809
2810       if (!av_for_af (access_function, access_vector, am))
2811         return false;
2812
2813       VEC_safe_push (lambda_vector, heap, matrix, access_vector);
2814     }
2815
2816   DR_ACCESS_MATRIX (data_reference) = am;
2817   return true;
2818 }
2819
2820 /* Returns false when one of the access matrices cannot be built.  */
2821
2822 bool
2823 lambda_compute_access_matrices (VEC (data_reference_p, heap) *datarefs,
2824                                 VEC (tree, heap) *parameters,
2825                                 VEC (loop_p, heap) *nest)
2826 {
2827   data_reference_p dataref;
2828   unsigned ix;
2829
2830   for (ix = 0; VEC_iterate (data_reference_p, datarefs, ix, dataref); ix++)
2831     if (!build_access_matrix (dataref, parameters, nest))
2832       return false;
2833
2834   return true;
2835 }