gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "tm.h"
  80 #include "ggc.h"
  81 #include "tree.h"
  82
  83 /* These RTL headers are needed for basic-block.h.  */
  84 #include "rtl.h"
  85 #include "basic-block.h"
  86 #include "diagnostic.h"
  87 #include "tree-flow.h"
  88 #include "tree-dump.h"
  89 #include "timevar.h"
  90 #include "cfgloop.h"
  91 #include "tree-data-ref.h"
  92 #include "tree-scalar-evolution.h"
  93 #include "tree-pass.h"
  94 #include "langhooks.h"
  95
  96 static struct datadep_stats
  97 {
  98   int num_dependence_tests;
  99   int num_dependence_dependent;
 100   int num_dependence_independent;
 101   int num_dependence_undetermined;
 102
 103   int num_subscript_tests;
 104   int num_subscript_undetermined;
 105   int num_same_subscript_function;
 106
 107   int num_ziv;
 108   int num_ziv_independent;
 109   int num_ziv_dependent;
 110   int num_ziv_unimplemented;
 111
 112   int num_siv;
 113   int num_siv_independent;
 114   int num_siv_dependent;
 115   int num_siv_unimplemented;
 116
 117   int num_miv;
 118   int num_miv_independent;
 119   int num_miv_dependent;
 120   int num_miv_unimplemented;
 121 } dependence_stats;
 122
 123 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 124                                            struct data_reference *,
 125                                            struct data_reference *,
 126                                            struct loop *);
 127 /* Returns true iff A divides B.  */
 128
 129 static inline bool
 130 tree_fold_divides_p (const_tree a, const_tree b)
 131 {
 132   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 133   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 134   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
 135 }
 136
 137 /* Returns true iff A divides B.  */
 138
 139 static inline bool
 140 int_divides_p (int a, int b)
 141 {
 142   return ((b % a) == 0);
 143 }
 144
 145 \f
 146
 147 /* Dump into FILE all the data references from DATAREFS.  */
 148
 149 void
 150 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
 151 {
 152   unsigned int i;
 153   struct data_reference *dr;
 154
 155   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
 156     dump_data_reference (file, dr);
 157 }
 158
 159 /* Dump to STDERR all the dependence relations from DDRS.  */
 160
 161 void
 162 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
 163 {
 164   dump_data_dependence_relations (stderr, ddrs);
 165 }
 166
 167 /* Dump into FILE all the dependence relations from DDRS.  */
 168
 169 void
 170 dump_data_dependence_relations (FILE *file,
 171                                 VEC (ddr_p, heap) *ddrs)
 172 {
 173   unsigned int i;
 174   struct data_dependence_relation *ddr;
 175
 176   for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
 177     dump_data_dependence_relation (file, ddr);
 178 }
 179
 180 /* Dump function for a DATA_REFERENCE structure.  */
 181
 182 void
 183 dump_data_reference (FILE *outf,
 184                      struct data_reference *dr)
 185 {
 186   unsigned int i;
 187
 188   fprintf (outf, "(Data Ref: \n  stmt: ");
 189   print_generic_stmt (outf, DR_STMT (dr), 0);
 190   fprintf (outf, "  ref: ");
 191   print_generic_stmt (outf, DR_REF (dr), 0);
 192   fprintf (outf, "  base_object: ");
 193   print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
 194
 195   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 196     {
 197       fprintf (outf, "  Access function %d: ", i);
 198       print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
 199     }
 200   fprintf (outf, ")\n");
 201 }
 202
 203 /* Dumps the affine function described by FN to the file OUTF.  */
 204
 205 static void
 206 dump_affine_function (FILE *outf, affine_fn fn)
 207 {
 208   unsigned i;
 209   tree coef;
 210
 211   print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
 212   for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
 213     {
 214       fprintf (outf, " + ");
 215       print_generic_expr (outf, coef, TDF_SLIM);
 216       fprintf (outf, " * x_%u", i);
 217     }
 218 }
 219
 220 /* Dumps the conflict function CF to the file OUTF.  */
 221
 222 static void
 223 dump_conflict_function (FILE *outf, conflict_function *cf)
 224 {
 225   unsigned i;
 226
 227   if (cf->n == NO_DEPENDENCE)
 228     fprintf (outf, "no dependence\n");
 229   else if (cf->n == NOT_KNOWN)
 230     fprintf (outf, "not known\n");
 231   else
 232     {
 233       for (i = 0; i < cf->n; i++)
 234         {
 235           fprintf (outf, "[");
 236           dump_affine_function (outf, cf->fns[i]);
 237           fprintf (outf, "]\n");
 238         }
 239     }
 240 }
 241
 242 /* Dump function for a SUBSCRIPT structure.  */
 243
 244 void
 245 dump_subscript (FILE *outf, struct subscript *subscript)
 246 {
 247   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 248
 249   fprintf (outf, "\n (subscript \n");
 250   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 251   dump_conflict_function (outf, cf);
 252   if (CF_NONTRIVIAL_P (cf))
 253     {
 254       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 255       fprintf (outf, "  last_conflict: ");
 256       print_generic_stmt (outf, last_iteration, 0);
 257     }
 258
 259   cf = SUB_CONFLICTS_IN_B (subscript);
 260   fprintf (outf, "  iterations_that_access_an_element_twice_in_B: ");
 261   dump_conflict_function (outf, cf);
 262   if (CF_NONTRIVIAL_P (cf))
 263     {
 264       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 265       fprintf (outf, "  last_conflict: ");
 266       print_generic_stmt (outf, last_iteration, 0);
 267     }
 268
 269   fprintf (outf, "  (Subscript distance: ");
 270   print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
 271   fprintf (outf, "  )\n");
 272   fprintf (outf, " )\n");
 273 }
 274
 275 /* Print the classic direction vector DIRV to OUTF.  */
 276
 277 void
 278 print_direction_vector (FILE *outf,
 279                         lambda_vector dirv,
 280                         int length)
 281 {
 282   int eq;
 283
 284   for (eq = 0; eq < length; eq++)
 285     {
 286       enum data_dependence_direction dir = dirv[eq];
 287
 288       switch (dir)
 289         {
 290         case dir_positive:
 291           fprintf (outf, "    +");
 292           break;
 293         case dir_negative:
 294           fprintf (outf, "    -");
 295           break;
 296         case dir_equal:
 297           fprintf (outf, "    =");
 298           break;
 299         case dir_positive_or_equal:
 300           fprintf (outf, "   +=");
 301           break;
 302         case dir_positive_or_negative:
 303           fprintf (outf, "   +-");
 304           break;
 305         case dir_negative_or_equal:
 306           fprintf (outf, "   -=");
 307           break;
 308         case dir_star:
 309           fprintf (outf, "    *");
 310           break;
 311         default:
 312           fprintf (outf, "indep");
 313           break;
 314         }
 315     }
 316   fprintf (outf, "\n");
 317 }
 318
 319 /* Print a vector of direction vectors.  */
 320
 321 void
 322 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
 323                    int length)
 324 {
 325   unsigned j;
 326   lambda_vector v;
 327
 328   for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, v); j++)
 329     print_direction_vector (outf, v, length);
 330 }
 331
 332 /* Print a vector of distance vectors.  */
 333
 334 void
 335 print_dist_vectors  (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
 336                      int length)
 337 {
 338   unsigned j;
 339   lambda_vector v;
 340
 341   for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, v); j++)
 342     print_lambda_vector (outf, v, length);
 343 }
 344
 345 /* Debug version.  */
 346
 347 void
 348 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 349 {
 350   dump_data_dependence_relation (stderr, ddr);
 351 }
 352
 353 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 354
 355 void
 356 dump_data_dependence_relation (FILE *outf,
 357                                struct data_dependence_relation *ddr)
 358 {
 359   struct data_reference *dra, *drb;
 360
 361   fprintf (outf, "(Data Dep: \n");
 362
 363   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 364     {
 365       fprintf (outf, "    (don't know)\n)\n");
 366       return;
 367     }
 368
 369   dra = DDR_A (ddr);
 370   drb = DDR_B (ddr);
 371   dump_data_reference (outf, dra);
 372   dump_data_reference (outf, drb);
 373
 374   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 375     fprintf (outf, "    (no dependence)\n");
 376
 377   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 378     {
 379       unsigned int i;
 380       struct loop *loopi;
 381
 382       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
 383         {
 384           fprintf (outf, "  access_fn_A: ");
 385           print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
 386           fprintf (outf, "  access_fn_B: ");
 387           print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
 388           dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
 389         }
 390
 391       fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
 392       fprintf (outf, "  loop nest: (");
 393       for (i = 0; VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
 394         fprintf (outf, "%d ", loopi->num);
 395       fprintf (outf, ")\n");
 396
 397       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 398         {
 399           fprintf (outf, "  distance_vector: ");
 400           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 401                                DDR_NB_LOOPS (ddr));
 402         }
 403
 404       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 405         {
 406           fprintf (outf, "  direction_vector: ");
 407           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 408                                   DDR_NB_LOOPS (ddr));
 409         }
 410     }
 411
 412   fprintf (outf, ")\n");
 413 }
 414
 415 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure.  */
 416
 417 void
 418 dump_data_dependence_direction (FILE *file,
 419                                 enum data_dependence_direction dir)
 420 {
 421   switch (dir)
 422     {
 423     case dir_positive:
 424       fprintf (file, "+");
 425       break;
 426
 427     case dir_negative:
 428       fprintf (file, "-");
 429       break;
 430
 431     case dir_equal:
 432       fprintf (file, "=");
 433       break;
 434
 435     case dir_positive_or_negative:
 436       fprintf (file, "+-");
 437       break;
 438
 439     case dir_positive_or_equal:
 440       fprintf (file, "+=");
 441       break;
 442
 443     case dir_negative_or_equal:
 444       fprintf (file, "-=");
 445       break;
 446
 447     case dir_star:
 448       fprintf (file, "*");
 449       break;
 450
 451     default:
 452       break;
 453     }
 454 }
 455
 456 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 457    the dependence relations, and VECT_SIZE is the size of the
 458    dependence vectors, or in other words the number of loops in the
 459    considered nest.  */
 460
 461 void
 462 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
 463 {
 464   unsigned int i, j;
 465   struct data_dependence_relation *ddr;
 466   lambda_vector v;
 467
 468   for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
 469     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 470       {
 471         for (j = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), j, v); j++)
 472           {
 473             fprintf (file, "DISTANCE_V (");
 474             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 475             fprintf (file, ")\n");
 476           }
 477
 478         for (j = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), j, v); j++)
 479           {
 480             fprintf (file, "DIRECTION_V (");
 481             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 482             fprintf (file, ")\n");
 483           }
 484       }
 485
 486   fprintf (file, "\n\n");
 487 }
 488
 489 /* Dumps the data dependence relations DDRS in FILE.  */
 490
 491 void
 492 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
 493 {
 494   unsigned int i;
 495   struct data_dependence_relation *ddr;
 496
 497   for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
 498     dump_data_dependence_relation (file, ddr);
 499
 500   fprintf (file, "\n\n");
 501 }
 502
 503 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 504    will be ssizetype.  */
 505
 506 void
 507 split_constant_offset (tree exp, tree *var, tree *off)
 508 {
 509   tree type = TREE_TYPE (exp), otype;
 510   tree var0, var1;
 511   tree off0, off1;
 512   enum tree_code code;
 513
 514   *var = exp;
 515   STRIP_NOPS (exp);
 516   otype = TREE_TYPE (exp);
 517   code = TREE_CODE (exp);
 518
 519   switch (code)
 520     {
 521     case INTEGER_CST:
 522       *var = build_int_cst (type, 0);
 523       *off = fold_convert (ssizetype, exp);
 524       return;
 525
 526     case POINTER_PLUS_EXPR:
 527       code = PLUS_EXPR;
 528       /* FALLTHROUGH */
 529     case PLUS_EXPR:
 530     case MINUS_EXPR:
 531       split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
 532       split_constant_offset (TREE_OPERAND (exp, 1), &var1, &off1);
 533       *var = fold_convert (type, fold_build2 (TREE_CODE (exp), otype,
 534                                               var0, var1));
 535       *off = size_binop (code, off0, off1);
 536       return;
 537
 538     case MULT_EXPR:
 539       off1 = TREE_OPERAND (exp, 1);
 540       if (TREE_CODE (off1) != INTEGER_CST)
 541         break;
 542
 543       split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
 544       *var = fold_convert (type, fold_build2 (MULT_EXPR, otype,
 545                                               var0, off1));
 546       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, off1));
 547       return;
 548
 549     case ADDR_EXPR:
 550       {
 551         tree op, base, poffset;
 552         HOST_WIDE_INT pbitsize, pbitpos;
 553         enum machine_mode pmode;
 554         int punsignedp, pvolatilep;
 555
 556         op = TREE_OPERAND (exp, 0);
 557         if (!handled_component_p (op))
 558           break;
 559
 560         base = get_inner_reference (op, &pbitsize, &pbitpos, &poffset,
 561                                     &pmode, &punsignedp, &pvolatilep, false);
 562
 563         if (pbitpos % BITS_PER_UNIT != 0)
 564           break;
 565         base = build_fold_addr_expr (base);
 566         off0 = ssize_int (pbitpos / BITS_PER_UNIT);
 567
 568         if (poffset)
 569           {
 570             split_constant_offset (poffset, &poffset, &off1);
 571             off0 = size_binop (PLUS_EXPR, off0, off1);
 572             if (POINTER_TYPE_P (TREE_TYPE (base)))
 573               base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
 574                                   base, fold_convert (sizetype, poffset));
 575             else
 576               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 577                                   fold_convert (TREE_TYPE (base), poffset));
 578           }
 579
 580         var0 = fold_convert (type, base);
 581
 582         /* If variable length types are involved, punt, otherwise casts
 583            might be converted into ARRAY_REFs in gimplify_conversion.
 584            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 585            possibly no longer appears in current GIMPLE, might resurface.
 586            This perhaps could run
 587            if (TREE_CODE (var0) == NOP_EXPR
 588                || TREE_CODE (var0) == CONVERT_EXPR)
 589              {
 590                gimplify_conversion (&var0);
 591                // Attempt to fill in any within var0 found ARRAY_REF's
 592                // element size from corresponding op embedded ARRAY_REF,
 593                // if unsuccessful, just punt.
 594              }  */
 595         while (POINTER_TYPE_P (type))
 596           type = TREE_TYPE (type);
 597         if (int_size_in_bytes (type) < 0)
 598           break;
 599
 600         *var = var0;
 601         *off = off0;
 602         return;
 603       }
 604
 605     case SSA_NAME:
 606       {
 607         tree def_stmt = SSA_NAME_DEF_STMT (exp);
 608         if (TREE_CODE (def_stmt) == GIMPLE_MODIFY_STMT)
 609           {
 610             tree def_stmt_rhs = GIMPLE_STMT_OPERAND (def_stmt, 1);
 611
 612             if (!TREE_SIDE_EFFECTS (def_stmt_rhs)
 613                 && EXPR_P (def_stmt_rhs)
 614                 && !REFERENCE_CLASS_P (def_stmt_rhs)
 615                 && !get_call_expr_in (def_stmt_rhs))
 616               {
 617                 split_constant_offset (def_stmt_rhs, &var0, &off0);
 618                 var0 = fold_convert (type, var0);
 619                 *var = var0;
 620                 *off = off0;
 621                 return;
 622               }
 623           }
 624         break;
 625       }
 626
 627     default:
 628       break;
 629     }
 630
 631   *off = ssize_int (0);
 632 }
 633
 634 /* Returns the address ADDR of an object in a canonical shape (without nop
 635    casts, and with type of pointer to the object).  */
 636
 637 static tree
 638 canonicalize_base_object_address (tree addr)
 639 {
 640   tree orig = addr;
 641
 642   STRIP_NOPS (addr);
 643
 644   /* The base address may be obtained by casting from integer, in that case
 645      keep the cast.  */
 646   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 647     return orig;
 648
 649   if (TREE_CODE (addr) != ADDR_EXPR)
 650     return addr;
 651
 652   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 653 }
 654
 655 /* Analyzes the behavior of the memory reference DR in the innermost loop that
 656    contains it.  */
 657
 658 void
 659 dr_analyze_innermost (struct data_reference *dr)
 660 {
 661   tree stmt = DR_STMT (dr);
 662   struct loop *loop = loop_containing_stmt (stmt);
 663   tree ref = DR_REF (dr);
 664   HOST_WIDE_INT pbitsize, pbitpos;
 665   tree base, poffset;
 666   enum machine_mode pmode;
 667   int punsignedp, pvolatilep;
 668   affine_iv base_iv, offset_iv;
 669   tree init, dinit, step;
 670
 671   if (dump_file && (dump_flags & TDF_DETAILS))
 672     fprintf (dump_file, "analyze_innermost: ");
 673
 674   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
 675                               &pmode, &punsignedp, &pvolatilep, false);
 676   gcc_assert (base != NULL_TREE);
 677
 678   if (pbitpos % BITS_PER_UNIT != 0)
 679     {
 680       if (dump_file && (dump_flags & TDF_DETAILS))
 681         fprintf (dump_file, "failed: bit offset alignment.\n");
 682       return;
 683     }
 684
 685   base = build_fold_addr_expr (base);
 686   if (!simple_iv (loop, stmt, base, &base_iv, false))
 687     {
 688       if (dump_file && (dump_flags & TDF_DETAILS))
 689         fprintf (dump_file, "failed: evolution of base is not affine.\n");
 690       return;
 691     }
 692   if (!poffset)
 693     {
 694       offset_iv.base = ssize_int (0);
 695       offset_iv.step = ssize_int (0);
 696     }
 697   else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
 698     {
 699       if (dump_file && (dump_flags & TDF_DETAILS))
 700         fprintf (dump_file, "failed: evolution of offset is not affine.\n");
 701       return;
 702     }
 703
 704   init = ssize_int (pbitpos / BITS_PER_UNIT);
 705   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 706   init =  size_binop (PLUS_EXPR, init, dinit);
 707   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 708   init =  size_binop (PLUS_EXPR, init, dinit);
 709
 710   step = size_binop (PLUS_EXPR,
 711                      fold_convert (ssizetype, base_iv.step),
 712                      fold_convert (ssizetype, offset_iv.step));
 713
 714   DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
 715
 716   DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
 717   DR_INIT (dr) = init;
 718   DR_STEP (dr) = step;
 719
 720   DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
 721
 722   if (dump_file && (dump_flags & TDF_DETAILS))
 723     fprintf (dump_file, "success.\n");
 724 }
 725
 726 /* Determines the base object and the list of indices of memory reference
 727    DR, analyzed in loop nest NEST.  */
 728
 729 static void
 730 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
 731 {
 732   tree stmt = DR_STMT (dr);
 733   struct loop *loop = loop_containing_stmt (stmt);
 734   VEC (tree, heap) *access_fns = NULL;
 735   tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
 736   tree base, off, access_fn;
 737
 738   while (handled_component_p (aref))
 739     {
 740       if (TREE_CODE (aref) == ARRAY_REF)
 741         {
 742           op = TREE_OPERAND (aref, 1);
 743           access_fn = analyze_scalar_evolution (loop, op);
 744           access_fn = resolve_mixers (nest, access_fn);
 745           VEC_safe_push (tree, heap, access_fns, access_fn);
 746
 747           TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
 748         }
 749
 750       aref = TREE_OPERAND (aref, 0);
 751     }
 752
 753   if (INDIRECT_REF_P (aref))
 754     {
 755       op = TREE_OPERAND (aref, 0);
 756       access_fn = analyze_scalar_evolution (loop, op);
 757       access_fn = resolve_mixers (nest, access_fn);
 758       base = initial_condition (access_fn);
 759       split_constant_offset (base, &base, &off);
 760       access_fn = chrec_replace_initial_condition (access_fn,
 761                         fold_convert (TREE_TYPE (base), off));
 762
 763       TREE_OPERAND (aref, 0) = base;
 764       VEC_safe_push (tree, heap, access_fns, access_fn);
 765     }
 766
 767   DR_BASE_OBJECT (dr) = ref;
 768   DR_ACCESS_FNS (dr) = access_fns;
 769 }
 770
 771 /* Extracts the alias analysis information from the memory reference DR.  */
 772
 773 static void
 774 dr_analyze_alias (struct data_reference *dr)
 775 {
 776   tree stmt = DR_STMT (dr);
 777   tree ref = DR_REF (dr);
 778   tree base = get_base_address (ref), addr, smt = NULL_TREE;
 779   ssa_op_iter it;
 780   tree op;
 781   bitmap vops;
 782
 783   if (DECL_P (base))
 784     smt = base;
 785   else if (INDIRECT_REF_P (base))
 786     {
 787       addr = TREE_OPERAND (base, 0);
 788       if (TREE_CODE (addr) == SSA_NAME)
 789         {
 790           smt = symbol_mem_tag (SSA_NAME_VAR (addr));
 791           DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
 792         }
 793     }
 794
 795   DR_SYMBOL_TAG (dr) = smt;
 796
 797   vops = BITMAP_ALLOC (NULL);
 798   FOR_EACH_SSA_TREE_OPERAND (op, stmt, it, SSA_OP_VIRTUAL_USES)
 799     {
 800       bitmap_set_bit (vops, DECL_UID (SSA_NAME_VAR (op)));
 801     }
 802
 803   DR_VOPS (dr) = vops;
 804 }
 805
 806 /* Returns true if the address of DR is invariant.  */
 807
 808 static bool
 809 dr_address_invariant_p (struct data_reference *dr)
 810 {
 811   unsigned i;
 812   tree idx;
 813
 814   for (i = 0; VEC_iterate (tree, DR_ACCESS_FNS (dr), i, idx); i++)
 815     if (tree_contains_chrecs (idx, NULL))
 816       return false;
 817
 818   return true;
 819 }
 820
 821 /* Frees data reference DR.  */
 822
 823 void
 824 free_data_ref (data_reference_p dr)
 825 {
 826   BITMAP_FREE (DR_VOPS (dr));
 827   VEC_free (tree, heap, DR_ACCESS_FNS (dr));
 828   free (dr);
 829 }
 830
 831 /* Analyzes memory reference MEMREF accessed in STMT.  The reference
 832    is read if IS_READ is true, write otherwise.  Returns the
 833    data_reference description of MEMREF.  NEST is the outermost loop of the
 834    loop nest in that the reference should be analyzed.  */
 835
 836 struct data_reference *
 837 create_data_ref (struct loop *nest, tree memref, tree stmt, bool is_read)
 838 {
 839   struct data_reference *dr;
 840
 841   if (dump_file && (dump_flags & TDF_DETAILS))
 842     {
 843       fprintf (dump_file, "Creating dr for ");
 844       print_generic_expr (dump_file, memref, TDF_SLIM);
 845       fprintf (dump_file, "\n");
 846     }
 847
 848   dr = XCNEW (struct data_reference);
 849   DR_STMT (dr) = stmt;
 850   DR_REF (dr) = memref;
 851   DR_IS_READ (dr) = is_read;
 852
 853   dr_analyze_innermost (dr);
 854   dr_analyze_indices (dr, nest);
 855   dr_analyze_alias (dr);
 856
 857   if (dump_file && (dump_flags & TDF_DETAILS))
 858     {
 859       fprintf (dump_file, "\tbase_address: ");
 860       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
 861       fprintf (dump_file, "\n\toffset from base address: ");
 862       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
 863       fprintf (dump_file, "\n\tconstant offset from base address: ");
 864       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
 865       fprintf (dump_file, "\n\tstep: ");
 866       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
 867       fprintf (dump_file, "\n\taligned to: ");
 868       print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
 869       fprintf (dump_file, "\n\tbase_object: ");
 870       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
 871       fprintf (dump_file, "\n\tsymbol tag: ");
 872       print_generic_expr (dump_file, DR_SYMBOL_TAG (dr), TDF_SLIM);
 873       fprintf (dump_file, "\n");
 874     }
 875
 876   return dr;
 877 }
 878
 879 /* Returns true if FNA == FNB.  */
 880
 881 static bool
 882 affine_function_equal_p (affine_fn fna, affine_fn fnb)
 883 {
 884   unsigned i, n = VEC_length (tree, fna);
 885
 886   if (n != VEC_length (tree, fnb))
 887     return false;
 888
 889   for (i = 0; i < n; i++)
 890     if (!operand_equal_p (VEC_index (tree, fna, i),
 891                           VEC_index (tree, fnb, i), 0))
 892       return false;
 893
 894   return true;
 895 }
 896
 897 /* If all the functions in CF are the same, returns one of them,
 898    otherwise returns NULL.  */
 899
 900 static affine_fn
 901 common_affine_function (conflict_function *cf)
 902 {
 903   unsigned i;
 904   affine_fn comm;
 905
 906   if (!CF_NONTRIVIAL_P (cf))
 907     return NULL;
 908
 909   comm = cf->fns[0];
 910
 911   for (i = 1; i < cf->n; i++)
 912     if (!affine_function_equal_p (comm, cf->fns[i]))
 913       return NULL;
 914
 915   return comm;
 916 }
 917
 918 /* Returns the base of the affine function FN.  */
 919
 920 static tree
 921 affine_function_base (affine_fn fn)
 922 {
 923   return VEC_index (tree, fn, 0);
 924 }
 925
 926 /* Returns true if FN is a constant.  */
 927
 928 static bool
 929 affine_function_constant_p (affine_fn fn)
 930 {
 931   unsigned i;
 932   tree coef;
 933
 934   for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
 935     if (!integer_zerop (coef))
 936       return false;
 937
 938   return true;
 939 }
 940
 941 /* Returns true if FN is the zero constant function.  */
 942
 943 static bool
 944 affine_function_zero_p (affine_fn fn)
 945 {
 946   return (integer_zerop (affine_function_base (fn))
 947           && affine_function_constant_p (fn));
 948 }
 949
 950 /* Returns a signed integer type with the largest precision from TA
 951    and TB.  */
 952
 953 static tree
 954 signed_type_for_types (tree ta, tree tb)
 955 {
 956   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
 957     return signed_type_for (ta);
 958   else
 959     return signed_type_for (tb);
 960 }
 961
 962 /* Applies operation OP on affine functions FNA and FNB, and returns the
 963    result.  */
 964
 965 static affine_fn
 966 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
 967 {
 968   unsigned i, n, m;
 969   affine_fn ret;
 970   tree coef;
 971
 972   if (VEC_length (tree, fnb) > VEC_length (tree, fna))
 973     {
 974       n = VEC_length (tree, fna);
 975       m = VEC_length (tree, fnb);
 976     }
 977   else
 978     {
 979       n = VEC_length (tree, fnb);
 980       m = VEC_length (tree, fna);
 981     }
 982
 983   ret = VEC_alloc (tree, heap, m);
 984   for (i = 0; i < n; i++)
 985     {
 986       tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
 987                                          TREE_TYPE (VEC_index (tree, fnb, i)));
 988
 989       VEC_quick_push (tree, ret,
 990                       fold_build2 (op, type,
 991                                    VEC_index (tree, fna, i),
 992                                    VEC_index (tree, fnb, i)));
 993     }
 994
 995   for (; VEC_iterate (tree, fna, i, coef); i++)
 996     VEC_quick_push (tree, ret,
 997                     fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
 998                                  coef, integer_zero_node));
 999   for (; VEC_iterate (tree, fnb, i, coef); i++)
1000     VEC_quick_push (tree, ret,
1001                     fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1002                                  integer_zero_node, coef));
1003
1004   return ret;
1005 }
1006
1007 /* Returns the sum of affine functions FNA and FNB.  */
1008
1009 static affine_fn
1010 affine_fn_plus (affine_fn fna, affine_fn fnb)
1011 {
1012   return affine_fn_op (PLUS_EXPR, fna, fnb);
1013 }
1014
1015 /* Returns the difference of affine functions FNA and FNB.  */
1016
1017 static affine_fn
1018 affine_fn_minus (affine_fn fna, affine_fn fnb)
1019 {
1020   return affine_fn_op (MINUS_EXPR, fna, fnb);
1021 }
1022
1023 /* Frees affine function FN.  */
1024
1025 static void
1026 affine_fn_free (affine_fn fn)
1027 {
1028   VEC_free (tree, heap, fn);
1029 }
1030
1031 /* Determine for each subscript in the data dependence relation DDR
1032    the distance.  */
1033
1034 static void
1035 compute_subscript_distance (struct data_dependence_relation *ddr)
1036 {
1037   conflict_function *cf_a, *cf_b;
1038   affine_fn fn_a, fn_b, diff;
1039
1040   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1041     {
1042       unsigned int i;
1043
1044       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1045         {
1046           struct subscript *subscript;
1047
1048           subscript = DDR_SUBSCRIPT (ddr, i);
1049           cf_a = SUB_CONFLICTS_IN_A (subscript);
1050           cf_b = SUB_CONFLICTS_IN_B (subscript);
1051
1052           fn_a = common_affine_function (cf_a);
1053           fn_b = common_affine_function (cf_b);
1054           if (!fn_a || !fn_b)
1055             {
1056               SUB_DISTANCE (subscript) = chrec_dont_know;
1057               return;
1058             }
1059           diff = affine_fn_minus (fn_a, fn_b);
1060
1061           if (affine_function_constant_p (diff))
1062             SUB_DISTANCE (subscript) = affine_function_base (diff);
1063           else
1064             SUB_DISTANCE (subscript) = chrec_dont_know;
1065
1066           affine_fn_free (diff);
1067         }
1068     }
1069 }
1070
1071 /* Returns the conflict function for "unknown".  */
1072
1073 static conflict_function *
1074 conflict_fn_not_known (void)
1075 {
1076   conflict_function *fn = XCNEW (conflict_function);
1077   fn->n = NOT_KNOWN;
1078
1079   return fn;
1080 }
1081
1082 /* Returns the conflict function for "independent".  */
1083
1084 static conflict_function *
1085 conflict_fn_no_dependence (void)
1086 {
1087   conflict_function *fn = XCNEW (conflict_function);
1088   fn->n = NO_DEPENDENCE;
1089
1090   return fn;
1091 }
1092
1093 /* Returns true if the address of OBJ is invariant in LOOP.  */
1094
1095 static bool
1096 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1097 {
1098   while (handled_component_p (obj))
1099     {
1100       if (TREE_CODE (obj) == ARRAY_REF)
1101         {
1102           /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1103              need to check the stride and the lower bound of the reference.  */
1104           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1105                                                       loop->num)
1106               || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1107                                                          loop->num))
1108             return false;
1109         }
1110       else if (TREE_CODE (obj) == COMPONENT_REF)
1111         {
1112           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1113                                                       loop->num))
1114             return false;
1115         }
1116       obj = TREE_OPERAND (obj, 0);
1117     }
1118
1119   if (!INDIRECT_REF_P (obj))
1120     return true;
1121
1122   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1123                                                   loop->num);
1124 }
1125
1126 /* Returns true if A and B are accesses to different objects, or to different
1127    fields of the same object.  */
1128
1129 static bool
1130 disjoint_objects_p (tree a, tree b)
1131 {
1132   tree base_a, base_b;
1133   VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
1134   bool ret;
1135
1136   base_a = get_base_address (a);
1137   base_b = get_base_address (b);
1138
1139   if (DECL_P (base_a)
1140       && DECL_P (base_b)
1141       && base_a != base_b)
1142     return true;
1143
1144   if (!operand_equal_p (base_a, base_b, 0))
1145     return false;
1146
1147   /* Compare the component references of A and B.  We must start from the inner
1148      ones, so record them to the vector first.  */
1149   while (handled_component_p (a))
1150     {
1151       VEC_safe_push (tree, heap, comp_a, a);
1152       a = TREE_OPERAND (a, 0);
1153     }
1154   while (handled_component_p (b))
1155     {
1156       VEC_safe_push (tree, heap, comp_b, b);
1157       b = TREE_OPERAND (b, 0);
1158     }
1159
1160   ret = false;
1161   while (1)
1162     {
1163       if (VEC_length (tree, comp_a) == 0
1164           || VEC_length (tree, comp_b) == 0)
1165         break;
1166
1167       a = VEC_pop (tree, comp_a);
1168       b = VEC_pop (tree, comp_b);
1169
1170       /* Real and imaginary part of a variable do not alias.  */
1171       if ((TREE_CODE (a) == REALPART_EXPR
1172            && TREE_CODE (b) == IMAGPART_EXPR)
1173           || (TREE_CODE (a) == IMAGPART_EXPR
1174               && TREE_CODE (b) == REALPART_EXPR))
1175         {
1176           ret = true;
1177           break;
1178         }
1179
1180       if (TREE_CODE (a) != TREE_CODE (b))
1181         break;
1182
1183       /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
1184          DR_BASE_OBJECT are always zero.  */
1185       if (TREE_CODE (a) == ARRAY_REF)
1186         continue;
1187       else if (TREE_CODE (a) == COMPONENT_REF)
1188         {
1189           if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
1190             continue;
1191
1192           /* Different fields of unions may overlap.  */
1193           base_a = TREE_OPERAND (a, 0);
1194           if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
1195             break;
1196
1197           /* Different fields of structures cannot.  */
1198           ret = true;
1199           break;
1200         }
1201       else
1202         break;
1203     }
1204
1205   VEC_free (tree, heap, comp_a);
1206   VEC_free (tree, heap, comp_b);
1207
1208   return ret;
1209 }
1210
1211 /* Returns false if we can prove that data references A and B do not alias,
1212    true otherwise.  */
1213
1214 static bool
1215 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1216 {
1217   const_tree addr_a = DR_BASE_ADDRESS (a);
1218   const_tree addr_b = DR_BASE_ADDRESS (b);
1219   const_tree type_a, type_b;
1220   const_tree decl_a = NULL_TREE, decl_b = NULL_TREE;
1221
1222   /* If the sets of virtual operands are disjoint, the memory references do not
1223      alias.  */
1224   if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
1225     return false;
1226
1227   /* If the accessed objects are disjoint, the memory references do not
1228      alias.  */
1229   if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
1230     return false;
1231
1232   if (!addr_a || !addr_b)
1233     return true;
1234
1235   /* If the references are based on different static objects, they cannot alias
1236      (PTA should be able to disambiguate such accesses, but often it fails to,
1237      since currently we cannot distinguish between pointer and offset in pointer
1238      arithmetics).  */
1239   if (TREE_CODE (addr_a) == ADDR_EXPR
1240       && TREE_CODE (addr_b) == ADDR_EXPR)
1241     return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
1242
1243   /* An instruction writing through a restricted pointer is "independent" of any
1244      instruction reading or writing through a different restricted pointer,
1245      in the same block/scope.  */
1246
1247   type_a = TREE_TYPE (addr_a);
1248   type_b = TREE_TYPE (addr_b);
1249   gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
1250
1251   if (TREE_CODE (addr_a) == SSA_NAME)
1252     decl_a = SSA_NAME_VAR (addr_a);
1253   if (TREE_CODE (addr_b) == SSA_NAME)
1254     decl_b = SSA_NAME_VAR (addr_b);
1255
1256   if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
1257       && (!DR_IS_READ (a) || !DR_IS_READ (b))
1258       && decl_a && DECL_P (decl_a)
1259       && decl_b && DECL_P (decl_b)
1260       && decl_a != decl_b
1261       && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
1262       && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
1263     return false;
1264
1265   return true;
1266 }
1267
1268 /* Initialize a data dependence relation between data accesses A and
1269    B.  NB_LOOPS is the number of loops surrounding the references: the
1270    size of the classic distance/direction vectors.  */
1271
1272 static struct data_dependence_relation *
1273 initialize_data_dependence_relation (struct data_reference *a,
1274                                      struct data_reference *b,
1275                                      VEC (loop_p, heap) *loop_nest)
1276 {
1277   struct data_dependence_relation *res;
1278   unsigned int i;
1279
1280   res = XNEW (struct data_dependence_relation);
1281   DDR_A (res) = a;
1282   DDR_B (res) = b;
1283   DDR_LOOP_NEST (res) = NULL;
1284   DDR_REVERSED_P (res) = false;
1285   DDR_SUBSCRIPTS (res) = NULL;
1286   DDR_DIR_VECTS (res) = NULL;
1287   DDR_DIST_VECTS (res) = NULL;
1288
1289   if (a == NULL || b == NULL)
1290     {
1291       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1292       return res;
1293     }
1294
1295   /* If the data references do not alias, then they are independent.  */
1296   if (!dr_may_alias_p (a, b))
1297     {
1298       DDR_ARE_DEPENDENT (res) = chrec_known;
1299       return res;
1300     }
1301
1302   /* If the references do not access the same object, we do not know
1303      whether they alias or not.  */
1304   if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1305     {
1306       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1307       return res;
1308     }
1309
1310   /* If the base of the object is not invariant in the loop nest, we cannot
1311      analyze it.  TODO -- in fact, it would suffice to record that there may
1312      be arbitrary dependences in the loops where the base object varies.  */
1313   if (!object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1314                                            DR_BASE_OBJECT (a)))
1315     {
1316       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1317       return res;
1318     }
1319
1320   gcc_assert (DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b));
1321
1322   DDR_AFFINE_P (res) = true;
1323   DDR_ARE_DEPENDENT (res) = NULL_TREE;
1324   DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1325   DDR_LOOP_NEST (res) = loop_nest;
1326   DDR_INNER_LOOP (res) = 0;
1327
1328   for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1329     {
1330       struct subscript *subscript;
1331
1332       subscript = XNEW (struct subscript);
1333       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1334       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1335       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1336       SUB_DISTANCE (subscript) = chrec_dont_know;
1337       VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1338     }
1339
1340   return res;
1341 }
1342
1343 /* Frees memory used by the conflict function F.  */
1344
1345 static void
1346 free_conflict_function (conflict_function *f)
1347 {
1348   unsigned i;
1349
1350   if (CF_NONTRIVIAL_P (f))
1351     {
1352       for (i = 0; i < f->n; i++)
1353         affine_fn_free (f->fns[i]);
1354     }
1355   free (f);
1356 }
1357
1358 /* Frees memory used by SUBSCRIPTS.  */
1359
1360 static void
1361 free_subscripts (VEC (subscript_p, heap) *subscripts)
1362 {
1363   unsigned i;
1364   subscript_p s;
1365
1366   for (i = 0; VEC_iterate (subscript_p, subscripts, i, s); i++)
1367     {
1368       free_conflict_function (s->conflicting_iterations_in_a);
1369       free_conflict_function (s->conflicting_iterations_in_b);
1370     }
1371   VEC_free (subscript_p, heap, subscripts);
1372 }
1373
1374 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1375    description.  */
1376
1377 static inline void
1378 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1379                         tree chrec)
1380 {
1381   if (dump_file && (dump_flags & TDF_DETAILS))
1382     {
1383       fprintf (dump_file, "(dependence classified: ");
1384       print_generic_expr (dump_file, chrec, 0);
1385       fprintf (dump_file, ")\n");
1386     }
1387
1388   DDR_ARE_DEPENDENT (ddr) = chrec;
1389   free_subscripts (DDR_SUBSCRIPTS (ddr));
1390   DDR_SUBSCRIPTS (ddr) = NULL;
1391 }
1392
1393 /* The dependence relation DDR cannot be represented by a distance
1394    vector.  */
1395
1396 static inline void
1397 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1398 {
1399   if (dump_file && (dump_flags & TDF_DETAILS))
1400     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1401
1402   DDR_AFFINE_P (ddr) = false;
1403 }
1404
1405 \f
1406
1407 /* This section contains the classic Banerjee tests.  */
1408
1409 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1410    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
1411
1412 static inline bool
1413 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1414 {
1415   return (evolution_function_is_constant_p (chrec_a)
1416           && evolution_function_is_constant_p (chrec_b));
1417 }
1418
1419 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1420    variable, i.e., if the SIV (Single Index Variable) test is true.  */
1421
1422 static bool
1423 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1424 {
1425   if ((evolution_function_is_constant_p (chrec_a)
1426        && evolution_function_is_univariate_p (chrec_b))
1427       || (evolution_function_is_constant_p (chrec_b)
1428           && evolution_function_is_univariate_p (chrec_a)))
1429     return true;
1430
1431   if (evolution_function_is_univariate_p (chrec_a)
1432       && evolution_function_is_univariate_p (chrec_b))
1433     {
1434       switch (TREE_CODE (chrec_a))
1435         {
1436         case POLYNOMIAL_CHREC:
1437           switch (TREE_CODE (chrec_b))
1438             {
1439             case POLYNOMIAL_CHREC:
1440               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1441                 return false;
1442
1443             default:
1444               return true;
1445             }
1446
1447         default:
1448           return true;
1449         }
1450     }
1451
1452   return false;
1453 }
1454
1455 /* Creates a conflict function with N dimensions.  The affine functions
1456    in each dimension follow.  */
1457
1458 static conflict_function *
1459 conflict_fn (unsigned n, ...)
1460 {
1461   unsigned i;
1462   conflict_function *ret = XCNEW (conflict_function);
1463   va_list ap;
1464
1465   gcc_assert (0 < n && n <= MAX_DIM);
1466   va_start(ap, n);
1467
1468   ret->n = n;
1469   for (i = 0; i < n; i++)
1470     ret->fns[i] = va_arg (ap, affine_fn);
1471   va_end(ap);
1472
1473   return ret;
1474 }
1475
1476 /* Returns constant affine function with value CST.  */
1477
1478 static affine_fn
1479 affine_fn_cst (tree cst)
1480 {
1481   affine_fn fn = VEC_alloc (tree, heap, 1);
1482   VEC_quick_push (tree, fn, cst);
1483   return fn;
1484 }
1485
1486 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
1487
1488 static affine_fn
1489 affine_fn_univar (tree cst, unsigned dim, tree coef)
1490 {
1491   affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1492   unsigned i;
1493
1494   gcc_assert (dim > 0);
1495   VEC_quick_push (tree, fn, cst);
1496   for (i = 1; i < dim; i++)
1497     VEC_quick_push (tree, fn, integer_zero_node);
1498   VEC_quick_push (tree, fn, coef);
1499   return fn;
1500 }
1501
1502 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
1503    *OVERLAPS_B are initialized to the functions that describe the
1504    relation between the elements accessed twice by CHREC_A and
1505    CHREC_B.  For k >= 0, the following property is verified:
1506
1507    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1508
1509 static void
1510 analyze_ziv_subscript (tree chrec_a,
1511                        tree chrec_b,
1512                        conflict_function **overlaps_a,
1513                        conflict_function **overlaps_b,
1514                        tree *last_conflicts)
1515 {
1516   tree type, difference;
1517   dependence_stats.num_ziv++;
1518
1519   if (dump_file && (dump_flags & TDF_DETAILS))
1520     fprintf (dump_file, "(analyze_ziv_subscript \n");
1521
1522   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1523   chrec_a = chrec_convert (type, chrec_a, NULL_TREE);
1524   chrec_b = chrec_convert (type, chrec_b, NULL_TREE);
1525   difference = chrec_fold_minus (type, chrec_a, chrec_b);
1526
1527   switch (TREE_CODE (difference))
1528     {
1529     case INTEGER_CST:
1530       if (integer_zerop (difference))
1531         {
1532           /* The difference is equal to zero: the accessed index
1533              overlaps for each iteration in the loop.  */
1534           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1535           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1536           *last_conflicts = chrec_dont_know;
1537           dependence_stats.num_ziv_dependent++;
1538         }
1539       else
1540         {
1541           /* The accesses do not overlap.  */
1542           *overlaps_a = conflict_fn_no_dependence ();
1543           *overlaps_b = conflict_fn_no_dependence ();
1544           *last_conflicts = integer_zero_node;
1545           dependence_stats.num_ziv_independent++;
1546         }
1547       break;
1548
1549     default:
1550       /* We're not sure whether the indexes overlap.  For the moment,
1551          conservatively answer "don't know".  */
1552       if (dump_file && (dump_flags & TDF_DETAILS))
1553         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1554
1555       *overlaps_a = conflict_fn_not_known ();
1556       *overlaps_b = conflict_fn_not_known ();
1557       *last_conflicts = chrec_dont_know;
1558       dependence_stats.num_ziv_unimplemented++;
1559       break;
1560     }
1561
1562   if (dump_file && (dump_flags & TDF_DETAILS))
1563     fprintf (dump_file, ")\n");
1564 }
1565
1566 /* Sets NIT to the estimated number of executions of the statements in
1567    LOOP.  If CONSERVATIVE is true, we must be sure that NIT is at least as
1568    large as the number of iterations.  If we have no reliable estimate,
1569    the function returns false, otherwise returns true.  */
1570
1571 bool
1572 estimated_loop_iterations (struct loop *loop, bool conservative,
1573                            double_int *nit)
1574 {
1575   estimate_numbers_of_iterations_loop (loop);
1576   if (conservative)
1577     {
1578       if (!loop->any_upper_bound)
1579         return false;
1580
1581       *nit = loop->nb_iterations_upper_bound;
1582     }
1583   else
1584     {
1585       if (!loop->any_estimate)
1586         return false;
1587
1588       *nit = loop->nb_iterations_estimate;
1589     }
1590
1591   return true;
1592 }
1593
1594 /* Similar to estimated_loop_iterations, but returns the estimate only
1595    if it fits to HOST_WIDE_INT.  If this is not the case, or the estimate
1596    on the number of iterations of LOOP could not be derived, returns -1.  */
1597
1598 HOST_WIDE_INT
1599 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1600 {
1601   double_int nit;
1602   HOST_WIDE_INT hwi_nit;
1603
1604   if (!estimated_loop_iterations (loop, conservative, &nit))
1605     return -1;
1606
1607   if (!double_int_fits_in_shwi_p (nit))
1608     return -1;
1609   hwi_nit = double_int_to_shwi (nit);
1610
1611   return hwi_nit < 0 ? -1 : hwi_nit;
1612 }
1613
1614 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1615    and only if it fits to the int type.  If this is not the case, or the
1616    estimate on the number of iterations of LOOP could not be derived, returns
1617    chrec_dont_know.  */
1618
1619 static tree
1620 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1621 {
1622   double_int nit;
1623   tree type;
1624
1625   if (!estimated_loop_iterations (loop, conservative, &nit))
1626     return chrec_dont_know;
1627
1628   type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1629   if (!double_int_fits_to_tree_p (type, nit))
1630     return chrec_dont_know;
1631
1632   return double_int_to_tree (type, nit);
1633 }
1634
1635 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1636    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
1637    *OVERLAPS_B are initialized to the functions that describe the
1638    relation between the elements accessed twice by CHREC_A and
1639    CHREC_B.  For k >= 0, the following property is verified:
1640
1641    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1642
1643 static void
1644 analyze_siv_subscript_cst_affine (tree chrec_a,
1645                                   tree chrec_b,
1646                                   conflict_function **overlaps_a,
1647                                   conflict_function **overlaps_b,
1648                                   tree *last_conflicts)
1649 {
1650   bool value0, value1, value2;
1651   tree type, difference, tmp;
1652
1653   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1654   chrec_a = chrec_convert (type, chrec_a, NULL_TREE);
1655   chrec_b = chrec_convert (type, chrec_b, NULL_TREE);
1656   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1657
1658   if (!chrec_is_positive (initial_condition (difference), &value0))
1659     {
1660       if (dump_file && (dump_flags & TDF_DETAILS))
1661         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1662
1663       dependence_stats.num_siv_unimplemented++;
1664       *overlaps_a = conflict_fn_not_known ();
1665       *overlaps_b = conflict_fn_not_known ();
1666       *last_conflicts = chrec_dont_know;
1667       return;
1668     }
1669   else
1670     {
1671       if (value0 == false)
1672         {
1673           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1674             {
1675               if (dump_file && (dump_flags & TDF_DETAILS))
1676                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1677
1678               *overlaps_a = conflict_fn_not_known ();
1679               *overlaps_b = conflict_fn_not_known ();
1680               *last_conflicts = chrec_dont_know;
1681               dependence_stats.num_siv_unimplemented++;
1682               return;
1683             }
1684           else
1685             {
1686               if (value1 == true)
1687                 {
1688                   /* Example:
1689                      chrec_a = 12
1690                      chrec_b = {10, +, 1}
1691                   */
1692
1693                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1694                     {
1695                       HOST_WIDE_INT numiter;
1696                       struct loop *loop = get_chrec_loop (chrec_b);
1697
1698                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1699                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
1700                                          fold_build1 (ABS_EXPR, type, difference),
1701                                          CHREC_RIGHT (chrec_b));
1702                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1703                       *last_conflicts = integer_one_node;
1704
1705
1706                       /* Perform weak-zero siv test to see if overlap is
1707                          outside the loop bounds.  */
1708                       numiter = estimated_loop_iterations_int (loop, false);
1709
1710                       if (numiter >= 0
1711                           && compare_tree_int (tmp, numiter) > 0)
1712                         {
1713                           free_conflict_function (*overlaps_a);
1714                           free_conflict_function (*overlaps_b);
1715                           *overlaps_a = conflict_fn_no_dependence ();
1716                           *overlaps_b = conflict_fn_no_dependence ();
1717                           *last_conflicts = integer_zero_node;
1718                           dependence_stats.num_siv_independent++;
1719                           return;
1720                         }
1721                       dependence_stats.num_siv_dependent++;
1722                       return;
1723                     }
1724
1725                   /* When the step does not divide the difference, there are
1726                      no overlaps.  */
1727                   else
1728                     {
1729                       *overlaps_a = conflict_fn_no_dependence ();
1730                       *overlaps_b = conflict_fn_no_dependence ();
1731                       *last_conflicts = integer_zero_node;
1732                       dependence_stats.num_siv_independent++;
1733                       return;
1734                     }
1735                 }
1736
1737               else
1738                 {
1739                   /* Example:
1740                      chrec_a = 12
1741                      chrec_b = {10, +, -1}
1742
1743                      In this case, chrec_a will not overlap with chrec_b.  */
1744                   *overlaps_a = conflict_fn_no_dependence ();
1745                   *overlaps_b = conflict_fn_no_dependence ();
1746                   *last_conflicts = integer_zero_node;
1747                   dependence_stats.num_siv_independent++;
1748                   return;
1749                 }
1750             }
1751         }
1752       else
1753         {
1754           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1755             {
1756               if (dump_file && (dump_flags & TDF_DETAILS))
1757                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1758
1759               *overlaps_a = conflict_fn_not_known ();
1760               *overlaps_b = conflict_fn_not_known ();
1761               *last_conflicts = chrec_dont_know;
1762               dependence_stats.num_siv_unimplemented++;
1763               return;
1764             }
1765           else
1766             {
1767               if (value2 == false)
1768                 {
1769                   /* Example:
1770                      chrec_a = 3
1771                      chrec_b = {10, +, -1}
1772                   */
1773                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1774                     {
1775                       HOST_WIDE_INT numiter;
1776                       struct loop *loop = get_chrec_loop (chrec_b);
1777
1778                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1779                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1780                                          CHREC_RIGHT (chrec_b));
1781                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1782                       *last_conflicts = integer_one_node;
1783
1784                       /* Perform weak-zero siv test to see if overlap is
1785                          outside the loop bounds.  */
1786                       numiter = estimated_loop_iterations_int (loop, false);
1787
1788                       if (numiter >= 0
1789                           && compare_tree_int (tmp, numiter) > 0)
1790                         {
1791                           free_conflict_function (*overlaps_a);
1792                           free_conflict_function (*overlaps_b);
1793                           *overlaps_a = conflict_fn_no_dependence ();
1794                           *overlaps_b = conflict_fn_no_dependence ();
1795                           *last_conflicts = integer_zero_node;
1796                           dependence_stats.num_siv_independent++;
1797                           return;
1798                         }
1799                       dependence_stats.num_siv_dependent++;
1800                       return;
1801                     }
1802
1803                   /* When the step does not divide the difference, there
1804                      are no overlaps.  */
1805                   else
1806                     {
1807                       *overlaps_a = conflict_fn_no_dependence ();
1808                       *overlaps_b = conflict_fn_no_dependence ();
1809                       *last_conflicts = integer_zero_node;
1810                       dependence_stats.num_siv_independent++;
1811                       return;
1812                     }
1813                 }
1814               else
1815                 {
1816                   /* Example:
1817                      chrec_a = 3
1818                      chrec_b = {4, +, 1}
1819
1820                      In this case, chrec_a will not overlap with chrec_b.  */
1821                   *overlaps_a = conflict_fn_no_dependence ();
1822                   *overlaps_b = conflict_fn_no_dependence ();
1823                   *last_conflicts = integer_zero_node;
1824                   dependence_stats.num_siv_independent++;
1825                   return;
1826                 }
1827             }
1828         }
1829     }
1830 }
1831
1832 /* Helper recursive function for initializing the matrix A.  Returns
1833    the initial value of CHREC.  */
1834
1835 static HOST_WIDE_INT
1836 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1837 {
1838   gcc_assert (chrec);
1839
1840   if (TREE_CODE (chrec) != POLYNOMIAL_CHREC)
1841     return int_cst_value (chrec);
1842
1843   A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1844   return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1845 }
1846
1847 #define FLOOR_DIV(x,y) ((x) / (y))
1848
1849 /* Solves the special case of the Diophantine equation:
1850    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1851
1852    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
1853    number of iterations that loops X and Y run.  The overlaps will be
1854    constructed as evolutions in dimension DIM.  */
1855
1856 static void
1857 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1858                                          affine_fn *overlaps_a,
1859                                          affine_fn *overlaps_b,
1860                                          tree *last_conflicts, int dim)
1861 {
1862   if (((step_a > 0 && step_b > 0)
1863        || (step_a < 0 && step_b < 0)))
1864     {
1865       int step_overlaps_a, step_overlaps_b;
1866       int gcd_steps_a_b, last_conflict, tau2;
1867
1868       gcd_steps_a_b = gcd (step_a, step_b);
1869       step_overlaps_a = step_b / gcd_steps_a_b;
1870       step_overlaps_b = step_a / gcd_steps_a_b;
1871
1872       if (niter > 0)
1873         {
1874           tau2 = FLOOR_DIV (niter, step_overlaps_a);
1875           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1876           last_conflict = tau2;
1877           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1878         }
1879       else
1880         *last_conflicts = chrec_dont_know;
1881
1882       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1883                                       build_int_cst (NULL_TREE,
1884                                                      step_overlaps_a));
1885       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1886                                       build_int_cst (NULL_TREE,
1887                                                      step_overlaps_b));
1888     }
1889
1890   else
1891     {
1892       *overlaps_a = affine_fn_cst (integer_zero_node);
1893       *overlaps_b = affine_fn_cst (integer_zero_node);
1894       *last_conflicts = integer_zero_node;
1895     }
1896 }
1897
1898 /* Solves the special case of a Diophantine equation where CHREC_A is
1899    an affine bivariate function, and CHREC_B is an affine univariate
1900    function.  For example,
1901
1902    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1903
1904    has the following overlapping functions:
1905
1906    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1907    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1908    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1909
1910    FORNOW: This is a specialized implementation for a case occurring in
1911    a common benchmark.  Implement the general algorithm.  */
1912
1913 static void
1914 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1915                                       conflict_function **overlaps_a,
1916                                       conflict_function **overlaps_b,
1917                                       tree *last_conflicts)
1918 {
1919   bool xz_p, yz_p, xyz_p;
1920   int step_x, step_y, step_z;
1921   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1922   affine_fn overlaps_a_xz, overlaps_b_xz;
1923   affine_fn overlaps_a_yz, overlaps_b_yz;
1924   affine_fn overlaps_a_xyz, overlaps_b_xyz;
1925   affine_fn ova1, ova2, ovb;
1926   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1927
1928   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1929   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1930   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1931
1932   niter_x =
1933     estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
1934                                    false);
1935   niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
1936   niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
1937
1938   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1939     {
1940       if (dump_file && (dump_flags & TDF_DETAILS))
1941         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1942
1943       *overlaps_a = conflict_fn_not_known ();
1944       *overlaps_b = conflict_fn_not_known ();
1945       *last_conflicts = chrec_dont_know;
1946       return;
1947     }
1948
1949   niter = MIN (niter_x, niter_z);
1950   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1951                                            &overlaps_a_xz,
1952                                            &overlaps_b_xz,
1953                                            &last_conflicts_xz, 1);
1954   niter = MIN (niter_y, niter_z);
1955   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1956                                            &overlaps_a_yz,
1957                                            &overlaps_b_yz,
1958                                            &last_conflicts_yz, 2);
1959   niter = MIN (niter_x, niter_z);
1960   niter = MIN (niter_y, niter);
1961   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
1962                                            &overlaps_a_xyz,
1963                                            &overlaps_b_xyz,
1964                                            &last_conflicts_xyz, 3);
1965
1966   xz_p = !integer_zerop (last_conflicts_xz);
1967   yz_p = !integer_zerop (last_conflicts_yz);
1968   xyz_p = !integer_zerop (last_conflicts_xyz);
1969
1970   if (xz_p || yz_p || xyz_p)
1971     {
1972       ova1 = affine_fn_cst (integer_zero_node);
1973       ova2 = affine_fn_cst (integer_zero_node);
1974       ovb = affine_fn_cst (integer_zero_node);
1975       if (xz_p)
1976         {
1977           affine_fn t0 = ova1;
1978           affine_fn t2 = ovb;
1979
1980           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
1981           ovb = affine_fn_plus (ovb, overlaps_b_xz);
1982           affine_fn_free (t0);
1983           affine_fn_free (t2);
1984           *last_conflicts = last_conflicts_xz;
1985         }
1986       if (yz_p)
1987         {
1988           affine_fn t0 = ova2;
1989           affine_fn t2 = ovb;
1990
1991           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
1992           ovb = affine_fn_plus (ovb, overlaps_b_yz);
1993           affine_fn_free (t0);
1994           affine_fn_free (t2);
1995           *last_conflicts = last_conflicts_yz;
1996         }
1997       if (xyz_p)
1998         {
1999           affine_fn t0 = ova1;
2000           affine_fn t2 = ova2;
2001           affine_fn t4 = ovb;
2002
2003           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2004           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2005           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2006           affine_fn_free (t0);
2007           affine_fn_free (t2);
2008           affine_fn_free (t4);
2009           *last_conflicts = last_conflicts_xyz;
2010         }
2011       *overlaps_a = conflict_fn (2, ova1, ova2);
2012       *overlaps_b = conflict_fn (1, ovb);
2013     }
2014   else
2015     {
2016       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2017       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2018       *last_conflicts = integer_zero_node;
2019     }
2020
2021   affine_fn_free (overlaps_a_xz);
2022   affine_fn_free (overlaps_b_xz);
2023   affine_fn_free (overlaps_a_yz);
2024   affine_fn_free (overlaps_b_yz);
2025   affine_fn_free (overlaps_a_xyz);
2026   affine_fn_free (overlaps_b_xyz);
2027 }
2028
2029 /* Determines the overlapping elements due to accesses CHREC_A and
2030    CHREC_B, that are affine functions.  This function cannot handle
2031    symbolic evolution functions, ie. when initial conditions are
2032    parameters, because it uses lambda matrices of integers.  */
2033
2034 static void
2035 analyze_subscript_affine_affine (tree chrec_a,
2036                                  tree chrec_b,
2037                                  conflict_function **overlaps_a,
2038                                  conflict_function **overlaps_b,
2039                                  tree *last_conflicts)
2040 {
2041   unsigned nb_vars_a, nb_vars_b, dim;
2042   HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2043   lambda_matrix A, U, S;
2044
2045   if (eq_evolutions_p (chrec_a, chrec_b))
2046     {
2047       /* The accessed index overlaps for each iteration in the
2048          loop.  */
2049       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2050       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2051       *last_conflicts = chrec_dont_know;
2052       return;
2053     }
2054   if (dump_file && (dump_flags & TDF_DETAILS))
2055     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2056
2057   /* For determining the initial intersection, we have to solve a
2058      Diophantine equation.  This is the most time consuming part.
2059
2060      For answering to the question: "Is there a dependence?" we have
2061      to prove that there exists a solution to the Diophantine
2062      equation, and that the solution is in the iteration domain,
2063      i.e. the solution is positive or zero, and that the solution
2064      happens before the upper bound loop.nb_iterations.  Otherwise
2065      there is no dependence.  This function outputs a description of
2066      the iterations that hold the intersections.  */
2067
2068   nb_vars_a = nb_vars_in_chrec (chrec_a);
2069   nb_vars_b = nb_vars_in_chrec (chrec_b);
2070
2071   dim = nb_vars_a + nb_vars_b;
2072   U = lambda_matrix_new (dim, dim);
2073   A = lambda_matrix_new (dim, 1);
2074   S = lambda_matrix_new (dim, 1);
2075
2076   init_a = initialize_matrix_A (A, chrec_a, 0, 1);
2077   init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
2078   gamma = init_b - init_a;
2079
2080   /* Don't do all the hard work of solving the Diophantine equation
2081      when we already know the solution: for example,
2082      | {3, +, 1}_1
2083      | {3, +, 4}_2
2084      | gamma = 3 - 3 = 0.
2085      Then the first overlap occurs during the first iterations:
2086      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2087   */
2088   if (gamma == 0)
2089     {
2090       if (nb_vars_a == 1 && nb_vars_b == 1)
2091         {
2092           HOST_WIDE_INT step_a, step_b;
2093           HOST_WIDE_INT niter, niter_a, niter_b;
2094           affine_fn ova, ovb;
2095
2096           niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2097                                                    false);
2098           niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2099                                                    false);
2100           niter = MIN (niter_a, niter_b);
2101           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2102           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2103
2104           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2105                                                    &ova, &ovb,
2106                                                    last_conflicts, 1);
2107           *overlaps_a = conflict_fn (1, ova);
2108           *overlaps_b = conflict_fn (1, ovb);
2109         }
2110
2111       else if (nb_vars_a == 2 && nb_vars_b == 1)
2112         compute_overlap_steps_for_affine_1_2
2113           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2114
2115       else if (nb_vars_a == 1 && nb_vars_b == 2)
2116         compute_overlap_steps_for_affine_1_2
2117           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2118
2119       else
2120         {
2121           if (dump_file && (dump_flags & TDF_DETAILS))
2122             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2123           *overlaps_a = conflict_fn_not_known ();
2124           *overlaps_b = conflict_fn_not_known ();
2125           *last_conflicts = chrec_dont_know;
2126         }
2127       goto end_analyze_subs_aa;
2128     }
2129
2130   /* U.A = S */
2131   lambda_matrix_right_hermite (A, dim, 1, S, U);
2132
2133   if (S[0][0] < 0)
2134     {
2135       S[0][0] *= -1;
2136       lambda_matrix_row_negate (U, dim, 0);
2137     }
2138   gcd_alpha_beta = S[0][0];
2139
2140   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2141      but that is a quite strange case.  Instead of ICEing, answer
2142      don't know.  */
2143   if (gcd_alpha_beta == 0)
2144     {
2145       *overlaps_a = conflict_fn_not_known ();
2146       *overlaps_b = conflict_fn_not_known ();
2147       *last_conflicts = chrec_dont_know;
2148       goto end_analyze_subs_aa;
2149     }
2150
2151   /* The classic "gcd-test".  */
2152   if (!int_divides_p (gcd_alpha_beta, gamma))
2153     {
2154       /* The "gcd-test" has determined that there is no integer
2155          solution, i.e. there is no dependence.  */
2156       *overlaps_a = conflict_fn_no_dependence ();
2157       *overlaps_b = conflict_fn_no_dependence ();
2158       *last_conflicts = integer_zero_node;
2159     }
2160
2161   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
2162   else if (nb_vars_a == 1 && nb_vars_b == 1)
2163     {
2164       /* Both functions should have the same evolution sign.  */
2165       if (((A[0][0] > 0 && -A[1][0] > 0)
2166            || (A[0][0] < 0 && -A[1][0] < 0)))
2167         {
2168           /* The solutions are given by:
2169              |
2170              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
2171              |                           [u21 u22]    [y0]
2172
2173              For a given integer t.  Using the following variables,
2174
2175              | i0 = u11 * gamma / gcd_alpha_beta
2176              | j0 = u12 * gamma / gcd_alpha_beta
2177              | i1 = u21
2178              | j1 = u22
2179
2180              the solutions are:
2181
2182              | x0 = i0 + i1 * t,
2183              | y0 = j0 + j1 * t.  */
2184           HOST_WIDE_INT i0, j0, i1, j1;
2185
2186           i0 = U[0][0] * gamma / gcd_alpha_beta;
2187           j0 = U[0][1] * gamma / gcd_alpha_beta;
2188           i1 = U[1][0];
2189           j1 = U[1][1];
2190
2191           if ((i1 == 0 && i0 < 0)
2192               || (j1 == 0 && j0 < 0))
2193             {
2194               /* There is no solution.
2195                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2196                  falls in here, but for the moment we don't look at the
2197                  upper bound of the iteration domain.  */
2198               *overlaps_a = conflict_fn_no_dependence ();
2199               *overlaps_b = conflict_fn_no_dependence ();
2200               *last_conflicts = integer_zero_node;
2201               goto end_analyze_subs_aa;
2202             }
2203
2204           if (i1 > 0 && j1 > 0)
2205             {
2206               HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2207                 (get_chrec_loop (chrec_a), false);
2208               HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2209                 (get_chrec_loop (chrec_b), false);
2210               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2211
2212               /* (X0, Y0) is a solution of the Diophantine equation:
2213                  "chrec_a (X0) = chrec_b (Y0)".  */
2214               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2215                                         CEIL (-j0, j1));
2216               HOST_WIDE_INT x0 = i1 * tau1 + i0;
2217               HOST_WIDE_INT y0 = j1 * tau1 + j0;
2218
2219               /* (X1, Y1) is the smallest positive solution of the eq
2220                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2221                  first conflict occurs.  */
2222               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2223               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2224               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2225
2226               if (niter > 0)
2227                 {
2228                   HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2229                                             FLOOR_DIV (niter - j0, j1));
2230                   HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2231
2232                   /* If the overlap occurs outside of the bounds of the
2233                      loop, there is no dependence.  */
2234                   if (x1 > niter || y1 > niter)
2235                     {
2236                       *overlaps_a = conflict_fn_no_dependence ();
2237                       *overlaps_b = conflict_fn_no_dependence ();
2238                       *last_conflicts = integer_zero_node;
2239                       goto end_analyze_subs_aa;
2240                     }
2241                   else
2242                     *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2243                 }
2244               else
2245                 *last_conflicts = chrec_dont_know;
2246
2247               *overlaps_a
2248                 = conflict_fn (1,
2249                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
2250                                                  1,
2251                                                  build_int_cst (NULL_TREE, i1)));
2252               *overlaps_b
2253                 = conflict_fn (1,
2254                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
2255                                                  1,
2256                                                  build_int_cst (NULL_TREE, j1)));
2257             }
2258           else
2259             {
2260               /* FIXME: For the moment, the upper bound of the
2261                  iteration domain for i and j is not checked.  */
2262               if (dump_file && (dump_flags & TDF_DETAILS))
2263                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2264               *overlaps_a = conflict_fn_not_known ();
2265               *overlaps_b = conflict_fn_not_known ();
2266               *last_conflicts = chrec_dont_know;
2267             }
2268         }
2269       else
2270         {
2271           if (dump_file && (dump_flags & TDF_DETAILS))
2272             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2273           *overlaps_a = conflict_fn_not_known ();
2274           *overlaps_b = conflict_fn_not_known ();
2275           *last_conflicts = chrec_dont_know;
2276         }
2277     }
2278   else
2279     {
2280       if (dump_file && (dump_flags & TDF_DETAILS))
2281         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2282       *overlaps_a = conflict_fn_not_known ();
2283       *overlaps_b = conflict_fn_not_known ();
2284       *last_conflicts = chrec_dont_know;
2285     }
2286
2287 end_analyze_subs_aa:
2288   if (dump_file && (dump_flags & TDF_DETAILS))
2289     {
2290       fprintf (dump_file, "  (overlaps_a = ");
2291       dump_conflict_function (dump_file, *overlaps_a);
2292       fprintf (dump_file, ")\n  (overlaps_b = ");
2293       dump_conflict_function (dump_file, *overlaps_b);
2294       fprintf (dump_file, ")\n");
2295       fprintf (dump_file, ")\n");
2296     }
2297 }
2298
2299 /* Returns true when analyze_subscript_affine_affine can be used for
2300    determining the dependence relation between chrec_a and chrec_b,
2301    that contain symbols.  This function modifies chrec_a and chrec_b
2302    such that the analysis result is the same, and such that they don't
2303    contain symbols, and then can safely be passed to the analyzer.
2304
2305    Example: The analysis of the following tuples of evolutions produce
2306    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2307    vs. {0, +, 1}_1
2308
2309    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2310    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2311 */
2312
2313 static bool
2314 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2315 {
2316   tree diff, type, left_a, left_b, right_b;
2317
2318   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2319       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2320     /* FIXME: For the moment not handled.  Might be refined later.  */
2321     return false;
2322
2323   type = chrec_type (*chrec_a);
2324   left_a = CHREC_LEFT (*chrec_a);
2325   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL_TREE);
2326   diff = chrec_fold_minus (type, left_a, left_b);
2327
2328   if (!evolution_function_is_constant_p (diff))
2329     return false;
2330
2331   if (dump_file && (dump_flags & TDF_DETAILS))
2332     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2333
2334   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2335                                      diff, CHREC_RIGHT (*chrec_a));
2336   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL_TREE);
2337   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2338                                      build_int_cst (type, 0),
2339                                      right_b);
2340   return true;
2341 }
2342
2343 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
2344    *OVERLAPS_B are initialized to the functions that describe the
2345    relation between the elements accessed twice by CHREC_A and
2346    CHREC_B.  For k >= 0, the following property is verified:
2347
2348    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2349
2350 static void
2351 analyze_siv_subscript (tree chrec_a,
2352                        tree chrec_b,
2353                        conflict_function **overlaps_a,
2354                        conflict_function **overlaps_b,
2355                        tree *last_conflicts)
2356 {
2357   dependence_stats.num_siv++;
2358
2359   if (dump_file && (dump_flags & TDF_DETAILS))
2360     fprintf (dump_file, "(analyze_siv_subscript \n");
2361
2362   if (evolution_function_is_constant_p (chrec_a)
2363       && evolution_function_is_affine_p (chrec_b))
2364     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2365                                       overlaps_a, overlaps_b, last_conflicts);
2366
2367   else if (evolution_function_is_affine_p (chrec_a)
2368            && evolution_function_is_constant_p (chrec_b))
2369     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2370                                       overlaps_b, overlaps_a, last_conflicts);
2371
2372   else if (evolution_function_is_affine_p (chrec_a)
2373            && evolution_function_is_affine_p (chrec_b))
2374     {
2375       if (!chrec_contains_symbols (chrec_a)
2376           && !chrec_contains_symbols (chrec_b))
2377         {
2378           analyze_subscript_affine_affine (chrec_a, chrec_b,
2379                                            overlaps_a, overlaps_b,
2380                                            last_conflicts);
2381
2382           if (CF_NOT_KNOWN_P (*overlaps_a)
2383               || CF_NOT_KNOWN_P (*overlaps_b))
2384             dependence_stats.num_siv_unimplemented++;
2385           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2386                    || CF_NO_DEPENDENCE_P (*overlaps_b))
2387             dependence_stats.num_siv_independent++;
2388           else
2389             dependence_stats.num_siv_dependent++;
2390         }
2391       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2392                                                         &chrec_b))
2393         {
2394           analyze_subscript_affine_affine (chrec_a, chrec_b,
2395                                            overlaps_a, overlaps_b,
2396                                            last_conflicts);
2397
2398           if (CF_NOT_KNOWN_P (*overlaps_a)
2399               || CF_NOT_KNOWN_P (*overlaps_b))
2400             dependence_stats.num_siv_unimplemented++;
2401           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2402                    || CF_NO_DEPENDENCE_P (*overlaps_b))
2403             dependence_stats.num_siv_independent++;
2404           else
2405             dependence_stats.num_siv_dependent++;
2406         }
2407       else
2408         goto siv_subscript_dontknow;
2409     }
2410
2411   else
2412     {
2413     siv_subscript_dontknow:;
2414       if (dump_file && (dump_flags & TDF_DETAILS))
2415         fprintf (dump_file, "siv test failed: unimplemented.\n");
2416       *overlaps_a = conflict_fn_not_known ();
2417       *overlaps_b = conflict_fn_not_known ();
2418       *last_conflicts = chrec_dont_know;
2419       dependence_stats.num_siv_unimplemented++;
2420     }
2421
2422   if (dump_file && (dump_flags & TDF_DETAILS))
2423     fprintf (dump_file, ")\n");
2424 }
2425
2426 /* Returns false if we can prove that the greatest common divisor of the steps
2427    of CHREC does not divide CST, false otherwise.  */
2428
2429 static bool
2430 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2431 {
2432   HOST_WIDE_INT cd = 0, val;
2433   tree step;
2434
2435   if (!host_integerp (cst, 0))
2436     return true;
2437   val = tree_low_cst (cst, 0);
2438
2439   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2440     {
2441       step = CHREC_RIGHT (chrec);
2442       if (!host_integerp (step, 0))
2443         return true;
2444       cd = gcd (cd, tree_low_cst (step, 0));
2445       chrec = CHREC_LEFT (chrec);
2446     }
2447
2448   return val % cd == 0;
2449 }
2450
2451 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2452    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
2453    functions that describe the relation between the elements accessed
2454    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
2455    is verified:
2456
2457    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2458
2459 static void
2460 analyze_miv_subscript (tree chrec_a,
2461                        tree chrec_b,
2462                        conflict_function **overlaps_a,
2463                        conflict_function **overlaps_b,
2464                        tree *last_conflicts,
2465                        struct loop *loop_nest)
2466 {
2467   /* FIXME:  This is a MIV subscript, not yet handled.
2468      Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2469      (A[i] vs. A[j]).
2470
2471      In the SIV test we had to solve a Diophantine equation with two
2472      variables.  In the MIV case we have to solve a Diophantine
2473      equation with 2*n variables (if the subscript uses n IVs).
2474   */
2475   tree type, difference;
2476
2477   dependence_stats.num_miv++;
2478   if (dump_file && (dump_flags & TDF_DETAILS))
2479     fprintf (dump_file, "(analyze_miv_subscript \n");
2480
2481   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2482   chrec_a = chrec_convert (type, chrec_a, NULL_TREE);
2483   chrec_b = chrec_convert (type, chrec_b, NULL_TREE);
2484   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2485
2486   if (eq_evolutions_p (chrec_a, chrec_b))
2487     {
2488       /* Access functions are the same: all the elements are accessed
2489          in the same order.  */
2490       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2491       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2492       *last_conflicts = estimated_loop_iterations_tree
2493                                 (get_chrec_loop (chrec_a), true);
2494       dependence_stats.num_miv_dependent++;
2495     }
2496
2497   else if (evolution_function_is_constant_p (difference)
2498            /* For the moment, the following is verified:
2499               evolution_function_is_affine_multivariate_p (chrec_a,
2500               loop_nest->num) */
2501            && !gcd_of_steps_may_divide_p (chrec_a, difference))
2502     {
2503       /* testsuite/.../ssa-chrec-33.c
2504          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
2505
2506          The difference is 1, and all the evolution steps are multiples
2507          of 2, consequently there are no overlapping elements.  */
2508       *overlaps_a = conflict_fn_no_dependence ();
2509       *overlaps_b = conflict_fn_no_dependence ();
2510       *last_conflicts = integer_zero_node;
2511       dependence_stats.num_miv_independent++;
2512     }
2513
2514   else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2515            && !chrec_contains_symbols (chrec_a)
2516            && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2517            && !chrec_contains_symbols (chrec_b))
2518     {
2519       /* testsuite/.../ssa-chrec-35.c
2520          {0, +, 1}_2  vs.  {0, +, 1}_3
2521          the overlapping elements are respectively located at iterations:
2522          {0, +, 1}_x and {0, +, 1}_x,
2523          in other words, we have the equality:
2524          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2525
2526          Other examples:
2527          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2528          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2529
2530          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2531          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2532       */
2533       analyze_subscript_affine_affine (chrec_a, chrec_b,
2534                                        overlaps_a, overlaps_b, last_conflicts);
2535
2536       if (CF_NOT_KNOWN_P (*overlaps_a)
2537           || CF_NOT_KNOWN_P (*overlaps_b))
2538         dependence_stats.num_miv_unimplemented++;
2539       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2540                || CF_NO_DEPENDENCE_P (*overlaps_b))
2541         dependence_stats.num_miv_independent++;
2542       else
2543         dependence_stats.num_miv_dependent++;
2544     }
2545
2546   else
2547     {
2548       /* When the analysis is too difficult, answer "don't know".  */
2549       if (dump_file && (dump_flags & TDF_DETAILS))
2550         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2551
2552       *overlaps_a = conflict_fn_not_known ();
2553       *overlaps_b = conflict_fn_not_known ();
2554       *last_conflicts = chrec_dont_know;
2555       dependence_stats.num_miv_unimplemented++;
2556     }
2557
2558   if (dump_file && (dump_flags & TDF_DETAILS))
2559     fprintf (dump_file, ")\n");
2560 }
2561
2562 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2563    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
2564    OVERLAP_ITERATIONS_B are initialized with two functions that
2565    describe the iterations that contain conflicting elements.
2566
2567    Remark: For an integer k >= 0, the following equality is true:
2568
2569    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2570 */
2571
2572 static void
2573 analyze_overlapping_iterations (tree chrec_a,
2574                                 tree chrec_b,
2575                                 conflict_function **overlap_iterations_a,
2576                                 conflict_function **overlap_iterations_b,
2577                                 tree *last_conflicts, struct loop *loop_nest)
2578 {
2579   unsigned int lnn = loop_nest->num;
2580
2581   dependence_stats.num_subscript_tests++;
2582
2583   if (dump_file && (dump_flags & TDF_DETAILS))
2584     {
2585       fprintf (dump_file, "(analyze_overlapping_iterations \n");
2586       fprintf (dump_file, "  (chrec_a = ");
2587       print_generic_expr (dump_file, chrec_a, 0);
2588       fprintf (dump_file, ")\n  (chrec_b = ");
2589       print_generic_expr (dump_file, chrec_b, 0);
2590       fprintf (dump_file, ")\n");
2591     }
2592
2593   if (chrec_a == NULL_TREE
2594       || chrec_b == NULL_TREE
2595       || chrec_contains_undetermined (chrec_a)
2596       || chrec_contains_undetermined (chrec_b))
2597     {
2598       dependence_stats.num_subscript_undetermined++;
2599
2600       *overlap_iterations_a = conflict_fn_not_known ();
2601       *overlap_iterations_b = conflict_fn_not_known ();
2602     }
2603
2604   /* If they are the same chrec, and are affine, they overlap
2605      on every iteration.  */
2606   else if (eq_evolutions_p (chrec_a, chrec_b)
2607            && evolution_function_is_affine_multivariate_p (chrec_a, lnn))
2608     {
2609       dependence_stats.num_same_subscript_function++;
2610       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2611       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2612       *last_conflicts = chrec_dont_know;
2613     }
2614
2615   /* If they aren't the same, and aren't affine, we can't do anything
2616      yet. */
2617   else if ((chrec_contains_symbols (chrec_a)
2618             || chrec_contains_symbols (chrec_b))
2619            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2620                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2621     {
2622       dependence_stats.num_subscript_undetermined++;
2623       *overlap_iterations_a = conflict_fn_not_known ();
2624       *overlap_iterations_b = conflict_fn_not_known ();
2625     }
2626
2627   else if (ziv_subscript_p (chrec_a, chrec_b))
2628     analyze_ziv_subscript (chrec_a, chrec_b,
2629                            overlap_iterations_a, overlap_iterations_b,
2630                            last_conflicts);
2631
2632   else if (siv_subscript_p (chrec_a, chrec_b))
2633     analyze_siv_subscript (chrec_a, chrec_b,
2634                            overlap_iterations_a, overlap_iterations_b,
2635                            last_conflicts);
2636
2637   else
2638     analyze_miv_subscript (chrec_a, chrec_b,
2639                            overlap_iterations_a, overlap_iterations_b,
2640                            last_conflicts, loop_nest);
2641
2642   if (dump_file && (dump_flags & TDF_DETAILS))
2643     {
2644       fprintf (dump_file, "  (overlap_iterations_a = ");
2645       dump_conflict_function (dump_file, *overlap_iterations_a);
2646       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
2647       dump_conflict_function (dump_file, *overlap_iterations_b);
2648       fprintf (dump_file, ")\n");
2649       fprintf (dump_file, ")\n");
2650     }
2651 }
2652
2653 /* Helper function for uniquely inserting distance vectors.  */
2654
2655 static void
2656 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2657 {
2658   unsigned i;
2659   lambda_vector v;
2660
2661   for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, v); i++)
2662     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2663       return;
2664
2665   VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2666 }
2667
2668 /* Helper function for uniquely inserting direction vectors.  */
2669
2670 static void
2671 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2672 {
2673   unsigned i;
2674   lambda_vector v;
2675
2676   for (i = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), i, v); i++)
2677     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2678       return;
2679
2680   VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2681 }
2682
2683 /* Add a distance of 1 on all the loops outer than INDEX.  If we
2684    haven't yet determined a distance for this outer loop, push a new
2685    distance vector composed of the previous distance, and a distance
2686    of 1 for this outer loop.  Example:
2687
2688    | loop_1
2689    |   loop_2
2690    |     A[10]
2691    |   endloop_2
2692    | endloop_1
2693
2694    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
2695    save (0, 1), then we have to save (1, 0).  */
2696
2697 static void
2698 add_outer_distances (struct data_dependence_relation *ddr,
2699                      lambda_vector dist_v, int index)
2700 {
2701   /* For each outer loop where init_v is not set, the accesses are
2702      in dependence of distance 1 in the loop.  */
2703   while (--index >= 0)
2704     {
2705       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2706       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2707       save_v[index] = 1;
2708       save_dist_v (ddr, save_v);
2709     }
2710 }
2711
2712 /* Return false when fail to represent the data dependence as a
2713    distance vector.  INIT_B is set to true when a component has been
2714    added to the distance vector DIST_V.  INDEX_CARRY is then set to
2715    the index in DIST_V that carries the dependence.  */
2716
2717 static bool
2718 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2719                              struct data_reference *ddr_a,
2720                              struct data_reference *ddr_b,
2721                              lambda_vector dist_v, bool *init_b,
2722                              int *index_carry)
2723 {
2724   unsigned i;
2725   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2726
2727   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2728     {
2729       tree access_fn_a, access_fn_b;
2730       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2731
2732       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2733         {
2734           non_affine_dependence_relation (ddr);
2735           return false;
2736         }
2737
2738       access_fn_a = DR_ACCESS_FN (ddr_a, i);
2739       access_fn_b = DR_ACCESS_FN (ddr_b, i);
2740
2741       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2742           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2743         {
2744           int dist, index;
2745           int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2746                                             DDR_LOOP_NEST (ddr));
2747           int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2748                                             DDR_LOOP_NEST (ddr));
2749
2750           /* The dependence is carried by the outermost loop.  Example:
2751              | loop_1
2752              |   A[{4, +, 1}_1]
2753              |   loop_2
2754              |     A[{5, +, 1}_2]
2755              |   endloop_2
2756              | endloop_1
2757              In this case, the dependence is carried by loop_1.  */
2758           index = index_a < index_b ? index_a : index_b;
2759           *index_carry = MIN (index, *index_carry);
2760
2761           if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2762             {
2763               non_affine_dependence_relation (ddr);
2764               return false;
2765             }
2766
2767           dist = int_cst_value (SUB_DISTANCE (subscript));
2768
2769           /* This is the subscript coupling test.  If we have already
2770              recorded a distance for this loop (a distance coming from
2771              another subscript), it should be the same.  For example,
2772              in the following code, there is no dependence:
2773
2774              | loop i = 0, N, 1
2775              |   T[i+1][i] = ...
2776              |   ... = T[i][i]
2777              | endloop
2778           */
2779           if (init_v[index] != 0 && dist_v[index] != dist)
2780             {
2781               finalize_ddr_dependent (ddr, chrec_known);
2782               return false;
2783             }
2784
2785           dist_v[index] = dist;
2786           init_v[index] = 1;
2787           *init_b = true;
2788         }
2789       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2790         {
2791           /* This can be for example an affine vs. constant dependence
2792              (T[i] vs. T[3]) that is not an affine dependence and is
2793              not representable as a distance vector.  */
2794           non_affine_dependence_relation (ddr);
2795           return false;
2796         }
2797     }
2798
2799   return true;
2800 }
2801
2802 /* Return true when the DDR contains only constant access functions.  */
2803
2804 static bool
2805 constant_access_functions (const struct data_dependence_relation *ddr)
2806 {
2807   unsigned i;
2808
2809   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2810     if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2811         || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2812       return false;
2813
2814   return true;
2815 }
2816
2817 /* Helper function for the case where DDR_A and DDR_B are the same
2818    multivariate access function with a constant step.  For an example
2819    see pr34635-1.c.  */
2820
2821 static void
2822 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2823 {
2824   int x_1, x_2;
2825   tree c_1 = CHREC_LEFT (c_2);
2826   tree c_0 = CHREC_LEFT (c_1);
2827   lambda_vector dist_v;
2828   int v1, v2, cd;
2829
2830   /* Polynomials with more than 2 variables are not handled yet.  When
2831      the evolution steps are parameters, it is not possible to
2832      represent the dependence using classical distance vectors.  */
2833   if (TREE_CODE (c_0) != INTEGER_CST
2834       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
2835       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
2836     {
2837       DDR_AFFINE_P (ddr) = false;
2838       return;
2839     }
2840
2841   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2842   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2843
2844   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
2845   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2846   v1 = int_cst_value (CHREC_RIGHT (c_1));
2847   v2 = int_cst_value (CHREC_RIGHT (c_2));
2848   cd = gcd (v1, v2);
2849   v1 /= cd;
2850   v2 /= cd;
2851
2852   if (v2 < 0)
2853     {
2854       v2 = -v2;
2855       v1 = -v1;
2856     }
2857
2858   dist_v[x_1] = v2;
2859   dist_v[x_2] = -v1;
2860   save_dist_v (ddr, dist_v);
2861
2862   add_outer_distances (ddr, dist_v, x_1);
2863 }
2864
2865 /* Helper function for the case where DDR_A and DDR_B are the same
2866    access functions.  */
2867
2868 static void
2869 add_other_self_distances (struct data_dependence_relation *ddr)
2870 {
2871   lambda_vector dist_v;
2872   unsigned i;
2873   int index_carry = DDR_NB_LOOPS (ddr);
2874
2875   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2876     {
2877       tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
2878
2879       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
2880         {
2881           if (!evolution_function_is_univariate_p (access_fun))
2882             {
2883               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
2884                 {
2885                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2886                   return;
2887                 }
2888
2889               access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
2890
2891               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
2892                 add_multivariate_self_dist (ddr, access_fun);
2893               else
2894                 /* The evolution step is not constant: it varies in
2895                    the outer loop, so this cannot be represented by a
2896                    distance vector.  For example in pr34635.c the
2897                    evolution is {0, +, {0, +, 4}_1}_2.  */
2898                 DDR_AFFINE_P (ddr) = false;
2899
2900               return;
2901             }
2902
2903           index_carry = MIN (index_carry,
2904                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
2905                                                  DDR_LOOP_NEST (ddr)));
2906         }
2907     }
2908
2909   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2910   add_outer_distances (ddr, dist_v, index_carry);
2911 }
2912
2913 static void
2914 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
2915 {
2916   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2917
2918   dist_v[DDR_INNER_LOOP (ddr)] = 1;
2919   save_dist_v (ddr, dist_v);
2920 }
2921
2922 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
2923    is the case for example when access functions are the same and
2924    equal to a constant, as in:
2925
2926    | loop_1
2927    |   A[3] = ...
2928    |   ... = A[3]
2929    | endloop_1
2930
2931    in which case the distance vectors are (0) and (1).  */
2932
2933 static void
2934 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
2935 {
2936   unsigned i, j;
2937
2938   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2939     {
2940       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
2941       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
2942       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
2943
2944       for (j = 0; j < ca->n; j++)
2945         if (affine_function_zero_p (ca->fns[j]))
2946           {
2947             insert_innermost_unit_dist_vector (ddr);
2948             return;
2949           }
2950
2951       for (j = 0; j < cb->n; j++)
2952         if (affine_function_zero_p (cb->fns[j]))
2953           {
2954             insert_innermost_unit_dist_vector (ddr);
2955             return;
2956           }
2957     }
2958 }
2959
2960 /* Compute the classic per loop distance vector.  DDR is the data
2961    dependence relation to build a vector from.  Return false when fail
2962    to represent the data dependence as a distance vector.  */
2963
2964 static bool
2965 build_classic_dist_vector (struct data_dependence_relation *ddr,
2966                            struct loop *loop_nest)
2967 {
2968   bool init_b = false;
2969   int index_carry = DDR_NB_LOOPS (ddr);
2970   lambda_vector dist_v;
2971
2972   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
2973     return false;
2974
2975   if (same_access_functions (ddr))
2976     {
2977       /* Save the 0 vector.  */
2978       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2979       save_dist_v (ddr, dist_v);
2980
2981       if (constant_access_functions (ddr))
2982         add_distance_for_zero_overlaps (ddr);
2983
2984       if (DDR_NB_LOOPS (ddr) > 1)
2985         add_other_self_distances (ddr);
2986
2987       return true;
2988     }
2989
2990   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2991   if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
2992                                     dist_v, &init_b, &index_carry))
2993     return false;
2994
2995   /* Save the distance vector if we initialized one.  */
2996   if (init_b)
2997     {
2998       /* Verify a basic constraint: classic distance vectors should
2999          always be lexicographically positive.
3000
3001          Data references are collected in the order of execution of
3002          the program, thus for the following loop
3003
3004          | for (i = 1; i < 100; i++)
3005          |   for (j = 1; j < 100; j++)
3006          |     {
3007          |       t = T[j+1][i-1];  // A
3008          |       T[j][i] = t + 2;  // B
3009          |     }
3010
3011          references are collected following the direction of the wind:
3012          A then B.  The data dependence tests are performed also
3013          following this order, such that we're looking at the distance
3014          separating the elements accessed by A from the elements later
3015          accessed by B.  But in this example, the distance returned by
3016          test_dep (A, B) is lexicographically negative (-1, 1), that
3017          means that the access A occurs later than B with respect to
3018          the outer loop, ie. we're actually looking upwind.  In this
3019          case we solve test_dep (B, A) looking downwind to the
3020          lexicographically positive solution, that returns the
3021          distance vector (1, -1).  */
3022       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3023         {
3024           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3025           if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3026                                               loop_nest))
3027             return false;
3028           compute_subscript_distance (ddr);
3029           if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3030                                             save_v, &init_b, &index_carry))
3031             return false;
3032           save_dist_v (ddr, save_v);
3033           DDR_REVERSED_P (ddr) = true;
3034
3035           /* In this case there is a dependence forward for all the
3036              outer loops:
3037
3038              | for (k = 1; k < 100; k++)
3039              |  for (i = 1; i < 100; i++)
3040              |   for (j = 1; j < 100; j++)
3041              |     {
3042              |       t = T[j+1][i-1];  // A
3043              |       T[j][i] = t + 2;  // B
3044              |     }
3045
3046              the vectors are:
3047              (0,  1, -1)
3048              (1,  1, -1)
3049              (1, -1,  1)
3050           */
3051           if (DDR_NB_LOOPS (ddr) > 1)
3052             {
3053               add_outer_distances (ddr, save_v, index_carry);
3054               add_outer_distances (ddr, dist_v, index_carry);
3055             }
3056         }
3057       else
3058         {
3059           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3060           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3061
3062           if (DDR_NB_LOOPS (ddr) > 1)
3063             {
3064               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3065
3066               if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3067                                                   DDR_A (ddr), loop_nest))
3068                 return false;
3069               compute_subscript_distance (ddr);
3070               if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3071                                                 opposite_v, &init_b,
3072                                                 &index_carry))
3073                 return false;
3074
3075               save_dist_v (ddr, save_v);
3076               add_outer_distances (ddr, dist_v, index_carry);
3077               add_outer_distances (ddr, opposite_v, index_carry);
3078             }
3079           else
3080             save_dist_v (ddr, save_v);
3081         }
3082     }
3083   else
3084     {
3085       /* There is a distance of 1 on all the outer loops: Example:
3086          there is a dependence of distance 1 on loop_1 for the array A.
3087
3088          | loop_1
3089          |   A[5] = ...
3090          | endloop
3091       */
3092       add_outer_distances (ddr, dist_v,
3093                            lambda_vector_first_nz (dist_v,
3094                                                    DDR_NB_LOOPS (ddr), 0));
3095     }
3096
3097   if (dump_file && (dump_flags & TDF_DETAILS))
3098     {
3099       unsigned i;
3100
3101       fprintf (dump_file, "(build_classic_dist_vector\n");
3102       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3103         {
3104           fprintf (dump_file, "  dist_vector = (");
3105           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3106                                DDR_NB_LOOPS (ddr));
3107           fprintf (dump_file, "  )\n");
3108         }
3109       fprintf (dump_file, ")\n");
3110     }
3111
3112   return true;
3113 }
3114
3115 /* Return the direction for a given distance.
3116    FIXME: Computing dir this way is suboptimal, since dir can catch
3117    cases that dist is unable to represent.  */
3118
3119 static inline enum data_dependence_direction
3120 dir_from_dist (int dist)
3121 {
3122   if (dist > 0)
3123     return dir_positive;
3124   else if (dist < 0)
3125     return dir_negative;
3126   else
3127     return dir_equal;
3128 }
3129
3130 /* Compute the classic per loop direction vector.  DDR is the data
3131    dependence relation to build a vector from.  */
3132
3133 static void
3134 build_classic_dir_vector (struct data_dependence_relation *ddr)
3135 {
3136   unsigned i, j;
3137   lambda_vector dist_v;
3138
3139   for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
3140     {
3141       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3142
3143       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3144         dir_v[j] = dir_from_dist (dist_v[j]);
3145
3146       save_dir_v (ddr, dir_v);
3147     }
3148 }
3149
3150 /* Helper function.  Returns true when there is a dependence between
3151    data references DRA and DRB.  */
3152
3153 static bool
3154 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3155                                struct data_reference *dra,
3156                                struct data_reference *drb,
3157                                struct loop *loop_nest)
3158 {
3159   unsigned int i;
3160   tree last_conflicts;
3161   struct subscript *subscript;
3162
3163   for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3164        i++)
3165     {
3166       conflict_function *overlaps_a, *overlaps_b;
3167
3168       analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3169                                       DR_ACCESS_FN (drb, i),
3170                                       &overlaps_a, &overlaps_b,
3171                                       &last_conflicts, loop_nest);
3172
3173       if (CF_NOT_KNOWN_P (overlaps_a)
3174           || CF_NOT_KNOWN_P (overlaps_b))
3175         {
3176           finalize_ddr_dependent (ddr, chrec_dont_know);
3177           dependence_stats.num_dependence_undetermined++;
3178           free_conflict_function (overlaps_a);
3179           free_conflict_function (overlaps_b);
3180           return false;
3181         }
3182
3183       else if (CF_NO_DEPENDENCE_P (overlaps_a)
3184                || CF_NO_DEPENDENCE_P (overlaps_b))
3185         {
3186           finalize_ddr_dependent (ddr, chrec_known);
3187           dependence_stats.num_dependence_independent++;
3188           free_conflict_function (overlaps_a);
3189           free_conflict_function (overlaps_b);
3190           return false;
3191         }
3192
3193       else
3194         {
3195           if (SUB_CONFLICTS_IN_A (subscript))
3196             free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3197           if (SUB_CONFLICTS_IN_B (subscript))
3198             free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3199
3200           SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3201           SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3202           SUB_LAST_CONFLICT (subscript) = last_conflicts;
3203         }
3204     }
3205
3206   return true;
3207 }
3208
3209 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
3210
3211 static void
3212 subscript_dependence_tester (struct data_dependence_relation *ddr,
3213                              struct loop *loop_nest)
3214 {
3215
3216   if (dump_file && (dump_flags & TDF_DETAILS))
3217     fprintf (dump_file, "(subscript_dependence_tester \n");
3218
3219   if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3220     dependence_stats.num_dependence_dependent++;
3221
3222   compute_subscript_distance (ddr);
3223   if (build_classic_dist_vector (ddr, loop_nest))
3224     build_classic_dir_vector (ddr);
3225
3226   if (dump_file && (dump_flags & TDF_DETAILS))
3227     fprintf (dump_file, ")\n");
3228 }
3229
3230 /* Returns true when all the access functions of A are affine or
3231    constant with respect to LOOP_NEST.  */
3232
3233 static bool
3234 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3235                                            const struct loop *loop_nest)
3236 {
3237   unsigned int i;
3238   VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3239   tree t;
3240
3241   for (i = 0; VEC_iterate (tree, fns, i, t); i++)
3242     if (!evolution_function_is_invariant_p (t, loop_nest->num)
3243         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3244       return false;
3245
3246   return true;
3247 }
3248
3249 /* Initializes an equation for an OMEGA problem using the information
3250    contained in the ACCESS_FUN.  Returns true when the operation
3251    succeeded.
3252
3253    PB is the omega constraint system.
3254    EQ is the number of the equation to be initialized.
3255    OFFSET is used for shifting the variables names in the constraints:
3256    a constrain is composed of 2 * the number of variables surrounding
3257    dependence accesses.  OFFSET is set either to 0 for the first n variables,
3258    then it is set to n.
3259    ACCESS_FUN is expected to be an affine chrec.  */
3260
3261 static bool
3262 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3263                        unsigned int offset, tree access_fun,
3264                        struct data_dependence_relation *ddr)
3265 {
3266   switch (TREE_CODE (access_fun))
3267     {
3268     case POLYNOMIAL_CHREC:
3269       {
3270         tree left = CHREC_LEFT (access_fun);
3271         tree right = CHREC_RIGHT (access_fun);
3272         int var = CHREC_VARIABLE (access_fun);
3273         unsigned var_idx;
3274
3275         if (TREE_CODE (right) != INTEGER_CST)
3276           return false;
3277
3278         var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3279         pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3280
3281         /* Compute the innermost loop index.  */
3282         DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3283
3284         if (offset == 0)
3285           pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3286             += int_cst_value (right);
3287
3288         switch (TREE_CODE (left))
3289           {
3290           case POLYNOMIAL_CHREC:
3291             return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3292
3293           case INTEGER_CST:
3294             pb->eqs[eq].coef[0] += int_cst_value (left);
3295             return true;
3296
3297           default:
3298             return false;
3299           }
3300       }
3301
3302     case INTEGER_CST:
3303       pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3304       return true;
3305
3306     default:
3307       return false;
3308     }
3309 }
3310
3311 /* As explained in the comments preceding init_omega_for_ddr, we have
3312    to set up a system for each loop level, setting outer loops
3313    variation to zero, and current loop variation to positive or zero.
3314    Save each lexico positive distance vector.  */
3315
3316 static void
3317 omega_extract_distance_vectors (omega_pb pb,
3318                                 struct data_dependence_relation *ddr)
3319 {
3320   int eq, geq;
3321   unsigned i, j;
3322   struct loop *loopi, *loopj;
3323   enum omega_result res;
3324
3325   /* Set a new problem for each loop in the nest.  The basis is the
3326      problem that we have initialized until now.  On top of this we
3327      add new constraints.  */
3328   for (i = 0; i <= DDR_INNER_LOOP (ddr)
3329          && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3330     {
3331       int dist = 0;
3332       omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3333                                            DDR_NB_LOOPS (ddr));
3334
3335       omega_copy_problem (copy, pb);
3336
3337       /* For all the outer loops "loop_j", add "dj = 0".  */
3338       for (j = 0;
3339            j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3340         {
3341           eq = omega_add_zero_eq (copy, omega_black);
3342           copy->eqs[eq].coef[j + 1] = 1;
3343         }
3344
3345       /* For "loop_i", add "0 <= di".  */
3346       geq = omega_add_zero_geq (copy, omega_black);
3347       copy->geqs[geq].coef[i + 1] = 1;
3348
3349       /* Reduce the constraint system, and test that the current
3350          problem is feasible.  */
3351       res = omega_simplify_problem (copy);
3352       if (res == omega_false
3353           || res == omega_unknown
3354           || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3355         goto next_problem;
3356
3357       for (eq = 0; eq < copy->num_subs; eq++)
3358         if (copy->subs[eq].key == (int) i + 1)
3359           {
3360             dist = copy->subs[eq].coef[0];
3361             goto found_dist;
3362           }
3363
3364       if (dist == 0)
3365         {
3366           /* Reinitialize problem...  */
3367           omega_copy_problem (copy, pb);
3368           for (j = 0;
3369                j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3370             {
3371               eq = omega_add_zero_eq (copy, omega_black);
3372               copy->eqs[eq].coef[j + 1] = 1;
3373             }
3374
3375           /* ..., but this time "di = 1".  */
3376           eq = omega_add_zero_eq (copy, omega_black);
3377           copy->eqs[eq].coef[i + 1] = 1;
3378           copy->eqs[eq].coef[0] = -1;
3379
3380           res = omega_simplify_problem (copy);
3381           if (res == omega_false
3382               || res == omega_unknown
3383               || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3384             goto next_problem;
3385
3386           for (eq = 0; eq < copy->num_subs; eq++)
3387             if (copy->subs[eq].key == (int) i + 1)
3388               {
3389                 dist = copy->subs[eq].coef[0];
3390                 goto found_dist;
3391               }
3392         }
3393
3394     found_dist:;
3395       /* Save the lexicographically positive distance vector.  */
3396       if (dist >= 0)
3397         {
3398           lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3399           lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3400
3401           dist_v[i] = dist;
3402
3403           for (eq = 0; eq < copy->num_subs; eq++)
3404             if (copy->subs[eq].key > 0)
3405               {
3406                 dist = copy->subs[eq].coef[0];
3407                 dist_v[copy->subs[eq].key - 1] = dist;
3408               }
3409
3410           for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3411             dir_v[j] = dir_from_dist (dist_v[j]);
3412
3413           save_dist_v (ddr, dist_v);
3414           save_dir_v (ddr, dir_v);
3415         }
3416
3417     next_problem:;
3418       omega_free_problem (copy);
3419     }
3420 }
3421
3422 /* This is called for each subscript of a tuple of data references:
3423    insert an equality for representing the conflicts.  */
3424
3425 static bool
3426 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3427                        struct data_dependence_relation *ddr,
3428                        omega_pb pb, bool *maybe_dependent)
3429 {
3430   int eq;
3431   tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3432                                      TREE_TYPE (access_fun_b));
3433   tree fun_a = chrec_convert (type, access_fun_a, NULL_TREE);
3434   tree fun_b = chrec_convert (type, access_fun_b, NULL_TREE);
3435   tree difference = chrec_fold_minus (type, fun_a, fun_b);
3436
3437   /* When the fun_a - fun_b is not constant, the dependence is not
3438      captured by the classic distance vector representation.  */
3439   if (TREE_CODE (difference) != INTEGER_CST)
3440     return false;
3441
3442   /* ZIV test.  */
3443   if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3444     {
3445       /* There is no dependence.  */
3446       *maybe_dependent = false;
3447       return true;
3448     }
3449
3450   fun_b = chrec_fold_multiply (type, fun_b, integer_minus_one_node);
3451
3452   eq = omega_add_zero_eq (pb, omega_black);
3453   if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3454       || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3455     /* There is probably a dependence, but the system of
3456        constraints cannot be built: answer "don't know".  */
3457     return false;
3458
3459   /* GCD test.  */
3460   if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3461       && !int_divides_p (lambda_vector_gcd
3462                          ((lambda_vector) &(pb->eqs[eq].coef[1]),
3463                           2 * DDR_NB_LOOPS (ddr)),
3464                          pb->eqs[eq].coef[0]))
3465     {
3466       /* There is no dependence.  */
3467       *maybe_dependent = false;
3468       return true;
3469     }
3470
3471   return true;
3472 }
3473
3474 /* Helper function, same as init_omega_for_ddr but specialized for
3475    data references A and B.  */
3476
3477 static bool
3478 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3479                       struct data_dependence_relation *ddr,
3480                       omega_pb pb, bool *maybe_dependent)
3481 {
3482   unsigned i;
3483   int ineq;
3484   struct loop *loopi;
3485   unsigned nb_loops = DDR_NB_LOOPS (ddr);
3486
3487   /* Insert an equality per subscript.  */
3488   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3489     {
3490       if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3491                                   ddr, pb, maybe_dependent))
3492         return false;
3493       else if (*maybe_dependent == false)
3494         {
3495           /* There is no dependence.  */
3496           DDR_ARE_DEPENDENT (ddr) = chrec_known;
3497           return true;
3498         }
3499     }
3500
3501   /* Insert inequalities: constraints corresponding to the iteration
3502      domain, i.e. the loops surrounding the references "loop_x" and
3503      the distance variables "dx".  The layout of the OMEGA
3504      representation is as follows:
3505      - coef[0] is the constant
3506      - coef[1..nb_loops] are the protected variables that will not be
3507      removed by the solver: the "dx"
3508      - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3509   */
3510   for (i = 0; i <= DDR_INNER_LOOP (ddr)
3511          && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3512     {
3513       HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3514
3515       /* 0 <= loop_x */
3516       ineq = omega_add_zero_geq (pb, omega_black);
3517       pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3518
3519       /* 0 <= loop_x + dx */
3520       ineq = omega_add_zero_geq (pb, omega_black);
3521       pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3522       pb->geqs[ineq].coef[i + 1] = 1;
3523
3524       if (nbi != -1)
3525         {
3526           /* loop_x <= nb_iters */
3527           ineq = omega_add_zero_geq (pb, omega_black);
3528           pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3529           pb->geqs[ineq].coef[0] = nbi;
3530
3531           /* loop_x + dx <= nb_iters */
3532           ineq = omega_add_zero_geq (pb, omega_black);
3533           pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3534           pb->geqs[ineq].coef[i + 1] = -1;
3535           pb->geqs[ineq].coef[0] = nbi;
3536
3537           /* A step "dx" bigger than nb_iters is not feasible, so
3538              add "0 <= nb_iters + dx",  */
3539           ineq = omega_add_zero_geq (pb, omega_black);
3540           pb->geqs[ineq].coef[i + 1] = 1;
3541           pb->geqs[ineq].coef[0] = nbi;
3542           /* and "dx <= nb_iters".  */
3543           ineq = omega_add_zero_geq (pb, omega_black);
3544           pb->geqs[ineq].coef[i + 1] = -1;
3545           pb->geqs[ineq].coef[0] = nbi;
3546         }
3547     }
3548
3549   omega_extract_distance_vectors (pb, ddr);
3550
3551   return true;
3552 }
3553
3554 /* Sets up the Omega dependence problem for the data dependence
3555    relation DDR.  Returns false when the constraint system cannot be
3556    built, ie. when the test answers "don't know".  Returns true
3557    otherwise, and when independence has been proved (using one of the
3558    trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3559    set MAYBE_DEPENDENT to true.
3560
3561    Example: for setting up the dependence system corresponding to the
3562    conflicting accesses
3563
3564    | loop_i
3565    |   loop_j
3566    |     A[i, i+1] = ...
3567    |     ... A[2*j, 2*(i + j)]
3568    |   endloop_j
3569    | endloop_i
3570
3571    the following constraints come from the iteration domain:
3572
3573    0 <= i <= Ni
3574    0 <= i + di <= Ni
3575    0 <= j <= Nj
3576    0 <= j + dj <= Nj
3577
3578    where di, dj are the distance variables.  The constraints
3579    representing the conflicting elements are:
3580
3581    i = 2 * (j + dj)
3582    i + 1 = 2 * (i + di + j + dj)
3583
3584    For asking that the resulting distance vector (di, dj) be
3585    lexicographically positive, we insert the constraint "di >= 0".  If
3586    "di = 0" in the solution, we fix that component to zero, and we
3587    look at the inner loops: we set a new problem where all the outer
3588    loop distances are zero, and fix this inner component to be
3589    positive.  When one of the components is positive, we save that
3590    distance, and set a new problem where the distance on this loop is
3591    zero, searching for other distances in the inner loops.  Here is
3592    the classic example that illustrates that we have to set for each
3593    inner loop a new problem:
3594
3595    | loop_1
3596    |   loop_2
3597    |     A[10]
3598    |   endloop_2
3599    | endloop_1
3600
3601    we have to save two distances (1, 0) and (0, 1).
3602
3603    Given two array references, refA and refB, we have to set the
3604    dependence problem twice, refA vs. refB and refB vs. refA, and we
3605    cannot do a single test, as refB might occur before refA in the
3606    inner loops, and the contrary when considering outer loops: ex.
3607
3608    | loop_0
3609    |   loop_1
3610    |     loop_2
3611    |       T[{1,+,1}_2][{1,+,1}_1]  // refA
3612    |       T[{2,+,1}_2][{0,+,1}_1]  // refB
3613    |     endloop_2
3614    |   endloop_1
3615    | endloop_0
3616
3617    refB touches the elements in T before refA, and thus for the same
3618    loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3619    but for successive loop_0 iterations, we have (1, -1, 1)
3620
3621    The Omega solver expects the distance variables ("di" in the
3622    previous example) to come first in the constraint system (as
3623    variables to be protected, or "safe" variables), the constraint
3624    system is built using the following layout:
3625
3626    "cst | distance vars | index vars".
3627 */
3628
3629 static bool
3630 init_omega_for_ddr (struct data_dependence_relation *ddr,
3631                     bool *maybe_dependent)
3632 {
3633   omega_pb pb;
3634   bool res = false;
3635
3636   *maybe_dependent = true;
3637
3638   if (same_access_functions (ddr))
3639     {
3640       unsigned j;
3641       lambda_vector dir_v;
3642
3643       /* Save the 0 vector.  */
3644       save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3645       dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3646       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3647         dir_v[j] = dir_equal;
3648       save_dir_v (ddr, dir_v);
3649
3650       /* Save the dependences carried by outer loops.  */
3651       pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3652       res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3653                                   maybe_dependent);
3654       omega_free_problem (pb);
3655       return res;
3656     }
3657
3658   /* Omega expects the protected variables (those that have to be kept
3659      after elimination) to appear first in the constraint system.
3660      These variables are the distance variables.  In the following
3661      initialization we declare NB_LOOPS safe variables, and the total
3662      number of variables for the constraint system is 2*NB_LOOPS.  */
3663   pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3664   res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3665                               maybe_dependent);
3666   omega_free_problem (pb);
3667
3668   /* Stop computation if not decidable, or no dependence.  */
3669   if (res == false || *maybe_dependent == false)
3670     return res;
3671
3672   pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3673   res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3674                               maybe_dependent);
3675   omega_free_problem (pb);
3676
3677   return res;
3678 }
3679
3680 /* Return true when DDR contains the same information as that stored
3681    in DIR_VECTS and in DIST_VECTS, return false otherwise.   */
3682
3683 static bool
3684 ddr_consistent_p (FILE *file,
3685                   struct data_dependence_relation *ddr,
3686                   VEC (lambda_vector, heap) *dist_vects,
3687                   VEC (lambda_vector, heap) *dir_vects)
3688 {
3689   unsigned int i, j;
3690
3691   /* If dump_file is set, output there.  */
3692   if (dump_file && (dump_flags & TDF_DETAILS))
3693     file = dump_file;
3694
3695   if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3696     {
3697       lambda_vector b_dist_v;
3698       fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3699                VEC_length (lambda_vector, dist_vects),
3700                DDR_NUM_DIST_VECTS (ddr));
3701
3702       fprintf (file, "Banerjee dist vectors:\n");
3703       for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, b_dist_v); i++)
3704         print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3705
3706       fprintf (file, "Omega dist vectors:\n");
3707       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3708         print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3709
3710       fprintf (file, "data dependence relation:\n");
3711       dump_data_dependence_relation (file, ddr);
3712
3713       fprintf (file, ")\n");
3714       return false;
3715     }
3716
3717   if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3718     {
3719       fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3720                VEC_length (lambda_vector, dir_vects),
3721                DDR_NUM_DIR_VECTS (ddr));
3722       return false;
3723     }
3724
3725   for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3726     {
3727       lambda_vector a_dist_v;
3728       lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3729
3730       /* Distance vectors are not ordered in the same way in the DDR
3731          and in the DIST_VECTS: search for a matching vector.  */
3732       for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, a_dist_v); j++)
3733         if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3734           break;
3735
3736       if (j == VEC_length (lambda_vector, dist_vects))
3737         {
3738           fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3739           print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3740           fprintf (file, "not found in Omega dist vectors:\n");
3741           print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3742           fprintf (file, "data dependence relation:\n");
3743           dump_data_dependence_relation (file, ddr);
3744           fprintf (file, ")\n");
3745         }
3746     }
3747
3748   for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3749     {
3750       lambda_vector a_dir_v;
3751       lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3752
3753       /* Direction vectors are not ordered in the same way in the DDR
3754          and in the DIR_VECTS: search for a matching vector.  */
3755       for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, a_dir_v); j++)
3756         if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3757           break;
3758
3759       if (j == VEC_length (lambda_vector, dist_vects))
3760         {
3761           fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3762           print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3763           fprintf (file, "not found in Omega dir vectors:\n");
3764           print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3765           fprintf (file, "data dependence relation:\n");
3766           dump_data_dependence_relation (file, ddr);
3767           fprintf (file, ")\n");
3768         }
3769     }
3770
3771   return true;
3772 }
3773
3774 /* This computes the affine dependence relation between A and B with
3775    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
3776    independence between two accesses, while CHREC_DONT_KNOW is used
3777    for representing the unknown relation.
3778
3779    Note that it is possible to stop the computation of the dependence
3780    relation the first time we detect a CHREC_KNOWN element for a given
3781    subscript.  */
3782
3783 static void
3784 compute_affine_dependence (struct data_dependence_relation *ddr,
3785                            struct loop *loop_nest)
3786 {
3787   struct data_reference *dra = DDR_A (ddr);
3788   struct data_reference *drb = DDR_B (ddr);
3789
3790   if (dump_file && (dump_flags & TDF_DETAILS))
3791     {
3792       fprintf (dump_file, "(compute_affine_dependence\n");
3793       fprintf (dump_file, "  (stmt_a = \n");
3794       print_generic_expr (dump_file, DR_STMT (dra), 0);
3795       fprintf (dump_file, ")\n  (stmt_b = \n");
3796       print_generic_expr (dump_file, DR_STMT (drb), 0);
3797       fprintf (dump_file, ")\n");
3798     }
3799
3800   /* Analyze only when the dependence relation is not yet known.  */
3801   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3802     {
3803       dependence_stats.num_dependence_tests++;
3804
3805       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3806           && access_functions_are_affine_or_constant_p (drb, loop_nest))
3807         {
3808           if (flag_check_data_deps)
3809             {
3810               /* Compute the dependences using the first algorithm.  */
3811               subscript_dependence_tester (ddr, loop_nest);
3812
3813               if (dump_file && (dump_flags & TDF_DETAILS))
3814                 {
3815                   fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3816                   dump_data_dependence_relation (dump_file, ddr);
3817                 }
3818
3819               if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3820                 {
3821                   bool maybe_dependent;
3822                   VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3823
3824                   /* Save the result of the first DD analyzer.  */
3825                   dist_vects = DDR_DIST_VECTS (ddr);
3826                   dir_vects = DDR_DIR_VECTS (ddr);
3827
3828                   /* Reset the information.  */
3829                   DDR_DIST_VECTS (ddr) = NULL;
3830                   DDR_DIR_VECTS (ddr) = NULL;
3831
3832                   /* Compute the same information using Omega.  */
3833                   if (!init_omega_for_ddr (ddr, &maybe_dependent))
3834                     goto csys_dont_know;
3835
3836                   if (dump_file && (dump_flags & TDF_DETAILS))
3837                     {
3838                       fprintf (dump_file, "Omega Analyzer\n");
3839                       dump_data_dependence_relation (dump_file, ddr);
3840                     }
3841
3842                   /* Check that we get the same information.  */
3843                   if (maybe_dependent)
3844                     gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3845                                                   dir_vects));
3846                 }
3847             }
3848           else
3849             subscript_dependence_tester (ddr, loop_nest);
3850         }
3851
3852       /* As a last case, if the dependence cannot be determined, or if
3853          the dependence is considered too difficult to determine, answer
3854          "don't know".  */
3855       else
3856         {
3857         csys_dont_know:;
3858           dependence_stats.num_dependence_undetermined++;
3859
3860           if (dump_file && (dump_flags & TDF_DETAILS))
3861             {
3862               fprintf (dump_file, "Data ref a:\n");
3863               dump_data_reference (dump_file, dra);
3864               fprintf (dump_file, "Data ref b:\n");
3865               dump_data_reference (dump_file, drb);
3866               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3867             }
3868           finalize_ddr_dependent (ddr, chrec_dont_know);
3869         }
3870     }
3871
3872   if (dump_file && (dump_flags & TDF_DETAILS))
3873     fprintf (dump_file, ")\n");
3874 }
3875
3876 /* This computes the dependence relation for the same data
3877    reference into DDR.  */
3878
3879 static void
3880 compute_self_dependence (struct data_dependence_relation *ddr)
3881 {
3882   unsigned int i;
3883   struct subscript *subscript;
3884
3885   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3886     return;
3887
3888   for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3889        i++)
3890     {
3891       if (SUB_CONFLICTS_IN_A (subscript))
3892         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3893       if (SUB_CONFLICTS_IN_B (subscript))
3894         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3895
3896       /* The accessed index overlaps for each iteration.  */
3897       SUB_CONFLICTS_IN_A (subscript)
3898         = conflict_fn (1, affine_fn_cst (integer_zero_node));
3899       SUB_CONFLICTS_IN_B (subscript)
3900         = conflict_fn (1, affine_fn_cst (integer_zero_node));
3901       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3902     }
3903
3904   /* The distance vector is the zero vector.  */
3905   save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3906   save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3907 }
3908
3909 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
3910    the data references in DATAREFS, in the LOOP_NEST.  When
3911    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3912    relations.  */
3913
3914 void
3915 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
3916                          VEC (ddr_p, heap) **dependence_relations,
3917                          VEC (loop_p, heap) *loop_nest,
3918                          bool compute_self_and_rr)
3919 {
3920   struct data_dependence_relation *ddr;
3921   struct data_reference *a, *b;
3922   unsigned int i, j;
3923
3924   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3925     for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
3926       if (!DR_IS_READ (a) || !DR_IS_READ (b) || compute_self_and_rr)
3927         {
3928           ddr = initialize_data_dependence_relation (a, b, loop_nest);
3929           VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3930           compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
3931         }
3932
3933   if (compute_self_and_rr)
3934     for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3935       {
3936         ddr = initialize_data_dependence_relation (a, a, loop_nest);
3937         VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3938         compute_self_dependence (ddr);
3939       }
3940 }
3941
3942 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
3943    true if STMT clobbers memory, false otherwise.  */
3944
3945 bool
3946 get_references_in_stmt (tree stmt, VEC (data_ref_loc, heap) **references)
3947 {
3948   bool clobbers_memory = false;
3949   data_ref_loc *ref;
3950   tree *op0, *op1, call;
3951
3952   *references = NULL;
3953
3954   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
3955      Calls have side-effects, except those to const or pure
3956      functions.  */
3957   call = get_call_expr_in (stmt);
3958   if ((call
3959        && !(call_expr_flags (call) & (ECF_CONST | ECF_PURE)))
3960       || (TREE_CODE (stmt) == ASM_EXPR
3961           && ASM_VOLATILE_P (stmt)))
3962     clobbers_memory = true;
3963
3964   if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
3965     return clobbers_memory;
3966
3967   if (TREE_CODE (stmt) ==  GIMPLE_MODIFY_STMT)
3968     {
3969       tree base;
3970       op0 = &GIMPLE_STMT_OPERAND (stmt, 0);
3971       op1 = &GIMPLE_STMT_OPERAND (stmt, 1);
3972
3973       if (DECL_P (*op1)
3974           || (REFERENCE_CLASS_P (*op1)
3975               && (base = get_base_address (*op1))
3976               && TREE_CODE (base) != SSA_NAME))
3977         {
3978           ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3979           ref->pos = op1;
3980           ref->is_read = true;
3981         }
3982
3983       if (DECL_P (*op0)
3984           || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
3985         {
3986           ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3987           ref->pos = op0;
3988           ref->is_read = false;
3989         }
3990     }
3991
3992   if (call)
3993     {
3994       unsigned i, n = call_expr_nargs (call);
3995
3996       for (i = 0; i < n; i++)
3997         {
3998           op0 = &CALL_EXPR_ARG (call, i);
3999
4000           if (DECL_P (*op0)
4001               || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4002             {
4003               ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4004               ref->pos = op0;
4005               ref->is_read = true;
4006             }
4007         }
4008     }
4009
4010   return clobbers_memory;
4011 }
4012
4013 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
4014    reference, returns false, otherwise returns true.  NEST is the outermost
4015    loop of the loop nest in that the references should be analyzed.  */
4016
4017 static bool
4018 find_data_references_in_stmt (struct loop *nest, tree stmt,
4019                               VEC (data_reference_p, heap) **datarefs)
4020 {
4021   unsigned i;
4022   VEC (data_ref_loc, heap) *references;
4023   data_ref_loc *ref;
4024   bool ret = true;
4025   data_reference_p dr;
4026
4027   if (get_references_in_stmt (stmt, &references))
4028     {
4029       VEC_free (data_ref_loc, heap, references);
4030       return false;
4031     }
4032
4033   for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
4034     {
4035       dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4036       gcc_assert (dr != NULL);
4037
4038       /* FIXME -- data dependence analysis does not work correctly for objects with
4039          invariant addresses.  Let us fail here until the problem is fixed.  */
4040       if (dr_address_invariant_p (dr))
4041         {
4042           free_data_ref (dr);
4043           if (dump_file && (dump_flags & TDF_DETAILS))
4044             fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4045           ret = false;
4046           break;
4047         }
4048
4049       VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4050     }
4051   VEC_free (data_ref_loc, heap, references);
4052   return ret;
4053 }
4054
4055 /* Search the data references in LOOP, and record the information into
4056    DATAREFS.  Returns chrec_dont_know when failing to analyze a
4057    difficult case, returns NULL_TREE otherwise.
4058
4059    TODO: This function should be made smarter so that it can handle address
4060    arithmetic as if they were array accesses, etc.  */
4061
4062 static tree
4063 find_data_references_in_loop (struct loop *loop,
4064                               VEC (data_reference_p, heap) **datarefs)
4065 {
4066   basic_block bb, *bbs;
4067   unsigned int i;
4068   block_stmt_iterator bsi;
4069
4070   bbs = get_loop_body_in_dom_order (loop);
4071
4072   for (i = 0; i < loop->num_nodes; i++)
4073     {
4074       bb = bbs[i];
4075
4076       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4077         {
4078           tree stmt = bsi_stmt (bsi);
4079
4080           if (!find_data_references_in_stmt (loop, stmt, datarefs))
4081             {
4082               struct data_reference *res;
4083               res = XCNEW (struct data_reference);
4084               VEC_safe_push (data_reference_p, heap, *datarefs, res);
4085
4086               free (bbs);
4087               return chrec_dont_know;
4088             }
4089         }
4090     }
4091   free (bbs);
4092
4093   return NULL_TREE;
4094 }
4095
4096 /* Recursive helper function.  */
4097
4098 static bool
4099 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4100 {
4101   /* Inner loops of the nest should not contain siblings.  Example:
4102      when there are two consecutive loops,
4103
4104      | loop_0
4105      |   loop_1
4106      |     A[{0, +, 1}_1]
4107      |   endloop_1
4108      |   loop_2
4109      |     A[{0, +, 1}_2]
4110      |   endloop_2
4111      | endloop_0
4112
4113      the dependence relation cannot be captured by the distance
4114      abstraction.  */
4115   if (loop->next)
4116     return false;
4117
4118   VEC_safe_push (loop_p, heap, *loop_nest, loop);
4119   if (loop->inner)
4120     return find_loop_nest_1 (loop->inner, loop_nest);
4121   return true;
4122 }
4123
4124 /* Return false when the LOOP is not well nested.  Otherwise return
4125    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
4126    contain the loops from the outermost to the innermost, as they will
4127    appear in the classic distance vector.  */
4128
4129 bool
4130 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4131 {
4132   VEC_safe_push (loop_p, heap, *loop_nest, loop);
4133   if (loop->inner)
4134     return find_loop_nest_1 (loop->inner, loop_nest);
4135   return true;
4136 }
4137
4138 /* Given a loop nest LOOP, the following vectors are returned:
4139    DATAREFS is initialized to all the array elements contained in this loop,
4140    DEPENDENCE_RELATIONS contains the relations between the data references.
4141    Compute read-read and self relations if
4142    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
4143
4144 void
4145 compute_data_dependences_for_loop (struct loop *loop,
4146                                    bool compute_self_and_read_read_dependences,
4147                                    VEC (data_reference_p, heap) **datarefs,
4148                                    VEC (ddr_p, heap) **dependence_relations)
4149 {
4150   VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
4151
4152   memset (&dependence_stats, 0, sizeof (dependence_stats));
4153
4154   /* If the loop nest is not well formed, or one of the data references
4155      is not computable, give up without spending time to compute other
4156      dependences.  */
4157   if (!loop
4158       || !find_loop_nest (loop, &vloops)
4159       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4160     {
4161       struct data_dependence_relation *ddr;
4162
4163       /* Insert a single relation into dependence_relations:
4164          chrec_dont_know.  */
4165       ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
4166       VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4167     }
4168   else
4169     compute_all_dependences (*datarefs, dependence_relations, vloops,
4170                              compute_self_and_read_read_dependences);
4171
4172   if (dump_file && (dump_flags & TDF_STATS))
4173     {
4174       fprintf (dump_file, "Dependence tester statistics:\n");
4175
4176       fprintf (dump_file, "Number of dependence tests: %d\n",
4177                dependence_stats.num_dependence_tests);
4178       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4179                dependence_stats.num_dependence_dependent);
4180       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4181                dependence_stats.num_dependence_independent);
4182       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4183                dependence_stats.num_dependence_undetermined);
4184
4185       fprintf (dump_file, "Number of subscript tests: %d\n",
4186                dependence_stats.num_subscript_tests);
4187       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4188                dependence_stats.num_subscript_undetermined);
4189       fprintf (dump_file, "Number of same subscript function: %d\n",
4190                dependence_stats.num_same_subscript_function);
4191
4192       fprintf (dump_file, "Number of ziv tests: %d\n",
4193                dependence_stats.num_ziv);
4194       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4195                dependence_stats.num_ziv_dependent);
4196       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4197                dependence_stats.num_ziv_independent);
4198       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4199                dependence_stats.num_ziv_unimplemented);
4200
4201       fprintf (dump_file, "Number of siv tests: %d\n",
4202                dependence_stats.num_siv);
4203       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4204                dependence_stats.num_siv_dependent);
4205       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4206                dependence_stats.num_siv_independent);
4207       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4208                dependence_stats.num_siv_unimplemented);
4209
4210       fprintf (dump_file, "Number of miv tests: %d\n",
4211                dependence_stats.num_miv);
4212       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4213                dependence_stats.num_miv_dependent);
4214       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4215                dependence_stats.num_miv_independent);
4216       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4217                dependence_stats.num_miv_unimplemented);
4218     }
4219 }
4220
4221 /* Entry point (for testing only).  Analyze all the data references
4222    and the dependence relations in LOOP.
4223
4224    The data references are computed first.
4225
4226    A relation on these nodes is represented by a complete graph.  Some
4227    of the relations could be of no interest, thus the relations can be
4228    computed on demand.
4229
4230    In the following function we compute all the relations.  This is
4231    just a first implementation that is here for:
4232    - for showing how to ask for the dependence relations,
4233    - for the debugging the whole dependence graph,
4234    - for the dejagnu testcases and maintenance.
4235
4236    It is possible to ask only for a part of the graph, avoiding to
4237    compute the whole dependence graph.  The computed dependences are
4238    stored in a knowledge base (KB) such that later queries don't
4239    recompute the same information.  The implementation of this KB is
4240    transparent to the optimizer, and thus the KB can be changed with a
4241    more efficient implementation, or the KB could be disabled.  */
4242 static void
4243 analyze_all_data_dependences (struct loop *loop)
4244 {
4245   unsigned int i;
4246   int nb_data_refs = 10;
4247   VEC (data_reference_p, heap) *datarefs =
4248     VEC_alloc (data_reference_p, heap, nb_data_refs);
4249   VEC (ddr_p, heap) *dependence_relations =
4250     VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4251
4252   /* Compute DDs on the whole function.  */
4253   compute_data_dependences_for_loop (loop, false, &datarefs,
4254                                      &dependence_relations);
4255
4256   if (dump_file)
4257     {
4258       dump_data_dependence_relations (dump_file, dependence_relations);
4259       fprintf (dump_file, "\n\n");
4260
4261       if (dump_flags & TDF_DETAILS)
4262         dump_dist_dir_vectors (dump_file, dependence_relations);
4263
4264       if (dump_flags & TDF_STATS)
4265         {
4266           unsigned nb_top_relations = 0;
4267           unsigned nb_bot_relations = 0;
4268           unsigned nb_basename_differ = 0;
4269           unsigned nb_chrec_relations = 0;
4270           struct data_dependence_relation *ddr;
4271
4272           for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4273             {
4274               if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4275                 nb_top_relations++;
4276
4277               else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4278                 {
4279                   struct data_reference *a = DDR_A (ddr);
4280                   struct data_reference *b = DDR_B (ddr);
4281
4282                   if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
4283                     nb_basename_differ++;
4284                   else
4285                     nb_bot_relations++;
4286                 }
4287
4288               else
4289                 nb_chrec_relations++;
4290             }
4291
4292           gather_stats_on_scev_database ();
4293         }
4294     }
4295
4296   free_dependence_relations (dependence_relations);
4297   free_data_refs (datarefs);
4298 }
4299
4300 /* Computes all the data dependences and check that the results of
4301    several analyzers are the same.  */
4302
4303 void
4304 tree_check_data_deps (void)
4305 {
4306   loop_iterator li;
4307   struct loop *loop_nest;
4308
4309   FOR_EACH_LOOP (li, loop_nest, 0)
4310     analyze_all_data_dependences (loop_nest);
4311 }
4312
4313 /* Free the memory used by a data dependence relation DDR.  */
4314
4315 void
4316 free_dependence_relation (struct data_dependence_relation *ddr)
4317 {
4318   if (ddr == NULL)
4319     return;
4320
4321   if (DDR_SUBSCRIPTS (ddr))
4322     free_subscripts (DDR_SUBSCRIPTS (ddr));
4323   if (DDR_DIST_VECTS (ddr))
4324     VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4325   if (DDR_DIR_VECTS (ddr))
4326     VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4327
4328   free (ddr);
4329 }
4330
4331 /* Free the memory used by the data dependence relations from
4332    DEPENDENCE_RELATIONS.  */
4333
4334 void
4335 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4336 {
4337   unsigned int i;
4338   struct data_dependence_relation *ddr;
4339   VEC (loop_p, heap) *loop_nest = NULL;
4340
4341   for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4342     {
4343       if (ddr == NULL)
4344         continue;
4345       if (loop_nest == NULL)
4346         loop_nest = DDR_LOOP_NEST (ddr);
4347       else
4348         gcc_assert (DDR_LOOP_NEST (ddr) == NULL
4349                     || DDR_LOOP_NEST (ddr) == loop_nest);
4350       free_dependence_relation (ddr);
4351     }
4352
4353   if (loop_nest)
4354     VEC_free (loop_p, heap, loop_nest);
4355   VEC_free (ddr_p, heap, dependence_relations);
4356 }
4357
4358 /* Free the memory used by the data references from DATAREFS.  */
4359
4360 void
4361 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4362 {
4363   unsigned int i;
4364   struct data_reference *dr;
4365
4366   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4367     free_data_ref (dr);
4368   VEC_free (data_reference_p, heap, datarefs);
4369 }
4370
4371 \f
4372
4373 /* Dump vertex I in RDG to FILE.  */
4374
4375 void
4376 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4377 {
4378   struct vertex *v = &(rdg->vertices[i]);
4379   struct graph_edge *e;
4380
4381   fprintf (file, "(vertex %d: (%s%s) (in:", i,
4382            RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4383            RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4384
4385   if (v->pred)
4386     for (e = v->pred; e; e = e->pred_next)
4387       fprintf (file, " %d", e->src);
4388
4389   fprintf (file, ") (out:");
4390
4391   if (v->succ)
4392     for (e = v->succ; e; e = e->succ_next)
4393       fprintf (file, " %d", e->dest);
4394
4395   fprintf (file, ") \n");
4396   print_generic_stmt (file, RDGV_STMT (v), TDF_VOPS|TDF_MEMSYMS);
4397   fprintf (file, ")\n");
4398 }
4399
4400 /* Call dump_rdg_vertex on stderr.  */
4401
4402 void
4403 debug_rdg_vertex (struct graph *rdg, int i)
4404 {
4405   dump_rdg_vertex (stderr, rdg, i);
4406 }
4407
4408 /* Dump component C of RDG to FILE.  If DUMPED is non-null, set the
4409    dumped vertices to that bitmap.  */
4410
4411 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4412 {
4413   int i;
4414
4415   fprintf (file, "(%d\n", c);
4416
4417   for (i = 0; i < rdg->n_vertices; i++)
4418     if (rdg->vertices[i].component == c)
4419       {
4420         if (dumped)
4421           bitmap_set_bit (dumped, i);
4422
4423         dump_rdg_vertex (file, rdg, i);
4424       }
4425
4426   fprintf (file, ")\n");
4427 }
4428
4429 /* Call dump_rdg_vertex on stderr.  */
4430
4431 void
4432 debug_rdg_component (struct graph *rdg, int c)
4433 {
4434   dump_rdg_component (stderr, rdg, c, NULL);
4435 }
4436
4437 /* Dump the reduced dependence graph RDG to FILE.  */
4438
4439 void
4440 dump_rdg (FILE *file, struct graph *rdg)
4441 {
4442   int i;
4443   bitmap dumped = BITMAP_ALLOC (NULL);
4444
4445   fprintf (file, "(rdg\n");
4446
4447   for (i = 0; i < rdg->n_vertices; i++)
4448     if (!bitmap_bit_p (dumped, i))
4449       dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4450
4451   fprintf (file, ")\n");
4452   BITMAP_FREE (dumped);
4453 }
4454
4455 /* Call dump_rdg on stderr.  */
4456
4457 void
4458 debug_rdg (struct graph *rdg)
4459 {
4460   dump_rdg (stderr, rdg);
4461 }
4462
4463 static void
4464 dot_rdg_1 (FILE *file, struct graph *rdg)
4465 {
4466   int i;
4467
4468   fprintf (file, "digraph RDG {\n");
4469
4470   for (i = 0; i < rdg->n_vertices; i++)
4471     {
4472       struct vertex *v = &(rdg->vertices[i]);
4473       struct graph_edge *e;
4474
4475       /* Highlight reads from memory.  */
4476       if (RDG_MEM_READS_STMT (rdg, i))
4477         fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4478
4479       /* Highlight stores to memory.  */
4480       if (RDG_MEM_WRITE_STMT (rdg, i))
4481         fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4482
4483       if (v->succ)
4484         for (e = v->succ; e; e = e->succ_next)
4485           switch (RDGE_TYPE (e))
4486             {
4487             case input_dd:
4488               fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4489               break;
4490
4491             case output_dd:
4492               fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4493               break;
4494
4495             case flow_dd:
4496               /* These are the most common dependences: don't print these. */
4497               fprintf (file, "%d -> %d \n", i, e->dest);
4498               break;
4499
4500             case anti_dd:
4501               fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4502               break;
4503
4504             default:
4505               gcc_unreachable ();
4506             }
4507     }
4508
4509   fprintf (file, "}\n\n");
4510 }
4511
4512 /* Display SCOP using dotty.  */
4513
4514 void
4515 dot_rdg (struct graph *rdg)
4516 {
4517   FILE *file = fopen ("/tmp/rdg.dot", "w");
4518   gcc_assert (file != NULL);
4519
4520   dot_rdg_1 (file, rdg);
4521   fclose (file);
4522
4523   system ("dotty /tmp/rdg.dot");
4524 }
4525
4526
4527 /* This structure is used for recording the mapping statement index in
4528    the RDG.  */
4529
4530 struct rdg_vertex_info GTY(())
4531 {
4532   tree stmt;
4533   int index;
4534 };
4535
4536 /* Returns the index of STMT in RDG.  */
4537
4538 int
4539 rdg_vertex_for_stmt (struct graph *rdg, tree stmt)
4540 {
4541   struct rdg_vertex_info rvi, *slot;
4542
4543   rvi.stmt = stmt;
4544   slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4545
4546   if (!slot)
4547     return -1;
4548
4549   return slot->index;
4550 }
4551
4552 /* Creates an edge in RDG for each distance vector from DDR.  The
4553    order that we keep track of in the RDG is the order in which
4554    statements have to be executed.  */
4555
4556 static void
4557 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4558 {
4559   struct graph_edge *e;
4560   int va, vb;
4561   data_reference_p dra = DDR_A (ddr);
4562   data_reference_p drb = DDR_B (ddr);
4563   unsigned level = ddr_dependence_level (ddr);
4564
4565   /* For non scalar dependences, when the dependence is REVERSED,
4566      statement B has to be executed before statement A.  */
4567   if (level > 0
4568       && !DDR_REVERSED_P (ddr))
4569     {
4570       data_reference_p tmp = dra;
4571       dra = drb;
4572       drb = tmp;
4573     }
4574
4575   va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4576   vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4577
4578   if (va < 0 || vb < 0)
4579     return;
4580
4581   e = add_edge (rdg, va, vb);
4582   e->data = XNEW (struct rdg_edge);
4583
4584   RDGE_LEVEL (e) = level;
4585
4586   /* Determines the type of the data dependence.  */
4587   if (DR_IS_READ (dra) && DR_IS_READ (drb))
4588     RDGE_TYPE (e) = input_dd;
4589   else if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
4590     RDGE_TYPE (e) = output_dd;
4591   else if (!DR_IS_READ (dra) && DR_IS_READ (drb))
4592     RDGE_TYPE (e) = flow_dd;
4593   else if (DR_IS_READ (dra) && !DR_IS_READ (drb))
4594     RDGE_TYPE (e) = anti_dd;
4595 }
4596
4597 /* Creates dependence edges in RDG for all the uses of DEF.  IDEF is
4598    the index of DEF in RDG.  */
4599
4600 static void
4601 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4602 {
4603   use_operand_p imm_use_p;
4604   imm_use_iterator iterator;
4605
4606   FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4607     {
4608       struct graph_edge *e;
4609       int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4610
4611       if (use < 0)
4612         continue;
4613
4614       e = add_edge (rdg, idef, use);
4615       e->data = XNEW (struct rdg_edge);
4616       RDGE_TYPE (e) = flow_dd;
4617     }
4618 }
4619
4620 /* Creates the edges of the reduced dependence graph RDG.  */
4621
4622 static void
4623 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4624 {
4625   int i;
4626   struct data_dependence_relation *ddr;
4627   def_operand_p def_p;
4628   ssa_op_iter iter;
4629
4630   for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
4631     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4632       create_rdg_edge_for_ddr (rdg, ddr);
4633
4634   for (i = 0; i < rdg->n_vertices; i++)
4635     FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4636                               iter, SSA_OP_DEF)
4637       create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4638 }
4639
4640 /* Build the vertices of the reduced dependence graph RDG.  */
4641
4642 static void
4643 create_rdg_vertices (struct graph *rdg, VEC (tree, heap) *stmts)
4644 {
4645   int i, j;
4646   tree stmt;
4647
4648   for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++)
4649     {
4650       VEC (data_ref_loc, heap) *references;
4651       data_ref_loc *ref;
4652       struct vertex *v = &(rdg->vertices[i]);
4653       struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4654       struct rdg_vertex_info **slot;
4655
4656       rvi->stmt = stmt;
4657       rvi->index = i;
4658       slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4659
4660       if (!*slot)
4661         *slot = rvi;
4662       else
4663         free (rvi);
4664
4665       v->data = XNEW (struct rdg_vertex);
4666       RDG_STMT (rdg, i) = stmt;
4667
4668       RDG_MEM_WRITE_STMT (rdg, i) = false;
4669       RDG_MEM_READS_STMT (rdg, i) = false;
4670       if (TREE_CODE (stmt) == PHI_NODE)
4671         continue;
4672
4673       get_references_in_stmt (stmt, &references);
4674       for (j = 0; VEC_iterate (data_ref_loc, references, j, ref); j++)
4675         if (!ref->is_read)
4676           RDG_MEM_WRITE_STMT (rdg, i) = true;
4677         else
4678           RDG_MEM_READS_STMT (rdg, i) = true;
4679
4680       VEC_free (data_ref_loc, heap, references);
4681     }
4682 }
4683
4684 /* Initialize STMTS with all the statements of LOOP.  When
4685    INCLUDE_PHIS is true, include also the PHI nodes.  The order in
4686    which we discover statements is important as
4687    generate_loops_for_partition is using the same traversal for
4688    identifying statements. */
4689
4690 static void
4691 stmts_from_loop (struct loop *loop, VEC (tree, heap) **stmts)
4692 {
4693   unsigned int i;
4694   basic_block *bbs = get_loop_body_in_dom_order (loop);
4695
4696   for (i = 0; i < loop->num_nodes; i++)
4697     {
4698       tree phi, stmt;
4699       basic_block bb = bbs[i];
4700       block_stmt_iterator bsi;
4701
4702       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
4703         VEC_safe_push (tree, heap, *stmts, phi);
4704
4705       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4706         if (TREE_CODE (stmt = bsi_stmt (bsi)) != LABEL_EXPR)
4707           VEC_safe_push (tree, heap, *stmts, stmt);
4708     }
4709
4710   free (bbs);
4711 }
4712
4713 /* Returns true when all the dependences are computable.  */
4714
4715 static bool
4716 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4717 {
4718   ddr_p ddr;
4719   unsigned int i;
4720
4721   for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4722     if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4723       return false;
4724
4725   return true;
4726 }
4727
4728 /* Computes a hash function for element ELT.  */
4729
4730 static hashval_t
4731 hash_stmt_vertex_info (const void *elt)
4732 {
4733   struct rdg_vertex_info *rvi = (struct rdg_vertex_info *) elt;
4734   tree stmt = rvi->stmt;
4735
4736   return htab_hash_pointer (stmt);
4737 }
4738
4739 /* Compares database elements E1 and E2.  */
4740
4741 static int
4742 eq_stmt_vertex_info (const void *e1, const void *e2)
4743 {
4744   const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
4745   const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
4746
4747   return elt1->stmt == elt2->stmt;
4748 }
4749
4750 /* Free the element E.  */
4751
4752 static void
4753 hash_stmt_vertex_del (void *e)
4754 {
4755   free (e);
4756 }
4757
4758 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4759    statement of the loop nest, and one edge per data dependence or
4760    scalar dependence.  */
4761
4762 struct graph *
4763 build_rdg (struct loop *loop)
4764 {
4765   int nb_data_refs = 10;
4766   struct graph *rdg = NULL;
4767   VEC (ddr_p, heap) *dependence_relations;
4768   VEC (data_reference_p, heap) *datarefs;
4769   VEC (tree, heap) *stmts = VEC_alloc (tree, heap, nb_data_refs);
4770
4771   dependence_relations = VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs) ;
4772   datarefs = VEC_alloc (data_reference_p, heap, nb_data_refs);
4773   compute_data_dependences_for_loop (loop,
4774                                      false,
4775                                      &datarefs,
4776                                      &dependence_relations);
4777
4778   if (!known_dependences_p (dependence_relations))
4779     goto end_rdg;
4780
4781   stmts_from_loop (loop, &stmts);
4782   rdg = new_graph (VEC_length (tree, stmts));
4783
4784   rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4785                               eq_stmt_vertex_info, hash_stmt_vertex_del);
4786   create_rdg_vertices (rdg, stmts);
4787   create_rdg_edges (rdg, dependence_relations);
4788
4789  end_rdg:
4790   free_dependence_relations (dependence_relations);
4791   free_data_refs (datarefs);
4792   VEC_free (tree, heap, stmts);
4793
4794   return rdg;
4795 }
4796
4797 /* Free the reduced dependence graph RDG.  */
4798
4799 void
4800 free_rdg (struct graph *rdg)
4801 {
4802   int i;
4803
4804   for (i = 0; i < rdg->n_vertices; i++)
4805     free (rdg->vertices[i].data);
4806
4807   htab_delete (rdg->indices);
4808   free_graph (rdg);
4809 }
4810
4811 /* Initialize STMTS with all the statements of LOOP that contain a
4812    store to memory.  */
4813
4814 void
4815 stores_from_loop (struct loop *loop, VEC (tree, heap) **stmts)
4816 {
4817   unsigned int i;
4818   basic_block *bbs = get_loop_body_in_dom_order (loop);
4819
4820   for (i = 0; i < loop->num_nodes; i++)
4821     {
4822       basic_block bb = bbs[i];
4823       block_stmt_iterator bsi;
4824
4825       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4826         if (!ZERO_SSA_OPERANDS (bsi_stmt (bsi), SSA_OP_VDEF))
4827           VEC_safe_push (tree, heap, *stmts, bsi_stmt (bsi));
4828     }
4829
4830   free (bbs);
4831 }
4832
4833 /* For a data reference REF, return the declaration of its base
4834    address or NULL_TREE if the base is not determined.  */
4835
4836 static inline tree
4837 ref_base_address (tree stmt, data_ref_loc *ref)
4838 {
4839   tree base = NULL_TREE;
4840   tree base_address;
4841   struct data_reference *dr = XCNEW (struct data_reference);
4842
4843   DR_STMT (dr) = stmt;
4844   DR_REF (dr) = *ref->pos;
4845   dr_analyze_innermost (dr);
4846   base_address = DR_BASE_ADDRESS (dr);
4847
4848   if (!base_address)
4849     goto end;
4850
4851   switch (TREE_CODE (base_address))
4852     {
4853     case ADDR_EXPR:
4854       base = TREE_OPERAND (base_address, 0);
4855       break;
4856
4857     default:
4858       base = base_address;
4859       break;
4860     }
4861
4862  end:
4863   free_data_ref (dr);
4864   return base;
4865 }
4866
4867 /* Determines whether the statement from vertex V of the RDG has a
4868    definition used outside the loop that contains this statement.  */
4869
4870 bool
4871 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
4872 {
4873   tree stmt = RDG_STMT (rdg, v);
4874   struct loop *loop = loop_containing_stmt (stmt);
4875   use_operand_p imm_use_p;
4876   imm_use_iterator iterator;
4877   ssa_op_iter it;
4878   def_operand_p def_p;
4879
4880   if (!loop)
4881     return true;
4882
4883   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
4884     {
4885       FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
4886         {
4887           if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
4888             return true;
4889         }
4890     }
4891
4892   return false;
4893 }
4894
4895 /* Determines whether statements S1 and S2 access to similar memory
4896    locations.  Two memory accesses are considered similar when they
4897    have the same base address declaration, i.e. when their
4898    ref_base_address is the same.  */
4899
4900 bool
4901 have_similar_memory_accesses (tree s1, tree s2)
4902 {
4903   bool res = false;
4904   unsigned i, j;
4905   VEC (data_ref_loc, heap) *refs1, *refs2;
4906   data_ref_loc *ref1, *ref2;
4907
4908   get_references_in_stmt (s1, &refs1);
4909   get_references_in_stmt (s2, &refs2);
4910
4911   for (i = 0; VEC_iterate (data_ref_loc, refs1, i, ref1); i++)
4912     {
4913       tree base1 = ref_base_address (s1, ref1);
4914
4915       if (base1)
4916         for (j = 0; VEC_iterate (data_ref_loc, refs2, j, ref2); j++)
4917           if (base1 == ref_base_address (s2, ref2))
4918             {
4919               res = true;
4920               goto end;
4921             }
4922     }
4923
4924  end:
4925   VEC_free (data_ref_loc, heap, refs1);
4926   VEC_free (data_ref_loc, heap, refs2);
4927   return res;
4928 }
4929
4930 /* Helper function for the hashtab.  */
4931
4932 static int
4933 have_similar_memory_accesses_1 (const void *s1, const void *s2)
4934 {
4935   return have_similar_memory_accesses ((tree) s1, (tree) s2);
4936 }
4937
4938 /* Helper function for the hashtab.  */
4939
4940 static hashval_t
4941 ref_base_address_1 (const void *s)
4942 {
4943   tree stmt = (tree) s;
4944   unsigned i;
4945   VEC (data_ref_loc, heap) *refs;
4946   data_ref_loc *ref;
4947   hashval_t res = 0;
4948
4949   get_references_in_stmt (stmt, &refs);
4950
4951   for (i = 0; VEC_iterate (data_ref_loc, refs, i, ref); i++)
4952     if (!ref->is_read)
4953       {
4954         res = htab_hash_pointer (ref_base_address (stmt, ref));
4955         break;
4956       }
4957
4958   VEC_free (data_ref_loc, heap, refs);
4959   return res;
4960 }
4961
4962 /* Try to remove duplicated write data references from STMTS.  */
4963
4964 void
4965 remove_similar_memory_refs (VEC (tree, heap) **stmts)
4966 {
4967   unsigned i;
4968   tree stmt;
4969   htab_t seen = htab_create (VEC_length (tree, *stmts), ref_base_address_1,
4970                              have_similar_memory_accesses_1, NULL);
4971
4972   for (i = 0; VEC_iterate (tree, *stmts, i, stmt); )
4973     {
4974       void **slot;
4975
4976       slot = htab_find_slot (seen, stmt, INSERT);
4977
4978       if (*slot)
4979         VEC_ordered_remove (tree, *stmts, i);
4980       else
4981         {
4982           *slot = (void *) stmt;
4983           i++;
4984         }
4985     }
4986
4987   htab_delete (seen);
4988 }
4989