1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
49 - to define an interface to access this data.
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
62 has an integer solution x = 1 and y = -1.
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
78 #include "coretypes.h"
83 /* These RTL headers are needed for basic-block.h. */
85 #include "basic-block.h"
86 #include "diagnostic.h"
87 #include "tree-flow.h"
88 #include "tree-dump.h"
91 #include "tree-chrec.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "tree-pass.h"
95 #include "langhooks.h"
97 static struct datadep_stats
99 int num_dependence_tests;
100 int num_dependence_dependent;
101 int num_dependence_independent;
102 int num_dependence_undetermined;
104 int num_subscript_tests;
105 int num_subscript_undetermined;
106 int num_same_subscript_function;
109 int num_ziv_independent;
110 int num_ziv_dependent;
111 int num_ziv_unimplemented;
114 int num_siv_independent;
115 int num_siv_dependent;
116 int num_siv_unimplemented;
119 int num_miv_independent;
120 int num_miv_dependent;
121 int num_miv_unimplemented;
124 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
125 struct data_reference *,
126 struct data_reference *,
128 /* Returns true iff A divides B. */
131 tree_fold_divides_p (tree a, tree b)
133 gcc_assert (TREE_CODE (a) == INTEGER_CST);
134 gcc_assert (TREE_CODE (b) == INTEGER_CST);
135 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
138 /* Returns true iff A divides B. */
141 int_divides_p (int a, int b)
143 return ((b % a) == 0);
148 /* Dump into FILE all the data references from DATAREFS. */
151 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
154 struct data_reference *dr;
156 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
157 dump_data_reference (file, dr);
160 /* Dump into FILE all the dependence relations from DDRS. */
163 dump_data_dependence_relations (FILE *file,
164 VEC (ddr_p, heap) *ddrs)
167 struct data_dependence_relation *ddr;
169 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
170 dump_data_dependence_relation (file, ddr);
173 /* Dump function for a DATA_REFERENCE structure. */
176 dump_data_reference (FILE *outf,
177 struct data_reference *dr)
181 fprintf (outf, "(Data Ref: \n stmt: ");
182 print_generic_stmt (outf, DR_STMT (dr), 0);
183 fprintf (outf, " ref: ");
184 print_generic_stmt (outf, DR_REF (dr), 0);
185 fprintf (outf, " base_object: ");
186 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
188 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
190 fprintf (outf, " Access function %d: ", i);
191 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
193 fprintf (outf, ")\n");
196 /* Dumps the affine function described by FN to the file OUTF. */
199 dump_affine_function (FILE *outf, affine_fn fn)
204 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
205 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
207 fprintf (outf, " + ");
208 print_generic_expr (outf, coef, TDF_SLIM);
209 fprintf (outf, " * x_%u", i);
213 /* Dumps the conflict function CF to the file OUTF. */
216 dump_conflict_function (FILE *outf, conflict_function *cf)
220 if (cf->n == NO_DEPENDENCE)
221 fprintf (outf, "no dependence\n");
222 else if (cf->n == NOT_KNOWN)
223 fprintf (outf, "not known\n");
226 for (i = 0; i < cf->n; i++)
229 dump_affine_function (outf, cf->fns[i]);
230 fprintf (outf, "]\n");
235 /* Dump function for a SUBSCRIPT structure. */
238 dump_subscript (FILE *outf, struct subscript *subscript)
240 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
242 fprintf (outf, "\n (subscript \n");
243 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
244 dump_conflict_function (outf, cf);
245 if (CF_NONTRIVIAL_P (cf))
247 tree last_iteration = SUB_LAST_CONFLICT (subscript);
248 fprintf (outf, " last_conflict: ");
249 print_generic_stmt (outf, last_iteration, 0);
252 cf = SUB_CONFLICTS_IN_B (subscript);
253 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
254 dump_conflict_function (outf, cf);
255 if (CF_NONTRIVIAL_P (cf))
257 tree last_iteration = SUB_LAST_CONFLICT (subscript);
258 fprintf (outf, " last_conflict: ");
259 print_generic_stmt (outf, last_iteration, 0);
262 fprintf (outf, " (Subscript distance: ");
263 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
264 fprintf (outf, " )\n");
265 fprintf (outf, " )\n");
268 /* Print the classic direction vector DIRV to OUTF. */
271 print_direction_vector (FILE *outf,
277 for (eq = 0; eq < length; eq++)
279 enum data_dependence_direction dir = dirv[eq];
284 fprintf (outf, " +");
287 fprintf (outf, " -");
290 fprintf (outf, " =");
292 case dir_positive_or_equal:
293 fprintf (outf, " +=");
295 case dir_positive_or_negative:
296 fprintf (outf, " +-");
298 case dir_negative_or_equal:
299 fprintf (outf, " -=");
302 fprintf (outf, " *");
305 fprintf (outf, "indep");
309 fprintf (outf, "\n");
312 /* Print a vector of direction vectors. */
315 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
321 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, v); j++)
322 print_direction_vector (outf, v, length);
325 /* Print a vector of distance vectors. */
328 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
334 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, v); j++)
335 print_lambda_vector (outf, v, length);
341 debug_data_dependence_relation (struct data_dependence_relation *ddr)
343 dump_data_dependence_relation (stderr, ddr);
346 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
349 dump_data_dependence_relation (FILE *outf,
350 struct data_dependence_relation *ddr)
352 struct data_reference *dra, *drb;
356 fprintf (outf, "(Data Dep: \n");
357 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
358 fprintf (outf, " (don't know)\n");
360 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
361 fprintf (outf, " (no dependence)\n");
363 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
368 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
370 fprintf (outf, " access_fn_A: ");
371 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
372 fprintf (outf, " access_fn_B: ");
373 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
374 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
377 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
378 fprintf (outf, " loop nest: (");
379 for (i = 0; VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
380 fprintf (outf, "%d ", loopi->num);
381 fprintf (outf, ")\n");
383 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
385 fprintf (outf, " distance_vector: ");
386 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
390 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
392 fprintf (outf, " direction_vector: ");
393 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
398 fprintf (outf, ")\n");
401 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
404 dump_data_dependence_direction (FILE *file,
405 enum data_dependence_direction dir)
421 case dir_positive_or_negative:
422 fprintf (file, "+-");
425 case dir_positive_or_equal:
426 fprintf (file, "+=");
429 case dir_negative_or_equal:
430 fprintf (file, "-=");
442 /* Dumps the distance and direction vectors in FILE. DDRS contains
443 the dependence relations, and VECT_SIZE is the size of the
444 dependence vectors, or in other words the number of loops in the
448 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
451 struct data_dependence_relation *ddr;
454 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
455 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
457 for (j = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), j, v); j++)
459 fprintf (file, "DISTANCE_V (");
460 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
461 fprintf (file, ")\n");
464 for (j = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), j, v); j++)
466 fprintf (file, "DIRECTION_V (");
467 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
468 fprintf (file, ")\n");
472 fprintf (file, "\n\n");
475 /* Dumps the data dependence relations DDRS in FILE. */
478 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
481 struct data_dependence_relation *ddr;
483 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
484 dump_data_dependence_relation (file, ddr);
486 fprintf (file, "\n\n");
489 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
490 will be ssizetype. */
493 split_constant_offset (tree exp, tree *var, tree *off)
495 tree type = TREE_TYPE (exp), otype;
502 otype = TREE_TYPE (exp);
503 code = TREE_CODE (exp);
508 *var = build_int_cst (type, 0);
509 *off = fold_convert (ssizetype, exp);
512 case POINTER_PLUS_EXPR:
517 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
518 split_constant_offset (TREE_OPERAND (exp, 1), &var1, &off1);
519 *var = fold_convert (type, fold_build2 (TREE_CODE (exp), otype,
521 *off = size_binop (code, off0, off1);
525 off1 = TREE_OPERAND (exp, 1);
526 if (TREE_CODE (off1) != INTEGER_CST)
529 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
530 *var = fold_convert (type, fold_build2 (MULT_EXPR, otype,
532 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, off1));
537 tree op, base, poffset;
538 HOST_WIDE_INT pbitsize, pbitpos;
539 enum machine_mode pmode;
540 int punsignedp, pvolatilep;
542 op = TREE_OPERAND (exp, 0);
543 if (!handled_component_p (op))
546 base = get_inner_reference (op, &pbitsize, &pbitpos, &poffset,
547 &pmode, &punsignedp, &pvolatilep, false);
549 if (pbitpos % BITS_PER_UNIT != 0)
551 base = build_fold_addr_expr (base);
552 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
556 split_constant_offset (poffset, &poffset, &off1);
557 off0 = size_binop (PLUS_EXPR, off0, off1);
558 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base),
560 fold_convert (TREE_TYPE (base), poffset));
563 *var = fold_convert (type, base);
572 *off = ssize_int (0);
575 /* Returns the address ADDR of an object in a canonical shape (without nop
576 casts, and with type of pointer to the object). */
579 canonicalize_base_object_address (tree addr)
585 /* The base address may be obtained by casting from integer, in that case
587 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
590 if (TREE_CODE (addr) != ADDR_EXPR)
593 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
596 /* Analyzes the behavior of the memory reference DR in the innermost loop that
600 dr_analyze_innermost (struct data_reference *dr)
602 tree stmt = DR_STMT (dr);
603 struct loop *loop = loop_containing_stmt (stmt);
604 tree ref = DR_REF (dr);
605 HOST_WIDE_INT pbitsize, pbitpos;
607 enum machine_mode pmode;
608 int punsignedp, pvolatilep;
609 affine_iv base_iv, offset_iv;
610 tree init, dinit, step;
612 if (dump_file && (dump_flags & TDF_DETAILS))
613 fprintf (dump_file, "analyze_innermost: ");
615 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
616 &pmode, &punsignedp, &pvolatilep, false);
617 gcc_assert (base != NULL_TREE);
619 if (pbitpos % BITS_PER_UNIT != 0)
621 if (dump_file && (dump_flags & TDF_DETAILS))
622 fprintf (dump_file, "failed: bit offset alignment.\n");
626 base = build_fold_addr_expr (base);
627 if (!simple_iv (loop, stmt, base, &base_iv, false))
629 if (dump_file && (dump_flags & TDF_DETAILS))
630 fprintf (dump_file, "failed: evolution of base is not affine.\n");
635 offset_iv.base = ssize_int (0);
636 offset_iv.step = ssize_int (0);
638 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
640 if (dump_file && (dump_flags & TDF_DETAILS))
641 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
645 init = ssize_int (pbitpos / BITS_PER_UNIT);
646 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
647 init = size_binop (PLUS_EXPR, init, dinit);
648 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
649 init = size_binop (PLUS_EXPR, init, dinit);
651 step = size_binop (PLUS_EXPR,
652 fold_convert (ssizetype, base_iv.step),
653 fold_convert (ssizetype, offset_iv.step));
655 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
657 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
661 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
663 if (dump_file && (dump_flags & TDF_DETAILS))
664 fprintf (dump_file, "success.\n");
667 /* Determines the base object and the list of indices of memory reference
668 DR, analyzed in loop nest NEST. */
671 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
673 tree stmt = DR_STMT (dr);
674 struct loop *loop = loop_containing_stmt (stmt);
675 VEC (tree, heap) *access_fns = NULL;
676 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
677 tree base, off, access_fn;
679 while (handled_component_p (aref))
681 if (TREE_CODE (aref) == ARRAY_REF)
683 op = TREE_OPERAND (aref, 1);
684 access_fn = analyze_scalar_evolution (loop, op);
685 access_fn = resolve_mixers (nest, access_fn);
686 VEC_safe_push (tree, heap, access_fns, access_fn);
688 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
691 aref = TREE_OPERAND (aref, 0);
694 if (INDIRECT_REF_P (aref))
696 op = TREE_OPERAND (aref, 0);
697 access_fn = analyze_scalar_evolution (loop, op);
698 access_fn = resolve_mixers (nest, access_fn);
699 base = initial_condition (access_fn);
700 split_constant_offset (base, &base, &off);
701 access_fn = chrec_replace_initial_condition (access_fn,
702 fold_convert (TREE_TYPE (base), off));
704 TREE_OPERAND (aref, 0) = base;
705 VEC_safe_push (tree, heap, access_fns, access_fn);
708 DR_BASE_OBJECT (dr) = ref;
709 DR_ACCESS_FNS (dr) = access_fns;
712 /* Extracts the alias analysis information from the memory reference DR. */
715 dr_analyze_alias (struct data_reference *dr)
717 tree stmt = DR_STMT (dr);
718 tree ref = DR_REF (dr);
719 tree base = get_base_address (ref), addr, smt = NULL_TREE;
726 else if (INDIRECT_REF_P (base))
728 addr = TREE_OPERAND (base, 0);
729 if (TREE_CODE (addr) == SSA_NAME)
731 smt = symbol_mem_tag (SSA_NAME_VAR (addr));
732 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
736 DR_SYMBOL_TAG (dr) = smt;
737 if (smt && var_can_have_subvars (smt))
738 DR_SUBVARS (dr) = get_subvars_for_var (smt);
740 vops = BITMAP_ALLOC (NULL);
741 FOR_EACH_SSA_TREE_OPERAND (op, stmt, it, SSA_OP_VIRTUAL_USES)
743 bitmap_set_bit (vops, DECL_UID (SSA_NAME_VAR (op)));
749 /* Returns true if the address of DR is invariant. */
752 dr_address_invariant_p (struct data_reference *dr)
757 for (i = 0; VEC_iterate (tree, DR_ACCESS_FNS (dr), i, idx); i++)
758 if (tree_contains_chrecs (idx, NULL))
764 /* Frees data reference DR. */
767 free_data_ref (data_reference_p dr)
769 BITMAP_FREE (DR_VOPS (dr));
770 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
774 /* Analyzes memory reference MEMREF accessed in STMT. The reference
775 is read if IS_READ is true, write otherwise. Returns the
776 data_reference description of MEMREF. NEST is the outermost loop of the
777 loop nest in that the reference should be analyzed. */
779 struct data_reference *
780 create_data_ref (struct loop *nest, tree memref, tree stmt, bool is_read)
782 struct data_reference *dr;
784 if (dump_file && (dump_flags & TDF_DETAILS))
786 fprintf (dump_file, "Creating dr for ");
787 print_generic_expr (dump_file, memref, TDF_SLIM);
788 fprintf (dump_file, "\n");
791 dr = XCNEW (struct data_reference);
793 DR_REF (dr) = memref;
794 DR_IS_READ (dr) = is_read;
796 dr_analyze_innermost (dr);
797 dr_analyze_indices (dr, nest);
798 dr_analyze_alias (dr);
800 if (dump_file && (dump_flags & TDF_DETAILS))
802 fprintf (dump_file, "\tbase_address: ");
803 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
804 fprintf (dump_file, "\n\toffset from base address: ");
805 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
806 fprintf (dump_file, "\n\tconstant offset from base address: ");
807 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
808 fprintf (dump_file, "\n\tstep: ");
809 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
810 fprintf (dump_file, "\n\taligned to: ");
811 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
812 fprintf (dump_file, "\n\tbase_object: ");
813 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
814 fprintf (dump_file, "\n\tsymbol tag: ");
815 print_generic_expr (dump_file, DR_SYMBOL_TAG (dr), TDF_SLIM);
816 fprintf (dump_file, "\n");
822 /* Returns true if FNA == FNB. */
825 affine_function_equal_p (affine_fn fna, affine_fn fnb)
827 unsigned i, n = VEC_length (tree, fna);
829 if (n != VEC_length (tree, fnb))
832 for (i = 0; i < n; i++)
833 if (!operand_equal_p (VEC_index (tree, fna, i),
834 VEC_index (tree, fnb, i), 0))
840 /* If all the functions in CF are the same, returns one of them,
841 otherwise returns NULL. */
844 common_affine_function (conflict_function *cf)
849 if (!CF_NONTRIVIAL_P (cf))
854 for (i = 1; i < cf->n; i++)
855 if (!affine_function_equal_p (comm, cf->fns[i]))
861 /* Returns the base of the affine function FN. */
864 affine_function_base (affine_fn fn)
866 return VEC_index (tree, fn, 0);
869 /* Returns true if FN is a constant. */
872 affine_function_constant_p (affine_fn fn)
877 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
878 if (!integer_zerop (coef))
884 /* Returns true if FN is the zero constant function. */
887 affine_function_zero_p (affine_fn fn)
889 return (integer_zerop (affine_function_base (fn))
890 && affine_function_constant_p (fn));
893 /* Applies operation OP on affine functions FNA and FNB, and returns the
897 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
903 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
905 n = VEC_length (tree, fna);
906 m = VEC_length (tree, fnb);
910 n = VEC_length (tree, fnb);
911 m = VEC_length (tree, fna);
914 ret = VEC_alloc (tree, heap, m);
915 for (i = 0; i < n; i++)
916 VEC_quick_push (tree, ret,
917 fold_build2 (op, integer_type_node,
918 VEC_index (tree, fna, i),
919 VEC_index (tree, fnb, i)));
921 for (; VEC_iterate (tree, fna, i, coef); i++)
922 VEC_quick_push (tree, ret,
923 fold_build2 (op, integer_type_node,
924 coef, integer_zero_node));
925 for (; VEC_iterate (tree, fnb, i, coef); i++)
926 VEC_quick_push (tree, ret,
927 fold_build2 (op, integer_type_node,
928 integer_zero_node, coef));
933 /* Returns the sum of affine functions FNA and FNB. */
936 affine_fn_plus (affine_fn fna, affine_fn fnb)
938 return affine_fn_op (PLUS_EXPR, fna, fnb);
941 /* Returns the difference of affine functions FNA and FNB. */
944 affine_fn_minus (affine_fn fna, affine_fn fnb)
946 return affine_fn_op (MINUS_EXPR, fna, fnb);
949 /* Frees affine function FN. */
952 affine_fn_free (affine_fn fn)
954 VEC_free (tree, heap, fn);
957 /* Determine for each subscript in the data dependence relation DDR
961 compute_subscript_distance (struct data_dependence_relation *ddr)
963 conflict_function *cf_a, *cf_b;
964 affine_fn fn_a, fn_b, diff;
966 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
970 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
972 struct subscript *subscript;
974 subscript = DDR_SUBSCRIPT (ddr, i);
975 cf_a = SUB_CONFLICTS_IN_A (subscript);
976 cf_b = SUB_CONFLICTS_IN_B (subscript);
978 fn_a = common_affine_function (cf_a);
979 fn_b = common_affine_function (cf_b);
982 SUB_DISTANCE (subscript) = chrec_dont_know;
985 diff = affine_fn_minus (fn_a, fn_b);
987 if (affine_function_constant_p (diff))
988 SUB_DISTANCE (subscript) = affine_function_base (diff);
990 SUB_DISTANCE (subscript) = chrec_dont_know;
992 affine_fn_free (diff);
997 /* Returns the conflict function for "unknown". */
999 static conflict_function *
1000 conflict_fn_not_known (void)
1002 conflict_function *fn = XCNEW (conflict_function);
1008 /* Returns the conflict function for "independent". */
1010 static conflict_function *
1011 conflict_fn_no_dependence (void)
1013 conflict_function *fn = XCNEW (conflict_function);
1014 fn->n = NO_DEPENDENCE;
1019 /* Returns true if the address of OBJ is invariant in LOOP. */
1022 object_address_invariant_in_loop_p (struct loop *loop, tree obj)
1024 while (handled_component_p (obj))
1026 if (TREE_CODE (obj) == ARRAY_REF)
1028 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1029 need to check the stride and the lower bound of the reference. */
1030 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1032 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1036 else if (TREE_CODE (obj) == COMPONENT_REF)
1038 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1042 obj = TREE_OPERAND (obj, 0);
1045 if (!INDIRECT_REF_P (obj))
1048 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1052 /* Returns true if A and B are accesses to different objects, or to different
1053 fields of the same object. */
1056 disjoint_objects_p (tree a, tree b)
1058 tree base_a, base_b;
1059 VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
1062 base_a = get_base_address (a);
1063 base_b = get_base_address (b);
1067 && base_a != base_b)
1070 if (!operand_equal_p (base_a, base_b, 0))
1073 /* Compare the component references of A and B. We must start from the inner
1074 ones, so record them to the vector first. */
1075 while (handled_component_p (a))
1077 VEC_safe_push (tree, heap, comp_a, a);
1078 a = TREE_OPERAND (a, 0);
1080 while (handled_component_p (b))
1082 VEC_safe_push (tree, heap, comp_b, b);
1083 b = TREE_OPERAND (b, 0);
1089 if (VEC_length (tree, comp_a) == 0
1090 || VEC_length (tree, comp_b) == 0)
1093 a = VEC_pop (tree, comp_a);
1094 b = VEC_pop (tree, comp_b);
1096 /* Real and imaginary part of a variable do not alias. */
1097 if ((TREE_CODE (a) == REALPART_EXPR
1098 && TREE_CODE (b) == IMAGPART_EXPR)
1099 || (TREE_CODE (a) == IMAGPART_EXPR
1100 && TREE_CODE (b) == REALPART_EXPR))
1106 if (TREE_CODE (a) != TREE_CODE (b))
1109 /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
1110 DR_BASE_OBJECT are always zero. */
1111 if (TREE_CODE (a) == ARRAY_REF)
1113 else if (TREE_CODE (a) == COMPONENT_REF)
1115 if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
1118 /* Different fields of unions may overlap. */
1119 base_a = TREE_OPERAND (a, 0);
1120 if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
1123 /* Different fields of structures cannot. */
1131 VEC_free (tree, heap, comp_a);
1132 VEC_free (tree, heap, comp_b);
1137 /* Returns false if we can prove that data references A and B do not alias,
1141 dr_may_alias_p (struct data_reference *a, struct data_reference *b)
1143 tree addr_a = DR_BASE_ADDRESS (a);
1144 tree addr_b = DR_BASE_ADDRESS (b);
1145 tree type_a, type_b;
1146 tree decl_a = NULL_TREE, decl_b = NULL_TREE;
1148 /* If the sets of virtual operands are disjoint, the memory references do not
1150 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
1153 /* If the accessed objects are disjoint, the memory references do not
1155 if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
1158 if (!addr_a || !addr_b)
1161 /* If the references are based on different static objects, they cannot alias
1162 (PTA should be able to disambiguate such accesses, but often it fails to,
1163 since currently we cannot distinguish between pointer and offset in pointer
1165 if (TREE_CODE (addr_a) == ADDR_EXPR
1166 && TREE_CODE (addr_b) == ADDR_EXPR)
1167 return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
1169 /* An instruction writing through a restricted pointer is "independent" of any
1170 instruction reading or writing through a different restricted pointer,
1171 in the same block/scope. */
1173 type_a = TREE_TYPE (addr_a);
1174 type_b = TREE_TYPE (addr_b);
1175 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
1177 if (TREE_CODE (addr_a) == SSA_NAME)
1178 decl_a = SSA_NAME_VAR (addr_a);
1179 if (TREE_CODE (addr_b) == SSA_NAME)
1180 decl_b = SSA_NAME_VAR (addr_b);
1182 if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
1183 && (!DR_IS_READ (a) || !DR_IS_READ (b))
1184 && decl_a && DECL_P (decl_a)
1185 && decl_b && DECL_P (decl_b)
1187 && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
1188 && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
1194 /* Initialize a data dependence relation between data accesses A and
1195 B. NB_LOOPS is the number of loops surrounding the references: the
1196 size of the classic distance/direction vectors. */
1198 static struct data_dependence_relation *
1199 initialize_data_dependence_relation (struct data_reference *a,
1200 struct data_reference *b,
1201 VEC (loop_p, heap) *loop_nest)
1203 struct data_dependence_relation *res;
1206 res = XNEW (struct data_dependence_relation);
1209 DDR_LOOP_NEST (res) = NULL;
1210 DDR_REVERSED_P (res) = false;
1212 if (a == NULL || b == NULL)
1214 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1218 /* If the data references do not alias, then they are independent. */
1219 if (!dr_may_alias_p (a, b))
1221 DDR_ARE_DEPENDENT (res) = chrec_known;
1225 /* If the references do not access the same object, we do not know
1226 whether they alias or not. */
1227 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1229 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1233 /* If the base of the object is not invariant in the loop nest, we cannot
1234 analyze it. TODO -- in fact, it would suffice to record that there may
1235 be arbitrary dependences in the loops where the base object varies. */
1236 if (!object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1237 DR_BASE_OBJECT (a)))
1239 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1243 gcc_assert (DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b));
1245 DDR_AFFINE_P (res) = true;
1246 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1247 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1248 DDR_LOOP_NEST (res) = loop_nest;
1249 DDR_INNER_LOOP (res) = 0;
1250 DDR_DIR_VECTS (res) = NULL;
1251 DDR_DIST_VECTS (res) = NULL;
1253 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1255 struct subscript *subscript;
1257 subscript = XNEW (struct subscript);
1258 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1259 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1260 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1261 SUB_DISTANCE (subscript) = chrec_dont_know;
1262 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1268 /* Frees memory used by the conflict function F. */
1271 free_conflict_function (conflict_function *f)
1275 if (CF_NONTRIVIAL_P (f))
1277 for (i = 0; i < f->n; i++)
1278 affine_fn_free (f->fns[i]);
1283 /* Frees memory used by SUBSCRIPTS. */
1286 free_subscripts (VEC (subscript_p, heap) *subscripts)
1291 for (i = 0; VEC_iterate (subscript_p, subscripts, i, s); i++)
1293 free_conflict_function (s->conflicting_iterations_in_a);
1294 free_conflict_function (s->conflicting_iterations_in_b);
1296 VEC_free (subscript_p, heap, subscripts);
1299 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1303 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1306 if (dump_file && (dump_flags & TDF_DETAILS))
1308 fprintf (dump_file, "(dependence classified: ");
1309 print_generic_expr (dump_file, chrec, 0);
1310 fprintf (dump_file, ")\n");
1313 DDR_ARE_DEPENDENT (ddr) = chrec;
1314 free_subscripts (DDR_SUBSCRIPTS (ddr));
1317 /* The dependence relation DDR cannot be represented by a distance
1321 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1323 if (dump_file && (dump_flags & TDF_DETAILS))
1324 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1326 DDR_AFFINE_P (ddr) = false;
1331 /* This section contains the classic Banerjee tests. */
1333 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1334 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1337 ziv_subscript_p (tree chrec_a,
1340 return (evolution_function_is_constant_p (chrec_a)
1341 && evolution_function_is_constant_p (chrec_b));
1344 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1345 variable, i.e., if the SIV (Single Index Variable) test is true. */
1348 siv_subscript_p (tree chrec_a,
1351 if ((evolution_function_is_constant_p (chrec_a)
1352 && evolution_function_is_univariate_p (chrec_b))
1353 || (evolution_function_is_constant_p (chrec_b)
1354 && evolution_function_is_univariate_p (chrec_a)))
1357 if (evolution_function_is_univariate_p (chrec_a)
1358 && evolution_function_is_univariate_p (chrec_b))
1360 switch (TREE_CODE (chrec_a))
1362 case POLYNOMIAL_CHREC:
1363 switch (TREE_CODE (chrec_b))
1365 case POLYNOMIAL_CHREC:
1366 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1381 /* Creates a conflict function with N dimensions. The affine functions
1382 in each dimension follow. */
1384 static conflict_function *
1385 conflict_fn (unsigned n, ...)
1388 conflict_function *ret = XCNEW (conflict_function);
1391 gcc_assert (0 < n && n <= MAX_DIM);
1395 for (i = 0; i < n; i++)
1396 ret->fns[i] = va_arg (ap, affine_fn);
1402 /* Returns constant affine function with value CST. */
1405 affine_fn_cst (tree cst)
1407 affine_fn fn = VEC_alloc (tree, heap, 1);
1408 VEC_quick_push (tree, fn, cst);
1412 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1415 affine_fn_univar (tree cst, unsigned dim, tree coef)
1417 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1420 gcc_assert (dim > 0);
1421 VEC_quick_push (tree, fn, cst);
1422 for (i = 1; i < dim; i++)
1423 VEC_quick_push (tree, fn, integer_zero_node);
1424 VEC_quick_push (tree, fn, coef);
1428 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1429 *OVERLAPS_B are initialized to the functions that describe the
1430 relation between the elements accessed twice by CHREC_A and
1431 CHREC_B. For k >= 0, the following property is verified:
1433 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1436 analyze_ziv_subscript (tree chrec_a,
1438 conflict_function **overlaps_a,
1439 conflict_function **overlaps_b,
1440 tree *last_conflicts)
1443 dependence_stats.num_ziv++;
1445 if (dump_file && (dump_flags & TDF_DETAILS))
1446 fprintf (dump_file, "(analyze_ziv_subscript \n");
1448 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
1449 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
1450 difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b);
1452 switch (TREE_CODE (difference))
1455 if (integer_zerop (difference))
1457 /* The difference is equal to zero: the accessed index
1458 overlaps for each iteration in the loop. */
1459 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1460 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1461 *last_conflicts = chrec_dont_know;
1462 dependence_stats.num_ziv_dependent++;
1466 /* The accesses do not overlap. */
1467 *overlaps_a = conflict_fn_no_dependence ();
1468 *overlaps_b = conflict_fn_no_dependence ();
1469 *last_conflicts = integer_zero_node;
1470 dependence_stats.num_ziv_independent++;
1475 /* We're not sure whether the indexes overlap. For the moment,
1476 conservatively answer "don't know". */
1477 if (dump_file && (dump_flags & TDF_DETAILS))
1478 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1480 *overlaps_a = conflict_fn_not_known ();
1481 *overlaps_b = conflict_fn_not_known ();
1482 *last_conflicts = chrec_dont_know;
1483 dependence_stats.num_ziv_unimplemented++;
1487 if (dump_file && (dump_flags & TDF_DETAILS))
1488 fprintf (dump_file, ")\n");
1491 /* Sets NIT to the estimated number of executions of the statements in
1492 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1493 large as the number of iterations. If we have no reliable estimate,
1494 the function returns false, otherwise returns true. */
1497 estimated_loop_iterations (struct loop *loop, bool conservative,
1500 estimate_numbers_of_iterations_loop (loop);
1503 if (!loop->any_upper_bound)
1506 *nit = loop->nb_iterations_upper_bound;
1510 if (!loop->any_estimate)
1513 *nit = loop->nb_iterations_estimate;
1519 /* Similar to estimated_loop_iterations, but returns the estimate only
1520 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1521 on the number of iterations of LOOP could not be derived, returns -1. */
1524 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1527 HOST_WIDE_INT hwi_nit;
1529 if (!estimated_loop_iterations (loop, conservative, &nit))
1532 if (!double_int_fits_in_shwi_p (nit))
1534 hwi_nit = double_int_to_shwi (nit);
1536 return hwi_nit < 0 ? -1 : hwi_nit;
1539 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1540 and only if it fits to the int type. If this is not the case, or the
1541 estimate on the number of iterations of LOOP could not be derived, returns
1545 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1550 if (!estimated_loop_iterations (loop, conservative, &nit))
1551 return chrec_dont_know;
1553 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1554 if (!double_int_fits_to_tree_p (type, nit))
1555 return chrec_dont_know;
1557 return double_int_to_tree (type, nit);
1560 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1561 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1562 *OVERLAPS_B are initialized to the functions that describe the
1563 relation between the elements accessed twice by CHREC_A and
1564 CHREC_B. For k >= 0, the following property is verified:
1566 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1569 analyze_siv_subscript_cst_affine (tree chrec_a,
1571 conflict_function **overlaps_a,
1572 conflict_function **overlaps_b,
1573 tree *last_conflicts)
1575 bool value0, value1, value2;
1576 tree difference, tmp;
1578 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
1579 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
1580 difference = chrec_fold_minus
1581 (integer_type_node, initial_condition (chrec_b), chrec_a);
1583 if (!chrec_is_positive (initial_condition (difference), &value0))
1585 if (dump_file && (dump_flags & TDF_DETAILS))
1586 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1588 dependence_stats.num_siv_unimplemented++;
1589 *overlaps_a = conflict_fn_not_known ();
1590 *overlaps_b = conflict_fn_not_known ();
1591 *last_conflicts = chrec_dont_know;
1596 if (value0 == false)
1598 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1600 if (dump_file && (dump_flags & TDF_DETAILS))
1601 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1603 *overlaps_a = conflict_fn_not_known ();
1604 *overlaps_b = conflict_fn_not_known ();
1605 *last_conflicts = chrec_dont_know;
1606 dependence_stats.num_siv_unimplemented++;
1615 chrec_b = {10, +, 1}
1618 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1620 HOST_WIDE_INT numiter;
1621 struct loop *loop = get_chrec_loop (chrec_b);
1623 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1624 tmp = fold_build2 (EXACT_DIV_EXPR, integer_type_node,
1625 fold_build1 (ABS_EXPR,
1628 CHREC_RIGHT (chrec_b));
1629 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1630 *last_conflicts = integer_one_node;
1633 /* Perform weak-zero siv test to see if overlap is
1634 outside the loop bounds. */
1635 numiter = estimated_loop_iterations_int (loop, false);
1638 && compare_tree_int (tmp, numiter) > 0)
1640 free_conflict_function (*overlaps_a);
1641 free_conflict_function (*overlaps_b);
1642 *overlaps_a = conflict_fn_no_dependence ();
1643 *overlaps_b = conflict_fn_no_dependence ();
1644 *last_conflicts = integer_zero_node;
1645 dependence_stats.num_siv_independent++;
1648 dependence_stats.num_siv_dependent++;
1652 /* When the step does not divide the difference, there are
1656 *overlaps_a = conflict_fn_no_dependence ();
1657 *overlaps_b = conflict_fn_no_dependence ();
1658 *last_conflicts = integer_zero_node;
1659 dependence_stats.num_siv_independent++;
1668 chrec_b = {10, +, -1}
1670 In this case, chrec_a will not overlap with chrec_b. */
1671 *overlaps_a = conflict_fn_no_dependence ();
1672 *overlaps_b = conflict_fn_no_dependence ();
1673 *last_conflicts = integer_zero_node;
1674 dependence_stats.num_siv_independent++;
1681 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1683 if (dump_file && (dump_flags & TDF_DETAILS))
1684 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1686 *overlaps_a = conflict_fn_not_known ();
1687 *overlaps_b = conflict_fn_not_known ();
1688 *last_conflicts = chrec_dont_know;
1689 dependence_stats.num_siv_unimplemented++;
1694 if (value2 == false)
1698 chrec_b = {10, +, -1}
1700 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1702 HOST_WIDE_INT numiter;
1703 struct loop *loop = get_chrec_loop (chrec_b);
1705 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1706 tmp = fold_build2 (EXACT_DIV_EXPR,
1707 integer_type_node, difference,
1708 CHREC_RIGHT (chrec_b));
1709 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1710 *last_conflicts = integer_one_node;
1712 /* Perform weak-zero siv test to see if overlap is
1713 outside the loop bounds. */
1714 numiter = estimated_loop_iterations_int (loop, false);
1717 && compare_tree_int (tmp, numiter) > 0)
1719 free_conflict_function (*overlaps_a);
1720 free_conflict_function (*overlaps_b);
1721 *overlaps_a = conflict_fn_no_dependence ();
1722 *overlaps_b = conflict_fn_no_dependence ();
1723 *last_conflicts = integer_zero_node;
1724 dependence_stats.num_siv_independent++;
1727 dependence_stats.num_siv_dependent++;
1731 /* When the step does not divide the difference, there
1735 *overlaps_a = conflict_fn_no_dependence ();
1736 *overlaps_b = conflict_fn_no_dependence ();
1737 *last_conflicts = integer_zero_node;
1738 dependence_stats.num_siv_independent++;
1748 In this case, chrec_a will not overlap with chrec_b. */
1749 *overlaps_a = conflict_fn_no_dependence ();
1750 *overlaps_b = conflict_fn_no_dependence ();
1751 *last_conflicts = integer_zero_node;
1752 dependence_stats.num_siv_independent++;
1760 /* Helper recursive function for initializing the matrix A. Returns
1761 the initial value of CHREC. */
1764 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1768 if (TREE_CODE (chrec) != POLYNOMIAL_CHREC)
1769 return int_cst_value (chrec);
1771 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1772 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1775 #define FLOOR_DIV(x,y) ((x) / (y))
1777 /* Solves the special case of the Diophantine equation:
1778 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1780 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1781 number of iterations that loops X and Y run. The overlaps will be
1782 constructed as evolutions in dimension DIM. */
1785 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1786 affine_fn *overlaps_a,
1787 affine_fn *overlaps_b,
1788 tree *last_conflicts, int dim)
1790 if (((step_a > 0 && step_b > 0)
1791 || (step_a < 0 && step_b < 0)))
1793 int step_overlaps_a, step_overlaps_b;
1794 int gcd_steps_a_b, last_conflict, tau2;
1796 gcd_steps_a_b = gcd (step_a, step_b);
1797 step_overlaps_a = step_b / gcd_steps_a_b;
1798 step_overlaps_b = step_a / gcd_steps_a_b;
1800 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1801 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1802 last_conflict = tau2;
1804 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1805 build_int_cst (NULL_TREE,
1807 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1808 build_int_cst (NULL_TREE,
1810 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1815 *overlaps_a = affine_fn_cst (integer_zero_node);
1816 *overlaps_b = affine_fn_cst (integer_zero_node);
1817 *last_conflicts = integer_zero_node;
1821 /* Solves the special case of a Diophantine equation where CHREC_A is
1822 an affine bivariate function, and CHREC_B is an affine univariate
1823 function. For example,
1825 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1827 has the following overlapping functions:
1829 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1830 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1831 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1833 FORNOW: This is a specialized implementation for a case occurring in
1834 a common benchmark. Implement the general algorithm. */
1837 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1838 conflict_function **overlaps_a,
1839 conflict_function **overlaps_b,
1840 tree *last_conflicts)
1842 bool xz_p, yz_p, xyz_p;
1843 int step_x, step_y, step_z;
1844 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1845 affine_fn overlaps_a_xz, overlaps_b_xz;
1846 affine_fn overlaps_a_yz, overlaps_b_yz;
1847 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1848 affine_fn ova1, ova2, ovb;
1849 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1851 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1852 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1853 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1856 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
1858 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
1859 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
1861 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1863 if (dump_file && (dump_flags & TDF_DETAILS))
1864 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1866 *overlaps_a = conflict_fn_not_known ();
1867 *overlaps_b = conflict_fn_not_known ();
1868 *last_conflicts = chrec_dont_know;
1872 niter = MIN (niter_x, niter_z);
1873 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1876 &last_conflicts_xz, 1);
1877 niter = MIN (niter_y, niter_z);
1878 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1881 &last_conflicts_yz, 2);
1882 niter = MIN (niter_x, niter_z);
1883 niter = MIN (niter_y, niter);
1884 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
1887 &last_conflicts_xyz, 3);
1889 xz_p = !integer_zerop (last_conflicts_xz);
1890 yz_p = !integer_zerop (last_conflicts_yz);
1891 xyz_p = !integer_zerop (last_conflicts_xyz);
1893 if (xz_p || yz_p || xyz_p)
1895 ova1 = affine_fn_cst (integer_zero_node);
1896 ova2 = affine_fn_cst (integer_zero_node);
1897 ovb = affine_fn_cst (integer_zero_node);
1900 affine_fn t0 = ova1;
1903 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
1904 ovb = affine_fn_plus (ovb, overlaps_b_xz);
1905 affine_fn_free (t0);
1906 affine_fn_free (t2);
1907 *last_conflicts = last_conflicts_xz;
1911 affine_fn t0 = ova2;
1914 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
1915 ovb = affine_fn_plus (ovb, overlaps_b_yz);
1916 affine_fn_free (t0);
1917 affine_fn_free (t2);
1918 *last_conflicts = last_conflicts_yz;
1922 affine_fn t0 = ova1;
1923 affine_fn t2 = ova2;
1926 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
1927 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
1928 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
1929 affine_fn_free (t0);
1930 affine_fn_free (t2);
1931 affine_fn_free (t4);
1932 *last_conflicts = last_conflicts_xyz;
1934 *overlaps_a = conflict_fn (2, ova1, ova2);
1935 *overlaps_b = conflict_fn (1, ovb);
1939 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1940 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1941 *last_conflicts = integer_zero_node;
1944 affine_fn_free (overlaps_a_xz);
1945 affine_fn_free (overlaps_b_xz);
1946 affine_fn_free (overlaps_a_yz);
1947 affine_fn_free (overlaps_b_yz);
1948 affine_fn_free (overlaps_a_xyz);
1949 affine_fn_free (overlaps_b_xyz);
1952 /* Determines the overlapping elements due to accesses CHREC_A and
1953 CHREC_B, that are affine functions. This function cannot handle
1954 symbolic evolution functions, ie. when initial conditions are
1955 parameters, because it uses lambda matrices of integers. */
1958 analyze_subscript_affine_affine (tree chrec_a,
1960 conflict_function **overlaps_a,
1961 conflict_function **overlaps_b,
1962 tree *last_conflicts)
1964 unsigned nb_vars_a, nb_vars_b, dim;
1965 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
1966 HOST_WIDE_INT tau1, tau2;
1967 lambda_matrix A, U, S;
1969 if (eq_evolutions_p (chrec_a, chrec_b))
1971 /* The accessed index overlaps for each iteration in the
1973 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1974 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1975 *last_conflicts = chrec_dont_know;
1978 if (dump_file && (dump_flags & TDF_DETAILS))
1979 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
1981 /* For determining the initial intersection, we have to solve a
1982 Diophantine equation. This is the most time consuming part.
1984 For answering to the question: "Is there a dependence?" we have
1985 to prove that there exists a solution to the Diophantine
1986 equation, and that the solution is in the iteration domain,
1987 i.e. the solution is positive or zero, and that the solution
1988 happens before the upper bound loop.nb_iterations. Otherwise
1989 there is no dependence. This function outputs a description of
1990 the iterations that hold the intersections. */
1992 nb_vars_a = nb_vars_in_chrec (chrec_a);
1993 nb_vars_b = nb_vars_in_chrec (chrec_b);
1995 dim = nb_vars_a + nb_vars_b;
1996 U = lambda_matrix_new (dim, dim);
1997 A = lambda_matrix_new (dim, 1);
1998 S = lambda_matrix_new (dim, 1);
2000 init_a = initialize_matrix_A (A, chrec_a, 0, 1);
2001 init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
2002 gamma = init_b - init_a;
2004 /* Don't do all the hard work of solving the Diophantine equation
2005 when we already know the solution: for example,
2008 | gamma = 3 - 3 = 0.
2009 Then the first overlap occurs during the first iterations:
2010 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2014 if (nb_vars_a == 1 && nb_vars_b == 1)
2016 HOST_WIDE_INT step_a, step_b;
2017 HOST_WIDE_INT niter, niter_a, niter_b;
2020 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2022 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2024 if (niter_a < 0 || niter_b < 0)
2026 if (dump_file && (dump_flags & TDF_DETAILS))
2027 fprintf (dump_file, "affine-affine test failed: missing iteration counts.\n");
2028 *overlaps_a = conflict_fn_not_known ();
2029 *overlaps_b = conflict_fn_not_known ();
2030 *last_conflicts = chrec_dont_know;
2031 goto end_analyze_subs_aa;
2034 niter = MIN (niter_a, niter_b);
2036 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2037 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2039 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2042 *overlaps_a = conflict_fn (1, ova);
2043 *overlaps_b = conflict_fn (1, ovb);
2046 else if (nb_vars_a == 2 && nb_vars_b == 1)
2047 compute_overlap_steps_for_affine_1_2
2048 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2050 else if (nb_vars_a == 1 && nb_vars_b == 2)
2051 compute_overlap_steps_for_affine_1_2
2052 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2056 if (dump_file && (dump_flags & TDF_DETAILS))
2057 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2058 *overlaps_a = conflict_fn_not_known ();
2059 *overlaps_b = conflict_fn_not_known ();
2060 *last_conflicts = chrec_dont_know;
2062 goto end_analyze_subs_aa;
2066 lambda_matrix_right_hermite (A, dim, 1, S, U);
2071 lambda_matrix_row_negate (U, dim, 0);
2073 gcd_alpha_beta = S[0][0];
2075 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2076 but that is a quite strange case. Instead of ICEing, answer
2078 if (gcd_alpha_beta == 0)
2080 *overlaps_a = conflict_fn_not_known ();
2081 *overlaps_b = conflict_fn_not_known ();
2082 *last_conflicts = chrec_dont_know;
2083 goto end_analyze_subs_aa;
2086 /* The classic "gcd-test". */
2087 if (!int_divides_p (gcd_alpha_beta, gamma))
2089 /* The "gcd-test" has determined that there is no integer
2090 solution, i.e. there is no dependence. */
2091 *overlaps_a = conflict_fn_no_dependence ();
2092 *overlaps_b = conflict_fn_no_dependence ();
2093 *last_conflicts = integer_zero_node;
2096 /* Both access functions are univariate. This includes SIV and MIV cases. */
2097 else if (nb_vars_a == 1 && nb_vars_b == 1)
2099 /* Both functions should have the same evolution sign. */
2100 if (((A[0][0] > 0 && -A[1][0] > 0)
2101 || (A[0][0] < 0 && -A[1][0] < 0)))
2103 /* The solutions are given by:
2105 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2108 For a given integer t. Using the following variables,
2110 | i0 = u11 * gamma / gcd_alpha_beta
2111 | j0 = u12 * gamma / gcd_alpha_beta
2118 | y0 = j0 + j1 * t. */
2120 HOST_WIDE_INT i0, j0, i1, j1;
2122 /* X0 and Y0 are the first iterations for which there is a
2123 dependence. X0, Y0 are two solutions of the Diophantine
2124 equation: chrec_a (X0) = chrec_b (Y0). */
2125 HOST_WIDE_INT x0, y0;
2126 HOST_WIDE_INT niter, niter_a, niter_b;
2128 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2130 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2133 if (niter_a < 0 || niter_b < 0)
2135 if (dump_file && (dump_flags & TDF_DETAILS))
2136 fprintf (dump_file, "affine-affine test failed: missing iteration counts.\n");
2137 *overlaps_a = conflict_fn_not_known ();
2138 *overlaps_b = conflict_fn_not_known ();
2139 *last_conflicts = chrec_dont_know;
2140 goto end_analyze_subs_aa;
2143 niter = MIN (niter_a, niter_b);
2145 i0 = U[0][0] * gamma / gcd_alpha_beta;
2146 j0 = U[0][1] * gamma / gcd_alpha_beta;
2150 if ((i1 == 0 && i0 < 0)
2151 || (j1 == 0 && j0 < 0))
2153 /* There is no solution.
2154 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2155 falls in here, but for the moment we don't look at the
2156 upper bound of the iteration domain. */
2157 *overlaps_a = conflict_fn_no_dependence ();
2158 *overlaps_b = conflict_fn_no_dependence ();
2159 *last_conflicts = integer_zero_node;
2166 tau1 = CEIL (-i0, i1);
2167 tau2 = FLOOR_DIV (niter - i0, i1);
2171 int last_conflict, min_multiple;
2172 tau1 = MAX (tau1, CEIL (-j0, j1));
2173 tau2 = MIN (tau2, FLOOR_DIV (niter - j0, j1));
2175 x0 = i1 * tau1 + i0;
2176 y0 = j1 * tau1 + j0;
2178 /* At this point (x0, y0) is one of the
2179 solutions to the Diophantine equation. The
2180 next step has to compute the smallest
2181 positive solution: the first conflicts. */
2182 min_multiple = MIN (x0 / i1, y0 / j1);
2183 x0 -= i1 * min_multiple;
2184 y0 -= j1 * min_multiple;
2186 tau1 = (x0 - i0)/i1;
2187 last_conflict = tau2 - tau1;
2189 /* If the overlap occurs outside of the bounds of the
2190 loop, there is no dependence. */
2191 if (x0 > niter || y0 > niter)
2193 *overlaps_a = conflict_fn_no_dependence ();
2194 *overlaps_b = conflict_fn_no_dependence ();
2195 *last_conflicts = integer_zero_node;
2201 affine_fn_univar (build_int_cst (NULL_TREE, x0),
2203 build_int_cst (NULL_TREE, i1)));
2206 affine_fn_univar (build_int_cst (NULL_TREE, y0),
2208 build_int_cst (NULL_TREE, j1)));
2209 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2214 /* FIXME: For the moment, the upper bound of the
2215 iteration domain for j is not checked. */
2216 if (dump_file && (dump_flags & TDF_DETAILS))
2217 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2218 *overlaps_a = conflict_fn_not_known ();
2219 *overlaps_b = conflict_fn_not_known ();
2220 *last_conflicts = chrec_dont_know;
2226 /* FIXME: For the moment, the upper bound of the
2227 iteration domain for i is not checked. */
2228 if (dump_file && (dump_flags & TDF_DETAILS))
2229 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2230 *overlaps_a = conflict_fn_not_known ();
2231 *overlaps_b = conflict_fn_not_known ();
2232 *last_conflicts = chrec_dont_know;
2238 if (dump_file && (dump_flags & TDF_DETAILS))
2239 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2240 *overlaps_a = conflict_fn_not_known ();
2241 *overlaps_b = conflict_fn_not_known ();
2242 *last_conflicts = chrec_dont_know;
2248 if (dump_file && (dump_flags & TDF_DETAILS))
2249 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2250 *overlaps_a = conflict_fn_not_known ();
2251 *overlaps_b = conflict_fn_not_known ();
2252 *last_conflicts = chrec_dont_know;
2255 end_analyze_subs_aa:
2256 if (dump_file && (dump_flags & TDF_DETAILS))
2258 fprintf (dump_file, " (overlaps_a = ");
2259 dump_conflict_function (dump_file, *overlaps_a);
2260 fprintf (dump_file, ")\n (overlaps_b = ");
2261 dump_conflict_function (dump_file, *overlaps_b);
2262 fprintf (dump_file, ")\n");
2263 fprintf (dump_file, ")\n");
2267 /* Returns true when analyze_subscript_affine_affine can be used for
2268 determining the dependence relation between chrec_a and chrec_b,
2269 that contain symbols. This function modifies chrec_a and chrec_b
2270 such that the analysis result is the same, and such that they don't
2271 contain symbols, and then can safely be passed to the analyzer.
2273 Example: The analysis of the following tuples of evolutions produce
2274 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2277 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2278 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2282 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2284 tree diff, type, left_a, left_b, right_b;
2286 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2287 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2288 /* FIXME: For the moment not handled. Might be refined later. */
2291 type = chrec_type (*chrec_a);
2292 left_a = CHREC_LEFT (*chrec_a);
2293 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL_TREE);
2294 diff = chrec_fold_minus (type, left_a, left_b);
2296 if (!evolution_function_is_constant_p (diff))
2299 if (dump_file && (dump_flags & TDF_DETAILS))
2300 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2302 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2303 diff, CHREC_RIGHT (*chrec_a));
2304 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL_TREE);
2305 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2306 build_int_cst (type, 0),
2311 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2312 *OVERLAPS_B are initialized to the functions that describe the
2313 relation between the elements accessed twice by CHREC_A and
2314 CHREC_B. For k >= 0, the following property is verified:
2316 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2319 analyze_siv_subscript (tree chrec_a,
2321 conflict_function **overlaps_a,
2322 conflict_function **overlaps_b,
2323 tree *last_conflicts)
2325 dependence_stats.num_siv++;
2327 if (dump_file && (dump_flags & TDF_DETAILS))
2328 fprintf (dump_file, "(analyze_siv_subscript \n");
2330 if (evolution_function_is_constant_p (chrec_a)
2331 && evolution_function_is_affine_p (chrec_b))
2332 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2333 overlaps_a, overlaps_b, last_conflicts);
2335 else if (evolution_function_is_affine_p (chrec_a)
2336 && evolution_function_is_constant_p (chrec_b))
2337 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2338 overlaps_b, overlaps_a, last_conflicts);
2340 else if (evolution_function_is_affine_p (chrec_a)
2341 && evolution_function_is_affine_p (chrec_b))
2343 if (!chrec_contains_symbols (chrec_a)
2344 && !chrec_contains_symbols (chrec_b))
2346 analyze_subscript_affine_affine (chrec_a, chrec_b,
2347 overlaps_a, overlaps_b,
2350 if (CF_NOT_KNOWN_P (*overlaps_a)
2351 || CF_NOT_KNOWN_P (*overlaps_b))
2352 dependence_stats.num_siv_unimplemented++;
2353 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2354 || CF_NO_DEPENDENCE_P (*overlaps_b))
2355 dependence_stats.num_siv_independent++;
2357 dependence_stats.num_siv_dependent++;
2359 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2362 analyze_subscript_affine_affine (chrec_a, chrec_b,
2363 overlaps_a, overlaps_b,
2366 if (CF_NOT_KNOWN_P (*overlaps_a)
2367 || CF_NOT_KNOWN_P (*overlaps_b))
2368 dependence_stats.num_siv_unimplemented++;
2369 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2370 || CF_NO_DEPENDENCE_P (*overlaps_b))
2371 dependence_stats.num_siv_independent++;
2373 dependence_stats.num_siv_dependent++;
2376 goto siv_subscript_dontknow;
2381 siv_subscript_dontknow:;
2382 if (dump_file && (dump_flags & TDF_DETAILS))
2383 fprintf (dump_file, "siv test failed: unimplemented.\n");
2384 *overlaps_a = conflict_fn_not_known ();
2385 *overlaps_b = conflict_fn_not_known ();
2386 *last_conflicts = chrec_dont_know;
2387 dependence_stats.num_siv_unimplemented++;
2390 if (dump_file && (dump_flags & TDF_DETAILS))
2391 fprintf (dump_file, ")\n");
2394 /* Returns false if we can prove that the greatest common divisor of the steps
2395 of CHREC does not divide CST, false otherwise. */
2398 gcd_of_steps_may_divide_p (tree chrec, tree cst)
2400 HOST_WIDE_INT cd = 0, val;
2403 if (!host_integerp (cst, 0))
2405 val = tree_low_cst (cst, 0);
2407 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2409 step = CHREC_RIGHT (chrec);
2410 if (!host_integerp (step, 0))
2412 cd = gcd (cd, tree_low_cst (step, 0));
2413 chrec = CHREC_LEFT (chrec);
2416 return val % cd == 0;
2419 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2420 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2421 functions that describe the relation between the elements accessed
2422 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2425 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2428 analyze_miv_subscript (tree chrec_a,
2430 conflict_function **overlaps_a,
2431 conflict_function **overlaps_b,
2432 tree *last_conflicts,
2433 struct loop *loop_nest)
2435 /* FIXME: This is a MIV subscript, not yet handled.
2436 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2439 In the SIV test we had to solve a Diophantine equation with two
2440 variables. In the MIV case we have to solve a Diophantine
2441 equation with 2*n variables (if the subscript uses n IVs).
2444 dependence_stats.num_miv++;
2445 if (dump_file && (dump_flags & TDF_DETAILS))
2446 fprintf (dump_file, "(analyze_miv_subscript \n");
2448 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
2449 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
2450 difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b);
2452 if (eq_evolutions_p (chrec_a, chrec_b))
2454 /* Access functions are the same: all the elements are accessed
2455 in the same order. */
2456 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2457 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2458 *last_conflicts = estimated_loop_iterations_tree
2459 (get_chrec_loop (chrec_a), true);
2460 dependence_stats.num_miv_dependent++;
2463 else if (evolution_function_is_constant_p (difference)
2464 /* For the moment, the following is verified:
2465 evolution_function_is_affine_multivariate_p (chrec_a,
2467 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2469 /* testsuite/.../ssa-chrec-33.c
2470 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2472 The difference is 1, and all the evolution steps are multiples
2473 of 2, consequently there are no overlapping elements. */
2474 *overlaps_a = conflict_fn_no_dependence ();
2475 *overlaps_b = conflict_fn_no_dependence ();
2476 *last_conflicts = integer_zero_node;
2477 dependence_stats.num_miv_independent++;
2480 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2481 && !chrec_contains_symbols (chrec_a)
2482 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2483 && !chrec_contains_symbols (chrec_b))
2485 /* testsuite/.../ssa-chrec-35.c
2486 {0, +, 1}_2 vs. {0, +, 1}_3
2487 the overlapping elements are respectively located at iterations:
2488 {0, +, 1}_x and {0, +, 1}_x,
2489 in other words, we have the equality:
2490 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2493 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2494 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2496 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2497 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2499 analyze_subscript_affine_affine (chrec_a, chrec_b,
2500 overlaps_a, overlaps_b, last_conflicts);
2502 if (CF_NOT_KNOWN_P (*overlaps_a)
2503 || CF_NOT_KNOWN_P (*overlaps_b))
2504 dependence_stats.num_miv_unimplemented++;
2505 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2506 || CF_NO_DEPENDENCE_P (*overlaps_b))
2507 dependence_stats.num_miv_independent++;
2509 dependence_stats.num_miv_dependent++;
2514 /* When the analysis is too difficult, answer "don't know". */
2515 if (dump_file && (dump_flags & TDF_DETAILS))
2516 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2518 *overlaps_a = conflict_fn_not_known ();
2519 *overlaps_b = conflict_fn_not_known ();
2520 *last_conflicts = chrec_dont_know;
2521 dependence_stats.num_miv_unimplemented++;
2524 if (dump_file && (dump_flags & TDF_DETAILS))
2525 fprintf (dump_file, ")\n");
2528 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2529 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2530 OVERLAP_ITERATIONS_B are initialized with two functions that
2531 describe the iterations that contain conflicting elements.
2533 Remark: For an integer k >= 0, the following equality is true:
2535 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2539 analyze_overlapping_iterations (tree chrec_a,
2541 conflict_function **overlap_iterations_a,
2542 conflict_function **overlap_iterations_b,
2543 tree *last_conflicts, struct loop *loop_nest)
2545 unsigned int lnn = loop_nest->num;
2547 dependence_stats.num_subscript_tests++;
2549 if (dump_file && (dump_flags & TDF_DETAILS))
2551 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2552 fprintf (dump_file, " (chrec_a = ");
2553 print_generic_expr (dump_file, chrec_a, 0);
2554 fprintf (dump_file, ")\n (chrec_b = ");
2555 print_generic_expr (dump_file, chrec_b, 0);
2556 fprintf (dump_file, ")\n");
2559 if (chrec_a == NULL_TREE
2560 || chrec_b == NULL_TREE
2561 || chrec_contains_undetermined (chrec_a)
2562 || chrec_contains_undetermined (chrec_b))
2564 dependence_stats.num_subscript_undetermined++;
2566 *overlap_iterations_a = conflict_fn_not_known ();
2567 *overlap_iterations_b = conflict_fn_not_known ();
2570 /* If they are the same chrec, and are affine, they overlap
2571 on every iteration. */
2572 else if (eq_evolutions_p (chrec_a, chrec_b)
2573 && evolution_function_is_affine_multivariate_p (chrec_a, lnn))
2575 dependence_stats.num_same_subscript_function++;
2576 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2577 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2578 *last_conflicts = chrec_dont_know;
2581 /* If they aren't the same, and aren't affine, we can't do anything
2583 else if ((chrec_contains_symbols (chrec_a)
2584 || chrec_contains_symbols (chrec_b))
2585 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2586 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2588 dependence_stats.num_subscript_undetermined++;
2589 *overlap_iterations_a = conflict_fn_not_known ();
2590 *overlap_iterations_b = conflict_fn_not_known ();
2593 else if (ziv_subscript_p (chrec_a, chrec_b))
2594 analyze_ziv_subscript (chrec_a, chrec_b,
2595 overlap_iterations_a, overlap_iterations_b,
2598 else if (siv_subscript_p (chrec_a, chrec_b))
2599 analyze_siv_subscript (chrec_a, chrec_b,
2600 overlap_iterations_a, overlap_iterations_b,
2604 analyze_miv_subscript (chrec_a, chrec_b,
2605 overlap_iterations_a, overlap_iterations_b,
2606 last_conflicts, loop_nest);
2608 if (dump_file && (dump_flags & TDF_DETAILS))
2610 fprintf (dump_file, " (overlap_iterations_a = ");
2611 dump_conflict_function (dump_file, *overlap_iterations_a);
2612 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2613 dump_conflict_function (dump_file, *overlap_iterations_b);
2614 fprintf (dump_file, ")\n");
2615 fprintf (dump_file, ")\n");
2619 /* Helper function for uniquely inserting distance vectors. */
2622 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2627 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, v); i++)
2628 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2631 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2634 /* Helper function for uniquely inserting direction vectors. */
2637 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2642 for (i = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), i, v); i++)
2643 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2646 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2649 /* Add a distance of 1 on all the loops outer than INDEX. If we
2650 haven't yet determined a distance for this outer loop, push a new
2651 distance vector composed of the previous distance, and a distance
2652 of 1 for this outer loop. Example:
2660 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2661 save (0, 1), then we have to save (1, 0). */
2664 add_outer_distances (struct data_dependence_relation *ddr,
2665 lambda_vector dist_v, int index)
2667 /* For each outer loop where init_v is not set, the accesses are
2668 in dependence of distance 1 in the loop. */
2669 while (--index >= 0)
2671 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2672 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2674 save_dist_v (ddr, save_v);
2678 /* Return false when fail to represent the data dependence as a
2679 distance vector. INIT_B is set to true when a component has been
2680 added to the distance vector DIST_V. INDEX_CARRY is then set to
2681 the index in DIST_V that carries the dependence. */
2684 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2685 struct data_reference *ddr_a,
2686 struct data_reference *ddr_b,
2687 lambda_vector dist_v, bool *init_b,
2691 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2693 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2695 tree access_fn_a, access_fn_b;
2696 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2698 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2700 non_affine_dependence_relation (ddr);
2704 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2705 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2707 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2708 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2711 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2712 DDR_LOOP_NEST (ddr));
2713 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2714 DDR_LOOP_NEST (ddr));
2716 /* The dependence is carried by the outermost loop. Example:
2723 In this case, the dependence is carried by loop_1. */
2724 index = index_a < index_b ? index_a : index_b;
2725 *index_carry = MIN (index, *index_carry);
2727 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2729 non_affine_dependence_relation (ddr);
2733 dist = int_cst_value (SUB_DISTANCE (subscript));
2735 /* This is the subscript coupling test. If we have already
2736 recorded a distance for this loop (a distance coming from
2737 another subscript), it should be the same. For example,
2738 in the following code, there is no dependence:
2745 if (init_v[index] != 0 && dist_v[index] != dist)
2747 finalize_ddr_dependent (ddr, chrec_known);
2751 dist_v[index] = dist;
2755 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2757 /* This can be for example an affine vs. constant dependence
2758 (T[i] vs. T[3]) that is not an affine dependence and is
2759 not representable as a distance vector. */
2760 non_affine_dependence_relation (ddr);
2768 /* Return true when the DDR contains two data references that have the
2769 same access functions. */
2772 same_access_functions (struct data_dependence_relation *ddr)
2776 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2777 if (!eq_evolutions_p (DR_ACCESS_FN (DDR_A (ddr), i),
2778 DR_ACCESS_FN (DDR_B (ddr), i)))
2784 /* Return true when the DDR contains only constant access functions. */
2787 constant_access_functions (struct data_dependence_relation *ddr)
2791 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2792 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2793 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2800 /* Helper function for the case where DDR_A and DDR_B are the same
2801 multivariate access function. */
2804 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2807 tree c_1 = CHREC_LEFT (c_2);
2808 tree c_0 = CHREC_LEFT (c_1);
2809 lambda_vector dist_v;
2812 /* Polynomials with more than 2 variables are not handled yet. When
2813 the evolution steps are parameters, it is not possible to
2814 represent the dependence using classical distance vectors. */
2815 if (TREE_CODE (c_0) != INTEGER_CST
2816 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
2817 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
2819 DDR_AFFINE_P (ddr) = false;
2823 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2824 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2826 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2827 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2828 v1 = int_cst_value (CHREC_RIGHT (c_1));
2829 v2 = int_cst_value (CHREC_RIGHT (c_2));
2842 save_dist_v (ddr, dist_v);
2844 add_outer_distances (ddr, dist_v, x_1);
2847 /* Helper function for the case where DDR_A and DDR_B are the same
2848 access functions. */
2851 add_other_self_distances (struct data_dependence_relation *ddr)
2853 lambda_vector dist_v;
2855 int index_carry = DDR_NB_LOOPS (ddr);
2857 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2859 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
2861 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
2863 if (!evolution_function_is_univariate_p (access_fun))
2865 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
2867 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2871 add_multivariate_self_dist (ddr, DR_ACCESS_FN (DDR_A (ddr), 0));
2875 index_carry = MIN (index_carry,
2876 index_in_loop_nest (CHREC_VARIABLE (access_fun),
2877 DDR_LOOP_NEST (ddr)));
2881 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2882 add_outer_distances (ddr, dist_v, index_carry);
2886 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
2888 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2890 dist_v[DDR_INNER_LOOP (ddr)] = 1;
2891 save_dist_v (ddr, dist_v);
2894 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
2895 is the case for example when access functions are the same and
2896 equal to a constant, as in:
2903 in which case the distance vectors are (0) and (1). */
2906 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
2910 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2912 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
2913 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
2914 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
2916 for (j = 0; j < ca->n; j++)
2917 if (affine_function_zero_p (ca->fns[j]))
2919 insert_innermost_unit_dist_vector (ddr);
2923 for (j = 0; j < cb->n; j++)
2924 if (affine_function_zero_p (cb->fns[j]))
2926 insert_innermost_unit_dist_vector (ddr);
2932 /* Compute the classic per loop distance vector. DDR is the data
2933 dependence relation to build a vector from. Return false when fail
2934 to represent the data dependence as a distance vector. */
2937 build_classic_dist_vector (struct data_dependence_relation *ddr,
2938 struct loop *loop_nest)
2940 bool init_b = false;
2941 int index_carry = DDR_NB_LOOPS (ddr);
2942 lambda_vector dist_v;
2944 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
2947 if (same_access_functions (ddr))
2949 /* Save the 0 vector. */
2950 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2951 save_dist_v (ddr, dist_v);
2953 if (constant_access_functions (ddr))
2954 add_distance_for_zero_overlaps (ddr);
2956 if (DDR_NB_LOOPS (ddr) > 1)
2957 add_other_self_distances (ddr);
2962 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2963 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
2964 dist_v, &init_b, &index_carry))
2967 /* Save the distance vector if we initialized one. */
2970 /* Verify a basic constraint: classic distance vectors should
2971 always be lexicographically positive.
2973 Data references are collected in the order of execution of
2974 the program, thus for the following loop
2976 | for (i = 1; i < 100; i++)
2977 | for (j = 1; j < 100; j++)
2979 | t = T[j+1][i-1]; // A
2980 | T[j][i] = t + 2; // B
2983 references are collected following the direction of the wind:
2984 A then B. The data dependence tests are performed also
2985 following this order, such that we're looking at the distance
2986 separating the elements accessed by A from the elements later
2987 accessed by B. But in this example, the distance returned by
2988 test_dep (A, B) is lexicographically negative (-1, 1), that
2989 means that the access A occurs later than B with respect to
2990 the outer loop, ie. we're actually looking upwind. In this
2991 case we solve test_dep (B, A) looking downwind to the
2992 lexicographically positive solution, that returns the
2993 distance vector (1, -1). */
2994 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
2996 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2997 subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
2999 compute_subscript_distance (ddr);
3000 build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3001 save_v, &init_b, &index_carry);
3002 save_dist_v (ddr, save_v);
3003 DDR_REVERSED_P (ddr) = true;
3005 /* In this case there is a dependence forward for all the
3008 | for (k = 1; k < 100; k++)
3009 | for (i = 1; i < 100; i++)
3010 | for (j = 1; j < 100; j++)
3012 | t = T[j+1][i-1]; // A
3013 | T[j][i] = t + 2; // B
3021 if (DDR_NB_LOOPS (ddr) > 1)
3023 add_outer_distances (ddr, save_v, index_carry);
3024 add_outer_distances (ddr, dist_v, index_carry);
3029 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3030 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3031 save_dist_v (ddr, save_v);
3033 if (DDR_NB_LOOPS (ddr) > 1)
3035 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3037 subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3039 compute_subscript_distance (ddr);
3040 build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3041 opposite_v, &init_b, &index_carry);
3043 add_outer_distances (ddr, dist_v, index_carry);
3044 add_outer_distances (ddr, opposite_v, index_carry);
3050 /* There is a distance of 1 on all the outer loops: Example:
3051 there is a dependence of distance 1 on loop_1 for the array A.
3057 add_outer_distances (ddr, dist_v,
3058 lambda_vector_first_nz (dist_v,
3059 DDR_NB_LOOPS (ddr), 0));
3062 if (dump_file && (dump_flags & TDF_DETAILS))
3066 fprintf (dump_file, "(build_classic_dist_vector\n");
3067 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3069 fprintf (dump_file, " dist_vector = (");
3070 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3071 DDR_NB_LOOPS (ddr));
3072 fprintf (dump_file, " )\n");
3074 fprintf (dump_file, ")\n");
3080 /* Return the direction for a given distance.
3081 FIXME: Computing dir this way is suboptimal, since dir can catch
3082 cases that dist is unable to represent. */
3084 static inline enum data_dependence_direction
3085 dir_from_dist (int dist)
3088 return dir_positive;
3090 return dir_negative;
3095 /* Compute the classic per loop direction vector. DDR is the data
3096 dependence relation to build a vector from. */
3099 build_classic_dir_vector (struct data_dependence_relation *ddr)
3102 lambda_vector dist_v;
3104 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
3106 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3108 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3109 dir_v[j] = dir_from_dist (dist_v[j]);
3111 save_dir_v (ddr, dir_v);
3115 /* Helper function. Returns true when there is a dependence between
3116 data references DRA and DRB. */
3119 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3120 struct data_reference *dra,
3121 struct data_reference *drb,
3122 struct loop *loop_nest)
3125 tree last_conflicts;
3126 struct subscript *subscript;
3128 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3131 conflict_function *overlaps_a, *overlaps_b;
3133 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3134 DR_ACCESS_FN (drb, i),
3135 &overlaps_a, &overlaps_b,
3136 &last_conflicts, loop_nest);
3138 if (CF_NOT_KNOWN_P (overlaps_a)
3139 || CF_NOT_KNOWN_P (overlaps_b))
3141 finalize_ddr_dependent (ddr, chrec_dont_know);
3142 dependence_stats.num_dependence_undetermined++;
3143 free_conflict_function (overlaps_a);
3144 free_conflict_function (overlaps_b);
3148 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3149 || CF_NO_DEPENDENCE_P (overlaps_b))
3151 finalize_ddr_dependent (ddr, chrec_known);
3152 dependence_stats.num_dependence_independent++;
3153 free_conflict_function (overlaps_a);
3154 free_conflict_function (overlaps_b);
3160 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3161 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3162 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3169 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3172 subscript_dependence_tester (struct data_dependence_relation *ddr,
3173 struct loop *loop_nest)
3176 if (dump_file && (dump_flags & TDF_DETAILS))
3177 fprintf (dump_file, "(subscript_dependence_tester \n");
3179 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3180 dependence_stats.num_dependence_dependent++;
3182 compute_subscript_distance (ddr);
3183 if (build_classic_dist_vector (ddr, loop_nest))
3184 build_classic_dir_vector (ddr);
3186 if (dump_file && (dump_flags & TDF_DETAILS))
3187 fprintf (dump_file, ")\n");
3190 /* Returns true when all the access functions of A are affine or
3191 constant with respect to LOOP_NEST. */
3194 access_functions_are_affine_or_constant_p (struct data_reference *a,
3195 struct loop *loop_nest)
3198 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3201 for (i = 0; VEC_iterate (tree, fns, i, t); i++)
3202 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3203 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3209 /* Initializes an equation for an OMEGA problem using the information
3210 contained in the ACCESS_FUN. Returns true when the operation
3213 PB is the omega constraint system.
3214 EQ is the number of the equation to be initialized.
3215 OFFSET is used for shifting the variables names in the constraints:
3216 a constrain is composed of 2 * the number of variables surrounding
3217 dependence accesses. OFFSET is set either to 0 for the first n variables,
3218 then it is set to n.
3219 ACCESS_FUN is expected to be an affine chrec. */
3222 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3223 unsigned int offset, tree access_fun,
3224 struct data_dependence_relation *ddr)
3226 switch (TREE_CODE (access_fun))
3228 case POLYNOMIAL_CHREC:
3230 tree left = CHREC_LEFT (access_fun);
3231 tree right = CHREC_RIGHT (access_fun);
3232 int var = CHREC_VARIABLE (access_fun);
3235 if (TREE_CODE (right) != INTEGER_CST)
3238 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3239 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3241 /* Compute the innermost loop index. */
3242 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3245 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3246 += int_cst_value (right);
3248 switch (TREE_CODE (left))
3250 case POLYNOMIAL_CHREC:
3251 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3254 pb->eqs[eq].coef[0] += int_cst_value (left);
3263 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3271 /* As explained in the comments preceding init_omega_for_ddr, we have
3272 to set up a system for each loop level, setting outer loops
3273 variation to zero, and current loop variation to positive or zero.
3274 Save each lexico positive distance vector. */
3277 omega_extract_distance_vectors (omega_pb pb,
3278 struct data_dependence_relation *ddr)
3282 struct loop *loopi, *loopj;
3283 enum omega_result res;
3285 /* Set a new problem for each loop in the nest. The basis is the
3286 problem that we have initialized until now. On top of this we
3287 add new constraints. */
3288 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3289 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3292 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3293 DDR_NB_LOOPS (ddr));
3295 omega_copy_problem (copy, pb);
3297 /* For all the outer loops "loop_j", add "dj = 0". */
3299 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3301 eq = omega_add_zero_eq (copy, omega_black);
3302 copy->eqs[eq].coef[j + 1] = 1;
3305 /* For "loop_i", add "0 <= di". */
3306 geq = omega_add_zero_geq (copy, omega_black);
3307 copy->geqs[geq].coef[i + 1] = 1;
3309 /* Reduce the constraint system, and test that the current
3310 problem is feasible. */
3311 res = omega_simplify_problem (copy);
3312 if (res == omega_false
3313 || res == omega_unknown
3314 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3317 for (eq = 0; eq < copy->num_subs; eq++)
3318 if (copy->subs[eq].key == (int) i + 1)
3320 dist = copy->subs[eq].coef[0];
3326 /* Reinitialize problem... */
3327 omega_copy_problem (copy, pb);
3329 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3331 eq = omega_add_zero_eq (copy, omega_black);
3332 copy->eqs[eq].coef[j + 1] = 1;
3335 /* ..., but this time "di = 1". */
3336 eq = omega_add_zero_eq (copy, omega_black);
3337 copy->eqs[eq].coef[i + 1] = 1;
3338 copy->eqs[eq].coef[0] = -1;
3340 res = omega_simplify_problem (copy);
3341 if (res == omega_false
3342 || res == omega_unknown
3343 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3346 for (eq = 0; eq < copy->num_subs; eq++)
3347 if (copy->subs[eq].key == (int) i + 1)
3349 dist = copy->subs[eq].coef[0];
3355 /* Save the lexicographically positive distance vector. */
3358 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3359 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3363 for (eq = 0; eq < copy->num_subs; eq++)
3364 if (copy->subs[eq].key > 0)
3366 dist = copy->subs[eq].coef[0];
3367 dist_v[copy->subs[eq].key - 1] = dist;
3370 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3371 dir_v[j] = dir_from_dist (dist_v[j]);
3373 save_dist_v (ddr, dist_v);
3374 save_dir_v (ddr, dir_v);
3378 omega_free_problem (copy);
3382 /* This is called for each subscript of a tuple of data references:
3383 insert an equality for representing the conflicts. */
3386 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3387 struct data_dependence_relation *ddr,
3388 omega_pb pb, bool *maybe_dependent)
3391 tree fun_a = chrec_convert (integer_type_node, access_fun_a, NULL_TREE);
3392 tree fun_b = chrec_convert (integer_type_node, access_fun_b, NULL_TREE);
3393 tree difference = chrec_fold_minus (integer_type_node, fun_a, fun_b);
3395 /* When the fun_a - fun_b is not constant, the dependence is not
3396 captured by the classic distance vector representation. */
3397 if (TREE_CODE (difference) != INTEGER_CST)
3401 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3403 /* There is no dependence. */
3404 *maybe_dependent = false;
3408 fun_b = chrec_fold_multiply (integer_type_node, fun_b,
3409 integer_minus_one_node);
3411 eq = omega_add_zero_eq (pb, omega_black);
3412 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3413 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3414 /* There is probably a dependence, but the system of
3415 constraints cannot be built: answer "don't know". */
3419 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3420 && !int_divides_p (lambda_vector_gcd
3421 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3422 2 * DDR_NB_LOOPS (ddr)),
3423 pb->eqs[eq].coef[0]))
3425 /* There is no dependence. */
3426 *maybe_dependent = false;
3433 /* Helper function, same as init_omega_for_ddr but specialized for
3434 data references A and B. */
3437 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3438 struct data_dependence_relation *ddr,
3439 omega_pb pb, bool *maybe_dependent)
3444 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3446 /* Insert an equality per subscript. */
3447 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3449 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3450 ddr, pb, maybe_dependent))
3452 else if (*maybe_dependent == false)
3454 /* There is no dependence. */
3455 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3460 /* Insert inequalities: constraints corresponding to the iteration
3461 domain, i.e. the loops surrounding the references "loop_x" and
3462 the distance variables "dx". The layout of the OMEGA
3463 representation is as follows:
3464 - coef[0] is the constant
3465 - coef[1..nb_loops] are the protected variables that will not be
3466 removed by the solver: the "dx"
3467 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3469 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3470 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3472 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3475 ineq = omega_add_zero_geq (pb, omega_black);
3476 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3478 /* 0 <= loop_x + dx */
3479 ineq = omega_add_zero_geq (pb, omega_black);
3480 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3481 pb->geqs[ineq].coef[i + 1] = 1;
3485 /* loop_x <= nb_iters */
3486 ineq = omega_add_zero_geq (pb, omega_black);
3487 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3488 pb->geqs[ineq].coef[0] = nbi;
3490 /* loop_x + dx <= nb_iters */
3491 ineq = omega_add_zero_geq (pb, omega_black);
3492 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3493 pb->geqs[ineq].coef[i + 1] = -1;
3494 pb->geqs[ineq].coef[0] = nbi;
3496 /* A step "dx" bigger than nb_iters is not feasible, so
3497 add "0 <= nb_iters + dx", */
3498 ineq = omega_add_zero_geq (pb, omega_black);
3499 pb->geqs[ineq].coef[i + 1] = 1;
3500 pb->geqs[ineq].coef[0] = nbi;
3501 /* and "dx <= nb_iters". */
3502 ineq = omega_add_zero_geq (pb, omega_black);
3503 pb->geqs[ineq].coef[i + 1] = -1;
3504 pb->geqs[ineq].coef[0] = nbi;
3508 omega_extract_distance_vectors (pb, ddr);
3513 /* Sets up the Omega dependence problem for the data dependence
3514 relation DDR. Returns false when the constraint system cannot be
3515 built, ie. when the test answers "don't know". Returns true
3516 otherwise, and when independence has been proved (using one of the
3517 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3518 set MAYBE_DEPENDENT to true.
3520 Example: for setting up the dependence system corresponding to the
3521 conflicting accesses
3526 | ... A[2*j, 2*(i + j)]
3530 the following constraints come from the iteration domain:
3537 where di, dj are the distance variables. The constraints
3538 representing the conflicting elements are:
3541 i + 1 = 2 * (i + di + j + dj)
3543 For asking that the resulting distance vector (di, dj) be
3544 lexicographically positive, we insert the constraint "di >= 0". If
3545 "di = 0" in the solution, we fix that component to zero, and we
3546 look at the inner loops: we set a new problem where all the outer
3547 loop distances are zero, and fix this inner component to be
3548 positive. When one of the components is positive, we save that
3549 distance, and set a new problem where the distance on this loop is
3550 zero, searching for other distances in the inner loops. Here is
3551 the classic example that illustrates that we have to set for each
3552 inner loop a new problem:
3560 we have to save two distances (1, 0) and (0, 1).
3562 Given two array references, refA and refB, we have to set the
3563 dependence problem twice, refA vs. refB and refB vs. refA, and we
3564 cannot do a single test, as refB might occur before refA in the
3565 inner loops, and the contrary when considering outer loops: ex.
3570 | T[{1,+,1}_2][{1,+,1}_1] // refA
3571 | T[{2,+,1}_2][{0,+,1}_1] // refB
3576 refB touches the elements in T before refA, and thus for the same
3577 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3578 but for successive loop_0 iterations, we have (1, -1, 1)
3580 The Omega solver expects the distance variables ("di" in the
3581 previous example) to come first in the constraint system (as
3582 variables to be protected, or "safe" variables), the constraint
3583 system is built using the following layout:
3585 "cst | distance vars | index vars".
3589 init_omega_for_ddr (struct data_dependence_relation *ddr,
3590 bool *maybe_dependent)
3595 *maybe_dependent = true;
3597 if (same_access_functions (ddr))
3600 lambda_vector dir_v;
3602 /* Save the 0 vector. */
3603 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3604 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3605 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3606 dir_v[j] = dir_equal;
3607 save_dir_v (ddr, dir_v);
3609 /* Save the dependences carried by outer loops. */
3610 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3611 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3613 omega_free_problem (pb);
3617 /* Omega expects the protected variables (those that have to be kept
3618 after elimination) to appear first in the constraint system.
3619 These variables are the distance variables. In the following
3620 initialization we declare NB_LOOPS safe variables, and the total
3621 number of variables for the constraint system is 2*NB_LOOPS. */
3622 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3623 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3625 omega_free_problem (pb);
3627 /* Stop computation if not decidable, or no dependence. */
3628 if (res == false || *maybe_dependent == false)
3631 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3632 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3634 omega_free_problem (pb);
3639 /* Return true when DDR contains the same information as that stored
3640 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3643 ddr_consistent_p (FILE *file,
3644 struct data_dependence_relation *ddr,
3645 VEC (lambda_vector, heap) *dist_vects,
3646 VEC (lambda_vector, heap) *dir_vects)
3650 /* If dump_file is set, output there. */
3651 if (dump_file && (dump_flags & TDF_DETAILS))
3654 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3656 lambda_vector b_dist_v;
3657 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3658 VEC_length (lambda_vector, dist_vects),
3659 DDR_NUM_DIST_VECTS (ddr));
3661 fprintf (file, "Banerjee dist vectors:\n");
3662 for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, b_dist_v); i++)
3663 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3665 fprintf (file, "Omega dist vectors:\n");
3666 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3667 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3669 fprintf (file, "data dependence relation:\n");
3670 dump_data_dependence_relation (file, ddr);
3672 fprintf (file, ")\n");
3676 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3678 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3679 VEC_length (lambda_vector, dir_vects),
3680 DDR_NUM_DIR_VECTS (ddr));
3684 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3686 lambda_vector a_dist_v;
3687 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3689 /* Distance vectors are not ordered in the same way in the DDR
3690 and in the DIST_VECTS: search for a matching vector. */
3691 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, a_dist_v); j++)
3692 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3695 if (j == VEC_length (lambda_vector, dist_vects))
3697 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3698 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3699 fprintf (file, "not found in Omega dist vectors:\n");
3700 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3701 fprintf (file, "data dependence relation:\n");
3702 dump_data_dependence_relation (file, ddr);
3703 fprintf (file, ")\n");
3707 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3709 lambda_vector a_dir_v;
3710 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3712 /* Direction vectors are not ordered in the same way in the DDR
3713 and in the DIR_VECTS: search for a matching vector. */
3714 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, a_dir_v); j++)
3715 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3718 if (j == VEC_length (lambda_vector, dist_vects))
3720 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3721 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3722 fprintf (file, "not found in Omega dir vectors:\n");
3723 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3724 fprintf (file, "data dependence relation:\n");
3725 dump_data_dependence_relation (file, ddr);
3726 fprintf (file, ")\n");
3733 /* This computes the affine dependence relation between A and B with
3734 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3735 independence between two accesses, while CHREC_DONT_KNOW is used
3736 for representing the unknown relation.
3738 Note that it is possible to stop the computation of the dependence
3739 relation the first time we detect a CHREC_KNOWN element for a given
3743 compute_affine_dependence (struct data_dependence_relation *ddr,
3744 struct loop *loop_nest)
3746 struct data_reference *dra = DDR_A (ddr);
3747 struct data_reference *drb = DDR_B (ddr);
3749 if (dump_file && (dump_flags & TDF_DETAILS))
3751 fprintf (dump_file, "(compute_affine_dependence\n");
3752 fprintf (dump_file, " (stmt_a = \n");
3753 print_generic_expr (dump_file, DR_STMT (dra), 0);
3754 fprintf (dump_file, ")\n (stmt_b = \n");
3755 print_generic_expr (dump_file, DR_STMT (drb), 0);
3756 fprintf (dump_file, ")\n");
3759 /* Analyze only when the dependence relation is not yet known. */
3760 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3762 dependence_stats.num_dependence_tests++;
3764 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3765 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3767 if (flag_check_data_deps)
3769 /* Compute the dependences using the first algorithm. */
3770 subscript_dependence_tester (ddr, loop_nest);
3772 if (dump_file && (dump_flags & TDF_DETAILS))
3774 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3775 dump_data_dependence_relation (dump_file, ddr);
3778 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3780 bool maybe_dependent;
3781 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3783 /* Save the result of the first DD analyzer. */
3784 dist_vects = DDR_DIST_VECTS (ddr);
3785 dir_vects = DDR_DIR_VECTS (ddr);
3787 /* Reset the information. */
3788 DDR_DIST_VECTS (ddr) = NULL;
3789 DDR_DIR_VECTS (ddr) = NULL;
3791 /* Compute the same information using Omega. */
3792 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3793 goto csys_dont_know;
3795 if (dump_file && (dump_flags & TDF_DETAILS))
3797 fprintf (dump_file, "Omega Analyzer\n");
3798 dump_data_dependence_relation (dump_file, ddr);
3801 /* Check that we get the same information. */
3802 if (maybe_dependent)
3803 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3808 subscript_dependence_tester (ddr, loop_nest);
3811 /* As a last case, if the dependence cannot be determined, or if
3812 the dependence is considered too difficult to determine, answer
3817 dependence_stats.num_dependence_undetermined++;
3819 if (dump_file && (dump_flags & TDF_DETAILS))
3821 fprintf (dump_file, "Data ref a:\n");
3822 dump_data_reference (dump_file, dra);
3823 fprintf (dump_file, "Data ref b:\n");
3824 dump_data_reference (dump_file, drb);
3825 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3827 finalize_ddr_dependent (ddr, chrec_dont_know);
3831 if (dump_file && (dump_flags & TDF_DETAILS))
3832 fprintf (dump_file, ")\n");
3835 /* This computes the dependence relation for the same data
3836 reference into DDR. */
3839 compute_self_dependence (struct data_dependence_relation *ddr)
3842 struct subscript *subscript;
3844 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3847 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3850 /* The accessed index overlaps for each iteration. */
3851 SUB_CONFLICTS_IN_A (subscript)
3852 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3853 SUB_CONFLICTS_IN_B (subscript)
3854 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3855 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3858 /* The distance vector is the zero vector. */
3859 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3860 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3863 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
3864 the data references in DATAREFS, in the LOOP_NEST. When
3865 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3869 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
3870 VEC (ddr_p, heap) **dependence_relations,
3871 VEC (loop_p, heap) *loop_nest,
3872 bool compute_self_and_rr)
3874 struct data_dependence_relation *ddr;
3875 struct data_reference *a, *b;
3878 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3879 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
3880 if (!DR_IS_READ (a) || !DR_IS_READ (b) || compute_self_and_rr)
3882 ddr = initialize_data_dependence_relation (a, b, loop_nest);
3883 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3884 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
3887 if (compute_self_and_rr)
3888 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3890 ddr = initialize_data_dependence_relation (a, a, loop_nest);
3891 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3892 compute_self_dependence (ddr);
3896 /* Stores the locations of memory references in STMT to REFERENCES. Returns
3897 true if STMT clobbers memory, false otherwise. */
3900 get_references_in_stmt (tree stmt, VEC (data_ref_loc, heap) **references)
3902 bool clobbers_memory = false;
3904 tree *op0, *op1, call;
3908 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
3909 Calls have side-effects, except those to const or pure
3911 call = get_call_expr_in (stmt);
3913 && !(call_expr_flags (call) & (ECF_CONST | ECF_PURE)))
3914 || (TREE_CODE (stmt) == ASM_EXPR
3915 && ASM_VOLATILE_P (stmt)))
3916 clobbers_memory = true;
3918 if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
3919 return clobbers_memory;
3921 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
3923 op0 = &GIMPLE_STMT_OPERAND (stmt, 0);
3924 op1 = &GIMPLE_STMT_OPERAND (stmt, 1);
3927 || REFERENCE_CLASS_P (*op1))
3929 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3931 ref->is_read = true;
3935 || REFERENCE_CLASS_P (*op0))
3937 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3939 ref->is_read = false;
3945 unsigned i, n = call_expr_nargs (call);
3947 for (i = 0; i < n; i++)
3949 op0 = &CALL_EXPR_ARG (call, i);
3952 || REFERENCE_CLASS_P (*op0))
3954 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3956 ref->is_read = true;
3961 return clobbers_memory;
3964 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
3965 reference, returns false, otherwise returns true. NEST is the outermost
3966 loop of the loop nest in that the references should be analyzed. */
3969 find_data_references_in_stmt (struct loop *nest, tree stmt,
3970 VEC (data_reference_p, heap) **datarefs)
3973 VEC (data_ref_loc, heap) *references;
3976 data_reference_p dr;
3978 if (get_references_in_stmt (stmt, &references))
3980 VEC_free (data_ref_loc, heap, references);
3984 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
3986 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
3987 gcc_assert (dr != NULL);
3989 /* FIXME -- data dependence analysis does not work correctly for objects with
3990 invariant addresses. Let us fail here until the problem is fixed. */
3991 if (dr_address_invariant_p (dr))
3994 if (dump_file && (dump_flags & TDF_DETAILS))
3995 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4000 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4002 VEC_free (data_ref_loc, heap, references);
4006 /* Search the data references in LOOP, and record the information into
4007 DATAREFS. Returns chrec_dont_know when failing to analyze a
4008 difficult case, returns NULL_TREE otherwise.
4010 TODO: This function should be made smarter so that it can handle address
4011 arithmetic as if they were array accesses, etc. */
4014 find_data_references_in_loop (struct loop *loop,
4015 VEC (data_reference_p, heap) **datarefs)
4017 basic_block bb, *bbs;
4019 block_stmt_iterator bsi;
4021 bbs = get_loop_body_in_dom_order (loop);
4023 for (i = 0; i < loop->num_nodes; i++)
4027 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4029 tree stmt = bsi_stmt (bsi);
4031 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4033 struct data_reference *res;
4034 res = XCNEW (struct data_reference);
4035 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4038 return chrec_dont_know;
4047 /* Recursive helper function. */
4050 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4052 /* Inner loops of the nest should not contain siblings. Example:
4053 when there are two consecutive loops,
4064 the dependence relation cannot be captured by the distance
4069 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4071 return find_loop_nest_1 (loop->inner, loop_nest);
4075 /* Return false when the LOOP is not well nested. Otherwise return
4076 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4077 contain the loops from the outermost to the innermost, as they will
4078 appear in the classic distance vector. */
4081 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4083 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4085 return find_loop_nest_1 (loop->inner, loop_nest);
4089 /* Given a loop nest LOOP, the following vectors are returned:
4090 DATAREFS is initialized to all the array elements contained in this loop,
4091 DEPENDENCE_RELATIONS contains the relations between the data references.
4092 Compute read-read and self relations if
4093 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4096 compute_data_dependences_for_loop (struct loop *loop,
4097 bool compute_self_and_read_read_dependences,
4098 VEC (data_reference_p, heap) **datarefs,
4099 VEC (ddr_p, heap) **dependence_relations)
4101 VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
4103 memset (&dependence_stats, 0, sizeof (dependence_stats));
4105 /* If the loop nest is not well formed, or one of the data references
4106 is not computable, give up without spending time to compute other
4109 || !find_loop_nest (loop, &vloops)
4110 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4112 struct data_dependence_relation *ddr;
4114 /* Insert a single relation into dependence_relations:
4116 ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
4117 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4120 compute_all_dependences (*datarefs, dependence_relations, vloops,
4121 compute_self_and_read_read_dependences);
4123 if (dump_file && (dump_flags & TDF_STATS))
4125 fprintf (dump_file, "Dependence tester statistics:\n");
4127 fprintf (dump_file, "Number of dependence tests: %d\n",
4128 dependence_stats.num_dependence_tests);
4129 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4130 dependence_stats.num_dependence_dependent);
4131 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4132 dependence_stats.num_dependence_independent);
4133 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4134 dependence_stats.num_dependence_undetermined);
4136 fprintf (dump_file, "Number of subscript tests: %d\n",
4137 dependence_stats.num_subscript_tests);
4138 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4139 dependence_stats.num_subscript_undetermined);
4140 fprintf (dump_file, "Number of same subscript function: %d\n",
4141 dependence_stats.num_same_subscript_function);
4143 fprintf (dump_file, "Number of ziv tests: %d\n",
4144 dependence_stats.num_ziv);
4145 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4146 dependence_stats.num_ziv_dependent);
4147 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4148 dependence_stats.num_ziv_independent);
4149 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4150 dependence_stats.num_ziv_unimplemented);
4152 fprintf (dump_file, "Number of siv tests: %d\n",
4153 dependence_stats.num_siv);
4154 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4155 dependence_stats.num_siv_dependent);
4156 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4157 dependence_stats.num_siv_independent);
4158 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4159 dependence_stats.num_siv_unimplemented);
4161 fprintf (dump_file, "Number of miv tests: %d\n",
4162 dependence_stats.num_miv);
4163 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4164 dependence_stats.num_miv_dependent);
4165 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4166 dependence_stats.num_miv_independent);
4167 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4168 dependence_stats.num_miv_unimplemented);
4172 /* Entry point (for testing only). Analyze all the data references
4173 and the dependence relations in LOOP.
4175 The data references are computed first.
4177 A relation on these nodes is represented by a complete graph. Some
4178 of the relations could be of no interest, thus the relations can be
4181 In the following function we compute all the relations. This is
4182 just a first implementation that is here for:
4183 - for showing how to ask for the dependence relations,
4184 - for the debugging the whole dependence graph,
4185 - for the dejagnu testcases and maintenance.
4187 It is possible to ask only for a part of the graph, avoiding to
4188 compute the whole dependence graph. The computed dependences are
4189 stored in a knowledge base (KB) such that later queries don't
4190 recompute the same information. The implementation of this KB is
4191 transparent to the optimizer, and thus the KB can be changed with a
4192 more efficient implementation, or the KB could be disabled. */
4194 analyze_all_data_dependences (struct loop *loop)
4197 int nb_data_refs = 10;
4198 VEC (data_reference_p, heap) *datarefs =
4199 VEC_alloc (data_reference_p, heap, nb_data_refs);
4200 VEC (ddr_p, heap) *dependence_relations =
4201 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4203 /* Compute DDs on the whole function. */
4204 compute_data_dependences_for_loop (loop, false, &datarefs,
4205 &dependence_relations);
4209 dump_data_dependence_relations (dump_file, dependence_relations);
4210 fprintf (dump_file, "\n\n");
4212 if (dump_flags & TDF_DETAILS)
4213 dump_dist_dir_vectors (dump_file, dependence_relations);
4215 if (dump_flags & TDF_STATS)
4217 unsigned nb_top_relations = 0;
4218 unsigned nb_bot_relations = 0;
4219 unsigned nb_basename_differ = 0;
4220 unsigned nb_chrec_relations = 0;
4221 struct data_dependence_relation *ddr;
4223 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4225 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4228 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4230 struct data_reference *a = DDR_A (ddr);
4231 struct data_reference *b = DDR_B (ddr);
4233 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
4234 nb_basename_differ++;
4240 nb_chrec_relations++;
4243 gather_stats_on_scev_database ();
4247 free_dependence_relations (dependence_relations);
4248 free_data_refs (datarefs);
4251 /* Computes all the data dependences and check that the results of
4252 several analyzers are the same. */
4255 tree_check_data_deps (void)
4258 struct loop *loop_nest;
4260 FOR_EACH_LOOP (li, loop_nest, 0)
4261 analyze_all_data_dependences (loop_nest);
4264 /* Free the memory used by a data dependence relation DDR. */
4267 free_dependence_relation (struct data_dependence_relation *ddr)
4272 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_SUBSCRIPTS (ddr))
4273 free_subscripts (DDR_SUBSCRIPTS (ddr));
4278 /* Free the memory used by the data dependence relations from
4279 DEPENDENCE_RELATIONS. */
4282 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4285 struct data_dependence_relation *ddr;
4286 VEC (loop_p, heap) *loop_nest = NULL;
4288 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4292 if (loop_nest == NULL)
4293 loop_nest = DDR_LOOP_NEST (ddr);
4295 gcc_assert (DDR_LOOP_NEST (ddr) == NULL
4296 || DDR_LOOP_NEST (ddr) == loop_nest);
4297 free_dependence_relation (ddr);
4301 VEC_free (loop_p, heap, loop_nest);
4302 VEC_free (ddr_p, heap, dependence_relations);
4305 /* Free the memory used by the data references from DATAREFS. */
4308 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4311 struct data_reference *dr;
4313 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4315 VEC_free (data_reference_p, heap, datarefs);
4320 /* Returns the index of STMT in RDG. */
4323 find_vertex_for_stmt (struct graph *rdg, tree stmt)
4327 for (i = 0; i < rdg->n_vertices; i++)
4328 if (RDGV_STMT (&(rdg->vertices[i])) == stmt)
4335 /* Creates an edge in RDG for each distance vector from DDR. */
4338 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4341 data_reference_p dra;
4342 data_reference_p drb;
4343 struct graph_edge *e;
4345 if (DDR_REVERSED_P (ddr))
4356 va = find_vertex_for_stmt (rdg, DR_STMT (dra));
4357 vb = find_vertex_for_stmt (rdg, DR_STMT (drb));
4359 e = add_edge (rdg, va, vb);
4360 e->data = XNEW (struct rdg_edge);
4362 /* Determines the type of the data dependence. */
4363 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4364 RDGE_TYPE (e) = input_dd;
4365 else if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
4366 RDGE_TYPE (e) = output_dd;
4367 else if (!DR_IS_READ (dra) && DR_IS_READ (drb))
4368 RDGE_TYPE (e) = flow_dd;
4369 else if (DR_IS_READ (dra) && !DR_IS_READ (drb))
4370 RDGE_TYPE (e) = anti_dd;
4373 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4374 the index of DEF in RDG. */
4377 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4379 use_operand_p imm_use_p;
4380 imm_use_iterator iterator;
4382 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4384 int use = find_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4385 struct graph_edge *e = add_edge (rdg, idef, use);
4387 e->data = XNEW (struct rdg_edge);
4388 RDGE_TYPE (e) = flow_dd;
4392 /* Creates the edges of the reduced dependence graph RDG. */
4395 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4398 struct data_dependence_relation *ddr;
4399 def_operand_p def_p;
4402 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
4403 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4404 create_rdg_edge_for_ddr (rdg, ddr);
4406 for (i = 0; i < rdg->n_vertices; i++)
4407 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDGV_STMT (&(rdg->vertices[i])),
4408 iter, SSA_OP_ALL_DEFS)
4409 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4412 /* Build the vertices of the reduced dependence graph RDG. */
4415 create_rdg_vertices (struct graph *rdg, VEC (tree, heap) *stmts)
4420 for (i = 0; VEC_iterate (tree, stmts, i, s); i++)
4422 struct vertex *v = &(rdg->vertices[i]);
4424 v->data = XNEW (struct rdg_vertex);
4429 /* Initialize STMTS with all the statements and PHI nodes of LOOP. */
4432 stmts_from_loop (struct loop *loop, VEC (tree, heap) **stmts)
4435 basic_block *bbs = get_loop_body_in_dom_order (loop);
4437 for (i = 0; i < loop->num_nodes; i++)
4440 basic_block bb = bbs[i];
4441 block_stmt_iterator bsi;
4443 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
4444 VEC_safe_push (tree, heap, *stmts, phi);
4446 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4447 VEC_safe_push (tree, heap, *stmts, bsi_stmt (bsi));
4453 /* Returns true when all the dependences are computable. */
4456 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4461 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4462 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4468 /* Build a Reduced Dependence Graph with one vertex per statement of the
4469 loop nest and one edge per data dependence or scalar dependence. */
4472 build_rdg (struct loop *loop)
4474 int nb_data_refs = 10;
4475 struct graph *rdg = NULL;
4476 VEC (ddr_p, heap) *dependence_relations;
4477 VEC (data_reference_p, heap) *datarefs;
4478 VEC (tree, heap) *stmts = VEC_alloc (tree, heap, 10);
4480 dependence_relations = VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs) ;
4481 datarefs = VEC_alloc (data_reference_p, heap, nb_data_refs);
4482 compute_data_dependences_for_loop (loop,
4485 &dependence_relations);
4487 if (!known_dependences_p (dependence_relations))
4490 stmts_from_loop (loop, &stmts);
4491 rdg = new_graph (VEC_length (tree, stmts));
4492 create_rdg_vertices (rdg, stmts);
4493 create_rdg_edges (rdg, dependence_relations);
4496 free_dependence_relations (dependence_relations);
4497 free_data_refs (datarefs);
4498 VEC_free (tree, heap, stmts);