2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5 and Sebastian Pop <sebastian.pop@amd.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* This pass performs loop distribution: for example, the loop
40 This pass uses an RDG, Reduced Dependence Graph built on top of the
41 data dependence relations. The RDG is then topologically sorted to
42 obtain a map of information producers/consumers based on which it
43 generates the new loops. */
47 #include "coretypes.h"
48 #include "tree-flow.h"
50 #include "tree-chrec.h"
51 #include "tree-data-ref.h"
52 #include "tree-scalar-evolution.h"
53 #include "tree-pass.h"
55 /* If bit I is not set, it means that this node represents an
56 operation that has already been performed, and that should not be
57 performed again. This is the subgraph of remaining important
58 computations that is passed to the DFS algorithm for avoiding to
59 include several times the same stores in different loops. */
60 static bitmap remaining_stmts;
62 /* A node of the RDG is marked in this bitmap when it has as a
63 predecessor a node that writes to memory. */
64 static bitmap upstream_mem_writes;
66 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
70 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
72 imm_use_iterator imm_iter;
75 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
76 if (loop != loop_containing_stmt (USE_STMT (use_p)))
82 /* Returns true when STMT defines a scalar variable used after the
86 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
91 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
92 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
98 /* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of
102 update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop)
105 gimple_stmt_iterator si_new, si_orig;
106 edge orig_loop_latch = loop_latch_edge (orig_loop);
107 edge orig_entry_e = loop_preheader_edge (orig_loop);
108 edge new_loop_entry_e = loop_preheader_edge (new_loop);
110 /* Scan the phis in the headers of the old and new loops
111 (they are organized in exactly the same order). */
112 for (si_new = gsi_start_phis (new_loop->header),
113 si_orig = gsi_start_phis (orig_loop->header);
114 !gsi_end_p (si_new) && !gsi_end_p (si_orig);
115 gsi_next (&si_new), gsi_next (&si_orig))
118 source_location locus;
119 gimple phi_new = gsi_stmt (si_new);
120 gimple phi_orig = gsi_stmt (si_orig);
122 /* Add the first phi argument for the phi in NEW_LOOP (the one
123 associated with the entry of NEW_LOOP) */
124 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e);
125 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e);
126 add_phi_arg (phi_new, def, new_loop_entry_e, locus);
128 /* Add the second phi argument for the phi in NEW_LOOP (the one
129 associated with the latch of NEW_LOOP) */
130 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
131 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
133 if (TREE_CODE (def) == SSA_NAME)
135 new_ssa_name = get_current_def (def);
138 /* This only happens if there are no definitions inside the
139 loop. Use the the invariant in the new loop as is. */
143 /* Could be an integer. */
146 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
150 /* Return a copy of LOOP placed before LOOP. */
153 copy_loop_before (struct loop *loop)
156 edge preheader = loop_preheader_edge (loop);
158 if (!single_exit (loop))
161 initialize_original_copy_tables ();
162 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
163 free_original_copy_tables ();
168 update_phis_for_loop_copy (loop, res);
169 rename_variables_in_loop (res);
174 /* Creates an empty basic block after LOOP. */
177 create_bb_after_loop (struct loop *loop)
179 edge exit = single_exit (loop);
187 /* Generate code for PARTITION from the code in LOOP. The loop is
188 copied when COPY_P is true. All the statements not flagged in the
189 PARTITION bitmap are removed from the loop or from its copy. The
190 statements are indexed in sequence inside a basic block, and the
191 basic blocks of a loop are taken in dom order. Returns true when
192 the code gen succeeded. */
195 generate_loops_for_partition (struct loop *loop, bitmap partition, bool copy_p)
198 gimple_stmt_iterator bsi;
203 loop = copy_loop_before (loop);
204 create_preheader (loop, CP_SIMPLE_PREHEADERS);
205 create_bb_after_loop (loop);
211 /* Remove stmts not in the PARTITION bitmap. The order in which we
212 visit the phi nodes and the statements is exactly as in
214 bbs = get_loop_body_in_dom_order (loop);
216 if (MAY_HAVE_DEBUG_STMTS)
217 for (x = 0, i = 0; i < loop->num_nodes; i++)
219 basic_block bb = bbs[i];
221 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
222 if (!bitmap_bit_p (partition, x++))
223 reset_debug_uses (gsi_stmt (bsi));
225 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
227 gimple stmt = gsi_stmt (bsi);
228 if (gimple_code (stmt) != GIMPLE_LABEL
229 && !is_gimple_debug (stmt)
230 && !bitmap_bit_p (partition, x++))
231 reset_debug_uses (stmt);
235 for (x = 0, i = 0; i < loop->num_nodes; i++)
237 basic_block bb = bbs[i];
239 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
240 if (!bitmap_bit_p (partition, x++))
242 gimple phi = gsi_stmt (bsi);
243 if (!is_gimple_reg (gimple_phi_result (phi)))
244 mark_virtual_phi_result_for_renaming (phi);
245 remove_phi_node (&bsi, true);
250 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
252 gimple stmt = gsi_stmt (bsi);
253 if (gimple_code (stmt) != GIMPLE_LABEL
254 && !is_gimple_debug (stmt)
255 && !bitmap_bit_p (partition, x++))
257 unlink_stmt_vdef (stmt);
258 gsi_remove (&bsi, true);
270 /* Build the size argument for a memset call. */
273 build_size_arg_loc (location_t loc, tree nb_iter, tree op,
274 gimple_seq *stmt_list)
277 tree x = fold_build2_loc (loc, MULT_EXPR, size_type_node,
278 fold_convert_loc (loc, size_type_node, nb_iter),
279 fold_convert_loc (loc, size_type_node,
280 TYPE_SIZE_UNIT (TREE_TYPE (op))));
281 x = force_gimple_operand (x, &stmts, true, NULL);
282 gimple_seq_add_seq (stmt_list, stmts);
287 /* Generate a call to memset. Return true when the operation succeeded. */
290 generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
291 gimple_stmt_iterator bsi)
293 tree addr_base, nb_bytes;
295 gimple_seq stmt_list = NULL, stmts;
298 struct data_reference *dr = XCNEW (struct data_reference);
299 location_t loc = gimple_location (stmt);
303 res = dr_analyze_innermost (dr, loop_containing_stmt (stmt));
304 gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
306 nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
307 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
308 addr_base = fold_convert_loc (loc, sizetype, addr_base);
310 /* Test for a negative stride, iterating over every element. */
311 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
313 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
314 fold_convert_loc (loc, sizetype, nb_bytes));
315 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
316 TYPE_SIZE_UNIT (TREE_TYPE (op0)));
319 addr_base = fold_build_pointer_plus_loc (loc,
320 DR_BASE_ADDRESS (dr), addr_base);
321 mem = force_gimple_operand (addr_base, &stmts, true, NULL);
322 gimple_seq_add_seq (&stmt_list, stmts);
324 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
325 fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
326 gimple_seq_add_stmt (&stmt_list, fn_call);
327 gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING);
329 if (dump_file && (dump_flags & TDF_DETAILS))
330 fprintf (dump_file, "generated memset zero\n");
335 /* Tries to generate a builtin function for the instructions of LOOP
336 pointed to by the bits set in PARTITION. Returns true when the
337 operation succeeded. */
340 generate_builtin (struct loop *loop, bitmap partition, bool copy_p)
346 gimple_stmt_iterator bsi;
347 tree nb_iter = number_of_exit_cond_executions (loop);
349 if (!nb_iter || nb_iter == chrec_dont_know)
352 bbs = get_loop_body_in_dom_order (loop);
354 for (i = 0; i < loop->num_nodes; i++)
356 basic_block bb = bbs[i];
358 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
361 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
363 gimple stmt = gsi_stmt (bsi);
365 if (gimple_code (stmt) == GIMPLE_LABEL
366 || is_gimple_debug (stmt))
369 if (!bitmap_bit_p (partition, x++))
372 /* If the stmt has uses outside of the loop fail.
373 ??? If the stmt is generated in another partition that
374 is not created as builtin we can ignore this. */
375 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
377 if (dump_file && (dump_flags & TDF_DETAILS))
378 fprintf (dump_file, "not generating builtin, partition has "
379 "scalar uses outside of the loop\n");
383 if (is_gimple_assign (stmt)
384 && !is_gimple_reg (gimple_assign_lhs (stmt)))
386 /* Don't generate the builtins when there are more than
392 if (bb == loop->latch)
393 nb_iter = number_of_latch_executions (loop);
398 if (!stmt_with_adjacent_zero_store_dr_p (write))
401 /* The new statements will be placed before LOOP. */
402 bsi = gsi_last_bb (loop_preheader_edge (loop)->src);
403 generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi);
406 /* If this is the last partition for which we generate code, we have
407 to destroy the loop. */
410 unsigned nbbs = loop->num_nodes;
411 edge exit = single_exit (loop);
412 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
413 redirect_edge_pred (exit, src);
414 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
415 exit->flags |= EDGE_FALLTHRU;
416 cancel_loop_tree (loop);
417 rescan_loop_exit (exit, false, true);
419 for (i = 0; i < nbbs; i++)
420 delete_basic_block (bbs[i]);
422 set_immediate_dominator (CDI_DOMINATORS, dest,
423 recompute_dominator (CDI_DOMINATORS, dest));
431 /* Generates code for PARTITION. For simple loops, this function can
432 generate a built-in. */
435 generate_code_for_partition (struct loop *loop, bitmap partition, bool copy_p)
437 if (generate_builtin (loop, partition, copy_p))
440 return generate_loops_for_partition (loop, partition, copy_p);
444 /* Returns true if the node V of RDG cannot be recomputed. */
447 rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
449 if (RDG_MEM_WRITE_STMT (rdg, v))
455 /* Returns true when the vertex V has already been generated in the
456 current partition (V is in PROCESSED), or when V belongs to another
457 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
460 already_processed_vertex_p (bitmap processed, int v)
462 return (bitmap_bit_p (processed, v)
463 || !bitmap_bit_p (remaining_stmts, v));
466 /* Returns NULL when there is no anti-dependence among the successors
467 of vertex V, otherwise returns the edge with the anti-dep. */
469 static struct graph_edge *
470 has_anti_dependence (struct vertex *v)
472 struct graph_edge *e;
475 for (e = v->succ; e; e = e->succ_next)
476 if (RDGE_TYPE (e) == anti_dd)
482 /* Returns true when V has an anti-dependence edge among its successors. */
485 predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
487 struct graph_edge *e;
490 for (e = v->pred; e; e = e->pred_next)
491 if (bitmap_bit_p (upstream_mem_writes, e->src)
492 /* Don't consider flow channels: a write to memory followed
493 by a read from memory. These channels allow the split of
494 the RDG in different partitions. */
495 && !RDG_MEM_WRITE_STMT (rdg, e->src))
501 /* Initializes the upstream_mem_writes bitmap following the
502 information from RDG. */
505 mark_nodes_having_upstream_mem_writes (struct graph *rdg)
508 bitmap seen = BITMAP_ALLOC (NULL);
510 for (v = rdg->n_vertices - 1; v >= 0; v--)
511 if (!bitmap_bit_p (seen, v))
514 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
516 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
518 FOR_EACH_VEC_ELT (int, nodes, i, x)
520 if (!bitmap_set_bit (seen, x))
523 if (RDG_MEM_WRITE_STMT (rdg, x)
524 || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
525 /* In anti dependences the read should occur before
526 the write, this is why both the read and the write
527 should be placed in the same partition. */
528 || has_anti_dependence (&(rdg->vertices[x])))
530 bitmap_set_bit (upstream_mem_writes, x);
534 VEC_free (int, heap, nodes);
538 /* Returns true when vertex u has a memory write node as a predecessor
542 has_upstream_mem_writes (int u)
544 return bitmap_bit_p (upstream_mem_writes, u);
547 static void rdg_flag_vertex_and_dependent (struct graph *, int, bitmap, bitmap,
550 /* Flag the uses of U stopping following the information from
551 upstream_mem_writes. */
554 rdg_flag_uses (struct graph *rdg, int u, bitmap partition, bitmap loops,
555 bitmap processed, bool *part_has_writes)
558 struct vertex *x = &(rdg->vertices[u]);
559 gimple stmt = RDGV_STMT (x);
560 struct graph_edge *anti_dep = has_anti_dependence (x);
562 /* Keep in the same partition the destination of an antidependence,
563 because this is a store to the exact same location. Putting this
564 in another partition is bad for cache locality. */
567 int v = anti_dep->dest;
569 if (!already_processed_vertex_p (processed, v))
570 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
571 processed, part_has_writes);
574 if (gimple_code (stmt) != GIMPLE_PHI)
576 if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
578 tree use = USE_FROM_PTR (use_p);
580 if (TREE_CODE (use) == SSA_NAME)
582 gimple def_stmt = SSA_NAME_DEF_STMT (use);
583 int v = rdg_vertex_for_stmt (rdg, def_stmt);
586 && !already_processed_vertex_p (processed, v))
587 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
588 processed, part_has_writes);
593 if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
595 tree op0 = gimple_assign_lhs (stmt);
597 /* Scalar channels don't have enough space for transmitting data
598 between tasks, unless we add more storage by privatizing. */
599 if (is_gimple_reg (op0))
602 imm_use_iterator iter;
604 FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
606 int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
608 if (!already_processed_vertex_p (processed, v))
609 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
610 processed, part_has_writes);
616 /* Flag V from RDG as part of PARTITION, and also flag its loop number
620 rdg_flag_vertex (struct graph *rdg, int v, bitmap partition, bitmap loops,
621 bool *part_has_writes)
625 if (!bitmap_set_bit (partition, v))
628 loop = loop_containing_stmt (RDG_STMT (rdg, v));
629 bitmap_set_bit (loops, loop->num);
631 if (rdg_cannot_recompute_vertex_p (rdg, v))
633 *part_has_writes = true;
634 bitmap_clear_bit (remaining_stmts, v);
638 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
639 Also flag their loop number in LOOPS. */
642 rdg_flag_vertex_and_dependent (struct graph *rdg, int v, bitmap partition,
643 bitmap loops, bitmap processed,
644 bool *part_has_writes)
647 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
650 bitmap_set_bit (processed, v);
651 rdg_flag_uses (rdg, v, partition, loops, processed, part_has_writes);
652 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
653 rdg_flag_vertex (rdg, v, partition, loops, part_has_writes);
655 FOR_EACH_VEC_ELT (int, nodes, i, x)
656 if (!already_processed_vertex_p (processed, x))
657 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed,
660 VEC_free (int, heap, nodes);
663 /* Initialize CONDS with all the condition statements from the basic
667 collect_condition_stmts (struct loop *loop, VEC (gimple, heap) **conds)
671 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
673 FOR_EACH_VEC_ELT (edge, exits, i, e)
675 gimple cond = last_stmt (e->src);
678 VEC_safe_push (gimple, heap, *conds, cond);
681 VEC_free (edge, heap, exits);
684 /* Add to PARTITION all the exit condition statements for LOOPS
685 together with all their dependent statements determined from
689 rdg_flag_loop_exits (struct graph *rdg, bitmap loops, bitmap partition,
690 bitmap processed, bool *part_has_writes)
694 VEC (gimple, heap) *conds = VEC_alloc (gimple, heap, 3);
696 EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
697 collect_condition_stmts (get_loop (i), &conds);
699 while (!VEC_empty (gimple, conds))
701 gimple cond = VEC_pop (gimple, conds);
702 int v = rdg_vertex_for_stmt (rdg, cond);
703 bitmap new_loops = BITMAP_ALLOC (NULL);
705 if (!already_processed_vertex_p (processed, v))
706 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed,
709 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
710 if (bitmap_set_bit (loops, i))
711 collect_condition_stmts (get_loop (i), &conds);
713 BITMAP_FREE (new_loops);
716 VEC_free (gimple, heap, conds);
719 /* Returns a bitmap in which all the statements needed for computing
720 the strongly connected component C of the RDG are flagged, also
721 including the loop exit conditions. */
724 build_rdg_partition_for_component (struct graph *rdg, rdgc c,
725 bool *part_has_writes)
728 bitmap partition = BITMAP_ALLOC (NULL);
729 bitmap loops = BITMAP_ALLOC (NULL);
730 bitmap processed = BITMAP_ALLOC (NULL);
732 FOR_EACH_VEC_ELT (int, c->vertices, i, v)
733 if (!already_processed_vertex_p (processed, v))
734 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed,
737 rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);
739 BITMAP_FREE (processed);
744 /* Free memory for COMPONENTS. */
747 free_rdg_components (VEC (rdgc, heap) *components)
752 FOR_EACH_VEC_ELT (rdgc, components, i, x)
754 VEC_free (int, heap, x->vertices);
758 VEC_free (rdgc, heap, components);
761 /* Build the COMPONENTS vector with the strongly connected components
762 of RDG in which the STARTING_VERTICES occur. */
765 rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices,
766 VEC (rdgc, heap) **components)
769 bitmap saved_components = BITMAP_ALLOC (NULL);
770 int n_components = graphds_scc (rdg, NULL);
771 VEC (int, heap) **all_components = XNEWVEC (VEC (int, heap) *, n_components);
773 for (i = 0; i < n_components; i++)
774 all_components[i] = VEC_alloc (int, heap, 3);
776 for (i = 0; i < rdg->n_vertices; i++)
777 VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i);
779 FOR_EACH_VEC_ELT (int, starting_vertices, i, v)
781 int c = rdg->vertices[v].component;
783 if (bitmap_set_bit (saved_components, c))
785 rdgc x = XCNEW (struct rdg_component);
787 x->vertices = all_components[c];
789 VEC_safe_push (rdgc, heap, *components, x);
793 for (i = 0; i < n_components; i++)
794 if (!bitmap_bit_p (saved_components, i))
795 VEC_free (int, heap, all_components[i]);
797 free (all_components);
798 BITMAP_FREE (saved_components);
801 /* Returns true when it is possible to generate a builtin pattern for
802 the PARTITION of RDG. For the moment we detect only the memset
806 can_generate_builtin (struct graph *rdg, bitmap partition)
814 EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, bi)
815 if (RDG_MEM_READS_STMT (rdg, i))
817 else if (RDG_MEM_WRITE_STMT (rdg, i))
819 gimple stmt = RDG_STMT (rdg, i);
821 if (!gimple_has_volatile_ops (stmt)
822 && stmt_with_adjacent_zero_store_dr_p (stmt))
826 return stores_zero == 1 && nb_writes == 1 && nb_reads == 0;
829 /* Returns true when PARTITION1 and PARTITION2 have similar memory
833 similar_memory_accesses (struct graph *rdg, bitmap partition1,
837 bitmap_iterator bi, bj;
839 EXECUTE_IF_SET_IN_BITMAP (partition1, 0, i, bi)
840 if (RDG_MEM_WRITE_STMT (rdg, i)
841 || RDG_MEM_READS_STMT (rdg, i))
842 EXECUTE_IF_SET_IN_BITMAP (partition2, 0, j, bj)
843 if (RDG_MEM_WRITE_STMT (rdg, j)
844 || RDG_MEM_READS_STMT (rdg, j))
845 if (rdg_has_similar_memory_accesses (rdg, i, j))
851 /* Fuse all the partitions from PARTITIONS that contain similar memory
852 references, i.e., we're taking care of cache locality. This
853 function does not fuse those partitions that contain patterns that
854 can be code generated with builtins. */
857 fuse_partitions_with_similar_memory_accesses (struct graph *rdg,
858 VEC (bitmap, heap) **partitions)
861 bitmap partition1, partition2;
863 FOR_EACH_VEC_ELT (bitmap, *partitions, p1, partition1)
864 if (!can_generate_builtin (rdg, partition1))
865 FOR_EACH_VEC_ELT (bitmap, *partitions, p2, partition2)
867 && !can_generate_builtin (rdg, partition2)
868 && similar_memory_accesses (rdg, partition1, partition2))
870 bitmap_ior_into (partition1, partition2);
871 VEC_ordered_remove (bitmap, *partitions, p2);
876 /* Aggregate several components into a useful partition that is
877 registered in the PARTITIONS vector. Partitions will be
878 distributed in different loops. */
881 rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components,
882 VEC (int, heap) **other_stores,
883 VEC (bitmap, heap) **partitions, bitmap processed)
887 bitmap partition = BITMAP_ALLOC (NULL);
889 FOR_EACH_VEC_ELT (rdgc, components, i, x)
892 bool part_has_writes = false;
893 int v = VEC_index (int, x->vertices, 0);
895 if (bitmap_bit_p (processed, v))
898 np = build_rdg_partition_for_component (rdg, x, &part_has_writes);
899 bitmap_ior_into (partition, np);
900 bitmap_ior_into (processed, np);
905 if (dump_file && (dump_flags & TDF_DETAILS))
907 fprintf (dump_file, "ldist useful partition:\n");
908 dump_bitmap (dump_file, partition);
911 VEC_safe_push (bitmap, heap, *partitions, partition);
912 partition = BITMAP_ALLOC (NULL);
916 /* Add the nodes from the RDG that were not marked as processed, and
917 that are used outside the current loop. These are scalar
918 computations that are not yet part of previous partitions. */
919 for (i = 0; i < rdg->n_vertices; i++)
920 if (!bitmap_bit_p (processed, i)
921 && rdg_defs_used_in_other_loops_p (rdg, i))
922 VEC_safe_push (int, heap, *other_stores, i);
924 /* If there are still statements left in the OTHER_STORES array,
925 create other components and partitions with these stores and
926 their dependences. */
927 if (VEC_length (int, *other_stores) > 0)
929 VEC (rdgc, heap) *comps = VEC_alloc (rdgc, heap, 3);
930 VEC (int, heap) *foo = VEC_alloc (int, heap, 3);
932 rdg_build_components (rdg, *other_stores, &comps);
933 rdg_build_partitions (rdg, comps, &foo, partitions, processed);
935 VEC_free (int, heap, foo);
936 free_rdg_components (comps);
939 /* If there is something left in the last partition, save it. */
940 if (bitmap_count_bits (partition) > 0)
941 VEC_safe_push (bitmap, heap, *partitions, partition);
943 BITMAP_FREE (partition);
945 fuse_partitions_with_similar_memory_accesses (rdg, partitions);
948 /* Dump to FILE the PARTITIONS. */
951 dump_rdg_partitions (FILE *file, VEC (bitmap, heap) *partitions)
956 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
957 debug_bitmap_file (file, partition);
960 /* Debug PARTITIONS. */
961 extern void debug_rdg_partitions (VEC (bitmap, heap) *);
964 debug_rdg_partitions (VEC (bitmap, heap) *partitions)
966 dump_rdg_partitions (stderr, partitions);
969 /* Returns the number of read and write operations in the RDG. */
972 number_of_rw_in_rdg (struct graph *rdg)
976 for (i = 0; i < rdg->n_vertices; i++)
978 if (RDG_MEM_WRITE_STMT (rdg, i))
981 if (RDG_MEM_READS_STMT (rdg, i))
988 /* Returns the number of read and write operations in a PARTITION of
992 number_of_rw_in_partition (struct graph *rdg, bitmap partition)
998 EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, ii)
1000 if (RDG_MEM_WRITE_STMT (rdg, i))
1003 if (RDG_MEM_READS_STMT (rdg, i))
1010 /* Returns true when one of the PARTITIONS contains all the read or
1011 write operations of RDG. */
1014 partition_contains_all_rw (struct graph *rdg, VEC (bitmap, heap) *partitions)
1018 int nrw = number_of_rw_in_rdg (rdg);
1020 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1021 if (nrw == number_of_rw_in_partition (rdg, partition))
1027 /* Generate code from STARTING_VERTICES in RDG. Returns the number of
1028 distributed loops. */
1031 ldist_gen (struct loop *loop, struct graph *rdg,
1032 VEC (int, heap) *starting_vertices)
1035 VEC (rdgc, heap) *components = VEC_alloc (rdgc, heap, 3);
1036 VEC (bitmap, heap) *partitions = VEC_alloc (bitmap, heap, 3);
1037 VEC (int, heap) *other_stores = VEC_alloc (int, heap, 3);
1038 bitmap partition, processed = BITMAP_ALLOC (NULL);
1040 remaining_stmts = BITMAP_ALLOC (NULL);
1041 upstream_mem_writes = BITMAP_ALLOC (NULL);
1043 for (i = 0; i < rdg->n_vertices; i++)
1045 bitmap_set_bit (remaining_stmts, i);
1047 /* Save in OTHER_STORES all the memory writes that are not in
1048 STARTING_VERTICES. */
1049 if (RDG_MEM_WRITE_STMT (rdg, i))
1055 FOR_EACH_VEC_ELT (int, starting_vertices, j, v)
1063 VEC_safe_push (int, heap, other_stores, i);
1067 mark_nodes_having_upstream_mem_writes (rdg);
1068 rdg_build_components (rdg, starting_vertices, &components);
1069 rdg_build_partitions (rdg, components, &other_stores, &partitions,
1071 BITMAP_FREE (processed);
1072 nbp = VEC_length (bitmap, partitions);
1076 && !can_generate_builtin (rdg, VEC_index (bitmap, partitions, 0)))
1078 && partition_contains_all_rw (rdg, partitions)))
1081 if (dump_file && (dump_flags & TDF_DETAILS))
1082 dump_rdg_partitions (dump_file, partitions);
1084 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1085 if (!generate_code_for_partition (loop, partition, i < nbp - 1))
1088 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1089 mark_sym_for_renaming (gimple_vop (cfun));
1090 update_ssa (TODO_update_ssa_only_virtuals);
1094 BITMAP_FREE (remaining_stmts);
1095 BITMAP_FREE (upstream_mem_writes);
1097 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1098 BITMAP_FREE (partition);
1100 VEC_free (int, heap, other_stores);
1101 VEC_free (bitmap, heap, partitions);
1102 free_rdg_components (components);
1106 /* Distributes the code from LOOP in such a way that producer
1107 statements are placed before consumer statements. When STMTS is
1108 NULL, performs the maximal distribution, if STMTS is not NULL,
1109 tries to separate only these statements from the LOOP's body.
1110 Returns the number of distributed loops. */
1113 distribute_loop (struct loop *loop, VEC (gimple, heap) *stmts)
1119 VEC (int, heap) *vertices;
1120 VEC (ddr_p, heap) *dependence_relations;
1121 VEC (data_reference_p, heap) *datarefs;
1122 VEC (loop_p, heap) *loop_nest;
1124 if (loop->num_nodes > 2)
1126 if (dump_file && (dump_flags & TDF_DETAILS))
1128 "FIXME: Loop %d not distributed: it has more than two basic blocks.\n",
1134 datarefs = VEC_alloc (data_reference_p, heap, 10);
1135 dependence_relations = VEC_alloc (ddr_p, heap, 100);
1136 loop_nest = VEC_alloc (loop_p, heap, 3);
1137 rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
1141 if (dump_file && (dump_flags & TDF_DETAILS))
1143 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1146 free_dependence_relations (dependence_relations);
1147 free_data_refs (datarefs);
1148 VEC_free (loop_p, heap, loop_nest);
1152 vertices = VEC_alloc (int, heap, 3);
1154 if (dump_file && (dump_flags & TDF_DETAILS))
1155 dump_rdg (dump_file, rdg);
1157 FOR_EACH_VEC_ELT (gimple, stmts, i, s)
1159 int v = rdg_vertex_for_stmt (rdg, s);
1163 VEC_safe_push (int, heap, vertices, v);
1165 if (dump_file && (dump_flags & TDF_DETAILS))
1167 "ldist asked to generate code for vertex %d\n", v);
1171 res = ldist_gen (loop, rdg, vertices);
1172 VEC_free (int, heap, vertices);
1174 free_dependence_relations (dependence_relations);
1175 free_data_refs (datarefs);
1176 VEC_free (loop_p, heap, loop_nest);
1180 /* Distribute all loops in the current function. */
1183 tree_loop_distribution (void)
1187 int nb_generated_loops = 0;
1189 FOR_EACH_LOOP (li, loop, 0)
1191 VEC (gimple, heap) *work_list = NULL;
1192 int num = loop->num;
1194 /* If the loop doesn't have a single exit we will fail anyway,
1195 so do that early. */
1196 if (!single_exit (loop))
1199 /* If both flag_tree_loop_distribute_patterns and
1200 flag_tree_loop_distribution are set, then only
1201 distribute_patterns is executed. */
1202 if (flag_tree_loop_distribute_patterns)
1204 /* With the following working list, we're asking
1205 distribute_loop to separate from the rest of the loop the
1206 stores of the form "A[i] = 0". */
1207 stores_zero_from_loop (loop, &work_list);
1209 /* Do nothing if there are no patterns to be distributed. */
1210 if (VEC_length (gimple, work_list) > 0)
1211 nb_generated_loops = distribute_loop (loop, work_list);
1213 else if (flag_tree_loop_distribution)
1215 /* With the following working list, we're asking
1216 distribute_loop to separate the stores of the loop: when
1217 dependences allow, it will end on having one store per
1219 stores_from_loop (loop, &work_list);
1221 /* A simple heuristic for cache locality is to not split
1222 stores to the same array. Without this call, an unrolled
1223 loop would be split into as many loops as unroll factor,
1224 each loop storing in the same array. */
1225 remove_similar_memory_refs (&work_list);
1227 nb_generated_loops = distribute_loop (loop, work_list);
1230 if (dump_file && (dump_flags & TDF_DETAILS))
1232 if (nb_generated_loops > 1)
1233 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
1234 num, nb_generated_loops);
1236 fprintf (dump_file, "Loop %d is the same.\n", num);
1239 #ifdef ENABLE_CHECKING
1240 verify_loop_structure ();
1243 VEC_free (gimple, heap, work_list);
1250 gate_tree_loop_distribution (void)
1252 return flag_tree_loop_distribution
1253 || flag_tree_loop_distribute_patterns;
1256 struct gimple_opt_pass pass_loop_distribution =
1261 gate_tree_loop_distribution, /* gate */
1262 tree_loop_distribution, /* execute */
1265 0, /* static_pass_number */
1266 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1267 PROP_cfg | PROP_ssa, /* properties_required */
1268 0, /* properties_provided */
1269 0, /* properties_destroyed */
1270 0, /* todo_flags_start */
1272 | TODO_verify_ssa /* todo_flags_finish */