2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5 and Sebastian Pop <sebastian.pop@amd.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* This pass performs loop distribution: for example, the loop
40 This pass uses an RDG, Reduced Dependence Graph built on top of the
41 data dependence relations. The RDG is then topologically sorted to
42 obtain a map of information producers/consumers based on which it
43 generates the new loops. */
47 #include "coretypes.h"
48 #include "tree-flow.h"
50 #include "tree-chrec.h"
51 #include "tree-data-ref.h"
52 #include "tree-scalar-evolution.h"
53 #include "tree-pass.h"
55 enum partition_kind { PKIND_NORMAL, PKIND_MEMSET };
57 typedef struct partition_s
60 enum partition_kind kind;
61 /* Main statement a kind != PKIND_NORMAL partition is about. */
65 DEF_VEC_P (partition_t);
66 DEF_VEC_ALLOC_P (partition_t, heap);
68 /* Allocate and initialize a partition from BITMAP. */
71 partition_alloc (bitmap stmts)
73 partition_t partition = XCNEW (struct partition_s);
74 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
75 partition->kind = PKIND_NORMAL;
82 partition_free (partition_t partition)
84 BITMAP_FREE (partition->stmts);
88 /* Returns true if the partition can be generated as a builtin. */
91 partition_builtin_p (partition_t partition)
93 return partition->kind != PKIND_NORMAL;
96 /* If bit I is not set, it means that this node represents an
97 operation that has already been performed, and that should not be
98 performed again. This is the subgraph of remaining important
99 computations that is passed to the DFS algorithm for avoiding to
100 include several times the same stores in different loops. */
101 static bitmap remaining_stmts;
103 /* A node of the RDG is marked in this bitmap when it has as a
104 predecessor a node that writes to memory. */
105 static bitmap upstream_mem_writes;
107 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
111 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
113 imm_use_iterator imm_iter;
116 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
117 if (loop != loop_containing_stmt (USE_STMT (use_p)))
123 /* Returns true when STMT defines a scalar variable used after the
127 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
132 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
133 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
139 /* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of
143 update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop)
146 gimple_stmt_iterator si_new, si_orig;
147 edge orig_loop_latch = loop_latch_edge (orig_loop);
148 edge orig_entry_e = loop_preheader_edge (orig_loop);
149 edge new_loop_entry_e = loop_preheader_edge (new_loop);
151 /* Scan the phis in the headers of the old and new loops
152 (they are organized in exactly the same order). */
153 for (si_new = gsi_start_phis (new_loop->header),
154 si_orig = gsi_start_phis (orig_loop->header);
155 !gsi_end_p (si_new) && !gsi_end_p (si_orig);
156 gsi_next (&si_new), gsi_next (&si_orig))
159 source_location locus;
160 gimple phi_new = gsi_stmt (si_new);
161 gimple phi_orig = gsi_stmt (si_orig);
163 /* Add the first phi argument for the phi in NEW_LOOP (the one
164 associated with the entry of NEW_LOOP) */
165 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e);
166 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e);
167 add_phi_arg (phi_new, def, new_loop_entry_e, locus);
169 /* Add the second phi argument for the phi in NEW_LOOP (the one
170 associated with the latch of NEW_LOOP) */
171 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
172 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
174 if (TREE_CODE (def) == SSA_NAME)
176 new_ssa_name = get_current_def (def);
179 /* This only happens if there are no definitions inside the
180 loop. Use the the invariant in the new loop as is. */
184 /* Could be an integer. */
187 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
191 /* Return a copy of LOOP placed before LOOP. */
194 copy_loop_before (struct loop *loop)
197 edge preheader = loop_preheader_edge (loop);
199 initialize_original_copy_tables ();
200 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
201 gcc_assert (res != NULL);
202 free_original_copy_tables ();
204 update_phis_for_loop_copy (loop, res);
205 rename_variables_in_loop (res);
210 /* Creates an empty basic block after LOOP. */
213 create_bb_after_loop (struct loop *loop)
215 edge exit = single_exit (loop);
223 /* Generate code for PARTITION from the code in LOOP. The loop is
224 copied when COPY_P is true. All the statements not flagged in the
225 PARTITION bitmap are removed from the loop or from its copy. The
226 statements are indexed in sequence inside a basic block, and the
227 basic blocks of a loop are taken in dom order. */
230 generate_loops_for_partition (struct loop *loop, partition_t partition,
234 gimple_stmt_iterator bsi;
239 loop = copy_loop_before (loop);
240 gcc_assert (loop != NULL);
241 create_preheader (loop, CP_SIMPLE_PREHEADERS);
242 create_bb_after_loop (loop);
245 /* Remove stmts not in the PARTITION bitmap. The order in which we
246 visit the phi nodes and the statements is exactly as in
248 bbs = get_loop_body_in_dom_order (loop);
250 if (MAY_HAVE_DEBUG_STMTS)
251 for (x = 0, i = 0; i < loop->num_nodes; i++)
253 basic_block bb = bbs[i];
255 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
256 if (!bitmap_bit_p (partition->stmts, x++))
257 reset_debug_uses (gsi_stmt (bsi));
259 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
261 gimple stmt = gsi_stmt (bsi);
262 if (gimple_code (stmt) != GIMPLE_LABEL
263 && !is_gimple_debug (stmt)
264 && !bitmap_bit_p (partition->stmts, x++))
265 reset_debug_uses (stmt);
269 for (x = 0, i = 0; i < loop->num_nodes; i++)
271 basic_block bb = bbs[i];
273 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
274 if (!bitmap_bit_p (partition->stmts, x++))
276 gimple phi = gsi_stmt (bsi);
277 if (!is_gimple_reg (gimple_phi_result (phi)))
278 mark_virtual_phi_result_for_renaming (phi);
279 remove_phi_node (&bsi, true);
284 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
286 gimple stmt = gsi_stmt (bsi);
287 if (gimple_code (stmt) != GIMPLE_LABEL
288 && !is_gimple_debug (stmt)
289 && !bitmap_bit_p (partition->stmts, x++))
291 unlink_stmt_vdef (stmt);
292 gsi_remove (&bsi, true);
303 /* Build the size argument for a memset call. */
306 build_size_arg_loc (location_t loc, tree nb_iter, tree op,
307 gimple_seq *stmt_list)
310 tree x = fold_build2_loc (loc, MULT_EXPR, size_type_node,
311 fold_convert_loc (loc, size_type_node, nb_iter),
312 fold_convert_loc (loc, size_type_node,
313 TYPE_SIZE_UNIT (TREE_TYPE (op))));
314 x = force_gimple_operand (x, &stmts, true, NULL);
315 gimple_seq_add_seq (stmt_list, stmts);
320 /* Generate a call to memset for PARTITION in LOOP. */
323 generate_memset_builtin (struct loop *loop, partition_t partition)
325 gimple_stmt_iterator gsi;
326 gimple stmt, fn_call;
327 tree op0, nb_iter, mem, fn, addr_base, nb_bytes;
328 gimple_seq stmt_list = NULL, stmts;
329 struct data_reference *dr = XCNEW (struct data_reference);
333 stmt = partition->main_stmt;
334 loc = gimple_location (stmt);
335 op0 = gimple_assign_lhs (stmt);
336 if (gimple_bb (stmt) == loop->latch)
337 nb_iter = number_of_latch_executions (loop);
339 nb_iter = number_of_exit_cond_executions (loop);
341 /* The new statements will be placed before LOOP. */
342 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
346 res = dr_analyze_innermost (dr, loop_containing_stmt (stmt));
347 gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
349 nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
350 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
351 addr_base = fold_convert_loc (loc, sizetype, addr_base);
353 /* Test for a negative stride, iterating over every element. */
354 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
356 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
357 fold_convert_loc (loc, sizetype, nb_bytes));
358 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
359 TYPE_SIZE_UNIT (TREE_TYPE (op0)));
362 addr_base = fold_build_pointer_plus_loc (loc,
363 DR_BASE_ADDRESS (dr), addr_base);
364 mem = force_gimple_operand (addr_base, &stmts, true, NULL);
365 gimple_seq_add_seq (&stmt_list, stmts);
367 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
368 fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
369 gimple_seq_add_stmt (&stmt_list, fn_call);
370 gsi_insert_seq_after (&gsi, stmt_list, GSI_CONTINUE_LINKING);
372 if (dump_file && (dump_flags & TDF_DETAILS))
373 fprintf (dump_file, "generated memset zero\n");
378 /* Remove and destroy the loop LOOP. */
381 destroy_loop (struct loop *loop)
383 unsigned nbbs = loop->num_nodes;
384 edge exit = single_exit (loop);
385 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
389 bbs = get_loop_body_in_dom_order (loop);
391 redirect_edge_pred (exit, src);
392 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
393 exit->flags |= EDGE_FALLTHRU;
394 cancel_loop_tree (loop);
395 rescan_loop_exit (exit, false, true);
397 for (i = 0; i < nbbs; i++)
398 delete_basic_block (bbs[i]);
401 set_immediate_dominator (CDI_DOMINATORS, dest,
402 recompute_dominator (CDI_DOMINATORS, dest));
405 /* Generates code for PARTITION. */
408 generate_code_for_partition (struct loop *loop, partition_t partition,
411 switch (partition->kind)
414 generate_memset_builtin (loop, partition);
415 /* If this is the last partition for which we generate code, we have
416 to destroy the loop. */
422 generate_loops_for_partition (loop, partition, copy_p);
431 /* Returns true if the node V of RDG cannot be recomputed. */
434 rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
436 if (RDG_MEM_WRITE_STMT (rdg, v))
442 /* Returns true when the vertex V has already been generated in the
443 current partition (V is in PROCESSED), or when V belongs to another
444 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
447 already_processed_vertex_p (bitmap processed, int v)
449 return (bitmap_bit_p (processed, v)
450 || !bitmap_bit_p (remaining_stmts, v));
453 /* Returns NULL when there is no anti-dependence among the successors
454 of vertex V, otherwise returns the edge with the anti-dep. */
456 static struct graph_edge *
457 has_anti_dependence (struct vertex *v)
459 struct graph_edge *e;
462 for (e = v->succ; e; e = e->succ_next)
463 if (RDGE_TYPE (e) == anti_dd)
469 /* Returns true when V has an anti-dependence edge among its successors. */
472 predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
474 struct graph_edge *e;
477 for (e = v->pred; e; e = e->pred_next)
478 if (bitmap_bit_p (upstream_mem_writes, e->src)
479 /* Don't consider flow channels: a write to memory followed
480 by a read from memory. These channels allow the split of
481 the RDG in different partitions. */
482 && !RDG_MEM_WRITE_STMT (rdg, e->src))
488 /* Initializes the upstream_mem_writes bitmap following the
489 information from RDG. */
492 mark_nodes_having_upstream_mem_writes (struct graph *rdg)
495 bitmap seen = BITMAP_ALLOC (NULL);
497 for (v = rdg->n_vertices - 1; v >= 0; v--)
498 if (!bitmap_bit_p (seen, v))
501 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
503 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
505 FOR_EACH_VEC_ELT (int, nodes, i, x)
507 if (!bitmap_set_bit (seen, x))
510 if (RDG_MEM_WRITE_STMT (rdg, x)
511 || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
512 /* In anti dependences the read should occur before
513 the write, this is why both the read and the write
514 should be placed in the same partition. */
515 || has_anti_dependence (&(rdg->vertices[x])))
517 bitmap_set_bit (upstream_mem_writes, x);
521 VEC_free (int, heap, nodes);
525 /* Returns true when vertex u has a memory write node as a predecessor
529 has_upstream_mem_writes (int u)
531 return bitmap_bit_p (upstream_mem_writes, u);
534 static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t,
535 bitmap, bitmap, bool *);
537 /* Flag the uses of U stopping following the information from
538 upstream_mem_writes. */
541 rdg_flag_uses (struct graph *rdg, int u, partition_t partition, bitmap loops,
542 bitmap processed, bool *part_has_writes)
545 struct vertex *x = &(rdg->vertices[u]);
546 gimple stmt = RDGV_STMT (x);
547 struct graph_edge *anti_dep = has_anti_dependence (x);
549 /* Keep in the same partition the destination of an antidependence,
550 because this is a store to the exact same location. Putting this
551 in another partition is bad for cache locality. */
554 int v = anti_dep->dest;
556 if (!already_processed_vertex_p (processed, v))
557 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
558 processed, part_has_writes);
561 if (gimple_code (stmt) != GIMPLE_PHI)
563 if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
565 tree use = USE_FROM_PTR (use_p);
567 if (TREE_CODE (use) == SSA_NAME)
569 gimple def_stmt = SSA_NAME_DEF_STMT (use);
570 int v = rdg_vertex_for_stmt (rdg, def_stmt);
573 && !already_processed_vertex_p (processed, v))
574 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
575 processed, part_has_writes);
580 if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
582 tree op0 = gimple_assign_lhs (stmt);
584 /* Scalar channels don't have enough space for transmitting data
585 between tasks, unless we add more storage by privatizing. */
586 if (is_gimple_reg (op0))
589 imm_use_iterator iter;
591 FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
593 int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
595 if (!already_processed_vertex_p (processed, v))
596 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
597 processed, part_has_writes);
603 /* Flag V from RDG as part of PARTITION, and also flag its loop number
607 rdg_flag_vertex (struct graph *rdg, int v, partition_t partition, bitmap loops,
608 bool *part_has_writes)
612 if (!bitmap_set_bit (partition->stmts, v))
615 loop = loop_containing_stmt (RDG_STMT (rdg, v));
616 bitmap_set_bit (loops, loop->num);
618 if (rdg_cannot_recompute_vertex_p (rdg, v))
620 *part_has_writes = true;
621 bitmap_clear_bit (remaining_stmts, v);
625 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
626 Also flag their loop number in LOOPS. */
629 rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition,
630 bitmap loops, bitmap processed,
631 bool *part_has_writes)
634 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
637 bitmap_set_bit (processed, v);
638 rdg_flag_uses (rdg, v, partition, loops, processed, part_has_writes);
639 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
640 rdg_flag_vertex (rdg, v, partition, loops, part_has_writes);
642 FOR_EACH_VEC_ELT (int, nodes, i, x)
643 if (!already_processed_vertex_p (processed, x))
644 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed,
647 VEC_free (int, heap, nodes);
650 /* Initialize CONDS with all the condition statements from the basic
654 collect_condition_stmts (struct loop *loop, VEC (gimple, heap) **conds)
658 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
660 FOR_EACH_VEC_ELT (edge, exits, i, e)
662 gimple cond = last_stmt (e->src);
665 VEC_safe_push (gimple, heap, *conds, cond);
668 VEC_free (edge, heap, exits);
671 /* Add to PARTITION all the exit condition statements for LOOPS
672 together with all their dependent statements determined from
676 rdg_flag_loop_exits (struct graph *rdg, bitmap loops, partition_t partition,
677 bitmap processed, bool *part_has_writes)
681 VEC (gimple, heap) *conds = VEC_alloc (gimple, heap, 3);
683 EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
684 collect_condition_stmts (get_loop (i), &conds);
686 while (!VEC_empty (gimple, conds))
688 gimple cond = VEC_pop (gimple, conds);
689 int v = rdg_vertex_for_stmt (rdg, cond);
690 bitmap new_loops = BITMAP_ALLOC (NULL);
692 if (!already_processed_vertex_p (processed, v))
693 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed,
696 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
697 if (bitmap_set_bit (loops, i))
698 collect_condition_stmts (get_loop (i), &conds);
700 BITMAP_FREE (new_loops);
703 VEC_free (gimple, heap, conds);
706 /* Returns a bitmap in which all the statements needed for computing
707 the strongly connected component C of the RDG are flagged, also
708 including the loop exit conditions. */
711 build_rdg_partition_for_component (struct graph *rdg, rdgc c,
712 bool *part_has_writes)
715 partition_t partition = partition_alloc (NULL);
716 bitmap loops = BITMAP_ALLOC (NULL);
717 bitmap processed = BITMAP_ALLOC (NULL);
719 FOR_EACH_VEC_ELT (int, c->vertices, i, v)
720 if (!already_processed_vertex_p (processed, v))
721 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed,
724 rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);
726 BITMAP_FREE (processed);
731 /* Free memory for COMPONENTS. */
734 free_rdg_components (VEC (rdgc, heap) *components)
739 FOR_EACH_VEC_ELT (rdgc, components, i, x)
741 VEC_free (int, heap, x->vertices);
745 VEC_free (rdgc, heap, components);
748 /* Build the COMPONENTS vector with the strongly connected components
749 of RDG in which the STARTING_VERTICES occur. */
752 rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices,
753 VEC (rdgc, heap) **components)
756 bitmap saved_components = BITMAP_ALLOC (NULL);
757 int n_components = graphds_scc (rdg, NULL);
758 VEC (int, heap) **all_components = XNEWVEC (VEC (int, heap) *, n_components);
760 for (i = 0; i < n_components; i++)
761 all_components[i] = VEC_alloc (int, heap, 3);
763 for (i = 0; i < rdg->n_vertices; i++)
764 VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i);
766 FOR_EACH_VEC_ELT (int, starting_vertices, i, v)
768 int c = rdg->vertices[v].component;
770 if (bitmap_set_bit (saved_components, c))
772 rdgc x = XCNEW (struct rdg_component);
774 x->vertices = all_components[c];
776 VEC_safe_push (rdgc, heap, *components, x);
780 for (i = 0; i < n_components; i++)
781 if (!bitmap_bit_p (saved_components, i))
782 VEC_free (int, heap, all_components[i]);
784 free (all_components);
785 BITMAP_FREE (saved_components);
788 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
789 For the moment we detect only the memset zero pattern. */
792 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
798 partition->kind = PKIND_NORMAL;
799 partition->main_stmt = NULL;
801 /* Perform general partition disqualification for builtins. */
802 nb_iter = number_of_exit_cond_executions (loop);
803 if (!nb_iter || nb_iter == chrec_dont_know)
806 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
808 gimple stmt = RDG_STMT (rdg, i);
810 if (gimple_has_volatile_ops (stmt))
813 /* If the stmt has uses outside of the loop fail.
814 ??? If the stmt is generated in another partition that
815 is not created as builtin we can ignore this. */
816 if (gimple_code (stmt) != GIMPLE_PHI
817 && stmt_has_scalar_dependences_outside_loop (loop, stmt))
819 if (dump_file && (dump_flags & TDF_DETAILS))
820 fprintf (dump_file, "not generating builtin, partition has "
821 "scalar uses outside of the loop\n");
827 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
829 gimple stmt = RDG_STMT (rdg, i);
831 if (gimple_code (stmt) == GIMPLE_PHI)
834 /* Any scalar stmts are ok. */
835 if (!gimple_vuse (stmt))
838 /* Exactly one store. */
839 if (gimple_assign_single_p (stmt)
840 && !is_gimple_reg (gimple_assign_lhs (stmt)))
842 if (partition->main_stmt != NULL)
844 partition->main_stmt = stmt;
850 if (partition->main_stmt != NULL
851 && stmt_with_adjacent_zero_store_dr_p (partition->main_stmt))
852 partition->kind = PKIND_MEMSET;
855 /* Returns true when PARTITION1 and PARTITION2 have similar memory
859 similar_memory_accesses (struct graph *rdg, partition_t partition1,
860 partition_t partition2)
863 bitmap_iterator bi, bj;
865 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
866 if (RDG_MEM_WRITE_STMT (rdg, i)
867 || RDG_MEM_READS_STMT (rdg, i))
868 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
869 if (RDG_MEM_WRITE_STMT (rdg, j)
870 || RDG_MEM_READS_STMT (rdg, j))
871 if (rdg_has_similar_memory_accesses (rdg, i, j))
877 /* Fuse all the partitions from PARTITIONS that contain similar memory
878 references, i.e., we're taking care of cache locality. This
879 function does not fuse those partitions that contain patterns that
880 can be code generated with builtins. */
883 fuse_partitions_with_similar_memory_accesses (struct graph *rdg,
884 VEC (partition_t, heap) **partitions)
887 partition_t partition1, partition2;
889 FOR_EACH_VEC_ELT (partition_t, *partitions, p1, partition1)
890 if (!partition_builtin_p (partition1))
891 FOR_EACH_VEC_ELT (partition_t, *partitions, p2, partition2)
893 && !partition_builtin_p (partition2)
894 && similar_memory_accesses (rdg, partition1, partition2))
896 bitmap_ior_into (partition1->stmts, partition2->stmts);
897 VEC_ordered_remove (partition_t, *partitions, p2);
902 /* Aggregate several components into a useful partition that is
903 registered in the PARTITIONS vector. Partitions will be
904 distributed in different loops. */
907 rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components,
908 VEC (int, heap) **other_stores,
909 VEC (partition_t, heap) **partitions, bitmap processed)
913 partition_t partition = partition_alloc (NULL);
915 FOR_EACH_VEC_ELT (rdgc, components, i, x)
918 bool part_has_writes = false;
919 int v = VEC_index (int, x->vertices, 0);
921 if (bitmap_bit_p (processed, v))
924 np = build_rdg_partition_for_component (rdg, x, &part_has_writes);
925 bitmap_ior_into (partition->stmts, np->stmts);
926 bitmap_ior_into (processed, np->stmts);
931 if (dump_file && (dump_flags & TDF_DETAILS))
933 fprintf (dump_file, "ldist useful partition:\n");
934 dump_bitmap (dump_file, partition->stmts);
937 VEC_safe_push (partition_t, heap, *partitions, partition);
938 partition = partition_alloc (NULL);
942 /* Add the nodes from the RDG that were not marked as processed, and
943 that are used outside the current loop. These are scalar
944 computations that are not yet part of previous partitions. */
945 for (i = 0; i < rdg->n_vertices; i++)
946 if (!bitmap_bit_p (processed, i)
947 && rdg_defs_used_in_other_loops_p (rdg, i))
948 VEC_safe_push (int, heap, *other_stores, i);
950 /* If there are still statements left in the OTHER_STORES array,
951 create other components and partitions with these stores and
952 their dependences. */
953 if (VEC_length (int, *other_stores) > 0)
955 VEC (rdgc, heap) *comps = VEC_alloc (rdgc, heap, 3);
956 VEC (int, heap) *foo = VEC_alloc (int, heap, 3);
958 rdg_build_components (rdg, *other_stores, &comps);
959 rdg_build_partitions (rdg, comps, &foo, partitions, processed);
961 VEC_free (int, heap, foo);
962 free_rdg_components (comps);
965 /* If there is something left in the last partition, save it. */
966 if (bitmap_count_bits (partition->stmts) > 0)
967 VEC_safe_push (partition_t, heap, *partitions, partition);
969 partition_free (partition);
972 /* Dump to FILE the PARTITIONS. */
975 dump_rdg_partitions (FILE *file, VEC (partition_t, heap) *partitions)
978 partition_t partition;
980 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
981 debug_bitmap_file (file, partition->stmts);
984 /* Debug PARTITIONS. */
985 extern void debug_rdg_partitions (VEC (partition_t, heap) *);
988 debug_rdg_partitions (VEC (partition_t, heap) *partitions)
990 dump_rdg_partitions (stderr, partitions);
993 /* Returns the number of read and write operations in the RDG. */
996 number_of_rw_in_rdg (struct graph *rdg)
1000 for (i = 0; i < rdg->n_vertices; i++)
1002 if (RDG_MEM_WRITE_STMT (rdg, i))
1005 if (RDG_MEM_READS_STMT (rdg, i))
1012 /* Returns the number of read and write operations in a PARTITION of
1016 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1022 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1024 if (RDG_MEM_WRITE_STMT (rdg, i))
1027 if (RDG_MEM_READS_STMT (rdg, i))
1034 /* Returns true when one of the PARTITIONS contains all the read or
1035 write operations of RDG. */
1038 partition_contains_all_rw (struct graph *rdg, VEC (partition_t, heap) *partitions)
1041 partition_t partition;
1042 int nrw = number_of_rw_in_rdg (rdg);
1044 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1045 if (nrw == number_of_rw_in_partition (rdg, partition))
1051 /* Generate code from STARTING_VERTICES in RDG. Returns the number of
1052 distributed loops. */
1055 ldist_gen (struct loop *loop, struct graph *rdg,
1056 VEC (int, heap) *starting_vertices)
1059 VEC (rdgc, heap) *components = VEC_alloc (rdgc, heap, 3);
1060 VEC (partition_t, heap) *partitions = VEC_alloc (partition_t, heap, 3);
1061 VEC (int, heap) *other_stores = VEC_alloc (int, heap, 3);
1062 partition_t partition;
1063 bitmap processed = BITMAP_ALLOC (NULL);
1065 remaining_stmts = BITMAP_ALLOC (NULL);
1066 upstream_mem_writes = BITMAP_ALLOC (NULL);
1068 for (i = 0; i < rdg->n_vertices; i++)
1070 bitmap_set_bit (remaining_stmts, i);
1072 /* Save in OTHER_STORES all the memory writes that are not in
1073 STARTING_VERTICES. */
1074 if (RDG_MEM_WRITE_STMT (rdg, i))
1080 FOR_EACH_VEC_ELT (int, starting_vertices, j, v)
1088 VEC_safe_push (int, heap, other_stores, i);
1092 mark_nodes_having_upstream_mem_writes (rdg);
1093 rdg_build_components (rdg, starting_vertices, &components);
1094 rdg_build_partitions (rdg, components, &other_stores, &partitions,
1096 BITMAP_FREE (processed);
1098 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1099 classify_partition (loop, rdg, partition);
1101 fuse_partitions_with_similar_memory_accesses (rdg, &partitions);
1103 nbp = VEC_length (partition_t, partitions);
1106 && !partition_builtin_p (VEC_index (partition_t, partitions, 0)))
1108 && partition_contains_all_rw (rdg, partitions)))
1111 if (dump_file && (dump_flags & TDF_DETAILS))
1112 dump_rdg_partitions (dump_file, partitions);
1114 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1115 generate_code_for_partition (loop, partition, i < nbp - 1);
1117 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1118 mark_sym_for_renaming (gimple_vop (cfun));
1119 update_ssa (TODO_update_ssa_only_virtuals);
1123 BITMAP_FREE (remaining_stmts);
1124 BITMAP_FREE (upstream_mem_writes);
1126 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1127 partition_free (partition);
1129 VEC_free (int, heap, other_stores);
1130 VEC_free (partition_t, heap, partitions);
1131 free_rdg_components (components);
1135 /* Distributes the code from LOOP in such a way that producer
1136 statements are placed before consumer statements. When STMTS is
1137 NULL, performs the maximal distribution, if STMTS is not NULL,
1138 tries to separate only these statements from the LOOP's body.
1139 Returns the number of distributed loops. */
1142 distribute_loop (struct loop *loop, VEC (gimple, heap) *stmts)
1148 VEC (int, heap) *vertices;
1149 VEC (ddr_p, heap) *dependence_relations;
1150 VEC (data_reference_p, heap) *datarefs;
1151 VEC (loop_p, heap) *loop_nest;
1153 if (loop->num_nodes > 2)
1155 if (dump_file && (dump_flags & TDF_DETAILS))
1157 "FIXME: Loop %d not distributed: it has more than two basic blocks.\n",
1163 datarefs = VEC_alloc (data_reference_p, heap, 10);
1164 dependence_relations = VEC_alloc (ddr_p, heap, 100);
1165 loop_nest = VEC_alloc (loop_p, heap, 3);
1166 rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
1170 if (dump_file && (dump_flags & TDF_DETAILS))
1172 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1175 free_dependence_relations (dependence_relations);
1176 free_data_refs (datarefs);
1177 VEC_free (loop_p, heap, loop_nest);
1181 vertices = VEC_alloc (int, heap, 3);
1183 if (dump_file && (dump_flags & TDF_DETAILS))
1184 dump_rdg (dump_file, rdg);
1186 FOR_EACH_VEC_ELT (gimple, stmts, i, s)
1188 int v = rdg_vertex_for_stmt (rdg, s);
1192 VEC_safe_push (int, heap, vertices, v);
1194 if (dump_file && (dump_flags & TDF_DETAILS))
1196 "ldist asked to generate code for vertex %d\n", v);
1200 res = ldist_gen (loop, rdg, vertices);
1201 VEC_free (int, heap, vertices);
1203 free_dependence_relations (dependence_relations);
1204 free_data_refs (datarefs);
1205 VEC_free (loop_p, heap, loop_nest);
1209 /* Distribute all loops in the current function. */
1212 tree_loop_distribution (void)
1216 int nb_generated_loops = 0;
1218 FOR_EACH_LOOP (li, loop, 0)
1220 VEC (gimple, heap) *work_list = NULL;
1221 int num = loop->num;
1223 /* If the loop doesn't have a single exit we will fail anyway,
1224 so do that early. */
1225 if (!single_exit (loop))
1228 /* If both flag_tree_loop_distribute_patterns and
1229 flag_tree_loop_distribution are set, then only
1230 distribute_patterns is executed. */
1231 if (flag_tree_loop_distribute_patterns)
1233 /* With the following working list, we're asking
1234 distribute_loop to separate from the rest of the loop the
1235 stores of the form "A[i] = 0". */
1236 stores_zero_from_loop (loop, &work_list);
1238 /* Do nothing if there are no patterns to be distributed. */
1239 if (VEC_length (gimple, work_list) > 0)
1240 nb_generated_loops = distribute_loop (loop, work_list);
1242 else if (flag_tree_loop_distribution)
1244 /* With the following working list, we're asking
1245 distribute_loop to separate the stores of the loop: when
1246 dependences allow, it will end on having one store per
1248 stores_from_loop (loop, &work_list);
1250 /* A simple heuristic for cache locality is to not split
1251 stores to the same array. Without this call, an unrolled
1252 loop would be split into as many loops as unroll factor,
1253 each loop storing in the same array. */
1254 remove_similar_memory_refs (&work_list);
1256 nb_generated_loops = distribute_loop (loop, work_list);
1259 if (dump_file && (dump_flags & TDF_DETAILS))
1261 if (nb_generated_loops > 1)
1262 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
1263 num, nb_generated_loops);
1265 fprintf (dump_file, "Loop %d is the same.\n", num);
1268 #ifdef ENABLE_CHECKING
1269 verify_loop_structure ();
1272 VEC_free (gimple, heap, work_list);
1279 gate_tree_loop_distribution (void)
1281 return flag_tree_loop_distribution
1282 || flag_tree_loop_distribute_patterns;
1285 struct gimple_opt_pass pass_loop_distribution =
1290 gate_tree_loop_distribution, /* gate */
1291 tree_loop_distribution, /* execute */
1294 0, /* static_pass_number */
1295 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1296 PROP_cfg | PROP_ssa, /* properties_required */
1297 0, /* properties_provided */
1298 0, /* properties_destroyed */
1299 0, /* todo_flags_start */
1301 | TODO_verify_ssa /* todo_flags_finish */