1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
43 # i_23 = PHI <0(0), i_18(10)>;
46 if (j_15 > 41) goto <L1>; else goto <L17>;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
67 # i_23 = PHI <0(0), i_18(10)>;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
85 #include "coretypes.h"
90 #include "double-int.h"
97 #include "fold-const.h"
98 #include "stor-layout.h"
101 #include "hard-reg-set.h"
103 #include "function.h"
104 #include "dominance.h"
106 #include "basic-block.h"
107 #include "gimple-pretty-print.h"
108 #include "tree-ssa-alias.h"
109 #include "internal-fn.h"
110 #include "gimple-fold.h"
111 #include "gimple-expr.h"
114 #include "gimplify.h"
115 #include "gimple-iterator.h"
116 #include "gimplify-me.h"
117 #include "gimple-ssa.h"
118 #include "tree-cfg.h"
119 #include "tree-phinodes.h"
120 #include "ssa-iterators.h"
121 #include "stringpool.h"
122 #include "tree-ssanames.h"
123 #include "tree-into-ssa.h"
124 #include "tree-ssa.h"
126 #include "tree-chrec.h"
127 #include "tree-data-ref.h"
128 #include "tree-scalar-evolution.h"
129 #include "tree-ssa-loop-ivopts.h"
130 #include "tree-ssa-address.h"
131 #include "tree-pass.h"
134 #include "insn-codes.h"
137 /* List of basic blocks in if-conversion-suitable order. */
138 static basic_block *ifc_bbs;
140 /* Structure used to predicate basic blocks. This is attached to the
141 ->aux field of the BBs in the loop to be if-converted. */
142 typedef struct bb_predicate_s {
144 /* The condition under which this basic block is executed. */
147 /* PREDICATE is gimplified, and the sequence of statements is
148 recorded here, in order to avoid the duplication of computations
149 that occur in previous conditions. See PR44483. */
150 gimple_seq predicate_gimplified_stmts;
153 /* Returns true when the basic block BB has a predicate. */
156 bb_has_predicate (basic_block bb)
158 return bb->aux != NULL;
161 /* Returns the gimplified predicate for basic block BB. */
164 bb_predicate (basic_block bb)
166 return ((bb_predicate_p) bb->aux)->predicate;
169 /* Sets the gimplified predicate COND for basic block BB. */
172 set_bb_predicate (basic_block bb, tree cond)
174 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
175 && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
176 || is_gimple_condexpr (cond));
177 ((bb_predicate_p) bb->aux)->predicate = cond;
180 /* Returns the sequence of statements of the gimplification of the
181 predicate for basic block BB. */
183 static inline gimple_seq
184 bb_predicate_gimplified_stmts (basic_block bb)
186 return ((bb_predicate_p) bb->aux)->predicate_gimplified_stmts;
189 /* Sets the sequence of statements STMTS of the gimplification of the
190 predicate for basic block BB. */
193 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
195 ((bb_predicate_p) bb->aux)->predicate_gimplified_stmts = stmts;
198 /* Adds the sequence of statements STMTS to the sequence of statements
199 of the predicate for basic block BB. */
202 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
205 (&(((bb_predicate_p) bb->aux)->predicate_gimplified_stmts), stmts);
208 /* Initializes to TRUE the predicate of basic block BB. */
211 init_bb_predicate (basic_block bb)
213 bb->aux = XNEW (struct bb_predicate_s);
214 set_bb_predicate_gimplified_stmts (bb, NULL);
215 set_bb_predicate (bb, boolean_true_node);
218 /* Release the SSA_NAMEs associated with the predicate of basic block BB,
219 but don't actually free it. */
222 release_bb_predicate (basic_block bb)
224 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
227 gimple_stmt_iterator i;
229 for (i = gsi_start (stmts); !gsi_end_p (i); gsi_next (&i))
230 free_stmt_operands (cfun, gsi_stmt (i));
231 set_bb_predicate_gimplified_stmts (bb, NULL);
235 /* Free the predicate of basic block BB. */
238 free_bb_predicate (basic_block bb)
240 if (!bb_has_predicate (bb))
243 release_bb_predicate (bb);
248 /* Reinitialize predicate of BB with the true predicate. */
251 reset_bb_predicate (basic_block bb)
253 if (!bb_has_predicate (bb))
254 init_bb_predicate (bb);
257 release_bb_predicate (bb);
258 set_bb_predicate (bb, boolean_true_node);
262 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
263 the expression EXPR. Inserts the statement created for this
264 computation before GSI and leaves the iterator GSI at the same
268 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
270 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
271 gimple stmt = gimple_build_assign (new_name, expr);
272 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
276 /* Return true when COND is a true predicate. */
279 is_true_predicate (tree cond)
281 return (cond == NULL_TREE
282 || cond == boolean_true_node
283 || integer_onep (cond));
286 /* Returns true when BB has a predicate that is not trivial: true or
290 is_predicated (basic_block bb)
292 return !is_true_predicate (bb_predicate (bb));
295 /* Parses the predicate COND and returns its comparison code and
296 operands OP0 and OP1. */
298 static enum tree_code
299 parse_predicate (tree cond, tree *op0, tree *op1)
303 if (TREE_CODE (cond) == SSA_NAME
304 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
306 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
308 *op0 = gimple_assign_rhs1 (s);
309 *op1 = gimple_assign_rhs2 (s);
310 return gimple_assign_rhs_code (s);
313 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
315 tree op = gimple_assign_rhs1 (s);
316 tree type = TREE_TYPE (op);
317 enum tree_code code = parse_predicate (op, op0, op1);
319 return code == ERROR_MARK ? ERROR_MARK
320 : invert_tree_comparison (code, HONOR_NANS (type));
326 if (TREE_CODE_CLASS (TREE_CODE (cond)) == tcc_comparison)
328 *op0 = TREE_OPERAND (cond, 0);
329 *op1 = TREE_OPERAND (cond, 1);
330 return TREE_CODE (cond);
336 /* Returns the fold of predicate C1 OR C2 at location LOC. */
339 fold_or_predicates (location_t loc, tree c1, tree c2)
341 tree op1a, op1b, op2a, op2b;
342 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
343 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
345 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
347 tree t = maybe_fold_or_comparisons (code1, op1a, op1b,
353 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
356 /* Returns true if N is either a constant or a SSA_NAME. */
359 constant_or_ssa_name (tree n)
361 switch (TREE_CODE (n))
374 /* Returns either a COND_EXPR or the folded expression if the folded
375 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
376 a constant or a SSA_NAME. */
379 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
381 tree rhs1, lhs1, cond_expr;
382 cond_expr = fold_ternary (COND_EXPR, type, cond,
385 if (cond_expr == NULL_TREE)
386 return build3 (COND_EXPR, type, cond, rhs, lhs);
388 STRIP_USELESS_TYPE_CONVERSION (cond_expr);
390 if (constant_or_ssa_name (cond_expr))
393 if (TREE_CODE (cond_expr) == ABS_EXPR)
395 rhs1 = TREE_OPERAND (cond_expr, 1);
396 STRIP_USELESS_TYPE_CONVERSION (rhs1);
397 if (constant_or_ssa_name (rhs1))
398 return build1 (ABS_EXPR, type, rhs1);
401 if (TREE_CODE (cond_expr) == MIN_EXPR
402 || TREE_CODE (cond_expr) == MAX_EXPR)
404 lhs1 = TREE_OPERAND (cond_expr, 0);
405 STRIP_USELESS_TYPE_CONVERSION (lhs1);
406 rhs1 = TREE_OPERAND (cond_expr, 1);
407 STRIP_USELESS_TYPE_CONVERSION (rhs1);
408 if (constant_or_ssa_name (rhs1)
409 && constant_or_ssa_name (lhs1))
410 return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
412 return build3 (COND_EXPR, type, cond, rhs, lhs);
415 /* Add condition NC to the predicate list of basic block BB. LOOP is
416 the loop to be if-converted. Use predicate of cd-equivalent block
417 for join bb if it exists: we call basic blocks bb1 and bb2
418 cd-equivalent if they are executed under the same condition. */
421 add_to_predicate_list (struct loop *loop, basic_block bb, tree nc)
426 if (is_true_predicate (nc))
429 /* If dominance tells us this basic block is always executed,
430 don't record any predicates for it. */
431 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
434 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
435 /* We use notion of cd equivalence to get simpler predicate for
436 join block, e.g. if join block has 2 predecessors with predicates
437 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
438 p1 & p2 | p1 & !p2. */
439 if (dom_bb != loop->header
440 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
442 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
443 bc = bb_predicate (dom_bb);
444 if (!is_true_predicate (bc))
445 set_bb_predicate (bb, bc);
447 gcc_assert (is_true_predicate (bb_predicate (bb)));
448 if (dump_file && (dump_flags & TDF_DETAILS))
449 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
450 dom_bb->index, bb->index);
454 if (!is_predicated (bb))
458 bc = bb_predicate (bb);
459 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
460 if (is_true_predicate (bc))
462 reset_bb_predicate (bb);
467 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
468 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
469 tp = &TREE_OPERAND (bc, 0);
472 if (!is_gimple_condexpr (*tp))
475 *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
476 add_bb_predicate_gimplified_stmts (bb, stmts);
478 set_bb_predicate (bb, bc);
481 /* Add the condition COND to the previous condition PREV_COND, and add
482 this to the predicate list of the destination of edge E. LOOP is
483 the loop to be if-converted. */
486 add_to_dst_predicate_list (struct loop *loop, edge e,
487 tree prev_cond, tree cond)
489 if (!flow_bb_inside_loop_p (loop, e->dest))
492 if (!is_true_predicate (prev_cond))
493 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
496 add_to_predicate_list (loop, e->dest, cond);
499 /* Return true if one of the successor edges of BB exits LOOP. */
502 bb_with_exit_edge_p (struct loop *loop, basic_block bb)
507 FOR_EACH_EDGE (e, ei, bb->succs)
508 if (loop_exit_edge_p (loop, e))
514 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
515 and it belongs to basic block BB.
517 PHI is not if-convertible if:
518 - it has more than 2 arguments.
520 When the flag_tree_loop_if_convert_stores is not set, PHI is not
522 - a virtual PHI is immediately used in another PHI node,
523 - there is a virtual PHI in a BB other than the loop->header. */
526 if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi,
527 bool any_mask_load_store)
529 if (dump_file && (dump_flags & TDF_DETAILS))
531 fprintf (dump_file, "-------------------------\n");
532 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
535 if (bb != loop->header && gimple_phi_num_args (phi) != 2)
537 if (dump_file && (dump_flags & TDF_DETAILS))
538 fprintf (dump_file, "More than two phi node args.\n");
542 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
545 /* When the flag_tree_loop_if_convert_stores is not set, check
546 that there are no memory writes in the branches of the loop to be
548 if (virtual_operand_p (gimple_phi_result (phi)))
550 imm_use_iterator imm_iter;
553 if (bb != loop->header)
555 if (dump_file && (dump_flags & TDF_DETAILS))
556 fprintf (dump_file, "Virtual phi not on loop->header.\n");
560 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (phi))
562 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
564 if (dump_file && (dump_flags & TDF_DETAILS))
565 fprintf (dump_file, "Difficult to handle this virtual phi.\n");
574 /* Records the status of a data reference. This struct is attached to
575 each DR->aux field. */
578 /* -1 when not initialized, 0 when false, 1 when true. */
579 int written_at_least_once;
581 /* -1 when not initialized, 0 when false, 1 when true. */
582 int rw_unconditionally;
585 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
586 #define DR_WRITTEN_AT_LEAST_ONCE(DR) (IFC_DR (DR)->written_at_least_once)
587 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
589 /* Returns true when the memory references of STMT are read or written
590 unconditionally. In other words, this function returns true when
591 for every data reference A in STMT there exist other accesses to
592 a data reference with the same base with predicates that add up (OR-up) to
593 the true predicate: this ensures that the data reference A is touched
594 (read or written) on every iteration of the if-converted loop. */
597 memrefs_read_or_written_unconditionally (gimple stmt,
598 vec<data_reference_p> drs)
601 data_reference_p a, b;
602 tree ca = bb_predicate (gimple_bb (stmt));
604 for (i = 0; drs.iterate (i, &a); i++)
605 if (DR_STMT (a) == stmt)
608 int x = DR_RW_UNCONDITIONALLY (a);
616 for (j = 0; drs.iterate (j, &b); j++)
618 tree ref_base_a = DR_REF (a);
619 tree ref_base_b = DR_REF (b);
621 if (DR_STMT (b) == stmt)
624 while (TREE_CODE (ref_base_a) == COMPONENT_REF
625 || TREE_CODE (ref_base_a) == IMAGPART_EXPR
626 || TREE_CODE (ref_base_a) == REALPART_EXPR)
627 ref_base_a = TREE_OPERAND (ref_base_a, 0);
629 while (TREE_CODE (ref_base_b) == COMPONENT_REF
630 || TREE_CODE (ref_base_b) == IMAGPART_EXPR
631 || TREE_CODE (ref_base_b) == REALPART_EXPR)
632 ref_base_b = TREE_OPERAND (ref_base_b, 0);
634 if (!operand_equal_p (ref_base_a, ref_base_b, 0))
636 tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
638 if (DR_RW_UNCONDITIONALLY (b) == 1
639 || is_true_predicate (cb)
640 || is_true_predicate (ca
641 = fold_or_predicates (EXPR_LOCATION (cb), ca, cb)))
643 DR_RW_UNCONDITIONALLY (a) = 1;
644 DR_RW_UNCONDITIONALLY (b) = 1;
653 DR_RW_UNCONDITIONALLY (a) = 0;
661 /* Returns true when the memory references of STMT are unconditionally
662 written. In other words, this function returns true when for every
663 data reference A written in STMT, there exist other writes to the
664 same data reference with predicates that add up (OR-up) to the true
665 predicate: this ensures that the data reference A is written on
666 every iteration of the if-converted loop. */
669 write_memrefs_written_at_least_once (gimple stmt,
670 vec<data_reference_p> drs)
673 data_reference_p a, b;
674 tree ca = bb_predicate (gimple_bb (stmt));
676 for (i = 0; drs.iterate (i, &a); i++)
677 if (DR_STMT (a) == stmt
681 int x = DR_WRITTEN_AT_LEAST_ONCE (a);
689 for (j = 0; drs.iterate (j, &b); j++)
690 if (DR_STMT (b) != stmt
692 && same_data_refs_base_objects (a, b))
694 tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
696 if (DR_WRITTEN_AT_LEAST_ONCE (b) == 1
697 || is_true_predicate (cb)
698 || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
701 DR_WRITTEN_AT_LEAST_ONCE (a) = 1;
702 DR_WRITTEN_AT_LEAST_ONCE (b) = 1;
710 DR_WRITTEN_AT_LEAST_ONCE (a) = 0;
718 /* Return true when the memory references of STMT won't trap in the
719 if-converted code. There are two things that we have to check for:
721 - writes to memory occur to writable memory: if-conversion of
722 memory writes transforms the conditional memory writes into
723 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
724 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
725 be executed at all in the original code, it may be a readonly
726 memory. To check that A is not const-qualified, we check that
727 there exists at least an unconditional write to A in the current
730 - reads or writes to memory are valid memory accesses for every
731 iteration. To check that the memory accesses are correctly formed
732 and that we are allowed to read and write in these locations, we
733 check that the memory accesses to be if-converted occur at every
734 iteration unconditionally. */
737 ifcvt_memrefs_wont_trap (gimple stmt, vec<data_reference_p> refs)
739 return write_memrefs_written_at_least_once (stmt, refs)
740 && memrefs_read_or_written_unconditionally (stmt, refs);
743 /* Wrapper around gimple_could_trap_p refined for the needs of the
744 if-conversion. Try to prove that the memory accesses of STMT could
745 not trap in the innermost loop containing STMT. */
748 ifcvt_could_trap_p (gimple stmt, vec<data_reference_p> refs)
750 if (gimple_vuse (stmt)
751 && !gimple_could_trap_p_1 (stmt, false, false)
752 && ifcvt_memrefs_wont_trap (stmt, refs))
755 return gimple_could_trap_p (stmt);
758 /* Return true if STMT could be converted into a masked load or store
759 (conditional load or store based on a mask computed from bb predicate). */
762 ifcvt_can_use_mask_load_store (gimple stmt)
766 basic_block bb = gimple_bb (stmt);
769 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
770 || bb->loop_father->dont_vectorize
771 || !gimple_assign_single_p (stmt)
772 || gimple_has_volatile_ops (stmt))
775 /* Check whether this is a load or store. */
776 lhs = gimple_assign_lhs (stmt);
777 if (gimple_store_p (stmt))
779 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
784 else if (gimple_assign_load_p (stmt))
787 ref = gimple_assign_rhs1 (stmt);
792 if (may_be_nonaddressable_p (ref))
795 /* Mask should be integer mode of the same size as the load/store
797 mode = TYPE_MODE (TREE_TYPE (lhs));
798 if (int_mode_for_mode (mode) == BLKmode
799 || VECTOR_MODE_P (mode))
802 if (can_vec_mask_load_store_p (mode, is_load))
808 /* Return true when STMT is if-convertible.
810 GIMPLE_ASSIGN statement is not if-convertible if,
813 - LHS is not var decl. */
816 if_convertible_gimple_assign_stmt_p (gimple stmt,
817 vec<data_reference_p> refs,
818 bool *any_mask_load_store)
820 tree lhs = gimple_assign_lhs (stmt);
823 if (dump_file && (dump_flags & TDF_DETAILS))
825 fprintf (dump_file, "-------------------------\n");
826 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
829 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
832 /* Some of these constrains might be too conservative. */
833 if (stmt_ends_bb_p (stmt)
834 || gimple_has_volatile_ops (stmt)
835 || (TREE_CODE (lhs) == SSA_NAME
836 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
837 || gimple_has_side_effects (stmt))
839 if (dump_file && (dump_flags & TDF_DETAILS))
840 fprintf (dump_file, "stmt not suitable for ifcvt\n");
844 /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
845 in between if_convertible_loop_p and combine_blocks
846 we can perform loop versioning. */
847 gimple_set_plf (stmt, GF_PLF_2, false);
849 if (flag_tree_loop_if_convert_stores)
851 if (ifcvt_could_trap_p (stmt, refs))
853 if (ifcvt_can_use_mask_load_store (stmt))
855 gimple_set_plf (stmt, GF_PLF_2, true);
856 *any_mask_load_store = true;
859 if (dump_file && (dump_flags & TDF_DETAILS))
860 fprintf (dump_file, "tree could trap...\n");
866 if (gimple_assign_rhs_could_trap_p (stmt))
868 if (ifcvt_can_use_mask_load_store (stmt))
870 gimple_set_plf (stmt, GF_PLF_2, true);
871 *any_mask_load_store = true;
874 if (dump_file && (dump_flags & TDF_DETAILS))
875 fprintf (dump_file, "tree could trap...\n");
879 bb = gimple_bb (stmt);
881 if (TREE_CODE (lhs) != SSA_NAME
882 && bb != bb->loop_father->header
883 && !bb_with_exit_edge_p (bb->loop_father, bb))
885 if (ifcvt_can_use_mask_load_store (stmt))
887 gimple_set_plf (stmt, GF_PLF_2, true);
888 *any_mask_load_store = true;
891 if (dump_file && (dump_flags & TDF_DETAILS))
893 fprintf (dump_file, "LHS is not var\n");
894 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
902 /* Return true when STMT is if-convertible.
904 A statement is if-convertible if:
905 - it is an if-convertible GIMPLE_ASSIGN,
906 - it is a GIMPLE_LABEL or a GIMPLE_COND. */
909 if_convertible_stmt_p (gimple stmt, vec<data_reference_p> refs,
910 bool *any_mask_load_store)
912 switch (gimple_code (stmt))
920 return if_convertible_gimple_assign_stmt_p (stmt, refs,
921 any_mask_load_store);
925 tree fndecl = gimple_call_fndecl (stmt);
928 int flags = gimple_call_flags (stmt);
929 if ((flags & ECF_CONST)
930 && !(flags & ECF_LOOPING_CONST_OR_PURE)
931 /* We can only vectorize some builtins at the moment,
932 so restrict if-conversion to those. */
933 && DECL_BUILT_IN (fndecl))
940 /* Don't know what to do with 'em so don't do anything. */
941 if (dump_file && (dump_flags & TDF_DETAILS))
943 fprintf (dump_file, "don't know what to do\n");
944 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
953 /* Return true when BB is if-convertible. This routine does not check
954 basic block's statements and phis.
956 A basic block is not if-convertible if:
957 - it is non-empty and it is after the exit block (in BFS order),
958 - it is after the exit block but before the latch,
959 - its edges are not normal.
961 EXIT_BB is the basic block containing the exit of the LOOP. BB is
965 if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb)
970 if (dump_file && (dump_flags & TDF_DETAILS))
971 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
973 if (EDGE_COUNT (bb->preds) > 2
974 || EDGE_COUNT (bb->succs) > 2)
979 if (bb != loop->latch)
981 if (dump_file && (dump_flags & TDF_DETAILS))
982 fprintf (dump_file, "basic block after exit bb but before latch\n");
985 else if (!empty_block_p (bb))
987 if (dump_file && (dump_flags & TDF_DETAILS))
988 fprintf (dump_file, "non empty basic block after exit bb\n");
991 else if (bb == loop->latch
993 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
995 if (dump_file && (dump_flags & TDF_DETAILS))
996 fprintf (dump_file, "latch is not dominated by exit_block\n");
1001 /* Be less adventurous and handle only normal edges. */
1002 FOR_EACH_EDGE (e, ei, bb->succs)
1003 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1005 if (dump_file && (dump_flags & TDF_DETAILS))
1006 fprintf (dump_file, "Difficult to handle edges\n");
1010 /* At least one incoming edge has to be non-critical as otherwise edge
1011 predicates are not equal to basic-block predicates of the edge
1013 if (EDGE_COUNT (bb->preds) > 1
1014 && bb != loop->header)
1017 FOR_EACH_EDGE (e, ei, bb->preds)
1018 if (EDGE_COUNT (e->src->succs) == 1)
1022 if (dump_file && (dump_flags & TDF_DETAILS))
1023 fprintf (dump_file, "only critical predecessors\n");
1031 /* Return true when all predecessor blocks of BB are visited. The
1032 VISITED bitmap keeps track of the visited blocks. */
1035 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1039 FOR_EACH_EDGE (e, ei, bb->preds)
1040 if (!bitmap_bit_p (*visited, e->src->index))
1046 /* Get body of a LOOP in suitable order for if-conversion. It is
1047 caller's responsibility to deallocate basic block list.
1048 If-conversion suitable order is, breadth first sort (BFS) order
1049 with an additional constraint: select a block only if all its
1050 predecessors are already selected. */
1052 static basic_block *
1053 get_loop_body_in_if_conv_order (const struct loop *loop)
1055 basic_block *blocks, *blocks_in_bfs_order;
1058 unsigned int index = 0;
1059 unsigned int visited_count = 0;
1061 gcc_assert (loop->num_nodes);
1062 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1064 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1065 visited = BITMAP_ALLOC (NULL);
1067 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1070 while (index < loop->num_nodes)
1072 bb = blocks_in_bfs_order [index];
1074 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1076 free (blocks_in_bfs_order);
1077 BITMAP_FREE (visited);
1082 if (!bitmap_bit_p (visited, bb->index))
1084 if (pred_blocks_visited_p (bb, &visited)
1085 || bb == loop->header)
1087 /* This block is now visited. */
1088 bitmap_set_bit (visited, bb->index);
1089 blocks[visited_count++] = bb;
1095 if (index == loop->num_nodes
1096 && visited_count != loop->num_nodes)
1100 free (blocks_in_bfs_order);
1101 BITMAP_FREE (visited);
1105 /* Returns true when the analysis of the predicates for all the basic
1106 blocks in LOOP succeeded.
1108 predicate_bbs first allocates the predicates of the basic blocks.
1109 These fields are then initialized with the tree expressions
1110 representing the predicates under which a basic block is executed
1111 in the LOOP. As the loop->header is executed at each iteration, it
1112 has the "true" predicate. Other statements executed under a
1113 condition are predicated with that condition, for example
1120 S1 will be predicated with "x", and
1121 S2 will be predicated with "!x". */
1124 predicate_bbs (loop_p loop)
1128 for (i = 0; i < loop->num_nodes; i++)
1129 init_bb_predicate (ifc_bbs[i]);
1131 for (i = 0; i < loop->num_nodes; i++)
1133 basic_block bb = ifc_bbs[i];
1137 /* The loop latch is always executed and has no extra conditions
1138 to be processed: skip it. */
1139 if (bb == loop->latch)
1141 reset_bb_predicate (loop->latch);
1145 cond = bb_predicate (bb);
1146 stmt = last_stmt (bb);
1147 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1150 edge true_edge, false_edge;
1151 location_t loc = gimple_location (stmt);
1152 tree c = fold_build2_loc (loc, gimple_cond_code (stmt),
1154 gimple_cond_lhs (stmt),
1155 gimple_cond_rhs (stmt));
1157 /* Add new condition into destination's predicate list. */
1158 extract_true_false_edges_from_block (gimple_bb (stmt),
1159 &true_edge, &false_edge);
1161 /* If C is true, then TRUE_EDGE is taken. */
1162 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1165 /* If C is false, then FALSE_EDGE is taken. */
1166 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1168 add_to_dst_predicate_list (loop, false_edge,
1169 unshare_expr (cond), c2);
1174 /* If current bb has only one successor, then consider it as an
1175 unconditional goto. */
1176 if (single_succ_p (bb))
1178 basic_block bb_n = single_succ (bb);
1180 /* The successor bb inherits the predicate of its
1181 predecessor. If there is no predicate in the predecessor
1182 bb, then consider the successor bb as always executed. */
1183 if (cond == NULL_TREE)
1184 cond = boolean_true_node;
1186 add_to_predicate_list (loop, bb_n, cond);
1190 /* The loop header is always executed. */
1191 reset_bb_predicate (loop->header);
1192 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1193 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1196 /* Return true when LOOP is if-convertible. This is a helper function
1197 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1198 in if_convertible_loop_p. */
1201 if_convertible_loop_p_1 (struct loop *loop,
1202 vec<loop_p> *loop_nest,
1203 vec<data_reference_p> *refs,
1204 vec<ddr_p> *ddrs, bool *any_mask_load_store)
1208 basic_block exit_bb = NULL;
1210 /* Don't if-convert the loop when the data dependences cannot be
1211 computed: the loop won't be vectorized in that case. */
1212 res = compute_data_dependences_for_loop (loop, true, loop_nest, refs, ddrs);
1216 calculate_dominance_info (CDI_DOMINATORS);
1217 calculate_dominance_info (CDI_POST_DOMINATORS);
1219 /* Allow statements that can be handled during if-conversion. */
1220 ifc_bbs = get_loop_body_in_if_conv_order (loop);
1223 if (dump_file && (dump_flags & TDF_DETAILS))
1224 fprintf (dump_file, "Irreducible loop\n");
1228 for (i = 0; i < loop->num_nodes; i++)
1230 basic_block bb = ifc_bbs[i];
1232 if (!if_convertible_bb_p (loop, bb, exit_bb))
1235 if (bb_with_exit_edge_p (loop, bb))
1239 for (i = 0; i < loop->num_nodes; i++)
1241 basic_block bb = ifc_bbs[i];
1242 gimple_stmt_iterator gsi;
1244 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1245 switch (gimple_code (gsi_stmt (gsi)))
1258 if (flag_tree_loop_if_convert_stores)
1260 data_reference_p dr;
1262 for (i = 0; refs->iterate (i, &dr); i++)
1264 dr->aux = XNEW (struct ifc_dr);
1265 DR_WRITTEN_AT_LEAST_ONCE (dr) = -1;
1266 DR_RW_UNCONDITIONALLY (dr) = -1;
1268 predicate_bbs (loop);
1271 for (i = 0; i < loop->num_nodes; i++)
1273 basic_block bb = ifc_bbs[i];
1274 gimple_stmt_iterator itr;
1276 /* Check the if-convertibility of statements in predicated BBs. */
1277 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1278 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1279 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs,
1280 any_mask_load_store))
1284 if (flag_tree_loop_if_convert_stores)
1285 for (i = 0; i < loop->num_nodes; i++)
1286 free_bb_predicate (ifc_bbs[i]);
1288 /* Checking PHIs needs to be done after stmts, as the fact whether there
1289 are any masked loads or stores affects the tests. */
1290 for (i = 0; i < loop->num_nodes; i++)
1292 basic_block bb = ifc_bbs[i];
1295 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1296 if (!if_convertible_phi_p (loop, bb, itr.phi (),
1297 *any_mask_load_store))
1302 fprintf (dump_file, "Applying if-conversion\n");
1307 /* Return true when LOOP is if-convertible.
1308 LOOP is if-convertible if:
1310 - it has two or more basic blocks,
1311 - it has only one exit,
1312 - loop header is not the exit edge,
1313 - if its basic blocks and phi nodes are if convertible. */
1316 if_convertible_loop_p (struct loop *loop, bool *any_mask_load_store)
1321 vec<data_reference_p> refs;
1324 /* Handle only innermost loop. */
1325 if (!loop || loop->inner)
1327 if (dump_file && (dump_flags & TDF_DETAILS))
1328 fprintf (dump_file, "not innermost loop\n");
1332 /* If only one block, no need for if-conversion. */
1333 if (loop->num_nodes <= 2)
1335 if (dump_file && (dump_flags & TDF_DETAILS))
1336 fprintf (dump_file, "less than 2 basic blocks\n");
1340 /* More than one loop exit is too much to handle. */
1341 if (!single_exit (loop))
1343 if (dump_file && (dump_flags & TDF_DETAILS))
1344 fprintf (dump_file, "multiple exits\n");
1348 /* If one of the loop header's edge is an exit edge then do not
1349 apply if-conversion. */
1350 FOR_EACH_EDGE (e, ei, loop->header->succs)
1351 if (loop_exit_edge_p (loop, e))
1356 auto_vec<loop_p, 3> loop_nest;
1357 res = if_convertible_loop_p_1 (loop, &loop_nest, &refs, &ddrs,
1358 any_mask_load_store);
1360 if (flag_tree_loop_if_convert_stores)
1362 data_reference_p dr;
1365 for (i = 0; refs.iterate (i, &dr); i++)
1369 free_data_refs (refs);
1370 free_dependence_relations (ddrs);
1374 /* Basic block BB has two predecessors. Using predecessor's bb
1375 predicate, set an appropriate condition COND for the PHI node
1376 replacement. Return the true block whose phi arguments are
1377 selected when cond is true. LOOP is the loop containing the
1378 if-converted region, GSI is the place to insert the code for the
1382 find_phi_replacement_condition (basic_block bb, tree *cond,
1383 gimple_stmt_iterator *gsi)
1385 edge first_edge, second_edge;
1388 gcc_assert (EDGE_COUNT (bb->preds) == 2);
1389 first_edge = EDGE_PRED (bb, 0);
1390 second_edge = EDGE_PRED (bb, 1);
1392 /* Prefer an edge with a not negated predicate.
1393 ??? That's a very weak cost model. */
1394 tmp_cond = bb_predicate (first_edge->src);
1395 gcc_assert (tmp_cond);
1396 if (TREE_CODE (tmp_cond) == TRUTH_NOT_EXPR)
1400 tmp_edge = first_edge;
1401 first_edge = second_edge;
1402 second_edge = tmp_edge;
1405 /* Check if the edge we take the condition from is not critical.
1406 We know that at least one non-critical edge exists. */
1407 if (EDGE_COUNT (first_edge->src->succs) > 1)
1409 *cond = bb_predicate (second_edge->src);
1411 if (TREE_CODE (*cond) == TRUTH_NOT_EXPR)
1412 *cond = TREE_OPERAND (*cond, 0);
1414 /* Select non loop header bb. */
1415 first_edge = second_edge;
1418 *cond = bb_predicate (first_edge->src);
1420 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1421 *cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (*cond),
1422 is_gimple_condexpr, NULL_TREE,
1423 true, GSI_SAME_STMT);
1425 return first_edge->src;
1428 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1429 which is in predicated basic block.
1430 In fact, the following PHI pattern is searching:
1432 reduc_1 = PHI <..., reduc_2>
1436 reduc_2 = PHI <reduc_1, reduc_3>
1438 REDUC, OP0 and OP1 contain reduction stmt and its operands. */
1441 is_cond_scalar_reduction (gimple phi, gimple *reduc,
1442 tree *op0, tree *op1)
1444 tree lhs, r_op1, r_op2;
1447 gimple header_phi = NULL;
1448 enum tree_code reduction_op;
1449 basic_block bb = gimple_bb (phi);
1450 struct loop *loop = bb->loop_father;
1451 edge latch_e = loop_latch_edge (loop);
1452 imm_use_iterator imm_iter;
1453 use_operand_p use_p;
1455 arg_0 = PHI_ARG_DEF (phi, 0);
1456 arg_1 = PHI_ARG_DEF (phi, 1);
1457 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1460 if (gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1463 header_phi = SSA_NAME_DEF_STMT (arg_0);
1464 stmt = SSA_NAME_DEF_STMT (arg_1);
1466 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1469 header_phi = SSA_NAME_DEF_STMT (arg_1);
1470 stmt = SSA_NAME_DEF_STMT (arg_0);
1474 if (gimple_bb (header_phi) != loop->header)
1477 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1480 if (gimple_code (stmt) != GIMPLE_ASSIGN
1481 || gimple_has_volatile_ops (stmt))
1484 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1487 if (!is_predicated (gimple_bb (stmt)))
1490 /* Check that stmt-block is predecessor of phi-block. */
1491 if (EDGE_PRED (bb, 0)->src != gimple_bb (stmt)
1492 && EDGE_PRED (bb, 1)->src != gimple_bb (stmt))
1495 if (!has_single_use (lhs))
1498 reduction_op = gimple_assign_rhs_code (stmt);
1499 if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
1501 r_op1 = gimple_assign_rhs1 (stmt);
1502 r_op2 = gimple_assign_rhs2 (stmt);
1504 /* Make R_OP1 to hold reduction variable. */
1505 if (r_op2 == PHI_RESULT (header_phi)
1506 && reduction_op == PLUS_EXPR)
1512 else if (r_op1 != PHI_RESULT (header_phi))
1515 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1516 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1518 gimple use_stmt = USE_STMT (use_p);
1519 if (is_gimple_debug (use_stmt))
1521 if (use_stmt == stmt)
1523 if (gimple_code (use_stmt) != GIMPLE_PHI)
1527 *op0 = r_op1; *op1 = r_op2;
1532 /* Converts conditional scalar reduction into unconditional form, e.g.
1534 if (_5 != 0) goto bb_5 else goto bb_6
1540 # res_2 = PHI <res_13(4), res_6(5)>
1543 will be converted into sequence
1544 _ifc__1 = _5 != 0 ? 1 : 0;
1545 res_2 = res_13 + _ifc__1;
1546 Argument SWAP tells that arguments of conditional expression should be
1548 Returns rhs of resulting PHI assignment. */
1551 convert_scalar_cond_reduction (gimple reduc, gimple_stmt_iterator *gsi,
1552 tree cond, tree op0, tree op1, bool swap)
1554 gimple_stmt_iterator stmt_it;
1557 tree rhs1 = gimple_assign_rhs1 (reduc);
1558 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1560 tree zero = build_zero_cst (TREE_TYPE (rhs1));
1562 if (dump_file && (dump_flags & TDF_DETAILS))
1564 fprintf (dump_file, "Found cond scalar reduction.\n");
1565 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1568 /* Build cond expression using COND and constant operand
1569 of reduction rhs. */
1570 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1571 unshare_expr (cond),
1575 /* Create assignment stmt and insert it at GSI. */
1576 new_assign = gimple_build_assign (tmp, c);
1577 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1578 /* Build rhs for unconditional increment/decrement. */
1579 rhs = fold_build2 (gimple_assign_rhs_code (reduc),
1580 TREE_TYPE (rhs1), op0, tmp);
1582 /* Delete original reduction stmt. */
1583 stmt_it = gsi_for_stmt (reduc);
1584 gsi_remove (&stmt_it, true);
1585 release_defs (reduc);
1589 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1590 This routine does not handle PHI nodes with more than two
1594 S1: A = PHI <x1(1), x2(5)>
1596 S2: A = cond ? x1 : x2;
1598 The generated code is inserted at GSI that points to the top of
1599 basic block's statement list. When COND is true, phi arg from
1600 TRUE_BB is selected. */
1603 predicate_scalar_phi (gphi *phi, tree cond,
1604 basic_block true_bb,
1605 gimple_stmt_iterator *gsi)
1609 tree rhs, res, arg, scev;
1611 gcc_assert (gimple_code (phi) == GIMPLE_PHI
1612 && gimple_phi_num_args (phi) == 2);
1614 res = gimple_phi_result (phi);
1615 /* Do not handle virtual phi nodes. */
1616 if (virtual_operand_p (res))
1619 bb = gimple_bb (phi);
1621 if ((arg = degenerate_phi_result (phi))
1622 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1624 && !chrec_contains_undetermined (scev)
1626 && (arg = gimple_phi_arg_def (phi, 0))))
1634 /* Use condition that is not TRUTH_NOT_EXPR in conditional modify expr. */
1635 if (EDGE_PRED (bb, 1)->src == true_bb)
1637 arg_0 = gimple_phi_arg_def (phi, 1);
1638 arg_1 = gimple_phi_arg_def (phi, 0);
1642 arg_0 = gimple_phi_arg_def (phi, 0);
1643 arg_1 = gimple_phi_arg_def (phi, 1);
1645 if (is_cond_scalar_reduction (phi, &reduc, &op0, &op1))
1646 /* Convert reduction stmt into vectorizable form. */
1647 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1648 true_bb != gimple_bb (reduc));
1650 /* Build new RHS using selected condition and arguments. */
1651 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1655 new_stmt = gimple_build_assign (res, rhs);
1656 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1657 update_stmt (new_stmt);
1659 if (dump_file && (dump_flags & TDF_DETAILS))
1661 fprintf (dump_file, "new phi replacement stmt\n");
1662 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1666 /* Replaces in LOOP all the scalar phi nodes other than those in the
1667 LOOP->header block with conditional modify expressions. */
1670 predicate_all_scalar_phis (struct loop *loop)
1673 unsigned int orig_loop_num_nodes = loop->num_nodes;
1676 for (i = 1; i < orig_loop_num_nodes; i++)
1679 tree cond = NULL_TREE;
1680 gimple_stmt_iterator gsi;
1681 gphi_iterator phi_gsi;
1682 basic_block true_bb = NULL;
1685 if (bb == loop->header)
1688 phi_gsi = gsi_start_phis (bb);
1689 if (gsi_end_p (phi_gsi))
1692 /* BB has two predecessors. Using predecessor's aux field, set
1693 appropriate condition for the PHI node replacement. */
1694 gsi = gsi_after_labels (bb);
1695 true_bb = find_phi_replacement_condition (bb, &cond, &gsi);
1697 while (!gsi_end_p (phi_gsi))
1699 phi = phi_gsi.phi ();
1700 predicate_scalar_phi (phi, cond, true_bb, &gsi);
1701 release_phi_node (phi);
1702 gsi_next (&phi_gsi);
1705 set_phi_nodes (bb, NULL);
1709 /* Insert in each basic block of LOOP the statements produced by the
1710 gimplification of the predicates. */
1713 insert_gimplified_predicates (loop_p loop, bool any_mask_load_store)
1717 for (i = 0; i < loop->num_nodes; i++)
1719 basic_block bb = ifc_bbs[i];
1722 if (!is_predicated (bb))
1724 /* Do not insert statements for a basic block that is not
1725 predicated. Also make sure that the predicate of the
1726 basic block is set to true. */
1727 reset_bb_predicate (bb);
1731 stmts = bb_predicate_gimplified_stmts (bb);
1734 if (flag_tree_loop_if_convert_stores
1735 || any_mask_load_store)
1737 /* Insert the predicate of the BB just after the label,
1738 as the if-conversion of memory writes will use this
1740 gimple_stmt_iterator gsi = gsi_after_labels (bb);
1741 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1745 /* Insert the predicate of the BB at the end of the BB
1746 as this would reduce the register pressure: the only
1747 use of this predicate will be in successor BBs. */
1748 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1751 || stmt_ends_bb_p (gsi_stmt (gsi)))
1752 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1754 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1757 /* Once the sequence is code generated, set it to NULL. */
1758 set_bb_predicate_gimplified_stmts (bb, NULL);
1763 /* Predicate each write to memory in LOOP.
1765 This function transforms control flow constructs containing memory
1768 | for (i = 0; i < N; i++)
1772 into the following form that does not contain control flow:
1774 | for (i = 0; i < N; i++)
1775 | A[i] = cond ? expr : A[i];
1777 The original CFG looks like this:
1784 | if (i < N) goto bb_5 else goto bb_2
1788 | cond = some_computation;
1789 | if (cond) goto bb_3 else goto bb_4
1801 insert_gimplified_predicates inserts the computation of the COND
1802 expression at the beginning of the destination basic block:
1809 | if (i < N) goto bb_5 else goto bb_2
1813 | cond = some_computation;
1814 | if (cond) goto bb_3 else goto bb_4
1818 | cond = some_computation;
1827 predicate_mem_writes is then predicating the memory write as follows:
1834 | if (i < N) goto bb_5 else goto bb_2
1838 | if (cond) goto bb_3 else goto bb_4
1842 | cond = some_computation;
1843 | A[i] = cond ? expr : A[i];
1851 and finally combine_blocks removes the basic block boundaries making
1852 the loop vectorizable:
1856 | if (i < N) goto bb_5 else goto bb_1
1860 | cond = some_computation;
1861 | A[i] = cond ? expr : A[i];
1862 | if (i < N) goto bb_5 else goto bb_4
1871 predicate_mem_writes (loop_p loop)
1873 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
1875 for (i = 1; i < orig_loop_num_nodes; i++)
1877 gimple_stmt_iterator gsi;
1878 basic_block bb = ifc_bbs[i];
1879 tree cond = bb_predicate (bb);
1883 if (is_true_predicate (cond))
1887 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1890 cond = TREE_OPERAND (cond, 0);
1893 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1894 if (!gimple_assign_single_p (stmt = gsi_stmt (gsi)))
1896 else if (gimple_plf (stmt, GF_PLF_2))
1898 tree lhs = gimple_assign_lhs (stmt);
1899 tree rhs = gimple_assign_rhs1 (stmt);
1900 tree ref, addr, ptr, masktype, mask_op0, mask_op1, mask;
1902 int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (lhs)));
1904 masktype = build_nonstandard_integer_type (bitsize, 1);
1905 mask_op0 = build_int_cst (masktype, swap ? 0 : -1);
1906 mask_op1 = build_int_cst (masktype, swap ? -1 : 0);
1907 ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
1908 mark_addressable (ref);
1909 addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
1910 true, NULL_TREE, true,
1912 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
1913 is_gimple_condexpr, NULL_TREE,
1914 true, GSI_SAME_STMT);
1915 mask = fold_build_cond_expr (masktype, unshare_expr (cond),
1916 mask_op0, mask_op1);
1917 mask = ifc_temp_var (masktype, mask, &gsi);
1918 ptr = build_int_cst (reference_alias_ptr_type (ref), 0);
1919 /* Copy points-to info if possible. */
1920 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
1921 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
1923 if (TREE_CODE (lhs) == SSA_NAME)
1926 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
1928 gimple_call_set_lhs (new_stmt, lhs);
1932 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
1934 gsi_replace (&gsi, new_stmt, true);
1936 else if (gimple_vdef (stmt))
1938 tree lhs = gimple_assign_lhs (stmt);
1939 tree rhs = gimple_assign_rhs1 (stmt);
1940 tree type = TREE_TYPE (lhs);
1942 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
1943 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
1950 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
1951 is_gimple_condexpr, NULL_TREE,
1952 true, GSI_SAME_STMT);
1953 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
1954 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
1960 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
1961 other than the exit and latch of the LOOP. Also resets the
1962 GIMPLE_DEBUG information. */
1965 remove_conditions_and_labels (loop_p loop)
1967 gimple_stmt_iterator gsi;
1970 for (i = 0; i < loop->num_nodes; i++)
1972 basic_block bb = ifc_bbs[i];
1974 if (bb_with_exit_edge_p (loop, bb)
1975 || bb == loop->latch)
1978 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
1979 switch (gimple_code (gsi_stmt (gsi)))
1983 gsi_remove (&gsi, true);
1987 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
1988 if (gimple_debug_bind_p (gsi_stmt (gsi)))
1990 gimple_debug_bind_reset_value (gsi_stmt (gsi));
1991 update_stmt (gsi_stmt (gsi));
2002 /* Combine all the basic blocks from LOOP into one or two super basic
2003 blocks. Replace PHI nodes with conditional modify expressions. */
2006 combine_blocks (struct loop *loop, bool any_mask_load_store)
2008 basic_block bb, exit_bb, merge_target_bb;
2009 unsigned int orig_loop_num_nodes = loop->num_nodes;
2014 predicate_bbs (loop);
2015 remove_conditions_and_labels (loop);
2016 insert_gimplified_predicates (loop, any_mask_load_store);
2017 predicate_all_scalar_phis (loop);
2019 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
2020 predicate_mem_writes (loop);
2022 /* Merge basic blocks: first remove all the edges in the loop,
2023 except for those from the exit block. */
2025 for (i = 0; i < orig_loop_num_nodes; i++)
2028 free_bb_predicate (bb);
2029 if (bb_with_exit_edge_p (loop, bb))
2031 gcc_assert (exit_bb == NULL);
2035 gcc_assert (exit_bb != loop->latch);
2037 for (i = 1; i < orig_loop_num_nodes; i++)
2041 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2043 if (e->src == exit_bb)
2050 if (exit_bb != NULL)
2052 if (exit_bb != loop->header)
2054 /* Connect this node to loop header. */
2055 make_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2056 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2059 /* Redirect non-exit edges to loop->latch. */
2060 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2062 if (!loop_exit_edge_p (loop, e))
2063 redirect_edge_and_branch (e, loop->latch);
2065 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2069 /* If the loop does not have an exit, reconnect header and latch. */
2070 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2071 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2074 merge_target_bb = loop->header;
2075 for (i = 1; i < orig_loop_num_nodes; i++)
2077 gimple_stmt_iterator gsi;
2078 gimple_stmt_iterator last;
2082 if (bb == exit_bb || bb == loop->latch)
2085 /* Make stmts member of loop->header. */
2086 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2087 gimple_set_bb (gsi_stmt (gsi), merge_target_bb);
2089 /* Update stmt list. */
2090 last = gsi_last_bb (merge_target_bb);
2091 gsi_insert_seq_after (&last, bb_seq (bb), GSI_NEW_STMT);
2092 set_bb_seq (bb, NULL);
2094 delete_basic_block (bb);
2097 /* If possible, merge loop header to the block with the exit edge.
2098 This reduces the number of basic blocks to two, to please the
2099 vectorizer that handles only loops with two nodes. */
2101 && exit_bb != loop->header
2102 && can_merge_blocks_p (loop->header, exit_bb))
2103 merge_blocks (loop->header, exit_bb);
2109 /* Version LOOP before if-converting it, the original loop
2110 will be then if-converted, the new copy of the loop will not,
2111 and the LOOP_VECTORIZED internal call will be guarding which
2112 loop to execute. The vectorizer pass will fold this
2113 internal call into either true or false. */
2116 version_loop_for_if_conversion (struct loop *loop)
2118 basic_block cond_bb;
2119 tree cond = make_ssa_name (boolean_type_node);
2120 struct loop *new_loop;
2122 gimple_stmt_iterator gsi;
2124 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2125 build_int_cst (integer_type_node, loop->num),
2127 gimple_call_set_lhs (g, cond);
2129 initialize_original_copy_tables ();
2130 new_loop = loop_version (loop, cond, &cond_bb,
2131 REG_BR_PROB_BASE, REG_BR_PROB_BASE,
2132 REG_BR_PROB_BASE, true);
2133 free_original_copy_tables ();
2134 if (new_loop == NULL)
2136 new_loop->dont_vectorize = true;
2137 new_loop->force_vectorize = false;
2138 gsi = gsi_last_bb (cond_bb);
2139 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2140 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2141 update_ssa (TODO_update_ssa);
2145 /* If-convert LOOP when it is legal. For the moment this pass has no
2146 profitability analysis. Returns non-zero todo flags when something
2150 tree_if_conversion (struct loop *loop)
2152 unsigned int todo = 0;
2154 bool any_mask_load_store = false;
2156 if (!if_convertible_loop_p (loop, &any_mask_load_store)
2157 || !dbg_cnt (if_conversion_tree))
2160 if (any_mask_load_store
2161 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
2162 || loop->dont_vectorize))
2165 if (any_mask_load_store && !version_loop_for_if_conversion (loop))
2168 /* Now all statements are if-convertible. Combine all the basic
2169 blocks into one huge basic block doing the if-conversion
2171 combine_blocks (loop, any_mask_load_store);
2173 todo |= TODO_cleanup_cfg;
2174 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
2176 mark_virtual_operands_for_renaming (cfun);
2177 todo |= TODO_update_ssa_only_virtuals;
2185 for (i = 0; i < loop->num_nodes; i++)
2186 free_bb_predicate (ifc_bbs[i]);
2191 free_dominance_info (CDI_POST_DOMINATORS);
2196 /* Tree if-conversion pass management. */
2200 const pass_data pass_data_if_conversion =
2202 GIMPLE_PASS, /* type */
2204 OPTGROUP_NONE, /* optinfo_flags */
2205 TV_NONE, /* tv_id */
2206 ( PROP_cfg | PROP_ssa ), /* properties_required */
2207 0, /* properties_provided */
2208 0, /* properties_destroyed */
2209 0, /* todo_flags_start */
2210 0, /* todo_flags_finish */
2213 class pass_if_conversion : public gimple_opt_pass
2216 pass_if_conversion (gcc::context *ctxt)
2217 : gimple_opt_pass (pass_data_if_conversion, ctxt)
2220 /* opt_pass methods: */
2221 virtual bool gate (function *);
2222 virtual unsigned int execute (function *);
2224 }; // class pass_if_conversion
2227 pass_if_conversion::gate (function *fun)
2229 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
2230 && flag_tree_loop_if_convert != 0)
2231 || flag_tree_loop_if_convert == 1
2232 || flag_tree_loop_if_convert_stores == 1);
2236 pass_if_conversion::execute (function *fun)
2241 if (number_of_loops (fun) <= 1)
2244 FOR_EACH_LOOP (loop, 0)
2245 if (flag_tree_loop_if_convert == 1
2246 || flag_tree_loop_if_convert_stores == 1
2247 || ((flag_tree_loop_vectorize || loop->force_vectorize)
2248 && !loop->dont_vectorize))
2249 todo |= tree_if_conversion (loop);
2251 #ifdef ENABLE_CHECKING
2254 FOR_EACH_BB_FN (bb, fun)
2255 gcc_assert (!bb->aux);
2265 make_pass_if_conversion (gcc::context *ctxt)
2267 return new pass_if_conversion (ctxt);