1 /* Reassociation for trees.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
26 #include "basic-block.h"
27 #include "gimple-pretty-print.h"
28 #include "tree-inline.h"
29 #include "tree-flow.h"
31 #include "tree-iterator.h"
32 #include "tree-pass.h"
33 #include "alloc-pool.h"
35 #include "langhooks.h"
36 #include "pointer-set.h"
41 #include "diagnostic-core.h"
43 /* This is a simple global reassociation pass. It is, in part, based
44 on the LLVM pass of the same name (They do some things more/less
45 than we do, in different orders, etc).
47 It consists of five steps:
49 1. Breaking up subtract operations into addition + negate, where
50 it would promote the reassociation of adds.
52 2. Left linearization of the expression trees, so that (A+B)+(C+D)
53 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
54 During linearization, we place the operands of the binary
55 expressions into a vector of operand_entry_t
57 3. Optimization of the operand lists, eliminating things like a +
60 3a. Combine repeated factors with the same occurrence counts
61 into a __builtin_powi call that will later be optimized into
62 an optimal number of multiplies.
64 4. Rewrite the expression trees we linearized and optimized so
65 they are in proper rank order.
67 5. Repropagate negates, as nothing else will clean it up ATM.
69 A bit of theory on #4, since nobody seems to write anything down
70 about why it makes sense to do it the way they do it:
72 We could do this much nicer theoretically, but don't (for reasons
73 explained after how to do it theoretically nice :P).
75 In order to promote the most redundancy elimination, you want
76 binary expressions whose operands are the same rank (or
77 preferably, the same value) exposed to the redundancy eliminator,
78 for possible elimination.
80 So the way to do this if we really cared, is to build the new op
81 tree from the leaves to the roots, merging as you go, and putting the
82 new op on the end of the worklist, until you are left with one
83 thing on the worklist.
85 IE if you have to rewrite the following set of operands (listed with
86 rank in parentheses), with opcode PLUS_EXPR:
88 a (1), b (1), c (1), d (2), e (2)
91 We start with our merge worklist empty, and the ops list with all of
94 You want to first merge all leaves of the same rank, as much as
97 So first build a binary op of
99 mergetmp = a + b, and put "mergetmp" on the merge worklist.
101 Because there is no three operand form of PLUS_EXPR, c is not going to
102 be exposed to redundancy elimination as a rank 1 operand.
104 So you might as well throw it on the merge worklist (you could also
105 consider it to now be a rank two operand, and merge it with d and e,
106 but in this case, you then have evicted e from a binary op. So at
107 least in this situation, you can't win.)
109 Then build a binary op of d + e
112 and put mergetmp2 on the merge worklist.
114 so merge worklist = {mergetmp, c, mergetmp2}
116 Continue building binary ops of these operations until you have only
117 one operation left on the worklist.
122 mergetmp3 = mergetmp + c
124 worklist = {mergetmp2, mergetmp3}
126 mergetmp4 = mergetmp2 + mergetmp3
128 worklist = {mergetmp4}
130 because we have one operation left, we can now just set the original
131 statement equal to the result of that operation.
133 This will at least expose a + b and d + e to redundancy elimination
134 as binary operations.
136 For extra points, you can reuse the old statements to build the
137 mergetmps, since you shouldn't run out.
139 So why don't we do this?
141 Because it's expensive, and rarely will help. Most trees we are
142 reassociating have 3 or less ops. If they have 2 ops, they already
143 will be written into a nice single binary op. If you have 3 ops, a
144 single simple check suffices to tell you whether the first two are of the
145 same rank. If so, you know to order it
148 newstmt = mergetmp + op3
152 newstmt = mergetmp + op1
154 If all three are of the same rank, you can't expose them all in a
155 single binary operator anyway, so the above is *still* the best you
158 Thus, this is what we do. When we have three ops left, we check to see
159 what order to put them in, and call it a day. As a nod to vector sum
160 reduction, we check if any of the ops are really a phi node that is a
161 destructive update for the associating op, and keep the destructive
162 update together for vector sum reduction recognition. */
169 int constants_eliminated;
172 int pows_encountered;
176 /* Operator, rank pair. */
177 typedef struct operand_entry
185 static alloc_pool operand_entry_pool;
187 /* This is used to assign a unique ID to each struct operand_entry
188 so that qsort results are identical on different hosts. */
189 static int next_operand_entry_id;
191 /* Starting rank number for a given basic block, so that we can rank
192 operations using unmovable instructions in that BB based on the bb
194 static long *bb_rank;
196 /* Operand->rank hashtable. */
197 static struct pointer_map_t *operand_rank;
200 static long get_rank (tree);
203 /* Bias amount for loop-carried phis. We want this to be larger than
204 the depth of any reassociation tree we can see, but not larger than
205 the rank difference between two blocks. */
206 #define PHI_LOOP_BIAS (1 << 15)
208 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
209 an innermost loop, and the phi has only a single use which is inside
210 the loop, then the rank is the block rank of the loop latch plus an
211 extra bias for the loop-carried dependence. This causes expressions
212 calculated into an accumulator variable to be independent for each
213 iteration of the loop. If STMT is some other phi, the rank is the
214 block rank of its containing block. */
216 phi_rank (gimple stmt)
218 basic_block bb = gimple_bb (stmt);
219 struct loop *father = bb->loop_father;
225 /* We only care about real loops (those with a latch). */
227 return bb_rank[bb->index];
229 /* Interesting phis must be in headers of innermost loops. */
230 if (bb != father->header
232 return bb_rank[bb->index];
234 /* Ignore virtual SSA_NAMEs. */
235 res = gimple_phi_result (stmt);
236 if (virtual_operand_p (res))
237 return bb_rank[bb->index];
239 /* The phi definition must have a single use, and that use must be
240 within the loop. Otherwise this isn't an accumulator pattern. */
241 if (!single_imm_use (res, &use, &use_stmt)
242 || gimple_bb (use_stmt)->loop_father != father)
243 return bb_rank[bb->index];
245 /* Look for phi arguments from within the loop. If found, bias this phi. */
246 for (i = 0; i < gimple_phi_num_args (stmt); i++)
248 tree arg = gimple_phi_arg_def (stmt, i);
249 if (TREE_CODE (arg) == SSA_NAME
250 && !SSA_NAME_IS_DEFAULT_DEF (arg))
252 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
253 if (gimple_bb (def_stmt)->loop_father == father)
254 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
258 /* Must be an uninteresting phi. */
259 return bb_rank[bb->index];
262 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
263 loop-carried dependence of an innermost loop, return TRUE; else
266 loop_carried_phi (tree exp)
271 if (TREE_CODE (exp) != SSA_NAME
272 || SSA_NAME_IS_DEFAULT_DEF (exp))
275 phi_stmt = SSA_NAME_DEF_STMT (exp);
277 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
280 /* Non-loop-carried phis have block rank. Loop-carried phis have
281 an additional bias added in. If this phi doesn't have block rank,
282 it's biased and should not be propagated. */
283 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
285 if (phi_rank (phi_stmt) != block_rank)
291 /* Return the maximum of RANK and the rank that should be propagated
292 from expression OP. For most operands, this is just the rank of OP.
293 For loop-carried phis, the value is zero to avoid undoing the bias
294 in favor of the phi. */
296 propagate_rank (long rank, tree op)
300 if (loop_carried_phi (op))
303 op_rank = get_rank (op);
305 return MAX (rank, op_rank);
308 /* Look up the operand rank structure for expression E. */
311 find_operand_rank (tree e)
313 void **slot = pointer_map_contains (operand_rank, e);
314 return slot ? (long) (intptr_t) *slot : -1;
317 /* Insert {E,RANK} into the operand rank hashtable. */
320 insert_operand_rank (tree e, long rank)
323 gcc_assert (rank > 0);
324 slot = pointer_map_insert (operand_rank, e);
326 *slot = (void *) (intptr_t) rank;
329 /* Given an expression E, return the rank of the expression. */
334 /* Constants have rank 0. */
335 if (is_gimple_min_invariant (e))
338 /* SSA_NAME's have the rank of the expression they are the result
340 For globals and uninitialized values, the rank is 0.
341 For function arguments, use the pre-setup rank.
342 For PHI nodes, stores, asm statements, etc, we use the rank of
344 For simple operations, the rank is the maximum rank of any of
345 its operands, or the bb_rank, whichever is less.
346 I make no claims that this is optimal, however, it gives good
349 /* We make an exception to the normal ranking system to break
350 dependences of accumulator variables in loops. Suppose we
351 have a simple one-block loop containing:
358 As shown, each iteration of the calculation into x is fully
359 dependent upon the iteration before it. We would prefer to
360 see this in the form:
367 If the loop is unrolled, the calculations of b and c from
368 different iterations can be interleaved.
370 To obtain this result during reassociation, we bias the rank
371 of the phi definition x_1 upward, when it is recognized as an
372 accumulator pattern. The artificial rank causes it to be
373 added last, providing the desired independence. */
375 if (TREE_CODE (e) == SSA_NAME)
382 if (SSA_NAME_IS_DEFAULT_DEF (e))
383 return find_operand_rank (e);
385 stmt = SSA_NAME_DEF_STMT (e);
386 if (gimple_code (stmt) == GIMPLE_PHI)
387 return phi_rank (stmt);
389 if (!is_gimple_assign (stmt)
390 || gimple_vdef (stmt))
391 return bb_rank[gimple_bb (stmt)->index];
393 /* If we already have a rank for this expression, use that. */
394 rank = find_operand_rank (e);
398 /* Otherwise, find the maximum rank for the operands. As an
399 exception, remove the bias from loop-carried phis when propagating
400 the rank so that dependent operations are not also biased. */
402 if (gimple_assign_single_p (stmt))
404 tree rhs = gimple_assign_rhs1 (stmt);
405 n = TREE_OPERAND_LENGTH (rhs);
407 rank = propagate_rank (rank, rhs);
410 for (i = 0; i < n; i++)
412 op = TREE_OPERAND (rhs, i);
415 rank = propagate_rank (rank, op);
421 n = gimple_num_ops (stmt);
422 for (i = 1; i < n; i++)
424 op = gimple_op (stmt, i);
426 rank = propagate_rank (rank, op);
430 if (dump_file && (dump_flags & TDF_DETAILS))
432 fprintf (dump_file, "Rank for ");
433 print_generic_expr (dump_file, e, 0);
434 fprintf (dump_file, " is %ld\n", (rank + 1));
437 /* Note the rank in the hashtable so we don't recompute it. */
438 insert_operand_rank (e, (rank + 1));
442 /* Globals, etc, are rank 0 */
447 /* We want integer ones to end up last no matter what, since they are
448 the ones we can do the most with. */
449 #define INTEGER_CONST_TYPE 1 << 3
450 #define FLOAT_CONST_TYPE 1 << 2
451 #define OTHER_CONST_TYPE 1 << 1
453 /* Classify an invariant tree into integer, float, or other, so that
454 we can sort them to be near other constants of the same type. */
456 constant_type (tree t)
458 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
459 return INTEGER_CONST_TYPE;
460 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
461 return FLOAT_CONST_TYPE;
463 return OTHER_CONST_TYPE;
466 /* qsort comparison function to sort operand entries PA and PB by rank
467 so that the sorted array is ordered by rank in decreasing order. */
469 sort_by_operand_rank (const void *pa, const void *pb)
471 const operand_entry_t oea = *(const operand_entry_t *)pa;
472 const operand_entry_t oeb = *(const operand_entry_t *)pb;
474 /* It's nicer for optimize_expression if constants that are likely
475 to fold when added/multiplied//whatever are put next to each
476 other. Since all constants have rank 0, order them by type. */
477 if (oeb->rank == 0 && oea->rank == 0)
479 if (constant_type (oeb->op) != constant_type (oea->op))
480 return constant_type (oeb->op) - constant_type (oea->op);
482 /* To make sorting result stable, we use unique IDs to determine
484 return oeb->id - oea->id;
487 /* Lastly, make sure the versions that are the same go next to each
488 other. We use SSA_NAME_VERSION because it's stable. */
489 if ((oeb->rank - oea->rank == 0)
490 && TREE_CODE (oea->op) == SSA_NAME
491 && TREE_CODE (oeb->op) == SSA_NAME)
493 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
494 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
496 return oeb->id - oea->id;
499 if (oeb->rank != oea->rank)
500 return oeb->rank - oea->rank;
502 return oeb->id - oea->id;
505 /* Add an operand entry to *OPS for the tree operand OP. */
508 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
510 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
513 oe->rank = get_rank (op);
514 oe->id = next_operand_entry_id++;
519 /* Add an operand entry to *OPS for the tree operand OP with repeat
523 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
524 HOST_WIDE_INT repeat)
526 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
529 oe->rank = get_rank (op);
530 oe->id = next_operand_entry_id++;
534 reassociate_stats.pows_encountered++;
537 /* Return true if STMT is reassociable operation containing a binary
538 operation with tree code CODE, and is inside LOOP. */
541 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
543 basic_block bb = gimple_bb (stmt);
545 if (gimple_bb (stmt) == NULL)
548 if (!flow_bb_inside_loop_p (loop, bb))
551 if (is_gimple_assign (stmt)
552 && gimple_assign_rhs_code (stmt) == code
553 && has_single_use (gimple_assign_lhs (stmt)))
560 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
561 operand of the negate operation. Otherwise, return NULL. */
564 get_unary_op (tree name, enum tree_code opcode)
566 gimple stmt = SSA_NAME_DEF_STMT (name);
568 if (!is_gimple_assign (stmt))
571 if (gimple_assign_rhs_code (stmt) == opcode)
572 return gimple_assign_rhs1 (stmt);
576 /* If CURR and LAST are a pair of ops that OPCODE allows us to
577 eliminate through equivalences, do so, remove them from OPS, and
578 return true. Otherwise, return false. */
581 eliminate_duplicate_pair (enum tree_code opcode,
582 vec<operand_entry_t> *ops,
585 operand_entry_t curr,
586 operand_entry_t last)
589 /* If we have two of the same op, and the opcode is & |, min, or max,
590 we can eliminate one of them.
591 If we have two of the same op, and the opcode is ^, we can
592 eliminate both of them. */
594 if (last && last->op == curr->op)
602 if (dump_file && (dump_flags & TDF_DETAILS))
604 fprintf (dump_file, "Equivalence: ");
605 print_generic_expr (dump_file, curr->op, 0);
606 fprintf (dump_file, " [&|minmax] ");
607 print_generic_expr (dump_file, last->op, 0);
608 fprintf (dump_file, " -> ");
609 print_generic_stmt (dump_file, last->op, 0);
612 ops->ordered_remove (i);
613 reassociate_stats.ops_eliminated ++;
618 if (dump_file && (dump_flags & TDF_DETAILS))
620 fprintf (dump_file, "Equivalence: ");
621 print_generic_expr (dump_file, curr->op, 0);
622 fprintf (dump_file, " ^ ");
623 print_generic_expr (dump_file, last->op, 0);
624 fprintf (dump_file, " -> nothing\n");
627 reassociate_stats.ops_eliminated += 2;
629 if (ops->length () == 2)
632 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
637 ops->ordered_remove (i-1);
638 ops->ordered_remove (i-1);
650 static vec<tree> plus_negates;
652 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
653 expression, look in OPS for a corresponding positive operation to cancel
654 it out. If we find one, remove the other from OPS, replace
655 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
659 eliminate_plus_minus_pair (enum tree_code opcode,
660 vec<operand_entry_t> *ops,
661 unsigned int currindex,
662 operand_entry_t curr)
669 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
672 negateop = get_unary_op (curr->op, NEGATE_EXPR);
673 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
674 if (negateop == NULL_TREE && notop == NULL_TREE)
677 /* Any non-negated version will have a rank that is one less than
678 the current rank. So once we hit those ranks, if we don't find
681 for (i = currindex + 1;
682 ops->iterate (i, &oe)
683 && oe->rank >= curr->rank - 1 ;
686 if (oe->op == negateop)
689 if (dump_file && (dump_flags & TDF_DETAILS))
691 fprintf (dump_file, "Equivalence: ");
692 print_generic_expr (dump_file, negateop, 0);
693 fprintf (dump_file, " + -");
694 print_generic_expr (dump_file, oe->op, 0);
695 fprintf (dump_file, " -> 0\n");
698 ops->ordered_remove (i);
699 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
700 ops->ordered_remove (currindex);
701 reassociate_stats.ops_eliminated ++;
705 else if (oe->op == notop)
707 tree op_type = TREE_TYPE (oe->op);
709 if (dump_file && (dump_flags & TDF_DETAILS))
711 fprintf (dump_file, "Equivalence: ");
712 print_generic_expr (dump_file, notop, 0);
713 fprintf (dump_file, " + ~");
714 print_generic_expr (dump_file, oe->op, 0);
715 fprintf (dump_file, " -> -1\n");
718 ops->ordered_remove (i);
719 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
720 ops->ordered_remove (currindex);
721 reassociate_stats.ops_eliminated ++;
727 /* CURR->OP is a negate expr in a plus expr: save it for later
728 inspection in repropagate_negates(). */
729 if (negateop != NULL_TREE)
730 plus_negates.safe_push (curr->op);
735 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
736 bitwise not expression, look in OPS for a corresponding operand to
737 cancel it out. If we find one, remove the other from OPS, replace
738 OPS[CURRINDEX] with 0, and return true. Otherwise, return
742 eliminate_not_pairs (enum tree_code opcode,
743 vec<operand_entry_t> *ops,
744 unsigned int currindex,
745 operand_entry_t curr)
751 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
752 || TREE_CODE (curr->op) != SSA_NAME)
755 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
756 if (notop == NULL_TREE)
759 /* Any non-not version will have a rank that is one less than
760 the current rank. So once we hit those ranks, if we don't find
763 for (i = currindex + 1;
764 ops->iterate (i, &oe)
765 && oe->rank >= curr->rank - 1;
770 if (dump_file && (dump_flags & TDF_DETAILS))
772 fprintf (dump_file, "Equivalence: ");
773 print_generic_expr (dump_file, notop, 0);
774 if (opcode == BIT_AND_EXPR)
775 fprintf (dump_file, " & ~");
776 else if (opcode == BIT_IOR_EXPR)
777 fprintf (dump_file, " | ~");
778 print_generic_expr (dump_file, oe->op, 0);
779 if (opcode == BIT_AND_EXPR)
780 fprintf (dump_file, " -> 0\n");
781 else if (opcode == BIT_IOR_EXPR)
782 fprintf (dump_file, " -> -1\n");
785 if (opcode == BIT_AND_EXPR)
786 oe->op = build_zero_cst (TREE_TYPE (oe->op));
787 else if (opcode == BIT_IOR_EXPR)
788 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
789 TYPE_PRECISION (TREE_TYPE (oe->op)));
791 reassociate_stats.ops_eliminated += ops->length () - 1;
793 ops->quick_push (oe);
801 /* Use constant value that may be present in OPS to try to eliminate
802 operands. Note that this function is only really used when we've
803 eliminated ops for other reasons, or merged constants. Across
804 single statements, fold already does all of this, plus more. There
805 is little point in duplicating logic, so I've only included the
806 identities that I could ever construct testcases to trigger. */
809 eliminate_using_constants (enum tree_code opcode,
810 vec<operand_entry_t> *ops)
812 operand_entry_t oelast = ops->last ();
813 tree type = TREE_TYPE (oelast->op);
815 if (oelast->rank == 0
816 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
821 if (integer_zerop (oelast->op))
823 if (ops->length () != 1)
825 if (dump_file && (dump_flags & TDF_DETAILS))
826 fprintf (dump_file, "Found & 0, removing all other ops\n");
828 reassociate_stats.ops_eliminated += ops->length () - 1;
831 ops->quick_push (oelast);
835 else if (integer_all_onesp (oelast->op))
837 if (ops->length () != 1)
839 if (dump_file && (dump_flags & TDF_DETAILS))
840 fprintf (dump_file, "Found & -1, removing\n");
842 reassociate_stats.ops_eliminated++;
847 if (integer_all_onesp (oelast->op))
849 if (ops->length () != 1)
851 if (dump_file && (dump_flags & TDF_DETAILS))
852 fprintf (dump_file, "Found | -1, removing all other ops\n");
854 reassociate_stats.ops_eliminated += ops->length () - 1;
857 ops->quick_push (oelast);
861 else if (integer_zerop (oelast->op))
863 if (ops->length () != 1)
865 if (dump_file && (dump_flags & TDF_DETAILS))
866 fprintf (dump_file, "Found | 0, removing\n");
868 reassociate_stats.ops_eliminated++;
873 if (integer_zerop (oelast->op)
874 || (FLOAT_TYPE_P (type)
875 && !HONOR_NANS (TYPE_MODE (type))
876 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
877 && real_zerop (oelast->op)))
879 if (ops->length () != 1)
881 if (dump_file && (dump_flags & TDF_DETAILS))
882 fprintf (dump_file, "Found * 0, removing all other ops\n");
884 reassociate_stats.ops_eliminated += ops->length () - 1;
886 ops->quick_push (oelast);
890 else if (integer_onep (oelast->op)
891 || (FLOAT_TYPE_P (type)
892 && !HONOR_SNANS (TYPE_MODE (type))
893 && real_onep (oelast->op)))
895 if (ops->length () != 1)
897 if (dump_file && (dump_flags & TDF_DETAILS))
898 fprintf (dump_file, "Found * 1, removing\n");
900 reassociate_stats.ops_eliminated++;
908 if (integer_zerop (oelast->op)
909 || (FLOAT_TYPE_P (type)
910 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
911 && fold_real_zero_addition_p (type, oelast->op,
912 opcode == MINUS_EXPR)))
914 if (ops->length () != 1)
916 if (dump_file && (dump_flags & TDF_DETAILS))
917 fprintf (dump_file, "Found [|^+] 0, removing\n");
919 reassociate_stats.ops_eliminated++;
931 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
934 /* Structure for tracking and counting operands. */
935 typedef struct oecount_s {
938 enum tree_code oecode;
943 /* The heap for the oecount hashtable and the sorted list of operands. */
944 static vec<oecount> cvec;
946 /* Hash function for oecount. */
949 oecount_hash (const void *p)
951 const oecount *c = &cvec[(size_t)p - 42];
952 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
955 /* Comparison function for oecount. */
958 oecount_eq (const void *p1, const void *p2)
960 const oecount *c1 = &cvec[(size_t)p1 - 42];
961 const oecount *c2 = &cvec[(size_t)p2 - 42];
962 return (c1->oecode == c2->oecode
963 && c1->op == c2->op);
966 /* Comparison function for qsort sorting oecount elements by count. */
969 oecount_cmp (const void *p1, const void *p2)
971 const oecount *c1 = (const oecount *)p1;
972 const oecount *c2 = (const oecount *)p2;
973 if (c1->cnt != c2->cnt)
974 return c1->cnt - c2->cnt;
976 /* If counts are identical, use unique IDs to stabilize qsort. */
977 return c1->id - c2->id;
980 /* Return TRUE iff STMT represents a builtin call that raises OP
984 stmt_is_power_of_op (gimple stmt, tree op)
988 if (!is_gimple_call (stmt))
991 fndecl = gimple_call_fndecl (stmt);
994 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
997 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
999 CASE_FLT_FN (BUILT_IN_POW):
1000 CASE_FLT_FN (BUILT_IN_POWI):
1001 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1008 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1009 in place and return the result. Assumes that stmt_is_power_of_op
1010 was previously called for STMT and returned TRUE. */
1012 static HOST_WIDE_INT
1013 decrement_power (gimple stmt)
1015 REAL_VALUE_TYPE c, cint;
1016 HOST_WIDE_INT power;
1019 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1021 CASE_FLT_FN (BUILT_IN_POW):
1022 arg1 = gimple_call_arg (stmt, 1);
1023 c = TREE_REAL_CST (arg1);
1024 power = real_to_integer (&c) - 1;
1025 real_from_integer (&cint, VOIDmode, power, 0, 0);
1026 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1029 CASE_FLT_FN (BUILT_IN_POWI):
1030 arg1 = gimple_call_arg (stmt, 1);
1031 power = TREE_INT_CST_LOW (arg1) - 1;
1032 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1040 /* Find the single immediate use of STMT's LHS, and replace it
1041 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1042 replace *DEF with OP as well. */
1045 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1050 gimple_stmt_iterator gsi;
1052 if (is_gimple_call (stmt))
1053 lhs = gimple_call_lhs (stmt);
1055 lhs = gimple_assign_lhs (stmt);
1057 gcc_assert (has_single_use (lhs));
1058 single_imm_use (lhs, &use, &use_stmt);
1062 if (TREE_CODE (op) != SSA_NAME)
1063 update_stmt (use_stmt);
1064 gsi = gsi_for_stmt (stmt);
1065 unlink_stmt_vdef (stmt);
1066 gsi_remove (&gsi, true);
1067 release_defs (stmt);
1070 /* Walks the linear chain with result *DEF searching for an operation
1071 with operand OP and code OPCODE removing that from the chain. *DEF
1072 is updated if there is only one operand but no operation left. */
1075 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1077 gimple stmt = SSA_NAME_DEF_STMT (*def);
1083 if (opcode == MULT_EXPR
1084 && stmt_is_power_of_op (stmt, op))
1086 if (decrement_power (stmt) == 1)
1087 propagate_op_to_single_use (op, stmt, def);
1091 name = gimple_assign_rhs1 (stmt);
1093 /* If this is the operation we look for and one of the operands
1094 is ours simply propagate the other operand into the stmts
1096 if (gimple_assign_rhs_code (stmt) == opcode
1098 || gimple_assign_rhs2 (stmt) == op))
1101 name = gimple_assign_rhs2 (stmt);
1102 propagate_op_to_single_use (name, stmt, def);
1106 /* We might have a multiply of two __builtin_pow* calls, and
1107 the operand might be hiding in the rightmost one. */
1108 if (opcode == MULT_EXPR
1109 && gimple_assign_rhs_code (stmt) == opcode
1110 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1111 && has_single_use (gimple_assign_rhs2 (stmt)))
1113 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1114 if (stmt_is_power_of_op (stmt2, op))
1116 if (decrement_power (stmt2) == 1)
1117 propagate_op_to_single_use (op, stmt2, def);
1122 /* Continue walking the chain. */
1123 gcc_assert (name != op
1124 && TREE_CODE (name) == SSA_NAME);
1125 stmt = SSA_NAME_DEF_STMT (name);
1130 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1131 the result. Places the statement after the definition of either
1132 OP1 or OP2. Returns the new statement. */
1135 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1137 gimple op1def = NULL, op2def = NULL;
1138 gimple_stmt_iterator gsi;
1142 /* Create the addition statement. */
1143 op = make_ssa_name (type, NULL);
1144 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1146 /* Find an insertion place and insert. */
1147 if (TREE_CODE (op1) == SSA_NAME)
1148 op1def = SSA_NAME_DEF_STMT (op1);
1149 if (TREE_CODE (op2) == SSA_NAME)
1150 op2def = SSA_NAME_DEF_STMT (op2);
1151 if ((!op1def || gimple_nop_p (op1def))
1152 && (!op2def || gimple_nop_p (op2def)))
1154 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1155 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1157 else if ((!op1def || gimple_nop_p (op1def))
1158 || (op2def && !gimple_nop_p (op2def)
1159 && stmt_dominates_stmt_p (op1def, op2def)))
1161 if (gimple_code (op2def) == GIMPLE_PHI)
1163 gsi = gsi_after_labels (gimple_bb (op2def));
1164 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1168 if (!stmt_ends_bb_p (op2def))
1170 gsi = gsi_for_stmt (op2def);
1171 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1178 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1179 if (e->flags & EDGE_FALLTHRU)
1180 gsi_insert_on_edge_immediate (e, sum);
1186 if (gimple_code (op1def) == GIMPLE_PHI)
1188 gsi = gsi_after_labels (gimple_bb (op1def));
1189 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1193 if (!stmt_ends_bb_p (op1def))
1195 gsi = gsi_for_stmt (op1def);
1196 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1203 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1204 if (e->flags & EDGE_FALLTHRU)
1205 gsi_insert_on_edge_immediate (e, sum);
1214 /* Perform un-distribution of divisions and multiplications.
1215 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1216 to (A + B) / X for real X.
1218 The algorithm is organized as follows.
1220 - First we walk the addition chain *OPS looking for summands that
1221 are defined by a multiplication or a real division. This results
1222 in the candidates bitmap with relevant indices into *OPS.
1224 - Second we build the chains of multiplications or divisions for
1225 these candidates, counting the number of occurrences of (operand, code)
1226 pairs in all of the candidates chains.
1228 - Third we sort the (operand, code) pairs by number of occurrence and
1229 process them starting with the pair with the most uses.
1231 * For each such pair we walk the candidates again to build a
1232 second candidate bitmap noting all multiplication/division chains
1233 that have at least one occurrence of (operand, code).
1235 * We build an alternate addition chain only covering these
1236 candidates with one (operand, code) operation removed from their
1237 multiplication/division chain.
1239 * The first candidate gets replaced by the alternate addition chain
1240 multiplied/divided by the operand.
1242 * All candidate chains get disabled for further processing and
1243 processing of (operand, code) pairs continues.
1245 The alternate addition chains built are re-processed by the main
1246 reassociation algorithm which allows optimizing a * x * y + b * y * x
1247 to (a + b ) * x * y in one invocation of the reassociation pass. */
1250 undistribute_ops_list (enum tree_code opcode,
1251 vec<operand_entry_t> *ops, struct loop *loop)
1253 unsigned int length = ops->length ();
1254 operand_entry_t oe1;
1256 sbitmap candidates, candidates2;
1257 unsigned nr_candidates, nr_candidates2;
1258 sbitmap_iterator sbi0;
1259 vec<operand_entry_t> *subops;
1261 bool changed = false;
1262 int next_oecount_id = 0;
1265 || opcode != PLUS_EXPR)
1268 /* Build a list of candidates to process. */
1269 candidates = sbitmap_alloc (length);
1270 bitmap_clear (candidates);
1272 FOR_EACH_VEC_ELT (*ops, i, oe1)
1274 enum tree_code dcode;
1277 if (TREE_CODE (oe1->op) != SSA_NAME)
1279 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1280 if (!is_gimple_assign (oe1def))
1282 dcode = gimple_assign_rhs_code (oe1def);
1283 if ((dcode != MULT_EXPR
1284 && dcode != RDIV_EXPR)
1285 || !is_reassociable_op (oe1def, dcode, loop))
1288 bitmap_set_bit (candidates, i);
1292 if (nr_candidates < 2)
1294 sbitmap_free (candidates);
1298 if (dump_file && (dump_flags & TDF_DETAILS))
1300 fprintf (dump_file, "searching for un-distribute opportunities ");
1301 print_generic_expr (dump_file,
1302 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1303 fprintf (dump_file, " %d\n", nr_candidates);
1306 /* Build linearized sub-operand lists and the counting table. */
1308 ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1309 /* ??? Macro arguments cannot have multi-argument template types in
1310 them. This typedef is needed to workaround that limitation. */
1311 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1312 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1313 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1316 enum tree_code oecode;
1319 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1320 oecode = gimple_assign_rhs_code (oedef);
1321 linearize_expr_tree (&subops[i], oedef,
1322 associative_tree_code (oecode), false);
1324 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1331 c.id = next_oecount_id++;
1334 idx = cvec.length () + 41;
1335 slot = htab_find_slot (ctable, (void *)idx, INSERT);
1338 *slot = (void *)idx;
1343 cvec[(size_t)*slot - 42].cnt++;
1347 htab_delete (ctable);
1349 /* Sort the counting table. */
1350 cvec.qsort (oecount_cmp);
1352 if (dump_file && (dump_flags & TDF_DETAILS))
1355 fprintf (dump_file, "Candidates:\n");
1356 FOR_EACH_VEC_ELT (cvec, j, c)
1358 fprintf (dump_file, " %u %s: ", c->cnt,
1359 c->oecode == MULT_EXPR
1360 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1361 print_generic_expr (dump_file, c->op, 0);
1362 fprintf (dump_file, "\n");
1366 /* Process the (operand, code) pairs in order of most occurence. */
1367 candidates2 = sbitmap_alloc (length);
1368 while (!cvec.is_empty ())
1370 oecount *c = &cvec.last ();
1374 /* Now collect the operands in the outer chain that contain
1375 the common operand in their inner chain. */
1376 bitmap_clear (candidates2);
1378 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1381 enum tree_code oecode;
1383 tree op = (*ops)[i]->op;
1385 /* If we undistributed in this chain already this may be
1387 if (TREE_CODE (op) != SSA_NAME)
1390 oedef = SSA_NAME_DEF_STMT (op);
1391 oecode = gimple_assign_rhs_code (oedef);
1392 if (oecode != c->oecode)
1395 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1397 if (oe1->op == c->op)
1399 bitmap_set_bit (candidates2, i);
1406 if (nr_candidates2 >= 2)
1408 operand_entry_t oe1, oe2;
1410 int first = bitmap_first_set_bit (candidates2);
1412 /* Build the new addition chain. */
1413 oe1 = (*ops)[first];
1414 if (dump_file && (dump_flags & TDF_DETAILS))
1416 fprintf (dump_file, "Building (");
1417 print_generic_expr (dump_file, oe1->op, 0);
1419 zero_one_operation (&oe1->op, c->oecode, c->op);
1420 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1424 if (dump_file && (dump_flags & TDF_DETAILS))
1426 fprintf (dump_file, " + ");
1427 print_generic_expr (dump_file, oe2->op, 0);
1429 zero_one_operation (&oe2->op, c->oecode, c->op);
1430 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1431 oe1->op, oe2->op, opcode);
1432 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1434 oe1->op = gimple_get_lhs (sum);
1437 /* Apply the multiplication/division. */
1438 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1439 oe1->op, c->op, c->oecode);
1440 if (dump_file && (dump_flags & TDF_DETAILS))
1442 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1443 print_generic_expr (dump_file, c->op, 0);
1444 fprintf (dump_file, "\n");
1447 /* Record it in the addition chain and disable further
1448 undistribution with this op. */
1449 oe1->op = gimple_assign_lhs (prod);
1450 oe1->rank = get_rank (oe1->op);
1451 subops[first].release ();
1459 for (i = 0; i < ops->length (); ++i)
1460 subops[i].release ();
1463 sbitmap_free (candidates);
1464 sbitmap_free (candidates2);
1469 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1470 expression, examine the other OPS to see if any of them are comparisons
1471 of the same values, which we may be able to combine or eliminate.
1472 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1475 eliminate_redundant_comparison (enum tree_code opcode,
1476 vec<operand_entry_t> *ops,
1477 unsigned int currindex,
1478 operand_entry_t curr)
1481 enum tree_code lcode, rcode;
1486 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1489 /* Check that CURR is a comparison. */
1490 if (TREE_CODE (curr->op) != SSA_NAME)
1492 def1 = SSA_NAME_DEF_STMT (curr->op);
1493 if (!is_gimple_assign (def1))
1495 lcode = gimple_assign_rhs_code (def1);
1496 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1498 op1 = gimple_assign_rhs1 (def1);
1499 op2 = gimple_assign_rhs2 (def1);
1501 /* Now look for a similar comparison in the remaining OPS. */
1502 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1506 if (TREE_CODE (oe->op) != SSA_NAME)
1508 def2 = SSA_NAME_DEF_STMT (oe->op);
1509 if (!is_gimple_assign (def2))
1511 rcode = gimple_assign_rhs_code (def2);
1512 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1515 /* If we got here, we have a match. See if we can combine the
1517 if (opcode == BIT_IOR_EXPR)
1518 t = maybe_fold_or_comparisons (lcode, op1, op2,
1519 rcode, gimple_assign_rhs1 (def2),
1520 gimple_assign_rhs2 (def2));
1522 t = maybe_fold_and_comparisons (lcode, op1, op2,
1523 rcode, gimple_assign_rhs1 (def2),
1524 gimple_assign_rhs2 (def2));
1528 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1529 always give us a boolean_type_node value back. If the original
1530 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1531 we need to convert. */
1532 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1533 t = fold_convert (TREE_TYPE (curr->op), t);
1535 if (TREE_CODE (t) != INTEGER_CST
1536 && !operand_equal_p (t, curr->op, 0))
1538 enum tree_code subcode;
1539 tree newop1, newop2;
1540 if (!COMPARISON_CLASS_P (t))
1542 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1543 STRIP_USELESS_TYPE_CONVERSION (newop1);
1544 STRIP_USELESS_TYPE_CONVERSION (newop2);
1545 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1549 if (dump_file && (dump_flags & TDF_DETAILS))
1551 fprintf (dump_file, "Equivalence: ");
1552 print_generic_expr (dump_file, curr->op, 0);
1553 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1554 print_generic_expr (dump_file, oe->op, 0);
1555 fprintf (dump_file, " -> ");
1556 print_generic_expr (dump_file, t, 0);
1557 fprintf (dump_file, "\n");
1560 /* Now we can delete oe, as it has been subsumed by the new combined
1562 ops->ordered_remove (i);
1563 reassociate_stats.ops_eliminated ++;
1565 /* If t is the same as curr->op, we're done. Otherwise we must
1566 replace curr->op with t. Special case is if we got a constant
1567 back, in which case we add it to the end instead of in place of
1568 the current entry. */
1569 if (TREE_CODE (t) == INTEGER_CST)
1571 ops->ordered_remove (currindex);
1572 add_to_ops_vec (ops, t);
1574 else if (!operand_equal_p (t, curr->op, 0))
1577 enum tree_code subcode;
1580 gcc_assert (COMPARISON_CLASS_P (t));
1581 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1582 STRIP_USELESS_TYPE_CONVERSION (newop1);
1583 STRIP_USELESS_TYPE_CONVERSION (newop2);
1584 gcc_checking_assert (is_gimple_val (newop1)
1585 && is_gimple_val (newop2));
1586 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1587 curr->op = gimple_get_lhs (sum);
1595 /* Perform various identities and other optimizations on the list of
1596 operand entries, stored in OPS. The tree code for the binary
1597 operation between all the operands is OPCODE. */
1600 optimize_ops_list (enum tree_code opcode,
1601 vec<operand_entry_t> *ops)
1603 unsigned int length = ops->length ();
1606 operand_entry_t oelast = NULL;
1607 bool iterate = false;
1612 oelast = ops->last ();
1614 /* If the last two are constants, pop the constants off, merge them
1615 and try the next two. */
1616 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1618 operand_entry_t oelm1 = (*ops)[length - 2];
1620 if (oelm1->rank == 0
1621 && is_gimple_min_invariant (oelm1->op)
1622 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1623 TREE_TYPE (oelast->op)))
1625 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1626 oelm1->op, oelast->op);
1628 if (folded && is_gimple_min_invariant (folded))
1630 if (dump_file && (dump_flags & TDF_DETAILS))
1631 fprintf (dump_file, "Merging constants\n");
1636 add_to_ops_vec (ops, folded);
1637 reassociate_stats.constants_eliminated++;
1639 optimize_ops_list (opcode, ops);
1645 eliminate_using_constants (opcode, ops);
1648 for (i = 0; ops->iterate (i, &oe);)
1652 if (eliminate_not_pairs (opcode, ops, i, oe))
1654 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1655 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1656 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1668 length = ops->length ();
1669 oelast = ops->last ();
1672 optimize_ops_list (opcode, ops);
1675 /* The following functions are subroutines to optimize_range_tests and allow
1676 it to try to change a logical combination of comparisons into a range
1680 X == 2 || X == 5 || X == 3 || X == 4
1684 (unsigned) (X - 2) <= 3
1686 For more information see comments above fold_test_range in fold-const.c,
1687 this implementation is for GIMPLE. */
1695 bool strict_overflow_p;
1696 unsigned int idx, next;
1699 /* This is similar to make_range in fold-const.c, but on top of
1700 GIMPLE instead of trees. If EXP is non-NULL, it should be
1701 an SSA_NAME and STMT argument is ignored, otherwise STMT
1702 argument should be a GIMPLE_COND. */
1705 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1709 bool is_bool, strict_overflow_p;
1713 r->strict_overflow_p = false;
1715 r->high = NULL_TREE;
1716 if (exp != NULL_TREE
1717 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1720 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1721 and see if we can refine the range. Some of the cases below may not
1722 happen, but it doesn't seem worth worrying about this. We "continue"
1723 the outer loop when we've changed something; otherwise we "break"
1724 the switch, which will "break" the while. */
1725 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1728 strict_overflow_p = false;
1730 if (exp == NULL_TREE)
1732 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1734 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1739 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1744 enum tree_code code;
1745 tree arg0, arg1, exp_type;
1749 if (exp != NULL_TREE)
1751 if (TREE_CODE (exp) != SSA_NAME)
1754 stmt = SSA_NAME_DEF_STMT (exp);
1755 if (!is_gimple_assign (stmt))
1758 code = gimple_assign_rhs_code (stmt);
1759 arg0 = gimple_assign_rhs1 (stmt);
1760 arg1 = gimple_assign_rhs2 (stmt);
1761 exp_type = TREE_TYPE (exp);
1765 code = gimple_cond_code (stmt);
1766 arg0 = gimple_cond_lhs (stmt);
1767 arg1 = gimple_cond_rhs (stmt);
1768 exp_type = boolean_type_node;
1771 if (TREE_CODE (arg0) != SSA_NAME)
1773 loc = gimple_location (stmt);
1777 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1778 /* Ensure the range is either +[-,0], +[0,0],
1779 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1780 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1781 or similar expression of unconditional true or
1782 false, it should not be negated. */
1783 && ((high && integer_zerop (high))
1784 || (low && integer_onep (low))))
1797 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1799 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1804 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1819 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1821 &strict_overflow_p);
1822 if (nexp != NULL_TREE)
1825 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1838 r->strict_overflow_p = strict_overflow_p;
1842 /* Comparison function for qsort. Sort entries
1843 without SSA_NAME exp first, then with SSA_NAMEs sorted
1844 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1845 by increasing ->low and if ->low is the same, by increasing
1846 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1850 range_entry_cmp (const void *a, const void *b)
1852 const struct range_entry *p = (const struct range_entry *) a;
1853 const struct range_entry *q = (const struct range_entry *) b;
1855 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1857 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1859 /* Group range_entries for the same SSA_NAME together. */
1860 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1862 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1864 /* If ->low is different, NULL low goes first, then by
1866 if (p->low != NULL_TREE)
1868 if (q->low != NULL_TREE)
1870 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1872 if (tem && integer_onep (tem))
1874 tem = fold_binary (GT_EXPR, boolean_type_node,
1876 if (tem && integer_onep (tem))
1882 else if (q->low != NULL_TREE)
1884 /* If ->high is different, NULL high goes last, before that by
1886 if (p->high != NULL_TREE)
1888 if (q->high != NULL_TREE)
1890 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1892 if (tem && integer_onep (tem))
1894 tem = fold_binary (GT_EXPR, boolean_type_node,
1896 if (tem && integer_onep (tem))
1902 else if (p->high != NULL_TREE)
1904 /* If both ranges are the same, sort below by ascending idx. */
1909 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1912 if (p->idx < q->idx)
1916 gcc_checking_assert (p->idx > q->idx);
1921 /* Helper routine of optimize_range_test.
1922 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1923 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1924 OPCODE and OPS are arguments of optimize_range_tests. Return
1925 true if the range merge has been successful.
1926 If OPCODE is ERROR_MARK, this is called from within
1927 maybe_optimize_range_tests and is performing inter-bb range optimization.
1928 Changes should be then performed right away, and whether an op is
1929 BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank. */
1932 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1933 unsigned int count, enum tree_code opcode,
1934 vec<operand_entry_t> *ops, tree exp, bool in_p,
1935 tree low, tree high, bool strict_overflow_p)
1937 operand_entry_t oe = (*ops)[range->idx];
1939 gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1940 location_t loc = gimple_location (stmt);
1941 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1942 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1943 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1944 gimple_stmt_iterator gsi;
1946 if (tem == NULL_TREE)
1949 if (strict_overflow_p && issue_strict_overflow_warning (wc))
1950 warning_at (loc, OPT_Wstrict_overflow,
1951 "assuming signed overflow does not occur "
1952 "when simplifying range test");
1954 if (dump_file && (dump_flags & TDF_DETAILS))
1956 struct range_entry *r;
1957 fprintf (dump_file, "Optimizing range tests ");
1958 print_generic_expr (dump_file, range->exp, 0);
1959 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1960 print_generic_expr (dump_file, range->low, 0);
1961 fprintf (dump_file, ", ");
1962 print_generic_expr (dump_file, range->high, 0);
1963 fprintf (dump_file, "]");
1964 for (r = otherrange; r < otherrange + count; r++)
1966 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1967 print_generic_expr (dump_file, r->low, 0);
1968 fprintf (dump_file, ", ");
1969 print_generic_expr (dump_file, r->high, 0);
1970 fprintf (dump_file, "]");
1972 fprintf (dump_file, "\n into ");
1973 print_generic_expr (dump_file, tem, 0);
1974 fprintf (dump_file, "\n");
1977 if (opcode == BIT_IOR_EXPR
1978 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
1979 tem = invert_truthvalue_loc (loc, tem);
1981 tem = fold_convert_loc (loc, optype, tem);
1982 gsi = gsi_for_stmt (stmt);
1983 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1986 /* If doing inter-bb range test optimization, update the
1987 stmts immediately. Start with changing the first range test
1988 immediate use to the new value (TEM), or, if the first range
1989 test is a GIMPLE_COND stmt, change that condition. */
1990 if (opcode == ERROR_MARK)
1994 imm_use_iterator iter;
1995 use_operand_p use_p;
1998 FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
2000 if (is_gimple_debug (use_stmt))
2002 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2003 SET_USE (use_p, tem);
2004 update_stmt (use_stmt);
2009 gimple_cond_set_code (stmt, NE_EXPR);
2010 gimple_cond_set_lhs (stmt, tem);
2011 gimple_cond_set_rhs (stmt, boolean_false_node);
2020 range->strict_overflow_p = false;
2022 for (range = otherrange; range < otherrange + count; range++)
2024 oe = (*ops)[range->idx];
2025 /* Now change all the other range test immediate uses, so that
2026 those tests will be optimized away. */
2027 if (opcode == ERROR_MARK)
2031 imm_use_iterator iter;
2032 use_operand_p use_p;
2035 FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2037 if (is_gimple_debug (use_stmt))
2039 /* If imm use of _8 is a statement like _7 = _8 | _9;,
2040 adjust it into _7 = _9;. */
2041 if (is_gimple_assign (use_stmt)
2042 && gimple_assign_rhs_code (use_stmt) == oe->rank)
2044 tree expr = NULL_TREE;
2045 if (oe->op == gimple_assign_rhs1 (use_stmt))
2046 expr = gimple_assign_rhs2 (use_stmt);
2047 else if (oe->op == gimple_assign_rhs2 (use_stmt))
2048 expr = gimple_assign_rhs1 (use_stmt);
2051 && TREE_CODE (expr) == SSA_NAME)
2053 gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2054 gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2056 update_stmt (use_stmt);
2060 /* If imm use of _8 is a statement like _7 = (int) _8;,
2061 adjust it into _7 = 0; or _7 = 1;. */
2062 if (gimple_assign_cast_p (use_stmt)
2063 && oe->op == gimple_assign_rhs1 (use_stmt))
2065 tree lhs = gimple_assign_lhs (use_stmt);
2066 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2068 gimple_stmt_iterator gsi2
2069 = gsi_for_stmt (use_stmt);
2070 tree expr = build_int_cst (TREE_TYPE (lhs),
2071 oe->rank == BIT_IOR_EXPR
2073 gimple_assign_set_rhs_with_ops (&gsi2,
2076 update_stmt (use_stmt);
2080 /* Otherwise replace the use with 0 or 1. */
2081 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2083 build_int_cst (TREE_TYPE (oe->op),
2084 oe->rank == BIT_IOR_EXPR
2086 update_stmt (use_stmt);
2091 /* If range test was a GIMPLE_COND, simply change it
2092 into an always false or always true condition. */
2093 stmt = last_stmt (BASIC_BLOCK (oe->id));
2094 if (oe->rank == BIT_IOR_EXPR)
2095 gimple_cond_make_false (stmt);
2097 gimple_cond_make_true (stmt);
2101 oe->op = error_mark_node;
2102 range->exp = NULL_TREE;
2107 /* Optimize range tests, similarly how fold_range_test optimizes
2108 it on trees. The tree code for the binary
2109 operation between all the operands is OPCODE.
2110 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2111 maybe_optimize_range_tests for inter-bb range optimization.
2112 In that case if oe->op is NULL, oe->id is bb->index whose
2113 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2114 the actual opcode. */
2117 optimize_range_tests (enum tree_code opcode,
2118 vec<operand_entry_t> *ops)
2120 unsigned int length = ops->length (), i, j, first;
2122 struct range_entry *ranges;
2123 bool any_changes = false;
2128 ranges = XNEWVEC (struct range_entry, length);
2129 for (i = 0; i < length; i++)
2133 init_range_entry (ranges + i, oe->op,
2134 oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2135 /* For | invert it now, we will invert it again before emitting
2136 the optimized expression. */
2137 if (opcode == BIT_IOR_EXPR
2138 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2139 ranges[i].in_p = !ranges[i].in_p;
2142 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2143 for (i = 0; i < length; i++)
2144 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2147 /* Try to merge ranges. */
2148 for (first = i; i < length; i++)
2150 tree low = ranges[i].low;
2151 tree high = ranges[i].high;
2152 int in_p = ranges[i].in_p;
2153 bool strict_overflow_p = ranges[i].strict_overflow_p;
2154 int update_fail_count = 0;
2156 for (j = i + 1; j < length; j++)
2158 if (ranges[i].exp != ranges[j].exp)
2160 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2161 ranges[j].in_p, ranges[j].low, ranges[j].high))
2163 strict_overflow_p |= ranges[j].strict_overflow_p;
2169 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2170 ops, ranges[i].exp, in_p, low, high,
2176 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2177 while update_range_test would fail. */
2178 else if (update_fail_count == 64)
2181 ++update_fail_count;
2184 /* Optimize X == CST1 || X == CST2
2185 if popcount (CST1 ^ CST2) == 1 into
2186 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2187 Similarly for ranges. E.g.
2188 X != 2 && X != 3 && X != 10 && X != 11
2189 will be transformed by the above loop into
2190 (X - 2U) <= 1U && (X - 10U) <= 1U
2191 and this loop can transform that into
2192 ((X & ~8) - 2U) <= 1U. */
2193 for (i = first; i < length; i++)
2195 tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2197 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2199 type = TREE_TYPE (ranges[i].exp);
2200 if (!INTEGRAL_TYPE_P (type))
2202 lowi = ranges[i].low;
2203 if (lowi == NULL_TREE)
2204 lowi = TYPE_MIN_VALUE (type);
2205 highi = ranges[i].high;
2206 if (highi == NULL_TREE)
2208 for (j = i + 1; j < length && j < i + 64; j++)
2210 if (ranges[j].exp == NULL_TREE)
2212 if (ranges[i].exp != ranges[j].exp)
2216 lowj = ranges[j].low;
2217 if (lowj == NULL_TREE)
2219 highj = ranges[j].high;
2220 if (highj == NULL_TREE)
2221 highj = TYPE_MAX_VALUE (type);
2222 tem = fold_binary (GT_EXPR, boolean_type_node,
2224 if (tem == NULL_TREE || !integer_onep (tem))
2226 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2227 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2229 gcc_checking_assert (!integer_zerop (lowxor));
2230 tem = fold_binary (MINUS_EXPR, type, lowxor,
2231 build_int_cst (type, 1));
2232 if (tem == NULL_TREE)
2234 tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2235 if (tem == NULL_TREE || !integer_zerop (tem))
2237 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2238 if (!tree_int_cst_equal (lowxor, highxor))
2240 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2241 exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2242 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2243 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2244 if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2245 ranges[i].in_p, lowj, highj,
2246 ranges[i].strict_overflow_p
2247 || ranges[j].strict_overflow_p))
2255 if (any_changes && opcode != ERROR_MARK)
2258 FOR_EACH_VEC_ELT (*ops, i, oe)
2260 if (oe->op == error_mark_node)
2269 XDELETEVEC (ranges);
2272 /* Return true if STMT is a cast like:
2278 # _345 = PHI <_123(N), 1(...), 1(...)>
2279 where _234 has bool type, _123 has single use and
2280 bb N has a single successor M. This is commonly used in
2281 the last block of a range test. */
2284 final_range_test_p (gimple stmt)
2286 basic_block bb, rhs_bb;
2289 use_operand_p use_p;
2292 if (!gimple_assign_cast_p (stmt))
2294 bb = gimple_bb (stmt);
2295 if (!single_succ_p (bb))
2297 e = single_succ_edge (bb);
2298 if (e->flags & EDGE_COMPLEX)
2301 lhs = gimple_assign_lhs (stmt);
2302 rhs = gimple_assign_rhs1 (stmt);
2303 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2304 || TREE_CODE (rhs) != SSA_NAME
2305 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2308 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2309 if (!single_imm_use (lhs, &use_p, &use_stmt))
2312 if (gimple_code (use_stmt) != GIMPLE_PHI
2313 || gimple_bb (use_stmt) != e->dest)
2316 /* And that the rhs is defined in the same loop. */
2317 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2319 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2325 /* Return true if BB is suitable basic block for inter-bb range test
2326 optimization. If BACKWARD is true, BB should be the only predecessor
2327 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2328 or compared with to find a common basic block to which all conditions
2329 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2330 be the only predecessor of BB. */
2333 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2336 edge_iterator ei, ei2;
2339 gimple_stmt_iterator gsi;
2340 bool other_edge_seen = false;
2345 /* Check last stmt first. */
2346 stmt = last_stmt (bb);
2348 || (gimple_code (stmt) != GIMPLE_COND
2349 && (backward || !final_range_test_p (stmt)))
2350 || gimple_visited_p (stmt)
2351 || stmt_could_throw_p (stmt)
2354 is_cond = gimple_code (stmt) == GIMPLE_COND;
2357 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2358 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2359 to *OTHER_BB (if not set yet, try to find it out). */
2360 if (EDGE_COUNT (bb->succs) != 2)
2362 FOR_EACH_EDGE (e, ei, bb->succs)
2364 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2366 if (e->dest == test_bb)
2375 if (*other_bb == NULL)
2377 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2378 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2380 else if (e->dest == e2->dest)
2381 *other_bb = e->dest;
2382 if (*other_bb == NULL)
2385 if (e->dest == *other_bb)
2386 other_edge_seen = true;
2390 if (*other_bb == NULL || !other_edge_seen)
2393 else if (single_succ (bb) != *other_bb)
2396 /* Now check all PHIs of *OTHER_BB. */
2397 e = find_edge (bb, *other_bb);
2398 e2 = find_edge (test_bb, *other_bb);
2399 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2401 gimple phi = gsi_stmt (gsi);
2402 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2403 corresponding to BB and TEST_BB predecessor must be the same. */
2404 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2405 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2407 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2408 one of the PHIs should have the lhs of the last stmt in
2409 that block as PHI arg and that PHI should have 0 or 1
2410 corresponding to it in all other range test basic blocks
2414 if (gimple_phi_arg_def (phi, e->dest_idx)
2415 == gimple_assign_lhs (stmt)
2416 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2417 || integer_onep (gimple_phi_arg_def (phi,
2423 gimple test_last = last_stmt (test_bb);
2424 if (gimple_code (test_last) != GIMPLE_COND
2425 && gimple_phi_arg_def (phi, e2->dest_idx)
2426 == gimple_assign_lhs (test_last)
2427 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2428 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2438 /* Return true if BB doesn't have side-effects that would disallow
2439 range test optimization, all SSA_NAMEs set in the bb are consumed
2440 in the bb and there are no PHIs. */
2443 no_side_effect_bb (basic_block bb)
2445 gimple_stmt_iterator gsi;
2448 if (!gimple_seq_empty_p (phi_nodes (bb)))
2450 last = last_stmt (bb);
2451 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2453 gimple stmt = gsi_stmt (gsi);
2455 imm_use_iterator imm_iter;
2456 use_operand_p use_p;
2458 if (is_gimple_debug (stmt))
2460 if (gimple_has_side_effects (stmt))
2464 if (!is_gimple_assign (stmt))
2466 lhs = gimple_assign_lhs (stmt);
2467 if (TREE_CODE (lhs) != SSA_NAME)
2469 if (gimple_assign_rhs_could_trap_p (stmt))
2471 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2473 gimple use_stmt = USE_STMT (use_p);
2474 if (is_gimple_debug (use_stmt))
2476 if (gimple_bb (use_stmt) != bb)
2483 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2484 return true and fill in *OPS recursively. */
2487 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2490 gimple stmt = SSA_NAME_DEF_STMT (var);
2494 if (!is_reassociable_op (stmt, code, loop))
2497 rhs[0] = gimple_assign_rhs1 (stmt);
2498 rhs[1] = gimple_assign_rhs2 (stmt);
2499 gimple_set_visited (stmt, true);
2500 for (i = 0; i < 2; i++)
2501 if (TREE_CODE (rhs[i]) == SSA_NAME
2502 && !get_ops (rhs[i], code, ops, loop)
2503 && has_single_use (rhs[i]))
2505 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2511 ops->safe_push (oe);
2516 /* Inter-bb range test optimization. */
2519 maybe_optimize_range_tests (gimple stmt)
2521 basic_block first_bb = gimple_bb (stmt);
2522 basic_block last_bb = first_bb;
2523 basic_block other_bb = NULL;
2527 vec<operand_entry_t> ops = vNULL;
2529 /* Consider only basic blocks that end with GIMPLE_COND or
2530 a cast statement satisfying final_range_test_p. All
2531 but the last bb in the first_bb .. last_bb range
2532 should end with GIMPLE_COND. */
2533 if (gimple_code (stmt) == GIMPLE_COND)
2535 if (EDGE_COUNT (first_bb->succs) != 2)
2538 else if (final_range_test_p (stmt))
2539 other_bb = single_succ (first_bb);
2543 if (stmt_could_throw_p (stmt))
2546 /* As relative ordering of post-dominator sons isn't fixed,
2547 maybe_optimize_range_tests can be called first on any
2548 bb in the range we want to optimize. So, start searching
2549 backwards, if first_bb can be set to a predecessor. */
2550 while (single_pred_p (first_bb))
2552 basic_block pred_bb = single_pred (first_bb);
2553 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2555 if (!no_side_effect_bb (first_bb))
2559 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2560 Before starting forward search in last_bb successors, find
2561 out the other_bb. */
2562 if (first_bb == last_bb)
2565 /* As non-GIMPLE_COND last stmt always terminates the range,
2566 if forward search didn't discover anything, just give up. */
2567 if (gimple_code (stmt) != GIMPLE_COND)
2569 /* Look at both successors. Either it ends with a GIMPLE_COND
2570 and satisfies suitable_cond_bb, or ends with a cast and
2571 other_bb is that cast's successor. */
2572 FOR_EACH_EDGE (e, ei, first_bb->succs)
2573 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2574 || e->dest == first_bb)
2576 else if (single_pred_p (e->dest))
2578 stmt = last_stmt (e->dest);
2580 && gimple_code (stmt) == GIMPLE_COND
2581 && EDGE_COUNT (e->dest->succs) == 2)
2583 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2589 && final_range_test_p (stmt)
2590 && find_edge (first_bb, single_succ (e->dest)))
2592 other_bb = single_succ (e->dest);
2593 if (other_bb == first_bb)
2597 if (other_bb == NULL)
2600 /* Now do the forward search, moving last_bb to successor bbs
2601 that aren't other_bb. */
2602 while (EDGE_COUNT (last_bb->succs) == 2)
2604 FOR_EACH_EDGE (e, ei, last_bb->succs)
2605 if (e->dest != other_bb)
2609 if (!single_pred_p (e->dest))
2611 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2613 if (!no_side_effect_bb (e->dest))
2617 if (first_bb == last_bb)
2619 /* Here basic blocks first_bb through last_bb's predecessor
2620 end with GIMPLE_COND, all of them have one of the edges to
2621 other_bb and another to another block in the range,
2622 all blocks except first_bb don't have side-effects and
2623 last_bb ends with either GIMPLE_COND, or cast satisfying
2624 final_range_test_p. */
2625 for (bb = last_bb; ; bb = single_pred (bb))
2627 enum tree_code code;
2630 e = find_edge (bb, other_bb);
2631 stmt = last_stmt (bb);
2632 gimple_set_visited (stmt, true);
2633 if (gimple_code (stmt) != GIMPLE_COND)
2635 use_operand_p use_p;
2640 lhs = gimple_assign_lhs (stmt);
2641 rhs = gimple_assign_rhs1 (stmt);
2642 gcc_assert (bb == last_bb);
2649 # _345 = PHI <_123(N), 1(...), 1(...)>
2651 or 0 instead of 1. If it is 0, the _234
2652 range test is anded together with all the
2653 other range tests, if it is 1, it is ored with
2655 single_imm_use (lhs, &use_p, &phi);
2656 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2657 e2 = find_edge (first_bb, other_bb);
2659 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2660 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2661 code = BIT_AND_EXPR;
2664 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2665 code = BIT_IOR_EXPR;
2668 /* If _234 SSA_NAME_DEF_STMT is
2670 (or &, corresponding to 1/0 in the phi arguments,
2671 push into ops the individual range test arguments
2672 of the bitwise or resp. and, recursively. */
2673 if (!get_ops (rhs, code, &ops,
2674 loop_containing_stmt (stmt))
2675 && has_single_use (rhs))
2677 /* Otherwise, push the _234 range test itself. */
2679 = (operand_entry_t) pool_alloc (operand_entry_pool);
2689 /* Otherwise stmt is GIMPLE_COND. */
2690 code = gimple_cond_code (stmt);
2691 lhs = gimple_cond_lhs (stmt);
2692 rhs = gimple_cond_rhs (stmt);
2693 if (TREE_CODE (lhs) == SSA_NAME
2694 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2695 && ((code != EQ_EXPR && code != NE_EXPR)
2696 || rhs != boolean_false_node
2697 /* Either push into ops the individual bitwise
2698 or resp. and operands, depending on which
2699 edge is other_bb. */
2700 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2701 ^ (code == EQ_EXPR))
2702 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2703 loop_containing_stmt (stmt))))
2705 /* Or push the GIMPLE_COND stmt itself. */
2707 = (operand_entry_t) pool_alloc (operand_entry_pool);
2710 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2711 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2712 /* oe->op = NULL signs that there is no SSA_NAME
2713 for the range test, and oe->id instead is the
2714 basic block number, at which's end the GIMPLE_COND
2723 if (ops.length () > 1)
2724 optimize_range_tests (ERROR_MARK, &ops);
2728 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2729 of STMT in it's operands. This is also known as a "destructive
2730 update" operation. */
2733 is_phi_for_stmt (gimple stmt, tree operand)
2737 use_operand_p arg_p;
2740 if (TREE_CODE (operand) != SSA_NAME)
2743 lhs = gimple_assign_lhs (stmt);
2745 def_stmt = SSA_NAME_DEF_STMT (operand);
2746 if (gimple_code (def_stmt) != GIMPLE_PHI)
2749 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2750 if (lhs == USE_FROM_PTR (arg_p))
2755 /* Remove def stmt of VAR if VAR has zero uses and recurse
2756 on rhs1 operand if so. */
2759 remove_visited_stmt_chain (tree var)
2762 gimple_stmt_iterator gsi;
2766 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2768 stmt = SSA_NAME_DEF_STMT (var);
2769 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2771 var = gimple_assign_rhs1 (stmt);
2772 gsi = gsi_for_stmt (stmt);
2773 gsi_remove (&gsi, true);
2774 release_defs (stmt);
2781 /* This function checks three consequtive operands in
2782 passed operands vector OPS starting from OPINDEX and
2783 swaps two operands if it is profitable for binary operation
2784 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2786 We pair ops with the same rank if possible.
2788 The alternative we try is to see if STMT is a destructive
2789 update style statement, which is like:
2792 In that case, we want to use the destructive update form to
2793 expose the possible vectorizer sum reduction opportunity.
2794 In that case, the third operand will be the phi node. This
2795 check is not performed if STMT is null.
2797 We could, of course, try to be better as noted above, and do a
2798 lot of work to try to find these opportunities in >3 operand
2799 cases, but it is unlikely to be worth it. */
2802 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2803 unsigned int opindex, gimple stmt)
2805 operand_entry_t oe1, oe2, oe3;
2808 oe2 = ops[opindex + 1];
2809 oe3 = ops[opindex + 2];
2811 if ((oe1->rank == oe2->rank
2812 && oe2->rank != oe3->rank)
2813 || (stmt && is_phi_for_stmt (stmt, oe3->op)
2814 && !is_phi_for_stmt (stmt, oe1->op)
2815 && !is_phi_for_stmt (stmt, oe2->op)))
2817 struct operand_entry temp = *oe3;
2819 oe3->rank = oe1->rank;
2821 oe1->rank= temp.rank;
2823 else if ((oe1->rank == oe3->rank
2824 && oe2->rank != oe3->rank)
2825 || (stmt && is_phi_for_stmt (stmt, oe2->op)
2826 && !is_phi_for_stmt (stmt, oe1->op)
2827 && !is_phi_for_stmt (stmt, oe3->op)))
2829 struct operand_entry temp = *oe2;
2831 oe2->rank = oe1->rank;
2833 oe1->rank= temp.rank;
2837 /* Recursively rewrite our linearized statements so that the operators
2838 match those in OPS[OPINDEX], putting the computation in rank
2842 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2843 vec<operand_entry_t> ops, bool moved)
2845 tree rhs1 = gimple_assign_rhs1 (stmt);
2846 tree rhs2 = gimple_assign_rhs2 (stmt);
2849 /* If we have three operands left, then we want to make sure the ones
2850 that get the double binary op are chosen wisely. */
2851 if (opindex + 3 == ops.length ())
2852 swap_ops_for_binary_stmt (ops, opindex, stmt);
2854 /* The final recursion case for this function is that you have
2855 exactly two operations left.
2856 If we had one exactly one op in the entire list to start with, we
2857 would have never called this function, and the tail recursion
2858 rewrites them one at a time. */
2859 if (opindex + 2 == ops.length ())
2861 operand_entry_t oe1, oe2;
2864 oe2 = ops[opindex + 1];
2866 if (rhs1 != oe1->op || rhs2 != oe2->op)
2868 if (dump_file && (dump_flags & TDF_DETAILS))
2870 fprintf (dump_file, "Transforming ");
2871 print_gimple_stmt (dump_file, stmt, 0, 0);
2874 gimple_assign_set_rhs1 (stmt, oe1->op);
2875 gimple_assign_set_rhs2 (stmt, oe2->op);
2877 if (rhs1 != oe1->op && rhs1 != oe2->op)
2878 remove_visited_stmt_chain (rhs1);
2880 if (dump_file && (dump_flags & TDF_DETAILS))
2882 fprintf (dump_file, " into ");
2883 print_gimple_stmt (dump_file, stmt, 0, 0);
2889 /* If we hit here, we should have 3 or more ops left. */
2890 gcc_assert (opindex + 2 < ops.length ());
2892 /* Rewrite the next operator. */
2899 gimple_stmt_iterator gsinow, gsirhs1;
2900 gimple stmt1 = stmt, stmt2;
2903 gsinow = gsi_for_stmt (stmt);
2904 count = ops.length () - opindex - 2;
2905 while (count-- != 0)
2907 stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2908 gsirhs1 = gsi_for_stmt (stmt2);
2909 gsi_move_before (&gsirhs1, &gsinow);
2916 if (dump_file && (dump_flags & TDF_DETAILS))
2918 fprintf (dump_file, "Transforming ");
2919 print_gimple_stmt (dump_file, stmt, 0, 0);
2922 gimple_assign_set_rhs2 (stmt, oe->op);
2925 if (dump_file && (dump_flags & TDF_DETAILS))
2927 fprintf (dump_file, " into ");
2928 print_gimple_stmt (dump_file, stmt, 0, 0);
2931 /* Recurse on the LHS of the binary operator, which is guaranteed to
2932 be the non-leaf side. */
2933 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2936 /* Find out how many cycles we need to compute statements chain.
2937 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
2938 maximum number of independent statements we may execute per cycle. */
2941 get_required_cycles (int ops_num, int cpu_width)
2947 /* While we have more than 2 * cpu_width operands
2948 we may reduce number of operands by cpu_width
2950 res = ops_num / (2 * cpu_width);
2952 /* Remained operands count may be reduced twice per cycle
2953 until we have only one operand. */
2954 rest = (unsigned)(ops_num - res * cpu_width);
2955 elog = exact_log2 (rest);
2959 res += floor_log2 (rest) + 1;
2964 /* Returns an optimal number of registers to use for computation of
2965 given statements. */
2968 get_reassociation_width (int ops_num, enum tree_code opc,
2969 enum machine_mode mode)
2971 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2976 if (param_width > 0)
2977 width = param_width;
2979 width = targetm.sched.reassociation_width (opc, mode);
2984 /* Get the minimal time required for sequence computation. */
2985 cycles_best = get_required_cycles (ops_num, width);
2987 /* Check if we may use less width and still compute sequence for
2988 the same time. It will allow us to reduce registers usage.
2989 get_required_cycles is monotonically increasing with lower width
2990 so we can perform a binary search for the minimal width that still
2991 results in the optimal cycle count. */
2993 while (width > width_min)
2995 int width_mid = (width + width_min) / 2;
2997 if (get_required_cycles (ops_num, width_mid) == cycles_best)
2999 else if (width_min < width_mid)
3000 width_min = width_mid;
3008 /* Recursively rewrite our linearized statements so that the operators
3009 match those in OPS[OPINDEX], putting the computation in rank
3010 order and trying to allow operations to be executed in
3014 rewrite_expr_tree_parallel (gimple stmt, int width,
3015 vec<operand_entry_t> ops)
3017 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3018 int op_num = ops.length ();
3019 int stmt_num = op_num - 1;
3020 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3021 int op_index = op_num - 1;
3023 int ready_stmts_end = 0;
3025 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3027 /* We start expression rewriting from the top statements.
3028 So, in this loop we create a full list of statements
3029 we will work with. */
3030 stmts[stmt_num - 1] = stmt;
3031 for (i = stmt_num - 2; i >= 0; i--)
3032 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3034 for (i = 0; i < stmt_num; i++)
3038 /* Determine whether we should use results of
3039 already handled statements or not. */
3040 if (ready_stmts_end == 0
3041 && (i - stmt_index >= width || op_index < 1))
3042 ready_stmts_end = i;
3044 /* Now we choose operands for the next statement. Non zero
3045 value in ready_stmts_end means here that we should use
3046 the result of already generated statements as new operand. */
3047 if (ready_stmts_end > 0)
3049 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3050 if (ready_stmts_end > stmt_index)
3051 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3052 else if (op_index >= 0)
3053 op2 = ops[op_index--]->op;
3056 gcc_assert (stmt_index < i);
3057 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3060 if (stmt_index >= ready_stmts_end)
3061 ready_stmts_end = 0;
3066 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3067 op2 = ops[op_index--]->op;
3068 op1 = ops[op_index--]->op;
3071 /* If we emit the last statement then we should put
3072 operands into the last statement. It will also
3074 if (op_index < 0 && stmt_index == i)
3077 if (dump_file && (dump_flags & TDF_DETAILS))
3079 fprintf (dump_file, "Transforming ");
3080 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3083 /* We keep original statement only for the last one. All
3084 others are recreated. */
3085 if (i == stmt_num - 1)
3087 gimple_assign_set_rhs1 (stmts[i], op1);
3088 gimple_assign_set_rhs2 (stmts[i], op2);
3089 update_stmt (stmts[i]);
3092 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3094 if (dump_file && (dump_flags & TDF_DETAILS))
3096 fprintf (dump_file, " into ");
3097 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3101 remove_visited_stmt_chain (last_rhs1);
3104 /* Transform STMT, which is really (A +B) + (C + D) into the left
3105 linear form, ((A+B)+C)+D.
3106 Recurse on D if necessary. */
3109 linearize_expr (gimple stmt)
3111 gimple_stmt_iterator gsinow, gsirhs;
3112 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3113 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3114 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3115 gimple newbinrhs = NULL;
3116 struct loop *loop = loop_containing_stmt (stmt);
3118 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3119 && is_reassociable_op (binrhs, rhscode, loop));
3121 gsinow = gsi_for_stmt (stmt);
3122 gsirhs = gsi_for_stmt (binrhs);
3123 gsi_move_before (&gsirhs, &gsinow);
3125 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3126 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3127 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3129 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3130 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3132 if (dump_file && (dump_flags & TDF_DETAILS))
3134 fprintf (dump_file, "Linearized: ");
3135 print_gimple_stmt (dump_file, stmt, 0, 0);
3138 reassociate_stats.linearized++;
3139 update_stmt (binrhs);
3140 update_stmt (binlhs);
3143 gimple_set_visited (stmt, true);
3144 gimple_set_visited (binlhs, true);
3145 gimple_set_visited (binrhs, true);
3147 /* Tail recurse on the new rhs if it still needs reassociation. */
3148 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3149 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3150 want to change the algorithm while converting to tuples. */
3151 linearize_expr (stmt);
3154 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3155 it. Otherwise, return NULL. */
3158 get_single_immediate_use (tree lhs)
3160 use_operand_p immuse;
3163 if (TREE_CODE (lhs) == SSA_NAME
3164 && single_imm_use (lhs, &immuse, &immusestmt)
3165 && is_gimple_assign (immusestmt))
3171 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3172 representing the negated value. Insertions of any necessary
3173 instructions go before GSI.
3174 This function is recursive in that, if you hand it "a_5" as the
3175 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3176 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3179 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3181 gimple negatedefstmt= NULL;
3182 tree resultofnegate;
3184 /* If we are trying to negate a name, defined by an add, negate the
3185 add operands instead. */
3186 if (TREE_CODE (tonegate) == SSA_NAME)
3187 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3188 if (TREE_CODE (tonegate) == SSA_NAME
3189 && is_gimple_assign (negatedefstmt)
3190 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3191 && has_single_use (gimple_assign_lhs (negatedefstmt))
3192 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3194 gimple_stmt_iterator gsi;
3195 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3196 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3198 gsi = gsi_for_stmt (negatedefstmt);
3199 rhs1 = negate_value (rhs1, &gsi);
3200 gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3202 gsi = gsi_for_stmt (negatedefstmt);
3203 rhs2 = negate_value (rhs2, &gsi);
3204 gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3206 update_stmt (negatedefstmt);
3207 return gimple_assign_lhs (negatedefstmt);
3210 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3211 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3212 NULL_TREE, true, GSI_SAME_STMT);
3213 return resultofnegate;
3216 /* Return true if we should break up the subtract in STMT into an add
3217 with negate. This is true when we the subtract operands are really
3218 adds, or the subtract itself is used in an add expression. In
3219 either case, breaking up the subtract into an add with negate
3220 exposes the adds to reassociation. */
3223 should_break_up_subtract (gimple stmt)
3225 tree lhs = gimple_assign_lhs (stmt);
3226 tree binlhs = gimple_assign_rhs1 (stmt);
3227 tree binrhs = gimple_assign_rhs2 (stmt);
3229 struct loop *loop = loop_containing_stmt (stmt);
3231 if (TREE_CODE (binlhs) == SSA_NAME
3232 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3235 if (TREE_CODE (binrhs) == SSA_NAME
3236 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3239 if (TREE_CODE (lhs) == SSA_NAME
3240 && (immusestmt = get_single_immediate_use (lhs))
3241 && is_gimple_assign (immusestmt)
3242 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3243 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3248 /* Transform STMT from A - B into A + -B. */
3251 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3253 tree rhs1 = gimple_assign_rhs1 (stmt);
3254 tree rhs2 = gimple_assign_rhs2 (stmt);
3256 if (dump_file && (dump_flags & TDF_DETAILS))
3258 fprintf (dump_file, "Breaking up subtract ");
3259 print_gimple_stmt (dump_file, stmt, 0, 0);
3262 rhs2 = negate_value (rhs2, gsip);
3263 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3267 /* Determine whether STMT is a builtin call that raises an SSA name
3268 to an integer power and has only one use. If so, and this is early
3269 reassociation and unsafe math optimizations are permitted, place
3270 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3271 If any of these conditions does not hold, return FALSE. */
3274 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3277 REAL_VALUE_TYPE c, cint;
3279 if (!first_pass_instance
3280 || !flag_unsafe_math_optimizations
3281 || !is_gimple_call (stmt)
3282 || !has_single_use (gimple_call_lhs (stmt)))
3285 fndecl = gimple_call_fndecl (stmt);
3288 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3291 switch (DECL_FUNCTION_CODE (fndecl))
3293 CASE_FLT_FN (BUILT_IN_POW):
3294 *base = gimple_call_arg (stmt, 0);
3295 arg1 = gimple_call_arg (stmt, 1);
3297 if (TREE_CODE (arg1) != REAL_CST)
3300 c = TREE_REAL_CST (arg1);
3302 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3305 *exponent = real_to_integer (&c);
3306 real_from_integer (&cint, VOIDmode, *exponent,
3307 *exponent < 0 ? -1 : 0, 0);
3308 if (!real_identical (&c, &cint))
3313 CASE_FLT_FN (BUILT_IN_POWI):
3314 *base = gimple_call_arg (stmt, 0);
3315 arg1 = gimple_call_arg (stmt, 1);
3317 if (!host_integerp (arg1, 0))
3320 *exponent = TREE_INT_CST_LOW (arg1);
3327 /* Expanding negative exponents is generally unproductive, so we don't
3328 complicate matters with those. Exponents of zero and one should
3329 have been handled by expression folding. */
3330 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3336 /* Recursively linearize a binary expression that is the RHS of STMT.
3337 Place the operands of the expression tree in the vector named OPS. */
3340 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3341 bool is_associative, bool set_visited)
3343 tree binlhs = gimple_assign_rhs1 (stmt);
3344 tree binrhs = gimple_assign_rhs2 (stmt);
3345 gimple binlhsdef = NULL, binrhsdef = NULL;
3346 bool binlhsisreassoc = false;
3347 bool binrhsisreassoc = false;
3348 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3349 struct loop *loop = loop_containing_stmt (stmt);
3350 tree base = NULL_TREE;
3351 HOST_WIDE_INT exponent = 0;
3354 gimple_set_visited (stmt, true);
3356 if (TREE_CODE (binlhs) == SSA_NAME)
3358 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3359 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3360 && !stmt_could_throw_p (binlhsdef));
3363 if (TREE_CODE (binrhs) == SSA_NAME)
3365 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3366 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3367 && !stmt_could_throw_p (binrhsdef));
3370 /* If the LHS is not reassociable, but the RHS is, we need to swap
3371 them. If neither is reassociable, there is nothing we can do, so
3372 just put them in the ops vector. If the LHS is reassociable,
3373 linearize it. If both are reassociable, then linearize the RHS
3376 if (!binlhsisreassoc)
3380 /* If this is not a associative operation like division, give up. */
3381 if (!is_associative)
3383 add_to_ops_vec (ops, binrhs);
3387 if (!binrhsisreassoc)
3389 if (rhscode == MULT_EXPR
3390 && TREE_CODE (binrhs) == SSA_NAME
3391 && acceptable_pow_call (binrhsdef, &base, &exponent))
3393 add_repeat_to_ops_vec (ops, base, exponent);
3394 gimple_set_visited (binrhsdef, true);
3397 add_to_ops_vec (ops, binrhs);
3399 if (rhscode == MULT_EXPR
3400 && TREE_CODE (binlhs) == SSA_NAME
3401 && acceptable_pow_call (binlhsdef, &base, &exponent))
3403 add_repeat_to_ops_vec (ops, base, exponent);
3404 gimple_set_visited (binlhsdef, true);
3407 add_to_ops_vec (ops, binlhs);
3412 if (dump_file && (dump_flags & TDF_DETAILS))
3414 fprintf (dump_file, "swapping operands of ");
3415 print_gimple_stmt (dump_file, stmt, 0, 0);
3418 swap_tree_operands (stmt,
3419 gimple_assign_rhs1_ptr (stmt),
3420 gimple_assign_rhs2_ptr (stmt));
3423 if (dump_file && (dump_flags & TDF_DETAILS))
3425 fprintf (dump_file, " is now ");
3426 print_gimple_stmt (dump_file, stmt, 0, 0);
3429 /* We want to make it so the lhs is always the reassociative op,
3435 else if (binrhsisreassoc)
3437 linearize_expr (stmt);
3438 binlhs = gimple_assign_rhs1 (stmt);
3439 binrhs = gimple_assign_rhs2 (stmt);
3442 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3443 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3445 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3446 is_associative, set_visited);
3448 if (rhscode == MULT_EXPR
3449 && TREE_CODE (binrhs) == SSA_NAME
3450 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3452 add_repeat_to_ops_vec (ops, base, exponent);
3453 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3456 add_to_ops_vec (ops, binrhs);
3459 /* Repropagate the negates back into subtracts, since no other pass
3460 currently does it. */
3463 repropagate_negates (void)
3468 FOR_EACH_VEC_ELT (plus_negates, i, negate)
3470 gimple user = get_single_immediate_use (negate);
3472 if (!user || !is_gimple_assign (user))
3475 /* The negate operand can be either operand of a PLUS_EXPR
3476 (it can be the LHS if the RHS is a constant for example).
3478 Force the negate operand to the RHS of the PLUS_EXPR, then
3479 transform the PLUS_EXPR into a MINUS_EXPR. */
3480 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3482 /* If the negated operand appears on the LHS of the
3483 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3484 to force the negated operand to the RHS of the PLUS_EXPR. */
3485 if (gimple_assign_rhs1 (user) == negate)
3487 swap_tree_operands (user,
3488 gimple_assign_rhs1_ptr (user),
3489 gimple_assign_rhs2_ptr (user));
3492 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3493 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3494 if (gimple_assign_rhs2 (user) == negate)
3496 tree rhs1 = gimple_assign_rhs1 (user);
3497 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3498 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3499 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3503 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3505 if (gimple_assign_rhs1 (user) == negate)
3510 which we transform into
3513 This pushes down the negate which we possibly can merge
3514 into some other operation, hence insert it into the
3515 plus_negates vector. */
3516 gimple feed = SSA_NAME_DEF_STMT (negate);
3517 tree a = gimple_assign_rhs1 (feed);
3518 tree rhs2 = gimple_assign_rhs2 (user);
3519 gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3520 gimple_replace_lhs (feed, negate);
3521 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3522 update_stmt (gsi_stmt (gsi));
3523 gsi2 = gsi_for_stmt (user);
3524 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3525 update_stmt (gsi_stmt (gsi2));
3526 gsi_move_before (&gsi, &gsi2);
3527 plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3531 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3532 rid of one operation. */
3533 gimple feed = SSA_NAME_DEF_STMT (negate);
3534 tree a = gimple_assign_rhs1 (feed);
3535 tree rhs1 = gimple_assign_rhs1 (user);
3536 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3537 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3538 update_stmt (gsi_stmt (gsi));
3544 /* Returns true if OP is of a type for which we can do reassociation.
3545 That is for integral or non-saturating fixed-point types, and for
3546 floating point type when associative-math is enabled. */
3549 can_reassociate_p (tree op)
3551 tree type = TREE_TYPE (op);
3552 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3553 || NON_SAT_FIXED_POINT_TYPE_P (type)
3554 || (flag_associative_math && FLOAT_TYPE_P (type)))
3559 /* Break up subtract operations in block BB.
3561 We do this top down because we don't know whether the subtract is
3562 part of a possible chain of reassociation except at the top.
3571 we want to break up k = t - q, but we won't until we've transformed q
3572 = b - r, which won't be broken up until we transform b = c - d.
3574 En passant, clear the GIMPLE visited flag on every statement. */
3577 break_up_subtract_bb (basic_block bb)
3579 gimple_stmt_iterator gsi;
3582 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3584 gimple stmt = gsi_stmt (gsi);
3585 gimple_set_visited (stmt, false);
3587 if (!is_gimple_assign (stmt)
3588 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3591 /* Look for simple gimple subtract operations. */
3592 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3594 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3595 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3598 /* Check for a subtract used only in an addition. If this
3599 is the case, transform it into add of a negate for better
3600 reassociation. IE transform C = A-B into C = A + -B if C
3601 is only used in an addition. */
3602 if (should_break_up_subtract (stmt))
3603 break_up_subtract (stmt, &gsi);
3605 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3606 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3607 plus_negates.safe_push (gimple_assign_lhs (stmt));
3609 for (son = first_dom_son (CDI_DOMINATORS, bb);
3611 son = next_dom_son (CDI_DOMINATORS, son))
3612 break_up_subtract_bb (son);
3615 /* Used for repeated factor analysis. */
3616 struct repeat_factor_d
3618 /* An SSA name that occurs in a multiply chain. */
3621 /* Cached rank of the factor. */
3624 /* Number of occurrences of the factor in the chain. */
3625 HOST_WIDE_INT count;
3627 /* An SSA name representing the product of this factor and
3628 all factors appearing later in the repeated factor vector. */
3632 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3633 typedef const struct repeat_factor_d *const_repeat_factor_t;
3636 static vec<repeat_factor> repeat_factor_vec;
3638 /* Used for sorting the repeat factor vector. Sort primarily by
3639 ascending occurrence count, secondarily by descending rank. */
3642 compare_repeat_factors (const void *x1, const void *x2)
3644 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3645 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3647 if (rf1->count != rf2->count)
3648 return rf1->count - rf2->count;
3650 return rf2->rank - rf1->rank;
3653 /* Look for repeated operands in OPS in the multiply tree rooted at
3654 STMT. Replace them with an optimal sequence of multiplies and powi
3655 builtin calls, and remove the used operands from OPS. Return an
3656 SSA name representing the value of the replacement sequence. */
3659 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3661 unsigned i, j, vec_len;
3664 repeat_factor_t rf1, rf2;
3665 repeat_factor rfnew;
3666 tree result = NULL_TREE;
3667 tree target_ssa, iter_result;
3668 tree type = TREE_TYPE (gimple_get_lhs (stmt));
3669 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3670 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3671 gimple mul_stmt, pow_stmt;
3673 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3678 /* Allocate the repeated factor vector. */
3679 repeat_factor_vec.create (10);
3681 /* Scan the OPS vector for all SSA names in the product and build
3682 up a vector of occurrence counts for each factor. */
3683 FOR_EACH_VEC_ELT (*ops, i, oe)
3685 if (TREE_CODE (oe->op) == SSA_NAME)
3687 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3689 if (rf1->factor == oe->op)
3691 rf1->count += oe->count;
3696 if (j >= repeat_factor_vec.length ())
3698 rfnew.factor = oe->op;
3699 rfnew.rank = oe->rank;
3700 rfnew.count = oe->count;
3701 rfnew.repr = NULL_TREE;
3702 repeat_factor_vec.safe_push (rfnew);
3707 /* Sort the repeated factor vector by (a) increasing occurrence count,
3708 and (b) decreasing rank. */
3709 repeat_factor_vec.qsort (compare_repeat_factors);
3711 /* It is generally best to combine as many base factors as possible
3712 into a product before applying __builtin_powi to the result.
3713 However, the sort order chosen for the repeated factor vector
3714 allows us to cache partial results for the product of the base
3715 factors for subsequent use. When we already have a cached partial
3716 result from a previous iteration, it is best to make use of it
3717 before looking for another __builtin_pow opportunity.
3719 As an example, consider x * x * y * y * y * z * z * z * z.
3720 We want to first compose the product x * y * z, raise it to the
3721 second power, then multiply this by y * z, and finally multiply
3722 by z. This can be done in 5 multiplies provided we cache y * z
3723 for use in both expressions:
3731 If we instead ignored the cached y * z and first multiplied by
3732 the __builtin_pow opportunity z * z, we would get the inferior:
3741 vec_len = repeat_factor_vec.length ();
3743 /* Repeatedly look for opportunities to create a builtin_powi call. */
3746 HOST_WIDE_INT power;
3748 /* First look for the largest cached product of factors from
3749 preceding iterations. If found, create a builtin_powi for
3750 it if the minimum occurrence count for its factors is at
3751 least 2, or just use this cached product as our next
3752 multiplicand if the minimum occurrence count is 1. */
3753 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3755 if (rf1->repr && rf1->count > 0)
3765 iter_result = rf1->repr;
3767 if (dump_file && (dump_flags & TDF_DETAILS))
3771 fputs ("Multiplying by cached product ", dump_file);
3772 for (elt = j; elt < vec_len; elt++)
3774 rf = &repeat_factor_vec[elt];
3775 print_generic_expr (dump_file, rf->factor, 0);
3776 if (elt < vec_len - 1)
3777 fputs (" * ", dump_file);
3779 fputs ("\n", dump_file);
3784 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3785 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3786 build_int_cst (integer_type_node,
3788 gimple_call_set_lhs (pow_stmt, iter_result);
3789 gimple_set_location (pow_stmt, gimple_location (stmt));
3790 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3792 if (dump_file && (dump_flags & TDF_DETAILS))
3796 fputs ("Building __builtin_pow call for cached product (",
3798 for (elt = j; elt < vec_len; elt++)
3800 rf = &repeat_factor_vec[elt];
3801 print_generic_expr (dump_file, rf->factor, 0);
3802 if (elt < vec_len - 1)
3803 fputs (" * ", dump_file);
3805 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3812 /* Otherwise, find the first factor in the repeated factor
3813 vector whose occurrence count is at least 2. If no such
3814 factor exists, there are no builtin_powi opportunities
3816 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3818 if (rf1->count >= 2)
3827 if (dump_file && (dump_flags & TDF_DETAILS))
3831 fputs ("Building __builtin_pow call for (", dump_file);
3832 for (elt = j; elt < vec_len; elt++)
3834 rf = &repeat_factor_vec[elt];
3835 print_generic_expr (dump_file, rf->factor, 0);
3836 if (elt < vec_len - 1)
3837 fputs (" * ", dump_file);
3839 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3842 reassociate_stats.pows_created++;
3844 /* Visit each element of the vector in reverse order (so that
3845 high-occurrence elements are visited first, and within the
3846 same occurrence count, lower-ranked elements are visited
3847 first). Form a linear product of all elements in this order
3848 whose occurrencce count is at least that of element J.
3849 Record the SSA name representing the product of each element
3850 with all subsequent elements in the vector. */
3851 if (j == vec_len - 1)
3852 rf1->repr = rf1->factor;
3855 for (ii = vec_len - 2; ii >= (int)j; ii--)
3859 rf1 = &repeat_factor_vec[ii];
3860 rf2 = &repeat_factor_vec[ii + 1];
3862 /* Init the last factor's representative to be itself. */
3864 rf2->repr = rf2->factor;
3869 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
3870 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3873 gimple_set_location (mul_stmt, gimple_location (stmt));
3874 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3875 rf1->repr = target_ssa;
3877 /* Don't reprocess the multiply we just introduced. */
3878 gimple_set_visited (mul_stmt, true);
3882 /* Form a call to __builtin_powi for the maximum product
3883 just formed, raised to the power obtained earlier. */
3884 rf1 = &repeat_factor_vec[j];
3885 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3886 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3887 build_int_cst (integer_type_node,
3889 gimple_call_set_lhs (pow_stmt, iter_result);
3890 gimple_set_location (pow_stmt, gimple_location (stmt));
3891 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3894 /* If we previously formed at least one other builtin_powi call,
3895 form the product of this one and those others. */
3898 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
3899 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3900 result, iter_result);
3901 gimple_set_location (mul_stmt, gimple_location (stmt));
3902 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3903 gimple_set_visited (mul_stmt, true);
3904 result = new_result;
3907 result = iter_result;
3909 /* Decrement the occurrence count of each element in the product
3910 by the count found above, and remove this many copies of each
3912 for (i = j; i < vec_len; i++)
3917 rf1 = &repeat_factor_vec[i];
3918 rf1->count -= power;
3920 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
3922 if (oe->op == rf1->factor)
3926 ops->ordered_remove (n);
3942 /* At this point all elements in the repeated factor vector have a
3943 remaining occurrence count of 0 or 1, and those with a count of 1
3944 don't have cached representatives. Re-sort the ops vector and
3946 ops->qsort (sort_by_operand_rank);
3947 repeat_factor_vec.release ();
3949 /* Return the final product computed herein. Note that there may
3950 still be some elements with single occurrence count left in OPS;
3951 those will be handled by the normal reassociation logic. */
3955 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
3958 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3962 if (dump_file && (dump_flags & TDF_DETAILS))
3964 fprintf (dump_file, "Transforming ");
3965 print_gimple_stmt (dump_file, stmt, 0, 0);
3968 rhs1 = gimple_assign_rhs1 (stmt);
3969 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3971 remove_visited_stmt_chain (rhs1);
3973 if (dump_file && (dump_flags & TDF_DETAILS))
3975 fprintf (dump_file, " into ");
3976 print_gimple_stmt (dump_file, stmt, 0, 0);
3980 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
3983 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3984 tree rhs1, tree rhs2)
3986 if (dump_file && (dump_flags & TDF_DETAILS))
3988 fprintf (dump_file, "Transforming ");
3989 print_gimple_stmt (dump_file, stmt, 0, 0);
3992 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
3993 update_stmt (gsi_stmt (*gsi));
3994 remove_visited_stmt_chain (rhs1);
3996 if (dump_file && (dump_flags & TDF_DETAILS))
3998 fprintf (dump_file, " into ");
3999 print_gimple_stmt (dump_file, stmt, 0, 0);
4003 /* Reassociate expressions in basic block BB and its post-dominator as
4007 reassociate_bb (basic_block bb)
4009 gimple_stmt_iterator gsi;
4011 gimple stmt = last_stmt (bb);
4013 if (stmt && !gimple_visited_p (stmt))
4014 maybe_optimize_range_tests (stmt);
4016 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4018 stmt = gsi_stmt (gsi);
4020 if (is_gimple_assign (stmt)
4021 && !stmt_could_throw_p (stmt))
4023 tree lhs, rhs1, rhs2;
4024 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4026 /* If this is not a gimple binary expression, there is
4027 nothing for us to do with it. */
4028 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4031 /* If this was part of an already processed statement,
4032 we don't need to touch it again. */
4033 if (gimple_visited_p (stmt))
4035 /* This statement might have become dead because of previous
4037 if (has_zero_uses (gimple_get_lhs (stmt)))
4039 gsi_remove (&gsi, true);
4040 release_defs (stmt);
4041 /* We might end up removing the last stmt above which
4042 places the iterator to the end of the sequence.
4043 Reset it to the last stmt in this case which might
4044 be the end of the sequence as well if we removed
4045 the last statement of the sequence. In which case
4046 we need to bail out. */
4047 if (gsi_end_p (gsi))
4049 gsi = gsi_last_bb (bb);
4050 if (gsi_end_p (gsi))
4057 lhs = gimple_assign_lhs (stmt);
4058 rhs1 = gimple_assign_rhs1 (stmt);
4059 rhs2 = gimple_assign_rhs2 (stmt);
4061 /* For non-bit or min/max operations we can't associate
4062 all types. Verify that here. */
4063 if (rhs_code != BIT_IOR_EXPR
4064 && rhs_code != BIT_AND_EXPR
4065 && rhs_code != BIT_XOR_EXPR
4066 && rhs_code != MIN_EXPR
4067 && rhs_code != MAX_EXPR
4068 && (!can_reassociate_p (lhs)
4069 || !can_reassociate_p (rhs1)
4070 || !can_reassociate_p (rhs2)))
4073 if (associative_tree_code (rhs_code))
4075 vec<operand_entry_t> ops = vNULL;
4076 tree powi_result = NULL_TREE;
4078 /* There may be no immediate uses left by the time we
4079 get here because we may have eliminated them all. */
4080 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4083 gimple_set_visited (stmt, true);
4084 linearize_expr_tree (&ops, stmt, true, true);
4085 ops.qsort (sort_by_operand_rank);
4086 optimize_ops_list (rhs_code, &ops);
4087 if (undistribute_ops_list (rhs_code, &ops,
4088 loop_containing_stmt (stmt)))
4090 ops.qsort (sort_by_operand_rank);
4091 optimize_ops_list (rhs_code, &ops);
4094 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4095 optimize_range_tests (rhs_code, &ops);
4097 if (first_pass_instance
4098 && rhs_code == MULT_EXPR
4099 && flag_unsafe_math_optimizations)
4100 powi_result = attempt_builtin_powi (stmt, &ops);
4102 /* If the operand vector is now empty, all operands were
4103 consumed by the __builtin_powi optimization. */
4104 if (ops.length () == 0)
4105 transform_stmt_to_copy (&gsi, stmt, powi_result);
4106 else if (ops.length () == 1)
4108 tree last_op = ops.last ()->op;
4111 transform_stmt_to_multiply (&gsi, stmt, last_op,
4114 transform_stmt_to_copy (&gsi, stmt, last_op);
4118 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4119 int ops_num = ops.length ();
4120 int width = get_reassociation_width (ops_num, rhs_code, mode);
4122 if (dump_file && (dump_flags & TDF_DETAILS))
4124 "Width = %d was chosen for reassociation\n", width);
4127 && ops.length () > 3)
4128 rewrite_expr_tree_parallel (stmt, width, ops);
4130 rewrite_expr_tree (stmt, 0, ops, false);
4132 /* If we combined some repeated factors into a
4133 __builtin_powi call, multiply that result by the
4134 reassociated operands. */
4138 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4139 tree target_ssa = make_temp_ssa_name (type, NULL,
4141 gimple_set_lhs (stmt, target_ssa);
4143 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4146 gimple_set_location (mul_stmt, gimple_location (stmt));
4147 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4155 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4157 son = next_dom_son (CDI_POST_DOMINATORS, son))
4158 reassociate_bb (son);
4161 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4162 void debug_ops_vector (vec<operand_entry_t> ops);
4164 /* Dump the operand entry vector OPS to FILE. */
4167 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4172 FOR_EACH_VEC_ELT (ops, i, oe)
4174 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4175 print_generic_expr (file, oe->op, 0);
4179 /* Dump the operand entry vector OPS to STDERR. */
4182 debug_ops_vector (vec<operand_entry_t> ops)
4184 dump_ops_vector (stderr, ops);
4190 break_up_subtract_bb (ENTRY_BLOCK_PTR);
4191 reassociate_bb (EXIT_BLOCK_PTR);
4194 /* Initialize the reassociation pass. */
4201 int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4203 /* Find the loops, so that we can prevent moving calculations in
4205 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4207 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4209 operand_entry_pool = create_alloc_pool ("operand entry pool",
4210 sizeof (struct operand_entry), 30);
4211 next_operand_entry_id = 0;
4213 /* Reverse RPO (Reverse Post Order) will give us something where
4214 deeper loops come later. */
4215 pre_and_rev_post_order_compute (NULL, bbs, false);
4216 bb_rank = XCNEWVEC (long, last_basic_block);
4217 operand_rank = pointer_map_create ();
4219 /* Give each default definition a distinct rank. This includes
4220 parameters and the static chain. Walk backwards over all
4221 SSA names so that we get proper rank ordering according
4222 to tree_swap_operands_p. */
4223 for (i = num_ssa_names - 1; i > 0; --i)
4225 tree name = ssa_name (i);
4226 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4227 insert_operand_rank (name, ++rank);
4230 /* Set up rank for each BB */
4231 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4232 bb_rank[bbs[i]] = ++rank << 16;
4235 calculate_dominance_info (CDI_POST_DOMINATORS);
4236 plus_negates = vNULL;
4239 /* Cleanup after the reassociation pass, and print stats if
4245 statistics_counter_event (cfun, "Linearized",
4246 reassociate_stats.linearized);
4247 statistics_counter_event (cfun, "Constants eliminated",
4248 reassociate_stats.constants_eliminated);
4249 statistics_counter_event (cfun, "Ops eliminated",
4250 reassociate_stats.ops_eliminated);
4251 statistics_counter_event (cfun, "Statements rewritten",
4252 reassociate_stats.rewritten);
4253 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4254 reassociate_stats.pows_encountered);
4255 statistics_counter_event (cfun, "Built-in powi calls created",
4256 reassociate_stats.pows_created);
4258 pointer_map_destroy (operand_rank);
4259 free_alloc_pool (operand_entry_pool);
4261 plus_negates.release ();
4262 free_dominance_info (CDI_POST_DOMINATORS);
4263 loop_optimizer_finalize ();
4266 /* Gate and execute functions for Reassociation. */
4269 execute_reassoc (void)
4274 repropagate_negates ();
4281 gate_tree_ssa_reassoc (void)
4283 return flag_tree_reassoc != 0;
4286 struct gimple_opt_pass pass_reassoc =
4290 "reassoc", /* name */
4291 OPTGROUP_NONE, /* optinfo_flags */
4292 gate_tree_ssa_reassoc, /* gate */
4293 execute_reassoc, /* execute */
4296 0, /* static_pass_number */
4297 TV_TREE_REASSOC, /* tv_id */
4298 PROP_cfg | PROP_ssa, /* properties_required */
4299 0, /* properties_provided */
4300 0, /* properties_destroyed */
4301 0, /* todo_flags_start */
4303 | TODO_update_ssa_only_virtuals
4305 | TODO_ggc_collect /* todo_flags_finish */