gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2013 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "tree.h"
  26 #include "basic-block.h"
  27 #include "gimple-pretty-print.h"
  28 #include "tree-inline.h"
  29 #include "tree-flow.h"
  30 #include "gimple.h"
  31 #include "tree-iterator.h"
  32 #include "tree-pass.h"
  33 #include "alloc-pool.h"
  34 #include "vec.h"
  35 #include "langhooks.h"
  36 #include "pointer-set.h"
  37 #include "cfgloop.h"
  38 #include "flags.h"
  39 #include "target.h"
  40 #include "params.h"
  41 #include "diagnostic-core.h"
  42
  43 /*  This is a simple global reassociation pass.  It is, in part, based
  44     on the LLVM pass of the same name (They do some things more/less
  45     than we do, in different orders, etc).
  46
  47     It consists of five steps:
  48
  49     1. Breaking up subtract operations into addition + negate, where
  50     it would promote the reassociation of adds.
  51
  52     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  53     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  54     During linearization, we place the operands of the binary
  55     expressions into a vector of operand_entry_t
  56
  57     3. Optimization of the operand lists, eliminating things like a +
  58     -a, a & a, etc.
  59
  60     3a. Combine repeated factors with the same occurrence counts
  61     into a __builtin_powi call that will later be optimized into
  62     an optimal number of multiplies.
  63
  64     4. Rewrite the expression trees we linearized and optimized so
  65     they are in proper rank order.
  66
  67     5. Repropagate negates, as nothing else will clean it up ATM.
  68
  69     A bit of theory on #4, since nobody seems to write anything down
  70     about why it makes sense to do it the way they do it:
  71
  72     We could do this much nicer theoretically, but don't (for reasons
  73     explained after how to do it theoretically nice :P).
  74
  75     In order to promote the most redundancy elimination, you want
  76     binary expressions whose operands are the same rank (or
  77     preferably, the same value) exposed to the redundancy eliminator,
  78     for possible elimination.
  79
  80     So the way to do this if we really cared, is to build the new op
  81     tree from the leaves to the roots, merging as you go, and putting the
  82     new op on the end of the worklist, until you are left with one
  83     thing on the worklist.
  84
  85     IE if you have to rewrite the following set of operands (listed with
  86     rank in parentheses), with opcode PLUS_EXPR:
  87
  88     a (1),  b (1),  c (1),  d (2), e (2)
  89
  90
  91     We start with our merge worklist empty, and the ops list with all of
  92     those on it.
  93
  94     You want to first merge all leaves of the same rank, as much as
  95     possible.
  96
  97     So first build a binary op of
  98
  99     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 100
 101     Because there is no three operand form of PLUS_EXPR, c is not going to
 102     be exposed to redundancy elimination as a rank 1 operand.
 103
 104     So you might as well throw it on the merge worklist (you could also
 105     consider it to now be a rank two operand, and merge it with d and e,
 106     but in this case, you then have evicted e from a binary op. So at
 107     least in this situation, you can't win.)
 108
 109     Then build a binary op of d + e
 110     mergetmp2 = d + e
 111
 112     and put mergetmp2 on the merge worklist.
 113
 114     so merge worklist = {mergetmp, c, mergetmp2}
 115
 116     Continue building binary ops of these operations until you have only
 117     one operation left on the worklist.
 118
 119     So we have
 120
 121     build binary op
 122     mergetmp3 = mergetmp + c
 123
 124     worklist = {mergetmp2, mergetmp3}
 125
 126     mergetmp4 = mergetmp2 + mergetmp3
 127
 128     worklist = {mergetmp4}
 129
 130     because we have one operation left, we can now just set the original
 131     statement equal to the result of that operation.
 132
 133     This will at least expose a + b  and d + e to redundancy elimination
 134     as binary operations.
 135
 136     For extra points, you can reuse the old statements to build the
 137     mergetmps, since you shouldn't run out.
 138
 139     So why don't we do this?
 140
 141     Because it's expensive, and rarely will help.  Most trees we are
 142     reassociating have 3 or less ops.  If they have 2 ops, they already
 143     will be written into a nice single binary op.  If you have 3 ops, a
 144     single simple check suffices to tell you whether the first two are of the
 145     same rank.  If so, you know to order it
 146
 147     mergetmp = op1 + op2
 148     newstmt = mergetmp + op3
 149
 150     instead of
 151     mergetmp = op2 + op3
 152     newstmt = mergetmp + op1
 153
 154     If all three are of the same rank, you can't expose them all in a
 155     single binary operator anyway, so the above is *still* the best you
 156     can do.
 157
 158     Thus, this is what we do.  When we have three ops left, we check to see
 159     what order to put them in, and call it a day.  As a nod to vector sum
 160     reduction, we check if any of the ops are really a phi node that is a
 161     destructive update for the associating op, and keep the destructive
 162     update together for vector sum reduction recognition.  */
 163
 164
 165 /* Statistics */
 166 static struct
 167 {
 168   int linearized;
 169   int constants_eliminated;
 170   int ops_eliminated;
 171   int rewritten;
 172   int pows_encountered;
 173   int pows_created;
 174 } reassociate_stats;
 175
 176 /* Operator, rank pair.  */
 177 typedef struct operand_entry
 178 {
 179   unsigned int rank;
 180   int id;
 181   tree op;
 182   unsigned int count;
 183 } *operand_entry_t;
 184
 185 static alloc_pool operand_entry_pool;
 186
 187 /* This is used to assign a unique ID to each struct operand_entry
 188    so that qsort results are identical on different hosts.  */
 189 static int next_operand_entry_id;
 190
 191 /* Starting rank number for a given basic block, so that we can rank
 192    operations using unmovable instructions in that BB based on the bb
 193    depth.  */
 194 static long *bb_rank;
 195
 196 /* Operand->rank hashtable.  */
 197 static struct pointer_map_t *operand_rank;
 198
 199 /* Forward decls.  */
 200 static long get_rank (tree);
 201
 202
 203 /* Bias amount for loop-carried phis.  We want this to be larger than
 204    the depth of any reassociation tree we can see, but not larger than
 205    the rank difference between two blocks.  */
 206 #define PHI_LOOP_BIAS (1 << 15)
 207
 208 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 209    an innermost loop, and the phi has only a single use which is inside
 210    the loop, then the rank is the block rank of the loop latch plus an
 211    extra bias for the loop-carried dependence.  This causes expressions
 212    calculated into an accumulator variable to be independent for each
 213    iteration of the loop.  If STMT is some other phi, the rank is the
 214    block rank of its containing block.  */
 215 static long
 216 phi_rank (gimple stmt)
 217 {
 218   basic_block bb = gimple_bb (stmt);
 219   struct loop *father = bb->loop_father;
 220   tree res;
 221   unsigned i;
 222   use_operand_p use;
 223   gimple use_stmt;
 224
 225   /* We only care about real loops (those with a latch).  */
 226   if (!father->latch)
 227     return bb_rank[bb->index];
 228
 229   /* Interesting phis must be in headers of innermost loops.  */
 230   if (bb != father->header
 231       || father->inner)
 232     return bb_rank[bb->index];
 233
 234   /* Ignore virtual SSA_NAMEs.  */
 235   res = gimple_phi_result (stmt);
 236   if (virtual_operand_p (res))
 237     return bb_rank[bb->index];
 238
 239   /* The phi definition must have a single use, and that use must be
 240      within the loop.  Otherwise this isn't an accumulator pattern.  */
 241   if (!single_imm_use (res, &use, &use_stmt)
 242       || gimple_bb (use_stmt)->loop_father != father)
 243     return bb_rank[bb->index];
 244
 245   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 246   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 247     {
 248       tree arg = gimple_phi_arg_def (stmt, i);
 249       if (TREE_CODE (arg) == SSA_NAME
 250           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 251         {
 252           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 253           if (gimple_bb (def_stmt)->loop_father == father)
 254             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 255         }
 256     }
 257
 258   /* Must be an uninteresting phi.  */
 259   return bb_rank[bb->index];
 260 }
 261
 262 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 263    loop-carried dependence of an innermost loop, return TRUE; else
 264    return FALSE.  */
 265 static bool
 266 loop_carried_phi (tree exp)
 267 {
 268   gimple phi_stmt;
 269   long block_rank;
 270
 271   if (TREE_CODE (exp) != SSA_NAME
 272       || SSA_NAME_IS_DEFAULT_DEF (exp))
 273     return false;
 274
 275   phi_stmt = SSA_NAME_DEF_STMT (exp);
 276
 277   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 278     return false;
 279
 280   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 281      an additional bias added in.  If this phi doesn't have block rank,
 282      it's biased and should not be propagated.  */
 283   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 284
 285   if (phi_rank (phi_stmt) != block_rank)
 286     return true;
 287
 288   return false;
 289 }
 290
 291 /* Return the maximum of RANK and the rank that should be propagated
 292    from expression OP.  For most operands, this is just the rank of OP.
 293    For loop-carried phis, the value is zero to avoid undoing the bias
 294    in favor of the phi.  */
 295 static long
 296 propagate_rank (long rank, tree op)
 297 {
 298   long op_rank;
 299
 300   if (loop_carried_phi (op))
 301     return rank;
 302
 303   op_rank = get_rank (op);
 304
 305   return MAX (rank, op_rank);
 306 }
 307
 308 /* Look up the operand rank structure for expression E.  */
 309
 310 static inline long
 311 find_operand_rank (tree e)
 312 {
 313   void **slot = pointer_map_contains (operand_rank, e);
 314   return slot ? (long) (intptr_t) *slot : -1;
 315 }
 316
 317 /* Insert {E,RANK} into the operand rank hashtable.  */
 318
 319 static inline void
 320 insert_operand_rank (tree e, long rank)
 321 {
 322   void **slot;
 323   gcc_assert (rank > 0);
 324   slot = pointer_map_insert (operand_rank, e);
 325   gcc_assert (!*slot);
 326   *slot = (void *) (intptr_t) rank;
 327 }
 328
 329 /* Given an expression E, return the rank of the expression.  */
 330
 331 static long
 332 get_rank (tree e)
 333 {
 334   /* Constants have rank 0.  */
 335   if (is_gimple_min_invariant (e))
 336     return 0;
 337
 338   /* SSA_NAME's have the rank of the expression they are the result
 339      of.
 340      For globals and uninitialized values, the rank is 0.
 341      For function arguments, use the pre-setup rank.
 342      For PHI nodes, stores, asm statements, etc, we use the rank of
 343      the BB.
 344      For simple operations, the rank is the maximum rank of any of
 345      its operands, or the bb_rank, whichever is less.
 346      I make no claims that this is optimal, however, it gives good
 347      results.  */
 348
 349   /* We make an exception to the normal ranking system to break
 350      dependences of accumulator variables in loops.  Suppose we
 351      have a simple one-block loop containing:
 352
 353        x_1 = phi(x_0, x_2)
 354        b = a + x_1
 355        c = b + d
 356        x_2 = c + e
 357
 358      As shown, each iteration of the calculation into x is fully
 359      dependent upon the iteration before it.  We would prefer to
 360      see this in the form:
 361
 362        x_1 = phi(x_0, x_2)
 363        b = a + d
 364        c = b + e
 365        x_2 = c + x_1
 366
 367      If the loop is unrolled, the calculations of b and c from
 368      different iterations can be interleaved.
 369
 370      To obtain this result during reassociation, we bias the rank
 371      of the phi definition x_1 upward, when it is recognized as an
 372      accumulator pattern.  The artificial rank causes it to be
 373      added last, providing the desired independence.  */
 374
 375   if (TREE_CODE (e) == SSA_NAME)
 376     {
 377       gimple stmt;
 378       long rank;
 379       int i, n;
 380       tree op;
 381
 382       if (SSA_NAME_IS_DEFAULT_DEF (e))
 383         return find_operand_rank (e);
 384
 385       stmt = SSA_NAME_DEF_STMT (e);
 386       if (gimple_code (stmt) == GIMPLE_PHI)
 387         return phi_rank (stmt);
 388
 389       if (!is_gimple_assign (stmt)
 390           || gimple_vdef (stmt))
 391         return bb_rank[gimple_bb (stmt)->index];
 392
 393       /* If we already have a rank for this expression, use that.  */
 394       rank = find_operand_rank (e);
 395       if (rank != -1)
 396         return rank;
 397
 398       /* Otherwise, find the maximum rank for the operands.  As an
 399          exception, remove the bias from loop-carried phis when propagating
 400          the rank so that dependent operations are not also biased.  */
 401       rank = 0;
 402       if (gimple_assign_single_p (stmt))
 403         {
 404           tree rhs = gimple_assign_rhs1 (stmt);
 405           n = TREE_OPERAND_LENGTH (rhs);
 406           if (n == 0)
 407             rank = propagate_rank (rank, rhs);
 408           else
 409             {
 410               for (i = 0; i < n; i++)
 411                 {
 412                   op = TREE_OPERAND (rhs, i);
 413
 414                   if (op != NULL_TREE)
 415                     rank = propagate_rank (rank, op);
 416                 }
 417             }
 418         }
 419       else
 420         {
 421           n = gimple_num_ops (stmt);
 422           for (i = 1; i < n; i++)
 423             {
 424               op = gimple_op (stmt, i);
 425               gcc_assert (op);
 426               rank = propagate_rank (rank, op);
 427             }
 428         }
 429
 430       if (dump_file && (dump_flags & TDF_DETAILS))
 431         {
 432           fprintf (dump_file, "Rank for ");
 433           print_generic_expr (dump_file, e, 0);
 434           fprintf (dump_file, " is %ld\n", (rank + 1));
 435         }
 436
 437       /* Note the rank in the hashtable so we don't recompute it.  */
 438       insert_operand_rank (e, (rank + 1));
 439       return (rank + 1);
 440     }
 441
 442   /* Globals, etc,  are rank 0 */
 443   return 0;
 444 }
 445
 446
 447 /* We want integer ones to end up last no matter what, since they are
 448    the ones we can do the most with.  */
 449 #define INTEGER_CONST_TYPE 1 << 3
 450 #define FLOAT_CONST_TYPE 1 << 2
 451 #define OTHER_CONST_TYPE 1 << 1
 452
 453 /* Classify an invariant tree into integer, float, or other, so that
 454    we can sort them to be near other constants of the same type.  */
 455 static inline int
 456 constant_type (tree t)
 457 {
 458   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 459     return INTEGER_CONST_TYPE;
 460   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 461     return FLOAT_CONST_TYPE;
 462   else
 463     return OTHER_CONST_TYPE;
 464 }
 465
 466 /* qsort comparison function to sort operand entries PA and PB by rank
 467    so that the sorted array is ordered by rank in decreasing order.  */
 468 static int
 469 sort_by_operand_rank (const void *pa, const void *pb)
 470 {
 471   const operand_entry_t oea = *(const operand_entry_t *)pa;
 472   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 473
 474   /* It's nicer for optimize_expression if constants that are likely
 475      to fold when added/multiplied//whatever are put next to each
 476      other.  Since all constants have rank 0, order them by type.  */
 477   if (oeb->rank == 0 && oea->rank == 0)
 478     {
 479       if (constant_type (oeb->op) != constant_type (oea->op))
 480         return constant_type (oeb->op) - constant_type (oea->op);
 481       else
 482         /* To make sorting result stable, we use unique IDs to determine
 483            order.  */
 484         return oeb->id - oea->id;
 485     }
 486
 487   /* Lastly, make sure the versions that are the same go next to each
 488      other.  We use SSA_NAME_VERSION because it's stable.  */
 489   if ((oeb->rank - oea->rank == 0)
 490       && TREE_CODE (oea->op) == SSA_NAME
 491       && TREE_CODE (oeb->op) == SSA_NAME)
 492     {
 493       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 494         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 495       else
 496         return oeb->id - oea->id;
 497     }
 498
 499   if (oeb->rank != oea->rank)
 500     return oeb->rank - oea->rank;
 501   else
 502     return oeb->id - oea->id;
 503 }
 504
 505 /* Add an operand entry to *OPS for the tree operand OP.  */
 506
 507 static void
 508 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 509 {
 510   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 511
 512   oe->op = op;
 513   oe->rank = get_rank (op);
 514   oe->id = next_operand_entry_id++;
 515   oe->count = 1;
 516   ops->safe_push (oe);
 517 }
 518
 519 /* Add an operand entry to *OPS for the tree operand OP with repeat
 520    count REPEAT.  */
 521
 522 static void
 523 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 524                        HOST_WIDE_INT repeat)
 525 {
 526   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 527
 528   oe->op = op;
 529   oe->rank = get_rank (op);
 530   oe->id = next_operand_entry_id++;
 531   oe->count = repeat;
 532   ops->safe_push (oe);
 533
 534   reassociate_stats.pows_encountered++;
 535 }
 536
 537 /* Return true if STMT is reassociable operation containing a binary
 538    operation with tree code CODE, and is inside LOOP.  */
 539
 540 static bool
 541 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 542 {
 543   basic_block bb = gimple_bb (stmt);
 544
 545   if (gimple_bb (stmt) == NULL)
 546     return false;
 547
 548   if (!flow_bb_inside_loop_p (loop, bb))
 549     return false;
 550
 551   if (is_gimple_assign (stmt)
 552       && gimple_assign_rhs_code (stmt) == code
 553       && has_single_use (gimple_assign_lhs (stmt)))
 554     return true;
 555
 556   return false;
 557 }
 558
 559
 560 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 561    operand of the negate operation.  Otherwise, return NULL.  */
 562
 563 static tree
 564 get_unary_op (tree name, enum tree_code opcode)
 565 {
 566   gimple stmt = SSA_NAME_DEF_STMT (name);
 567
 568   if (!is_gimple_assign (stmt))
 569     return NULL_TREE;
 570
 571   if (gimple_assign_rhs_code (stmt) == opcode)
 572     return gimple_assign_rhs1 (stmt);
 573   return NULL_TREE;
 574 }
 575
 576 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 577    eliminate through equivalences, do so, remove them from OPS, and
 578    return true.  Otherwise, return false.  */
 579
 580 static bool
 581 eliminate_duplicate_pair (enum tree_code opcode,
 582                           vec<operand_entry_t> *ops,
 583                           bool *all_done,
 584                           unsigned int i,
 585                           operand_entry_t curr,
 586                           operand_entry_t last)
 587 {
 588
 589   /* If we have two of the same op, and the opcode is & |, min, or max,
 590      we can eliminate one of them.
 591      If we have two of the same op, and the opcode is ^, we can
 592      eliminate both of them.  */
 593
 594   if (last && last->op == curr->op)
 595     {
 596       switch (opcode)
 597         {
 598         case MAX_EXPR:
 599         case MIN_EXPR:
 600         case BIT_IOR_EXPR:
 601         case BIT_AND_EXPR:
 602           if (dump_file && (dump_flags & TDF_DETAILS))
 603             {
 604               fprintf (dump_file, "Equivalence: ");
 605               print_generic_expr (dump_file, curr->op, 0);
 606               fprintf (dump_file, " [&|minmax] ");
 607               print_generic_expr (dump_file, last->op, 0);
 608               fprintf (dump_file, " -> ");
 609               print_generic_stmt (dump_file, last->op, 0);
 610             }
 611
 612           ops->ordered_remove (i);
 613           reassociate_stats.ops_eliminated ++;
 614
 615           return true;
 616
 617         case BIT_XOR_EXPR:
 618           if (dump_file && (dump_flags & TDF_DETAILS))
 619             {
 620               fprintf (dump_file, "Equivalence: ");
 621               print_generic_expr (dump_file, curr->op, 0);
 622               fprintf (dump_file, " ^ ");
 623               print_generic_expr (dump_file, last->op, 0);
 624               fprintf (dump_file, " -> nothing\n");
 625             }
 626
 627           reassociate_stats.ops_eliminated += 2;
 628
 629           if (ops->length () == 2)
 630             {
 631               ops->create (0);
 632               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 633               *all_done = true;
 634             }
 635           else
 636             {
 637               ops->ordered_remove (i-1);
 638               ops->ordered_remove (i-1);
 639             }
 640
 641           return true;
 642
 643         default:
 644           break;
 645         }
 646     }
 647   return false;
 648 }
 649
 650 static vec<tree> plus_negates;
 651
 652 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 653    expression, look in OPS for a corresponding positive operation to cancel
 654    it out.  If we find one, remove the other from OPS, replace
 655    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 656    return false. */
 657
 658 static bool
 659 eliminate_plus_minus_pair (enum tree_code opcode,
 660                            vec<operand_entry_t> *ops,
 661                            unsigned int currindex,
 662                            operand_entry_t curr)
 663 {
 664   tree negateop;
 665   tree notop;
 666   unsigned int i;
 667   operand_entry_t oe;
 668
 669   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 670     return false;
 671
 672   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 673   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 674   if (negateop == NULL_TREE && notop == NULL_TREE)
 675     return false;
 676
 677   /* Any non-negated version will have a rank that is one less than
 678      the current rank.  So once we hit those ranks, if we don't find
 679      one, we can stop.  */
 680
 681   for (i = currindex + 1;
 682        ops->iterate (i, &oe)
 683        && oe->rank >= curr->rank - 1 ;
 684        i++)
 685     {
 686       if (oe->op == negateop)
 687         {
 688
 689           if (dump_file && (dump_flags & TDF_DETAILS))
 690             {
 691               fprintf (dump_file, "Equivalence: ");
 692               print_generic_expr (dump_file, negateop, 0);
 693               fprintf (dump_file, " + -");
 694               print_generic_expr (dump_file, oe->op, 0);
 695               fprintf (dump_file, " -> 0\n");
 696             }
 697
 698           ops->ordered_remove (i);
 699           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 700           ops->ordered_remove (currindex);
 701           reassociate_stats.ops_eliminated ++;
 702
 703           return true;
 704         }
 705       else if (oe->op == notop)
 706         {
 707           tree op_type = TREE_TYPE (oe->op);
 708
 709           if (dump_file && (dump_flags & TDF_DETAILS))
 710             {
 711               fprintf (dump_file, "Equivalence: ");
 712               print_generic_expr (dump_file, notop, 0);
 713               fprintf (dump_file, " + ~");
 714               print_generic_expr (dump_file, oe->op, 0);
 715               fprintf (dump_file, " -> -1\n");
 716             }
 717
 718           ops->ordered_remove (i);
 719           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 720           ops->ordered_remove (currindex);
 721           reassociate_stats.ops_eliminated ++;
 722
 723           return true;
 724         }
 725     }
 726
 727   /* CURR->OP is a negate expr in a plus expr: save it for later
 728      inspection in repropagate_negates().  */
 729   if (negateop != NULL_TREE)
 730     plus_negates.safe_push (curr->op);
 731
 732   return false;
 733 }
 734
 735 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 736    bitwise not expression, look in OPS for a corresponding operand to
 737    cancel it out.  If we find one, remove the other from OPS, replace
 738    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 739    false. */
 740
 741 static bool
 742 eliminate_not_pairs (enum tree_code opcode,
 743                      vec<operand_entry_t> *ops,
 744                      unsigned int currindex,
 745                      operand_entry_t curr)
 746 {
 747   tree notop;
 748   unsigned int i;
 749   operand_entry_t oe;
 750
 751   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 752       || TREE_CODE (curr->op) != SSA_NAME)
 753     return false;
 754
 755   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 756   if (notop == NULL_TREE)
 757     return false;
 758
 759   /* Any non-not version will have a rank that is one less than
 760      the current rank.  So once we hit those ranks, if we don't find
 761      one, we can stop.  */
 762
 763   for (i = currindex + 1;
 764        ops->iterate (i, &oe)
 765        && oe->rank >= curr->rank - 1;
 766        i++)
 767     {
 768       if (oe->op == notop)
 769         {
 770           if (dump_file && (dump_flags & TDF_DETAILS))
 771             {
 772               fprintf (dump_file, "Equivalence: ");
 773               print_generic_expr (dump_file, notop, 0);
 774               if (opcode == BIT_AND_EXPR)
 775                 fprintf (dump_file, " & ~");
 776               else if (opcode == BIT_IOR_EXPR)
 777                 fprintf (dump_file, " | ~");
 778               print_generic_expr (dump_file, oe->op, 0);
 779               if (opcode == BIT_AND_EXPR)
 780                 fprintf (dump_file, " -> 0\n");
 781               else if (opcode == BIT_IOR_EXPR)
 782                 fprintf (dump_file, " -> -1\n");
 783             }
 784
 785           if (opcode == BIT_AND_EXPR)
 786             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 787           else if (opcode == BIT_IOR_EXPR)
 788             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 789                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 790
 791           reassociate_stats.ops_eliminated += ops->length () - 1;
 792           ops->truncate (0);
 793           ops->quick_push (oe);
 794           return true;
 795         }
 796     }
 797
 798   return false;
 799 }
 800
 801 /* Use constant value that may be present in OPS to try to eliminate
 802    operands.  Note that this function is only really used when we've
 803    eliminated ops for other reasons, or merged constants.  Across
 804    single statements, fold already does all of this, plus more.  There
 805    is little point in duplicating logic, so I've only included the
 806    identities that I could ever construct testcases to trigger.  */
 807
 808 static void
 809 eliminate_using_constants (enum tree_code opcode,
 810                            vec<operand_entry_t> *ops)
 811 {
 812   operand_entry_t oelast = ops->last ();
 813   tree type = TREE_TYPE (oelast->op);
 814
 815   if (oelast->rank == 0
 816       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 817     {
 818       switch (opcode)
 819         {
 820         case BIT_AND_EXPR:
 821           if (integer_zerop (oelast->op))
 822             {
 823               if (ops->length () != 1)
 824                 {
 825                   if (dump_file && (dump_flags & TDF_DETAILS))
 826                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 827
 828                   reassociate_stats.ops_eliminated += ops->length () - 1;
 829
 830                   ops->truncate (0);
 831                   ops->quick_push (oelast);
 832                   return;
 833                 }
 834             }
 835           else if (integer_all_onesp (oelast->op))
 836             {
 837               if (ops->length () != 1)
 838                 {
 839                   if (dump_file && (dump_flags & TDF_DETAILS))
 840                     fprintf (dump_file, "Found & -1, removing\n");
 841                   ops->pop ();
 842                   reassociate_stats.ops_eliminated++;
 843                 }
 844             }
 845           break;
 846         case BIT_IOR_EXPR:
 847           if (integer_all_onesp (oelast->op))
 848             {
 849               if (ops->length () != 1)
 850                 {
 851                   if (dump_file && (dump_flags & TDF_DETAILS))
 852                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 853
 854                   reassociate_stats.ops_eliminated += ops->length () - 1;
 855
 856                   ops->truncate (0);
 857                   ops->quick_push (oelast);
 858                   return;
 859                 }
 860             }
 861           else if (integer_zerop (oelast->op))
 862             {
 863               if (ops->length () != 1)
 864                 {
 865                   if (dump_file && (dump_flags & TDF_DETAILS))
 866                     fprintf (dump_file, "Found | 0, removing\n");
 867                   ops->pop ();
 868                   reassociate_stats.ops_eliminated++;
 869                 }
 870             }
 871           break;
 872         case MULT_EXPR:
 873           if (integer_zerop (oelast->op)
 874               || (FLOAT_TYPE_P (type)
 875                   && !HONOR_NANS (TYPE_MODE (type))
 876                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 877                   && real_zerop (oelast->op)))
 878             {
 879               if (ops->length () != 1)
 880                 {
 881                   if (dump_file && (dump_flags & TDF_DETAILS))
 882                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 883
 884                   reassociate_stats.ops_eliminated += ops->length () - 1;
 885                   ops->truncate (1);
 886                   ops->quick_push (oelast);
 887                   return;
 888                 }
 889             }
 890           else if (integer_onep (oelast->op)
 891                    || (FLOAT_TYPE_P (type)
 892                        && !HONOR_SNANS (TYPE_MODE (type))
 893                        && real_onep (oelast->op)))
 894             {
 895               if (ops->length () != 1)
 896                 {
 897                   if (dump_file && (dump_flags & TDF_DETAILS))
 898                     fprintf (dump_file, "Found * 1, removing\n");
 899                   ops->pop ();
 900                   reassociate_stats.ops_eliminated++;
 901                   return;
 902                 }
 903             }
 904           break;
 905         case BIT_XOR_EXPR:
 906         case PLUS_EXPR:
 907         case MINUS_EXPR:
 908           if (integer_zerop (oelast->op)
 909               || (FLOAT_TYPE_P (type)
 910                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 911                   && fold_real_zero_addition_p (type, oelast->op,
 912                                                 opcode == MINUS_EXPR)))
 913             {
 914               if (ops->length () != 1)
 915                 {
 916                   if (dump_file && (dump_flags & TDF_DETAILS))
 917                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 918                   ops->pop ();
 919                   reassociate_stats.ops_eliminated++;
 920                   return;
 921                 }
 922             }
 923           break;
 924         default:
 925           break;
 926         }
 927     }
 928 }
 929
 930
 931 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
 932                                  bool, bool);
 933
 934 /* Structure for tracking and counting operands.  */
 935 typedef struct oecount_s {
 936   int cnt;
 937   int id;
 938   enum tree_code oecode;
 939   tree op;
 940 } oecount;
 941
 942
 943 /* The heap for the oecount hashtable and the sorted list of operands.  */
 944 static vec<oecount> cvec;
 945
 946 /* Hash function for oecount.  */
 947
 948 static hashval_t
 949 oecount_hash (const void *p)
 950 {
 951   const oecount *c = &cvec[(size_t)p - 42];
 952   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 953 }
 954
 955 /* Comparison function for oecount.  */
 956
 957 static int
 958 oecount_eq (const void *p1, const void *p2)
 959 {
 960   const oecount *c1 = &cvec[(size_t)p1 - 42];
 961   const oecount *c2 = &cvec[(size_t)p2 - 42];
 962   return (c1->oecode == c2->oecode
 963           && c1->op == c2->op);
 964 }
 965
 966 /* Comparison function for qsort sorting oecount elements by count.  */
 967
 968 static int
 969 oecount_cmp (const void *p1, const void *p2)
 970 {
 971   const oecount *c1 = (const oecount *)p1;
 972   const oecount *c2 = (const oecount *)p2;
 973   if (c1->cnt != c2->cnt)
 974     return c1->cnt - c2->cnt;
 975   else
 976     /* If counts are identical, use unique IDs to stabilize qsort.  */
 977     return c1->id - c2->id;
 978 }
 979
 980 /* Return TRUE iff STMT represents a builtin call that raises OP
 981    to some exponent.  */
 982
 983 static bool
 984 stmt_is_power_of_op (gimple stmt, tree op)
 985 {
 986   tree fndecl;
 987
 988   if (!is_gimple_call (stmt))
 989     return false;
 990
 991   fndecl = gimple_call_fndecl (stmt);
 992
 993   if (!fndecl
 994       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
 995     return false;
 996
 997   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
 998     {
 999     CASE_FLT_FN (BUILT_IN_POW):
1000     CASE_FLT_FN (BUILT_IN_POWI):
1001       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1002
1003     default:
1004       return false;
1005     }
1006 }
1007
1008 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1009    in place and return the result.  Assumes that stmt_is_power_of_op
1010    was previously called for STMT and returned TRUE.  */
1011
1012 static HOST_WIDE_INT
1013 decrement_power (gimple stmt)
1014 {
1015   REAL_VALUE_TYPE c, cint;
1016   HOST_WIDE_INT power;
1017   tree arg1;
1018
1019   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1020     {
1021     CASE_FLT_FN (BUILT_IN_POW):
1022       arg1 = gimple_call_arg (stmt, 1);
1023       c = TREE_REAL_CST (arg1);
1024       power = real_to_integer (&c) - 1;
1025       real_from_integer (&cint, VOIDmode, power, 0, 0);
1026       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1027       return power;
1028
1029     CASE_FLT_FN (BUILT_IN_POWI):
1030       arg1 = gimple_call_arg (stmt, 1);
1031       power = TREE_INT_CST_LOW (arg1) - 1;
1032       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1033       return power;
1034
1035     default:
1036       gcc_unreachable ();
1037     }
1038 }
1039
1040 /* Find the single immediate use of STMT's LHS, and replace it
1041    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1042    replace *DEF with OP as well.  */
1043
1044 static void
1045 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1046 {
1047   tree lhs;
1048   gimple use_stmt;
1049   use_operand_p use;
1050   gimple_stmt_iterator gsi;
1051
1052   if (is_gimple_call (stmt))
1053     lhs = gimple_call_lhs (stmt);
1054   else
1055     lhs = gimple_assign_lhs (stmt);
1056
1057   gcc_assert (has_single_use (lhs));
1058   single_imm_use (lhs, &use, &use_stmt);
1059   if (lhs == *def)
1060     *def = op;
1061   SET_USE (use, op);
1062   if (TREE_CODE (op) != SSA_NAME)
1063     update_stmt (use_stmt);
1064   gsi = gsi_for_stmt (stmt);
1065   unlink_stmt_vdef (stmt);
1066   gsi_remove (&gsi, true);
1067   release_defs (stmt);
1068 }
1069
1070 /* Walks the linear chain with result *DEF searching for an operation
1071    with operand OP and code OPCODE removing that from the chain.  *DEF
1072    is updated if there is only one operand but no operation left.  */
1073
1074 static void
1075 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1076 {
1077   gimple stmt = SSA_NAME_DEF_STMT (*def);
1078
1079   do
1080     {
1081       tree name;
1082
1083       if (opcode == MULT_EXPR
1084           && stmt_is_power_of_op (stmt, op))
1085         {
1086           if (decrement_power (stmt) == 1)
1087             propagate_op_to_single_use (op, stmt, def);
1088           return;
1089         }
1090
1091       name = gimple_assign_rhs1 (stmt);
1092
1093       /* If this is the operation we look for and one of the operands
1094          is ours simply propagate the other operand into the stmts
1095          single use.  */
1096       if (gimple_assign_rhs_code (stmt) == opcode
1097           && (name == op
1098               || gimple_assign_rhs2 (stmt) == op))
1099         {
1100           if (name == op)
1101             name = gimple_assign_rhs2 (stmt);
1102           propagate_op_to_single_use (name, stmt, def);
1103           return;
1104         }
1105
1106       /* We might have a multiply of two __builtin_pow* calls, and
1107          the operand might be hiding in the rightmost one.  */
1108       if (opcode == MULT_EXPR
1109           && gimple_assign_rhs_code (stmt) == opcode
1110           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1111           && has_single_use (gimple_assign_rhs2 (stmt)))
1112         {
1113           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1114           if (stmt_is_power_of_op (stmt2, op))
1115             {
1116               if (decrement_power (stmt2) == 1)
1117                 propagate_op_to_single_use (op, stmt2, def);
1118               return;
1119             }
1120         }
1121
1122       /* Continue walking the chain.  */
1123       gcc_assert (name != op
1124                   && TREE_CODE (name) == SSA_NAME);
1125       stmt = SSA_NAME_DEF_STMT (name);
1126     }
1127   while (1);
1128 }
1129
1130 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1131    the result.  Places the statement after the definition of either
1132    OP1 or OP2.  Returns the new statement.  */
1133
1134 static gimple
1135 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1136 {
1137   gimple op1def = NULL, op2def = NULL;
1138   gimple_stmt_iterator gsi;
1139   tree op;
1140   gimple sum;
1141
1142   /* Create the addition statement.  */
1143   op = make_ssa_name (type, NULL);
1144   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1145
1146   /* Find an insertion place and insert.  */
1147   if (TREE_CODE (op1) == SSA_NAME)
1148     op1def = SSA_NAME_DEF_STMT (op1);
1149   if (TREE_CODE (op2) == SSA_NAME)
1150     op2def = SSA_NAME_DEF_STMT (op2);
1151   if ((!op1def || gimple_nop_p (op1def))
1152       && (!op2def || gimple_nop_p (op2def)))
1153     {
1154       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1155       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1156     }
1157   else if ((!op1def || gimple_nop_p (op1def))
1158            || (op2def && !gimple_nop_p (op2def)
1159                && stmt_dominates_stmt_p (op1def, op2def)))
1160     {
1161       if (gimple_code (op2def) == GIMPLE_PHI)
1162         {
1163           gsi = gsi_after_labels (gimple_bb (op2def));
1164           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1165         }
1166       else
1167         {
1168           if (!stmt_ends_bb_p (op2def))
1169             {
1170               gsi = gsi_for_stmt (op2def);
1171               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1172             }
1173           else
1174             {
1175               edge e;
1176               edge_iterator ei;
1177
1178               FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1179                 if (e->flags & EDGE_FALLTHRU)
1180                   gsi_insert_on_edge_immediate (e, sum);
1181             }
1182         }
1183     }
1184   else
1185     {
1186       if (gimple_code (op1def) == GIMPLE_PHI)
1187         {
1188           gsi = gsi_after_labels (gimple_bb (op1def));
1189           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1190         }
1191       else
1192         {
1193           if (!stmt_ends_bb_p (op1def))
1194             {
1195               gsi = gsi_for_stmt (op1def);
1196               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1197             }
1198           else
1199             {
1200               edge e;
1201               edge_iterator ei;
1202
1203               FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1204                 if (e->flags & EDGE_FALLTHRU)
1205                   gsi_insert_on_edge_immediate (e, sum);
1206             }
1207         }
1208     }
1209   update_stmt (sum);
1210
1211   return sum;
1212 }
1213
1214 /* Perform un-distribution of divisions and multiplications.
1215    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1216    to (A + B) / X for real X.
1217
1218    The algorithm is organized as follows.
1219
1220     - First we walk the addition chain *OPS looking for summands that
1221       are defined by a multiplication or a real division.  This results
1222       in the candidates bitmap with relevant indices into *OPS.
1223
1224     - Second we build the chains of multiplications or divisions for
1225       these candidates, counting the number of occurrences of (operand, code)
1226       pairs in all of the candidates chains.
1227
1228     - Third we sort the (operand, code) pairs by number of occurrence and
1229       process them starting with the pair with the most uses.
1230
1231       * For each such pair we walk the candidates again to build a
1232         second candidate bitmap noting all multiplication/division chains
1233         that have at least one occurrence of (operand, code).
1234
1235       * We build an alternate addition chain only covering these
1236         candidates with one (operand, code) operation removed from their
1237         multiplication/division chain.
1238
1239       * The first candidate gets replaced by the alternate addition chain
1240         multiplied/divided by the operand.
1241
1242       * All candidate chains get disabled for further processing and
1243         processing of (operand, code) pairs continues.
1244
1245   The alternate addition chains built are re-processed by the main
1246   reassociation algorithm which allows optimizing a * x * y + b * y * x
1247   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1248
1249 static bool
1250 undistribute_ops_list (enum tree_code opcode,
1251                        vec<operand_entry_t> *ops, struct loop *loop)
1252 {
1253   unsigned int length = ops->length ();
1254   operand_entry_t oe1;
1255   unsigned i, j;
1256   sbitmap candidates, candidates2;
1257   unsigned nr_candidates, nr_candidates2;
1258   sbitmap_iterator sbi0;
1259   vec<operand_entry_t> *subops;
1260   htab_t ctable;
1261   bool changed = false;
1262   int next_oecount_id = 0;
1263
1264   if (length <= 1
1265       || opcode != PLUS_EXPR)
1266     return false;
1267
1268   /* Build a list of candidates to process.  */
1269   candidates = sbitmap_alloc (length);
1270   bitmap_clear (candidates);
1271   nr_candidates = 0;
1272   FOR_EACH_VEC_ELT (*ops, i, oe1)
1273     {
1274       enum tree_code dcode;
1275       gimple oe1def;
1276
1277       if (TREE_CODE (oe1->op) != SSA_NAME)
1278         continue;
1279       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1280       if (!is_gimple_assign (oe1def))
1281         continue;
1282       dcode = gimple_assign_rhs_code (oe1def);
1283       if ((dcode != MULT_EXPR
1284            && dcode != RDIV_EXPR)
1285           || !is_reassociable_op (oe1def, dcode, loop))
1286         continue;
1287
1288       bitmap_set_bit (candidates, i);
1289       nr_candidates++;
1290     }
1291
1292   if (nr_candidates < 2)
1293     {
1294       sbitmap_free (candidates);
1295       return false;
1296     }
1297
1298   if (dump_file && (dump_flags & TDF_DETAILS))
1299     {
1300       fprintf (dump_file, "searching for un-distribute opportunities ");
1301       print_generic_expr (dump_file,
1302         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1303       fprintf (dump_file, " %d\n", nr_candidates);
1304     }
1305
1306   /* Build linearized sub-operand lists and the counting table.  */
1307   cvec.create (0);
1308   ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1309   /* ??? Macro arguments cannot have multi-argument template types in
1310      them.  This typedef is needed to workaround that limitation.  */
1311   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1312   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1313   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1314     {
1315       gimple oedef;
1316       enum tree_code oecode;
1317       unsigned j;
1318
1319       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1320       oecode = gimple_assign_rhs_code (oedef);
1321       linearize_expr_tree (&subops[i], oedef,
1322                            associative_tree_code (oecode), false);
1323
1324       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1325         {
1326           oecount c;
1327           void **slot;
1328           size_t idx;
1329           c.oecode = oecode;
1330           c.cnt = 1;
1331           c.id = next_oecount_id++;
1332           c.op = oe1->op;
1333           cvec.safe_push (c);
1334           idx = cvec.length () + 41;
1335           slot = htab_find_slot (ctable, (void *)idx, INSERT);
1336           if (!*slot)
1337             {
1338               *slot = (void *)idx;
1339             }
1340           else
1341             {
1342               cvec.pop ();
1343               cvec[(size_t)*slot - 42].cnt++;
1344             }
1345         }
1346     }
1347   htab_delete (ctable);
1348
1349   /* Sort the counting table.  */
1350   cvec.qsort (oecount_cmp);
1351
1352   if (dump_file && (dump_flags & TDF_DETAILS))
1353     {
1354       oecount *c;
1355       fprintf (dump_file, "Candidates:\n");
1356       FOR_EACH_VEC_ELT (cvec, j, c)
1357         {
1358           fprintf (dump_file, "  %u %s: ", c->cnt,
1359                    c->oecode == MULT_EXPR
1360                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1361           print_generic_expr (dump_file, c->op, 0);
1362           fprintf (dump_file, "\n");
1363         }
1364     }
1365
1366   /* Process the (operand, code) pairs in order of most occurence.  */
1367   candidates2 = sbitmap_alloc (length);
1368   while (!cvec.is_empty ())
1369     {
1370       oecount *c = &cvec.last ();
1371       if (c->cnt < 2)
1372         break;
1373
1374       /* Now collect the operands in the outer chain that contain
1375          the common operand in their inner chain.  */
1376       bitmap_clear (candidates2);
1377       nr_candidates2 = 0;
1378       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1379         {
1380           gimple oedef;
1381           enum tree_code oecode;
1382           unsigned j;
1383           tree op = (*ops)[i]->op;
1384
1385           /* If we undistributed in this chain already this may be
1386              a constant.  */
1387           if (TREE_CODE (op) != SSA_NAME)
1388             continue;
1389
1390           oedef = SSA_NAME_DEF_STMT (op);
1391           oecode = gimple_assign_rhs_code (oedef);
1392           if (oecode != c->oecode)
1393             continue;
1394
1395           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1396             {
1397               if (oe1->op == c->op)
1398                 {
1399                   bitmap_set_bit (candidates2, i);
1400                   ++nr_candidates2;
1401                   break;
1402                 }
1403             }
1404         }
1405
1406       if (nr_candidates2 >= 2)
1407         {
1408           operand_entry_t oe1, oe2;
1409           gimple prod;
1410           int first = bitmap_first_set_bit (candidates2);
1411
1412           /* Build the new addition chain.  */
1413           oe1 = (*ops)[first];
1414           if (dump_file && (dump_flags & TDF_DETAILS))
1415             {
1416               fprintf (dump_file, "Building (");
1417               print_generic_expr (dump_file, oe1->op, 0);
1418             }
1419           zero_one_operation (&oe1->op, c->oecode, c->op);
1420           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1421             {
1422               gimple sum;
1423               oe2 = (*ops)[i];
1424               if (dump_file && (dump_flags & TDF_DETAILS))
1425                 {
1426                   fprintf (dump_file, " + ");
1427                   print_generic_expr (dump_file, oe2->op, 0);
1428                 }
1429               zero_one_operation (&oe2->op, c->oecode, c->op);
1430               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1431                                        oe1->op, oe2->op, opcode);
1432               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1433               oe2->rank = 0;
1434               oe1->op = gimple_get_lhs (sum);
1435             }
1436
1437           /* Apply the multiplication/division.  */
1438           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1439                                     oe1->op, c->op, c->oecode);
1440           if (dump_file && (dump_flags & TDF_DETAILS))
1441             {
1442               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1443               print_generic_expr (dump_file, c->op, 0);
1444               fprintf (dump_file, "\n");
1445             }
1446
1447           /* Record it in the addition chain and disable further
1448              undistribution with this op.  */
1449           oe1->op = gimple_assign_lhs (prod);
1450           oe1->rank = get_rank (oe1->op);
1451           subops[first].release ();
1452
1453           changed = true;
1454         }
1455
1456       cvec.pop ();
1457     }
1458
1459   for (i = 0; i < ops->length (); ++i)
1460     subops[i].release ();
1461   free (subops);
1462   cvec.release ();
1463   sbitmap_free (candidates);
1464   sbitmap_free (candidates2);
1465
1466   return changed;
1467 }
1468
1469 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1470    expression, examine the other OPS to see if any of them are comparisons
1471    of the same values, which we may be able to combine or eliminate.
1472    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1473
1474 static bool
1475 eliminate_redundant_comparison (enum tree_code opcode,
1476                                 vec<operand_entry_t> *ops,
1477                                 unsigned int currindex,
1478                                 operand_entry_t curr)
1479 {
1480   tree op1, op2;
1481   enum tree_code lcode, rcode;
1482   gimple def1, def2;
1483   int i;
1484   operand_entry_t oe;
1485
1486   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1487     return false;
1488
1489   /* Check that CURR is a comparison.  */
1490   if (TREE_CODE (curr->op) != SSA_NAME)
1491     return false;
1492   def1 = SSA_NAME_DEF_STMT (curr->op);
1493   if (!is_gimple_assign (def1))
1494     return false;
1495   lcode = gimple_assign_rhs_code (def1);
1496   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1497     return false;
1498   op1 = gimple_assign_rhs1 (def1);
1499   op2 = gimple_assign_rhs2 (def1);
1500
1501   /* Now look for a similar comparison in the remaining OPS.  */
1502   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1503     {
1504       tree t;
1505
1506       if (TREE_CODE (oe->op) != SSA_NAME)
1507         continue;
1508       def2 = SSA_NAME_DEF_STMT (oe->op);
1509       if (!is_gimple_assign (def2))
1510         continue;
1511       rcode = gimple_assign_rhs_code (def2);
1512       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1513         continue;
1514
1515       /* If we got here, we have a match.  See if we can combine the
1516          two comparisons.  */
1517       if (opcode == BIT_IOR_EXPR)
1518         t = maybe_fold_or_comparisons (lcode, op1, op2,
1519                                        rcode, gimple_assign_rhs1 (def2),
1520                                        gimple_assign_rhs2 (def2));
1521       else
1522         t = maybe_fold_and_comparisons (lcode, op1, op2,
1523                                         rcode, gimple_assign_rhs1 (def2),
1524                                         gimple_assign_rhs2 (def2));
1525       if (!t)
1526         continue;
1527
1528       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1529          always give us a boolean_type_node value back.  If the original
1530          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1531          we need to convert.  */
1532       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1533         t = fold_convert (TREE_TYPE (curr->op), t);
1534
1535       if (TREE_CODE (t) != INTEGER_CST
1536           && !operand_equal_p (t, curr->op, 0))
1537         {
1538           enum tree_code subcode;
1539           tree newop1, newop2;
1540           if (!COMPARISON_CLASS_P (t))
1541             continue;
1542           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1543           STRIP_USELESS_TYPE_CONVERSION (newop1);
1544           STRIP_USELESS_TYPE_CONVERSION (newop2);
1545           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1546             continue;
1547         }
1548
1549       if (dump_file && (dump_flags & TDF_DETAILS))
1550         {
1551           fprintf (dump_file, "Equivalence: ");
1552           print_generic_expr (dump_file, curr->op, 0);
1553           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1554           print_generic_expr (dump_file, oe->op, 0);
1555           fprintf (dump_file, " -> ");
1556           print_generic_expr (dump_file, t, 0);
1557           fprintf (dump_file, "\n");
1558         }
1559
1560       /* Now we can delete oe, as it has been subsumed by the new combined
1561          expression t.  */
1562       ops->ordered_remove (i);
1563       reassociate_stats.ops_eliminated ++;
1564
1565       /* If t is the same as curr->op, we're done.  Otherwise we must
1566          replace curr->op with t.  Special case is if we got a constant
1567          back, in which case we add it to the end instead of in place of
1568          the current entry.  */
1569       if (TREE_CODE (t) == INTEGER_CST)
1570         {
1571           ops->ordered_remove (currindex);
1572           add_to_ops_vec (ops, t);
1573         }
1574       else if (!operand_equal_p (t, curr->op, 0))
1575         {
1576           gimple sum;
1577           enum tree_code subcode;
1578           tree newop1;
1579           tree newop2;
1580           gcc_assert (COMPARISON_CLASS_P (t));
1581           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1582           STRIP_USELESS_TYPE_CONVERSION (newop1);
1583           STRIP_USELESS_TYPE_CONVERSION (newop2);
1584           gcc_checking_assert (is_gimple_val (newop1)
1585                                && is_gimple_val (newop2));
1586           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1587           curr->op = gimple_get_lhs (sum);
1588         }
1589       return true;
1590     }
1591
1592   return false;
1593 }
1594
1595 /* Perform various identities and other optimizations on the list of
1596    operand entries, stored in OPS.  The tree code for the binary
1597    operation between all the operands is OPCODE.  */
1598
1599 static void
1600 optimize_ops_list (enum tree_code opcode,
1601                    vec<operand_entry_t> *ops)
1602 {
1603   unsigned int length = ops->length ();
1604   unsigned int i;
1605   operand_entry_t oe;
1606   operand_entry_t oelast = NULL;
1607   bool iterate = false;
1608
1609   if (length == 1)
1610     return;
1611
1612   oelast = ops->last ();
1613
1614   /* If the last two are constants, pop the constants off, merge them
1615      and try the next two.  */
1616   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1617     {
1618       operand_entry_t oelm1 = (*ops)[length - 2];
1619
1620       if (oelm1->rank == 0
1621           && is_gimple_min_invariant (oelm1->op)
1622           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1623                                        TREE_TYPE (oelast->op)))
1624         {
1625           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1626                                      oelm1->op, oelast->op);
1627
1628           if (folded && is_gimple_min_invariant (folded))
1629             {
1630               if (dump_file && (dump_flags & TDF_DETAILS))
1631                 fprintf (dump_file, "Merging constants\n");
1632
1633               ops->pop ();
1634               ops->pop ();
1635
1636               add_to_ops_vec (ops, folded);
1637               reassociate_stats.constants_eliminated++;
1638
1639               optimize_ops_list (opcode, ops);
1640               return;
1641             }
1642         }
1643     }
1644
1645   eliminate_using_constants (opcode, ops);
1646   oelast = NULL;
1647
1648   for (i = 0; ops->iterate (i, &oe);)
1649     {
1650       bool done = false;
1651
1652       if (eliminate_not_pairs (opcode, ops, i, oe))
1653         return;
1654       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1655           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1656           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1657         {
1658           if (done)
1659             return;
1660           iterate = true;
1661           oelast = NULL;
1662           continue;
1663         }
1664       oelast = oe;
1665       i++;
1666     }
1667
1668   length = ops->length ();
1669   oelast = ops->last ();
1670
1671   if (iterate)
1672     optimize_ops_list (opcode, ops);
1673 }
1674
1675 /* The following functions are subroutines to optimize_range_tests and allow
1676    it to try to change a logical combination of comparisons into a range
1677    test.
1678
1679    For example, both
1680         X == 2 || X == 5 || X == 3 || X == 4
1681    and
1682         X >= 2 && X <= 5
1683    are converted to
1684         (unsigned) (X - 2) <= 3
1685
1686    For more information see comments above fold_test_range in fold-const.c,
1687    this implementation is for GIMPLE.  */
1688
1689 struct range_entry
1690 {
1691   tree exp;
1692   tree low;
1693   tree high;
1694   bool in_p;
1695   bool strict_overflow_p;
1696   unsigned int idx, next;
1697 };
1698
1699 /* This is similar to make_range in fold-const.c, but on top of
1700    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1701    an SSA_NAME and STMT argument is ignored, otherwise STMT
1702    argument should be a GIMPLE_COND.  */
1703
1704 static void
1705 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1706 {
1707   int in_p;
1708   tree low, high;
1709   bool is_bool, strict_overflow_p;
1710
1711   r->exp = NULL_TREE;
1712   r->in_p = false;
1713   r->strict_overflow_p = false;
1714   r->low = NULL_TREE;
1715   r->high = NULL_TREE;
1716   if (exp != NULL_TREE
1717       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1718     return;
1719
1720   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1721      and see if we can refine the range.  Some of the cases below may not
1722      happen, but it doesn't seem worth worrying about this.  We "continue"
1723      the outer loop when we've changed something; otherwise we "break"
1724      the switch, which will "break" the while.  */
1725   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1726   high = low;
1727   in_p = 0;
1728   strict_overflow_p = false;
1729   is_bool = false;
1730   if (exp == NULL_TREE)
1731     is_bool = true;
1732   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1733     {
1734       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1735         is_bool = true;
1736       else
1737         return;
1738     }
1739   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1740     is_bool = true;
1741
1742   while (1)
1743     {
1744       enum tree_code code;
1745       tree arg0, arg1, exp_type;
1746       tree nexp;
1747       location_t loc;
1748
1749       if (exp != NULL_TREE)
1750         {
1751           if (TREE_CODE (exp) != SSA_NAME)
1752             break;
1753
1754           stmt = SSA_NAME_DEF_STMT (exp);
1755           if (!is_gimple_assign (stmt))
1756             break;
1757
1758           code = gimple_assign_rhs_code (stmt);
1759           arg0 = gimple_assign_rhs1 (stmt);
1760           arg1 = gimple_assign_rhs2 (stmt);
1761           exp_type = TREE_TYPE (exp);
1762         }
1763       else
1764         {
1765           code = gimple_cond_code (stmt);
1766           arg0 = gimple_cond_lhs (stmt);
1767           arg1 = gimple_cond_rhs (stmt);
1768           exp_type = boolean_type_node;
1769         }
1770
1771       if (TREE_CODE (arg0) != SSA_NAME)
1772         break;
1773       loc = gimple_location (stmt);
1774       switch (code)
1775         {
1776         case BIT_NOT_EXPR:
1777           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1778               /* Ensure the range is either +[-,0], +[0,0],
1779                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1780                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1781                  or similar expression of unconditional true or
1782                  false, it should not be negated.  */
1783               && ((high && integer_zerop (high))
1784                   || (low && integer_onep (low))))
1785             {
1786               in_p = !in_p;
1787               exp = arg0;
1788               continue;
1789             }
1790           break;
1791         case SSA_NAME:
1792           exp = arg0;
1793           continue;
1794         CASE_CONVERT:
1795           if (is_bool)
1796             goto do_default;
1797           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1798             {
1799               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1800                 is_bool = true;
1801               else
1802                 return;
1803             }
1804           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1805             is_bool = true;
1806           goto do_default;
1807         case EQ_EXPR:
1808         case NE_EXPR:
1809         case LT_EXPR:
1810         case LE_EXPR:
1811         case GE_EXPR:
1812         case GT_EXPR:
1813           is_bool = true;
1814           /* FALLTHRU */
1815         default:
1816           if (!is_bool)
1817             return;
1818         do_default:
1819           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1820                                   &low, &high, &in_p,
1821                                   &strict_overflow_p);
1822           if (nexp != NULL_TREE)
1823             {
1824               exp = nexp;
1825               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1826               continue;
1827             }
1828           break;
1829         }
1830       break;
1831     }
1832   if (is_bool)
1833     {
1834       r->exp = exp;
1835       r->in_p = in_p;
1836       r->low = low;
1837       r->high = high;
1838       r->strict_overflow_p = strict_overflow_p;
1839     }
1840 }
1841
1842 /* Comparison function for qsort.  Sort entries
1843    without SSA_NAME exp first, then with SSA_NAMEs sorted
1844    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1845    by increasing ->low and if ->low is the same, by increasing
1846    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1847    maximum.  */
1848
1849 static int
1850 range_entry_cmp (const void *a, const void *b)
1851 {
1852   const struct range_entry *p = (const struct range_entry *) a;
1853   const struct range_entry *q = (const struct range_entry *) b;
1854
1855   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1856     {
1857       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1858         {
1859           /* Group range_entries for the same SSA_NAME together.  */
1860           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1861             return -1;
1862           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1863             return 1;
1864           /* If ->low is different, NULL low goes first, then by
1865              ascending low.  */
1866           if (p->low != NULL_TREE)
1867             {
1868               if (q->low != NULL_TREE)
1869                 {
1870                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1871                                           p->low, q->low);
1872                   if (tem && integer_onep (tem))
1873                     return -1;
1874                   tem = fold_binary (GT_EXPR, boolean_type_node,
1875                                      p->low, q->low);
1876                   if (tem && integer_onep (tem))
1877                     return 1;
1878                 }
1879               else
1880                 return 1;
1881             }
1882           else if (q->low != NULL_TREE)
1883             return -1;
1884           /* If ->high is different, NULL high goes last, before that by
1885              ascending high.  */
1886           if (p->high != NULL_TREE)
1887             {
1888               if (q->high != NULL_TREE)
1889                 {
1890                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1891                                           p->high, q->high);
1892                   if (tem && integer_onep (tem))
1893                     return -1;
1894                   tem = fold_binary (GT_EXPR, boolean_type_node,
1895                                      p->high, q->high);
1896                   if (tem && integer_onep (tem))
1897                     return 1;
1898                 }
1899               else
1900                 return -1;
1901             }
1902           else if (p->high != NULL_TREE)
1903             return 1;
1904           /* If both ranges are the same, sort below by ascending idx.  */
1905         }
1906       else
1907         return 1;
1908     }
1909   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1910     return -1;
1911
1912   if (p->idx < q->idx)
1913     return -1;
1914   else
1915     {
1916       gcc_checking_assert (p->idx > q->idx);
1917       return 1;
1918     }
1919 }
1920
1921 /* Helper routine of optimize_range_test.
1922    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1923    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1924    OPCODE and OPS are arguments of optimize_range_tests.  Return
1925    true if the range merge has been successful.
1926    If OPCODE is ERROR_MARK, this is called from within
1927    maybe_optimize_range_tests and is performing inter-bb range optimization.
1928    Changes should be then performed right away, and whether an op is
1929    BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank.  */
1930
1931 static bool
1932 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1933                    unsigned int count, enum tree_code opcode,
1934                    vec<operand_entry_t> *ops, tree exp, bool in_p,
1935                    tree low, tree high, bool strict_overflow_p)
1936 {
1937   operand_entry_t oe = (*ops)[range->idx];
1938   tree op = oe->op;
1939   gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1940   location_t loc = gimple_location (stmt);
1941   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1942   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1943   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1944   gimple_stmt_iterator gsi;
1945
1946   if (tem == NULL_TREE)
1947     return false;
1948
1949   if (strict_overflow_p && issue_strict_overflow_warning (wc))
1950     warning_at (loc, OPT_Wstrict_overflow,
1951                 "assuming signed overflow does not occur "
1952                 "when simplifying range test");
1953
1954   if (dump_file && (dump_flags & TDF_DETAILS))
1955     {
1956       struct range_entry *r;
1957       fprintf (dump_file, "Optimizing range tests ");
1958       print_generic_expr (dump_file, range->exp, 0);
1959       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1960       print_generic_expr (dump_file, range->low, 0);
1961       fprintf (dump_file, ", ");
1962       print_generic_expr (dump_file, range->high, 0);
1963       fprintf (dump_file, "]");
1964       for (r = otherrange; r < otherrange + count; r++)
1965         {
1966           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1967           print_generic_expr (dump_file, r->low, 0);
1968           fprintf (dump_file, ", ");
1969           print_generic_expr (dump_file, r->high, 0);
1970           fprintf (dump_file, "]");
1971         }
1972       fprintf (dump_file, "\n into ");
1973       print_generic_expr (dump_file, tem, 0);
1974       fprintf (dump_file, "\n");
1975     }
1976
1977   if (opcode == BIT_IOR_EXPR
1978       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
1979     tem = invert_truthvalue_loc (loc, tem);
1980
1981   tem = fold_convert_loc (loc, optype, tem);
1982   gsi = gsi_for_stmt (stmt);
1983   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1984                                   GSI_SAME_STMT);
1985
1986   /* If doing inter-bb range test optimization, update the
1987      stmts immediately.  Start with changing the first range test
1988      immediate use to the new value (TEM), or, if the first range
1989      test is a GIMPLE_COND stmt, change that condition.  */
1990   if (opcode == ERROR_MARK)
1991     {
1992       if (op)
1993         {
1994           imm_use_iterator iter;
1995           use_operand_p use_p;
1996           gimple use_stmt;
1997
1998           FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
1999             {
2000               if (is_gimple_debug (use_stmt))
2001                 continue;
2002               FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2003                 SET_USE (use_p, tem);
2004               update_stmt (use_stmt);
2005             }
2006         }
2007       else
2008         {
2009           gimple_cond_set_code (stmt, NE_EXPR);
2010           gimple_cond_set_lhs (stmt, tem);
2011           gimple_cond_set_rhs (stmt, boolean_false_node);
2012           update_stmt (stmt);
2013         }
2014     }
2015   oe->op = tem;
2016   range->exp = exp;
2017   range->low = low;
2018   range->high = high;
2019   range->in_p = in_p;
2020   range->strict_overflow_p = false;
2021
2022   for (range = otherrange; range < otherrange + count; range++)
2023     {
2024       oe = (*ops)[range->idx];
2025       /* Now change all the other range test immediate uses, so that
2026          those tests will be optimized away.  */
2027       if (opcode == ERROR_MARK)
2028         {
2029           if (oe->op)
2030             {
2031               imm_use_iterator iter;
2032               use_operand_p use_p;
2033               gimple use_stmt;
2034
2035               FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2036                 {
2037                   if (is_gimple_debug (use_stmt))
2038                     continue;
2039                   /* If imm use of _8 is a statement like _7 = _8 | _9;,
2040                      adjust it into _7 = _9;.  */
2041                   if (is_gimple_assign (use_stmt)
2042                       && gimple_assign_rhs_code (use_stmt) == oe->rank)
2043                     {
2044                       tree expr = NULL_TREE;
2045                       if (oe->op == gimple_assign_rhs1 (use_stmt))
2046                         expr = gimple_assign_rhs2 (use_stmt);
2047                       else if (oe->op == gimple_assign_rhs2 (use_stmt))
2048                         expr = gimple_assign_rhs1 (use_stmt);
2049                       if (expr
2050                           && expr != oe->op
2051                           && TREE_CODE (expr) == SSA_NAME)
2052                         {
2053                           gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2054                           gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2055                                                           expr, NULL_TREE);
2056                           update_stmt (use_stmt);
2057                           continue;
2058                         }
2059                     }
2060                   /* If imm use of _8 is a statement like _7 = (int) _8;,
2061                      adjust it into _7 = 0; or _7 = 1;.  */
2062                   if (gimple_assign_cast_p (use_stmt)
2063                       && oe->op == gimple_assign_rhs1 (use_stmt))
2064                     {
2065                       tree lhs = gimple_assign_lhs (use_stmt);
2066                       if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2067                         {
2068                           gimple_stmt_iterator gsi2
2069                             = gsi_for_stmt (use_stmt);
2070                           tree expr = build_int_cst (TREE_TYPE (lhs),
2071                                                      oe->rank == BIT_IOR_EXPR
2072                                                      ? 0 : 1);
2073                           gimple_assign_set_rhs_with_ops (&gsi2,
2074                                                           INTEGER_CST,
2075                                                           expr, NULL_TREE);
2076                           update_stmt (use_stmt);
2077                           continue;
2078                         }
2079                     }
2080                   /* Otherwise replace the use with 0 or 1.  */
2081                   FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2082                     SET_USE (use_p,
2083                              build_int_cst (TREE_TYPE (oe->op),
2084                                             oe->rank == BIT_IOR_EXPR
2085                                             ? 0 : 1));
2086                   update_stmt (use_stmt);
2087                 }
2088             }
2089           else
2090             {
2091               /* If range test was a GIMPLE_COND, simply change it
2092                  into an always false or always true condition.  */
2093               stmt = last_stmt (BASIC_BLOCK (oe->id));
2094               if (oe->rank == BIT_IOR_EXPR)
2095                 gimple_cond_make_false (stmt);
2096               else
2097                 gimple_cond_make_true (stmt);
2098               update_stmt (stmt);
2099             }
2100         }
2101       oe->op = error_mark_node;
2102       range->exp = NULL_TREE;
2103     }
2104   return true;
2105 }
2106
2107 /* Optimize range tests, similarly how fold_range_test optimizes
2108    it on trees.  The tree code for the binary
2109    operation between all the operands is OPCODE.
2110    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2111    maybe_optimize_range_tests for inter-bb range optimization.
2112    In that case if oe->op is NULL, oe->id is bb->index whose
2113    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2114    the actual opcode.  */
2115
2116 static void
2117 optimize_range_tests (enum tree_code opcode,
2118                       vec<operand_entry_t> *ops)
2119 {
2120   unsigned int length = ops->length (), i, j, first;
2121   operand_entry_t oe;
2122   struct range_entry *ranges;
2123   bool any_changes = false;
2124
2125   if (length == 1)
2126     return;
2127
2128   ranges = XNEWVEC (struct range_entry, length);
2129   for (i = 0; i < length; i++)
2130     {
2131       oe = (*ops)[i];
2132       ranges[i].idx = i;
2133       init_range_entry (ranges + i, oe->op,
2134                         oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2135       /* For | invert it now, we will invert it again before emitting
2136          the optimized expression.  */
2137       if (opcode == BIT_IOR_EXPR
2138           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2139         ranges[i].in_p = !ranges[i].in_p;
2140     }
2141
2142   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2143   for (i = 0; i < length; i++)
2144     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2145       break;
2146
2147   /* Try to merge ranges.  */
2148   for (first = i; i < length; i++)
2149     {
2150       tree low = ranges[i].low;
2151       tree high = ranges[i].high;
2152       int in_p = ranges[i].in_p;
2153       bool strict_overflow_p = ranges[i].strict_overflow_p;
2154       int update_fail_count = 0;
2155
2156       for (j = i + 1; j < length; j++)
2157         {
2158           if (ranges[i].exp != ranges[j].exp)
2159             break;
2160           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2161                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2162             break;
2163           strict_overflow_p |= ranges[j].strict_overflow_p;
2164         }
2165
2166       if (j == i + 1)
2167         continue;
2168
2169       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2170                              ops, ranges[i].exp, in_p, low, high,
2171                              strict_overflow_p))
2172         {
2173           i = j - 1;
2174           any_changes = true;
2175         }
2176       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2177          while update_range_test would fail.  */
2178       else if (update_fail_count == 64)
2179         i = j - 1;
2180       else
2181         ++update_fail_count;
2182     }
2183
2184   /* Optimize X == CST1 || X == CST2
2185      if popcount (CST1 ^ CST2) == 1 into
2186      (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2187      Similarly for ranges.  E.g.
2188      X != 2 && X != 3 && X != 10 && X != 11
2189      will be transformed by the above loop into
2190      (X - 2U) <= 1U && (X - 10U) <= 1U
2191      and this loop can transform that into
2192      ((X & ~8) - 2U) <= 1U.  */
2193   for (i = first; i < length; i++)
2194     {
2195       tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2196
2197       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2198         continue;
2199       type = TREE_TYPE (ranges[i].exp);
2200       if (!INTEGRAL_TYPE_P (type))
2201         continue;
2202       lowi = ranges[i].low;
2203       if (lowi == NULL_TREE)
2204         lowi = TYPE_MIN_VALUE (type);
2205       highi = ranges[i].high;
2206       if (highi == NULL_TREE)
2207         continue;
2208       for (j = i + 1; j < length && j < i + 64; j++)
2209         {
2210           if (ranges[j].exp == NULL_TREE)
2211             continue;
2212           if (ranges[i].exp != ranges[j].exp)
2213             break;
2214           if (ranges[j].in_p)
2215             continue;
2216           lowj = ranges[j].low;
2217           if (lowj == NULL_TREE)
2218             continue;
2219           highj = ranges[j].high;
2220           if (highj == NULL_TREE)
2221             highj = TYPE_MAX_VALUE (type);
2222           tem = fold_binary (GT_EXPR, boolean_type_node,
2223                              lowj, highi);
2224           if (tem == NULL_TREE || !integer_onep (tem))
2225             continue;
2226           lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2227           if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2228             continue;
2229           gcc_checking_assert (!integer_zerop (lowxor));
2230           tem = fold_binary (MINUS_EXPR, type, lowxor,
2231                              build_int_cst (type, 1));
2232           if (tem == NULL_TREE)
2233             continue;
2234           tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2235           if (tem == NULL_TREE || !integer_zerop (tem))
2236             continue;
2237           highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2238           if (!tree_int_cst_equal (lowxor, highxor))
2239             continue;
2240           tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2241           exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2242           lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2243           highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2244           if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2245                                  ranges[i].in_p, lowj, highj,
2246                                  ranges[i].strict_overflow_p
2247                                  || ranges[j].strict_overflow_p))
2248             {
2249               any_changes = true;
2250               break;
2251             }
2252         }
2253     }
2254
2255   if (any_changes && opcode != ERROR_MARK)
2256     {
2257       j = 0;
2258       FOR_EACH_VEC_ELT (*ops, i, oe)
2259         {
2260           if (oe->op == error_mark_node)
2261             continue;
2262           else if (i != j)
2263             (*ops)[j] = oe;
2264           j++;
2265         }
2266       ops->truncate (j);
2267     }
2268
2269   XDELETEVEC (ranges);
2270 }
2271
2272 /* Return true if STMT is a cast like:
2273    <bb N>:
2274    ...
2275    _123 = (int) _234;
2276
2277    <bb M>:
2278    # _345 = PHI <_123(N), 1(...), 1(...)>
2279    where _234 has bool type, _123 has single use and
2280    bb N has a single successor M.  This is commonly used in
2281    the last block of a range test.  */
2282
2283 static bool
2284 final_range_test_p (gimple stmt)
2285 {
2286   basic_block bb, rhs_bb;
2287   edge e;
2288   tree lhs, rhs;
2289   use_operand_p use_p;
2290   gimple use_stmt;
2291
2292   if (!gimple_assign_cast_p (stmt))
2293     return false;
2294   bb = gimple_bb (stmt);
2295   if (!single_succ_p (bb))
2296     return false;
2297   e = single_succ_edge (bb);
2298   if (e->flags & EDGE_COMPLEX)
2299     return false;
2300
2301   lhs = gimple_assign_lhs (stmt);
2302   rhs = gimple_assign_rhs1 (stmt);
2303   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2304       || TREE_CODE (rhs) != SSA_NAME
2305       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2306     return false;
2307
2308   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2309   if (!single_imm_use (lhs, &use_p, &use_stmt))
2310     return false;
2311
2312   if (gimple_code (use_stmt) != GIMPLE_PHI
2313       || gimple_bb (use_stmt) != e->dest)
2314     return false;
2315
2316   /* And that the rhs is defined in the same loop.  */
2317   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2318   if (rhs_bb == NULL
2319       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2320     return false;
2321
2322   return true;
2323 }
2324
2325 /* Return true if BB is suitable basic block for inter-bb range test
2326    optimization.  If BACKWARD is true, BB should be the only predecessor
2327    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2328    or compared with to find a common basic block to which all conditions
2329    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2330    be the only predecessor of BB.  */
2331
2332 static bool
2333 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2334                   bool backward)
2335 {
2336   edge_iterator ei, ei2;
2337   edge e, e2;
2338   gimple stmt;
2339   gimple_stmt_iterator gsi;
2340   bool other_edge_seen = false;
2341   bool is_cond;
2342
2343   if (test_bb == bb)
2344     return false;
2345   /* Check last stmt first.  */
2346   stmt = last_stmt (bb);
2347   if (stmt == NULL
2348       || (gimple_code (stmt) != GIMPLE_COND
2349           && (backward || !final_range_test_p (stmt)))
2350       || gimple_visited_p (stmt)
2351       || stmt_could_throw_p (stmt)
2352       || *other_bb == bb)
2353     return false;
2354   is_cond = gimple_code (stmt) == GIMPLE_COND;
2355   if (is_cond)
2356     {
2357       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2358          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2359          to *OTHER_BB (if not set yet, try to find it out).  */
2360       if (EDGE_COUNT (bb->succs) != 2)
2361         return false;
2362       FOR_EACH_EDGE (e, ei, bb->succs)
2363         {
2364           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2365             return false;
2366           if (e->dest == test_bb)
2367             {
2368               if (backward)
2369                 continue;
2370               else
2371                 return false;
2372             }
2373           if (e->dest == bb)
2374             return false;
2375           if (*other_bb == NULL)
2376             {
2377               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2378                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2379                   return false;
2380                 else if (e->dest == e2->dest)
2381                   *other_bb = e->dest;
2382               if (*other_bb == NULL)
2383                 return false;
2384             }
2385           if (e->dest == *other_bb)
2386             other_edge_seen = true;
2387           else if (backward)
2388             return false;
2389         }
2390       if (*other_bb == NULL || !other_edge_seen)
2391         return false;
2392     }
2393   else if (single_succ (bb) != *other_bb)
2394     return false;
2395
2396   /* Now check all PHIs of *OTHER_BB.  */
2397   e = find_edge (bb, *other_bb);
2398   e2 = find_edge (test_bb, *other_bb);
2399   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2400     {
2401       gimple phi = gsi_stmt (gsi);
2402       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2403          corresponding to BB and TEST_BB predecessor must be the same.  */
2404       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2405                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2406         {
2407           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2408              one of the PHIs should have the lhs of the last stmt in
2409              that block as PHI arg and that PHI should have 0 or 1
2410              corresponding to it in all other range test basic blocks
2411              considered.  */
2412           if (!is_cond)
2413             {
2414               if (gimple_phi_arg_def (phi, e->dest_idx)
2415                   == gimple_assign_lhs (stmt)
2416                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2417                       || integer_onep (gimple_phi_arg_def (phi,
2418                                                            e2->dest_idx))))
2419                 continue;
2420             }
2421           else
2422             {
2423               gimple test_last = last_stmt (test_bb);
2424               if (gimple_code (test_last) != GIMPLE_COND
2425                   && gimple_phi_arg_def (phi, e2->dest_idx)
2426                      == gimple_assign_lhs (test_last)
2427                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2428                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2429                 continue;
2430             }
2431
2432           return false;
2433         }
2434     }
2435   return true;
2436 }
2437
2438 /* Return true if BB doesn't have side-effects that would disallow
2439    range test optimization, all SSA_NAMEs set in the bb are consumed
2440    in the bb and there are no PHIs.  */
2441
2442 static bool
2443 no_side_effect_bb (basic_block bb)
2444 {
2445   gimple_stmt_iterator gsi;
2446   gimple last;
2447
2448   if (!gimple_seq_empty_p (phi_nodes (bb)))
2449     return false;
2450   last = last_stmt (bb);
2451   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2452     {
2453       gimple stmt = gsi_stmt (gsi);
2454       tree lhs;
2455       imm_use_iterator imm_iter;
2456       use_operand_p use_p;
2457
2458       if (is_gimple_debug (stmt))
2459         continue;
2460       if (gimple_has_side_effects (stmt))
2461         return false;
2462       if (stmt == last)
2463         return true;
2464       if (!is_gimple_assign (stmt))
2465         return false;
2466       lhs = gimple_assign_lhs (stmt);
2467       if (TREE_CODE (lhs) != SSA_NAME)
2468         return false;
2469       if (gimple_assign_rhs_could_trap_p (stmt))
2470         return false;
2471       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2472         {
2473           gimple use_stmt = USE_STMT (use_p);
2474           if (is_gimple_debug (use_stmt))
2475             continue;
2476           if (gimple_bb (use_stmt) != bb)
2477             return false;
2478         }
2479     }
2480   return false;
2481 }
2482
2483 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2484    return true and fill in *OPS recursively.  */
2485
2486 static bool
2487 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2488          struct loop *loop)
2489 {
2490   gimple stmt = SSA_NAME_DEF_STMT (var);
2491   tree rhs[2];
2492   int i;
2493
2494   if (!is_reassociable_op (stmt, code, loop))
2495     return false;
2496
2497   rhs[0] = gimple_assign_rhs1 (stmt);
2498   rhs[1] = gimple_assign_rhs2 (stmt);
2499   gimple_set_visited (stmt, true);
2500   for (i = 0; i < 2; i++)
2501     if (TREE_CODE (rhs[i]) == SSA_NAME
2502         && !get_ops (rhs[i], code, ops, loop)
2503         && has_single_use (rhs[i]))
2504       {
2505         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2506
2507         oe->op = rhs[i];
2508         oe->rank = code;
2509         oe->id = 0;
2510         oe->count = 1;
2511         ops->safe_push (oe);
2512       }
2513   return true;
2514 }
2515
2516 /* Inter-bb range test optimization.  */
2517
2518 static void
2519 maybe_optimize_range_tests (gimple stmt)
2520 {
2521   basic_block first_bb = gimple_bb (stmt);
2522   basic_block last_bb = first_bb;
2523   basic_block other_bb = NULL;
2524   basic_block bb;
2525   edge_iterator ei;
2526   edge e;
2527   vec<operand_entry_t> ops = vNULL;
2528
2529   /* Consider only basic blocks that end with GIMPLE_COND or
2530      a cast statement satisfying final_range_test_p.  All
2531      but the last bb in the first_bb .. last_bb range
2532      should end with GIMPLE_COND.  */
2533   if (gimple_code (stmt) == GIMPLE_COND)
2534     {
2535       if (EDGE_COUNT (first_bb->succs) != 2)
2536         return;
2537     }
2538   else if (final_range_test_p (stmt))
2539     other_bb = single_succ (first_bb);
2540   else
2541     return;
2542
2543   if (stmt_could_throw_p (stmt))
2544     return;
2545
2546   /* As relative ordering of post-dominator sons isn't fixed,
2547      maybe_optimize_range_tests can be called first on any
2548      bb in the range we want to optimize.  So, start searching
2549      backwards, if first_bb can be set to a predecessor.  */
2550   while (single_pred_p (first_bb))
2551     {
2552       basic_block pred_bb = single_pred (first_bb);
2553       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2554         break;
2555       if (!no_side_effect_bb (first_bb))
2556         break;
2557       first_bb = pred_bb;
2558     }
2559   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2560      Before starting forward search in last_bb successors, find
2561      out the other_bb.  */
2562   if (first_bb == last_bb)
2563     {
2564       other_bb = NULL;
2565       /* As non-GIMPLE_COND last stmt always terminates the range,
2566          if forward search didn't discover anything, just give up.  */
2567       if (gimple_code (stmt) != GIMPLE_COND)
2568         return;
2569       /* Look at both successors.  Either it ends with a GIMPLE_COND
2570          and satisfies suitable_cond_bb, or ends with a cast and
2571          other_bb is that cast's successor.  */
2572       FOR_EACH_EDGE (e, ei, first_bb->succs)
2573         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2574             || e->dest == first_bb)
2575           return;
2576         else if (single_pred_p (e->dest))
2577           {
2578             stmt = last_stmt (e->dest);
2579             if (stmt
2580                 && gimple_code (stmt) == GIMPLE_COND
2581                 && EDGE_COUNT (e->dest->succs) == 2)
2582               {
2583                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2584                   break;
2585                 else
2586                   other_bb = NULL;
2587               }
2588             else if (stmt
2589                      && final_range_test_p (stmt)
2590                      && find_edge (first_bb, single_succ (e->dest)))
2591               {
2592                 other_bb = single_succ (e->dest);
2593                 if (other_bb == first_bb)
2594                   other_bb = NULL;
2595               }
2596           }
2597       if (other_bb == NULL)
2598         return;
2599     }
2600   /* Now do the forward search, moving last_bb to successor bbs
2601      that aren't other_bb.  */
2602   while (EDGE_COUNT (last_bb->succs) == 2)
2603     {
2604       FOR_EACH_EDGE (e, ei, last_bb->succs)
2605         if (e->dest != other_bb)
2606           break;
2607       if (e == NULL)
2608         break;
2609       if (!single_pred_p (e->dest))
2610         break;
2611       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2612         break;
2613       if (!no_side_effect_bb (e->dest))
2614         break;
2615       last_bb = e->dest;
2616     }
2617   if (first_bb == last_bb)
2618     return;
2619   /* Here basic blocks first_bb through last_bb's predecessor
2620      end with GIMPLE_COND, all of them have one of the edges to
2621      other_bb and another to another block in the range,
2622      all blocks except first_bb don't have side-effects and
2623      last_bb ends with either GIMPLE_COND, or cast satisfying
2624      final_range_test_p.  */
2625   for (bb = last_bb; ; bb = single_pred (bb))
2626     {
2627       enum tree_code code;
2628       tree lhs, rhs;
2629
2630       e = find_edge (bb, other_bb);
2631       stmt = last_stmt (bb);
2632       gimple_set_visited (stmt, true);
2633       if (gimple_code (stmt) != GIMPLE_COND)
2634         {
2635           use_operand_p use_p;
2636           gimple phi;
2637           edge e2;
2638           unsigned int d;
2639
2640           lhs = gimple_assign_lhs (stmt);
2641           rhs = gimple_assign_rhs1 (stmt);
2642           gcc_assert (bb == last_bb);
2643
2644           /* stmt is
2645              _123 = (int) _234;
2646
2647              followed by:
2648              <bb M>:
2649              # _345 = PHI <_123(N), 1(...), 1(...)>
2650
2651              or 0 instead of 1.  If it is 0, the _234
2652              range test is anded together with all the
2653              other range tests, if it is 1, it is ored with
2654              them.  */
2655           single_imm_use (lhs, &use_p, &phi);
2656           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2657           e2 = find_edge (first_bb, other_bb);
2658           d = e2->dest_idx;
2659           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2660           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2661             code = BIT_AND_EXPR;
2662           else
2663             {
2664               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2665               code = BIT_IOR_EXPR;
2666             }
2667
2668           /* If _234 SSA_NAME_DEF_STMT is
2669              _234 = _567 | _789;
2670              (or &, corresponding to 1/0 in the phi arguments,
2671              push into ops the individual range test arguments
2672              of the bitwise or resp. and, recursively.  */
2673           if (!get_ops (rhs, code, &ops,
2674                         loop_containing_stmt (stmt))
2675               && has_single_use (rhs))
2676             {
2677               /* Otherwise, push the _234 range test itself.  */
2678               operand_entry_t oe
2679                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2680
2681               oe->op = rhs;
2682               oe->rank = code;
2683               oe->id = 0;
2684               oe->count = 1;
2685               ops.safe_push (oe);
2686             }
2687           continue;
2688         }
2689       /* Otherwise stmt is GIMPLE_COND.  */
2690       code = gimple_cond_code (stmt);
2691       lhs = gimple_cond_lhs (stmt);
2692       rhs = gimple_cond_rhs (stmt);
2693       if (TREE_CODE (lhs) == SSA_NAME
2694           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2695           && ((code != EQ_EXPR && code != NE_EXPR)
2696               || rhs != boolean_false_node
2697                  /* Either push into ops the individual bitwise
2698                     or resp. and operands, depending on which
2699                     edge is other_bb.  */
2700               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2701                                  ^ (code == EQ_EXPR))
2702                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2703                            loop_containing_stmt (stmt))))
2704         {
2705           /* Or push the GIMPLE_COND stmt itself.  */
2706           operand_entry_t oe
2707             = (operand_entry_t) pool_alloc (operand_entry_pool);
2708
2709           oe->op = NULL;
2710           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2711                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2712           /* oe->op = NULL signs that there is no SSA_NAME
2713              for the range test, and oe->id instead is the
2714              basic block number, at which's end the GIMPLE_COND
2715              is.  */
2716           oe->id = bb->index;
2717           oe->count = 1;
2718           ops.safe_push (oe);
2719         }
2720       if (bb == first_bb)
2721         break;
2722     }
2723   if (ops.length () > 1)
2724     optimize_range_tests (ERROR_MARK, &ops);
2725   ops.release ();
2726 }
2727
2728 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2729    of STMT in it's operands.  This is also known as a "destructive
2730    update" operation.  */
2731
2732 static bool
2733 is_phi_for_stmt (gimple stmt, tree operand)
2734 {
2735   gimple def_stmt;
2736   tree lhs;
2737   use_operand_p arg_p;
2738   ssa_op_iter i;
2739
2740   if (TREE_CODE (operand) != SSA_NAME)
2741     return false;
2742
2743   lhs = gimple_assign_lhs (stmt);
2744
2745   def_stmt = SSA_NAME_DEF_STMT (operand);
2746   if (gimple_code (def_stmt) != GIMPLE_PHI)
2747     return false;
2748
2749   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2750     if (lhs == USE_FROM_PTR (arg_p))
2751       return true;
2752   return false;
2753 }
2754
2755 /* Remove def stmt of VAR if VAR has zero uses and recurse
2756    on rhs1 operand if so.  */
2757
2758 static void
2759 remove_visited_stmt_chain (tree var)
2760 {
2761   gimple stmt;
2762   gimple_stmt_iterator gsi;
2763
2764   while (1)
2765     {
2766       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2767         return;
2768       stmt = SSA_NAME_DEF_STMT (var);
2769       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2770         {
2771           var = gimple_assign_rhs1 (stmt);
2772           gsi = gsi_for_stmt (stmt);
2773           gsi_remove (&gsi, true);
2774           release_defs (stmt);
2775         }
2776       else
2777         return;
2778     }
2779 }
2780
2781 /* This function checks three consequtive operands in
2782    passed operands vector OPS starting from OPINDEX and
2783    swaps two operands if it is profitable for binary operation
2784    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2785
2786    We pair ops with the same rank if possible.
2787
2788    The alternative we try is to see if STMT is a destructive
2789    update style statement, which is like:
2790    b = phi (a, ...)
2791    a = c + b;
2792    In that case, we want to use the destructive update form to
2793    expose the possible vectorizer sum reduction opportunity.
2794    In that case, the third operand will be the phi node. This
2795    check is not performed if STMT is null.
2796
2797    We could, of course, try to be better as noted above, and do a
2798    lot of work to try to find these opportunities in >3 operand
2799    cases, but it is unlikely to be worth it.  */
2800
2801 static void
2802 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2803                           unsigned int opindex, gimple stmt)
2804 {
2805   operand_entry_t oe1, oe2, oe3;
2806
2807   oe1 = ops[opindex];
2808   oe2 = ops[opindex + 1];
2809   oe3 = ops[opindex + 2];
2810
2811   if ((oe1->rank == oe2->rank
2812        && oe2->rank != oe3->rank)
2813       || (stmt && is_phi_for_stmt (stmt, oe3->op)
2814           && !is_phi_for_stmt (stmt, oe1->op)
2815           && !is_phi_for_stmt (stmt, oe2->op)))
2816     {
2817       struct operand_entry temp = *oe3;
2818       oe3->op = oe1->op;
2819       oe3->rank = oe1->rank;
2820       oe1->op = temp.op;
2821       oe1->rank= temp.rank;
2822     }
2823   else if ((oe1->rank == oe3->rank
2824             && oe2->rank != oe3->rank)
2825            || (stmt && is_phi_for_stmt (stmt, oe2->op)
2826                && !is_phi_for_stmt (stmt, oe1->op)
2827                && !is_phi_for_stmt (stmt, oe3->op)))
2828     {
2829       struct operand_entry temp = *oe2;
2830       oe2->op = oe1->op;
2831       oe2->rank = oe1->rank;
2832       oe1->op = temp.op;
2833       oe1->rank= temp.rank;
2834     }
2835 }
2836
2837 /* Recursively rewrite our linearized statements so that the operators
2838    match those in OPS[OPINDEX], putting the computation in rank
2839    order.  */
2840
2841 static void
2842 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2843                    vec<operand_entry_t> ops, bool moved)
2844 {
2845   tree rhs1 = gimple_assign_rhs1 (stmt);
2846   tree rhs2 = gimple_assign_rhs2 (stmt);
2847   operand_entry_t oe;
2848
2849   /* If we have three operands left, then we want to make sure the ones
2850      that get the double binary op are chosen wisely.  */
2851   if (opindex + 3 == ops.length ())
2852     swap_ops_for_binary_stmt (ops, opindex, stmt);
2853
2854   /* The final recursion case for this function is that you have
2855      exactly two operations left.
2856      If we had one exactly one op in the entire list to start with, we
2857      would have never called this function, and the tail recursion
2858      rewrites them one at a time.  */
2859   if (opindex + 2 == ops.length ())
2860     {
2861       operand_entry_t oe1, oe2;
2862
2863       oe1 = ops[opindex];
2864       oe2 = ops[opindex + 1];
2865
2866       if (rhs1 != oe1->op || rhs2 != oe2->op)
2867         {
2868           if (dump_file && (dump_flags & TDF_DETAILS))
2869             {
2870               fprintf (dump_file, "Transforming ");
2871               print_gimple_stmt (dump_file, stmt, 0, 0);
2872             }
2873
2874           gimple_assign_set_rhs1 (stmt, oe1->op);
2875           gimple_assign_set_rhs2 (stmt, oe2->op);
2876           update_stmt (stmt);
2877           if (rhs1 != oe1->op && rhs1 != oe2->op)
2878             remove_visited_stmt_chain (rhs1);
2879
2880           if (dump_file && (dump_flags & TDF_DETAILS))
2881             {
2882               fprintf (dump_file, " into ");
2883               print_gimple_stmt (dump_file, stmt, 0, 0);
2884             }
2885         }
2886       return;
2887     }
2888
2889   /* If we hit here, we should have 3 or more ops left.  */
2890   gcc_assert (opindex + 2 < ops.length ());
2891
2892   /* Rewrite the next operator.  */
2893   oe = ops[opindex];
2894
2895   if (oe->op != rhs2)
2896     {
2897       if (!moved)
2898         {
2899           gimple_stmt_iterator gsinow, gsirhs1;
2900           gimple stmt1 = stmt, stmt2;
2901           unsigned int count;
2902
2903           gsinow = gsi_for_stmt (stmt);
2904           count = ops.length () - opindex - 2;
2905           while (count-- != 0)
2906             {
2907               stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2908               gsirhs1 = gsi_for_stmt (stmt2);
2909               gsi_move_before (&gsirhs1, &gsinow);
2910               gsi_prev (&gsinow);
2911               stmt1 = stmt2;
2912             }
2913           moved = true;
2914         }
2915
2916       if (dump_file && (dump_flags & TDF_DETAILS))
2917         {
2918           fprintf (dump_file, "Transforming ");
2919           print_gimple_stmt (dump_file, stmt, 0, 0);
2920         }
2921
2922       gimple_assign_set_rhs2 (stmt, oe->op);
2923       update_stmt (stmt);
2924
2925       if (dump_file && (dump_flags & TDF_DETAILS))
2926         {
2927           fprintf (dump_file, " into ");
2928           print_gimple_stmt (dump_file, stmt, 0, 0);
2929         }
2930     }
2931   /* Recurse on the LHS of the binary operator, which is guaranteed to
2932      be the non-leaf side.  */
2933   rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2934 }
2935
2936 /* Find out how many cycles we need to compute statements chain.
2937    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
2938    maximum number of independent statements we may execute per cycle.  */
2939
2940 static int
2941 get_required_cycles (int ops_num, int cpu_width)
2942 {
2943   int res;
2944   int elog;
2945   unsigned int rest;
2946
2947   /* While we have more than 2 * cpu_width operands
2948      we may reduce number of operands by cpu_width
2949      per cycle.  */
2950   res = ops_num / (2 * cpu_width);
2951
2952   /* Remained operands count may be reduced twice per cycle
2953      until we have only one operand.  */
2954   rest = (unsigned)(ops_num - res * cpu_width);
2955   elog = exact_log2 (rest);
2956   if (elog >= 0)
2957     res += elog;
2958   else
2959     res += floor_log2 (rest) + 1;
2960
2961   return res;
2962 }
2963
2964 /* Returns an optimal number of registers to use for computation of
2965    given statements.  */
2966
2967 static int
2968 get_reassociation_width (int ops_num, enum tree_code opc,
2969                          enum machine_mode mode)
2970 {
2971   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2972   int width;
2973   int width_min;
2974   int cycles_best;
2975
2976   if (param_width > 0)
2977     width = param_width;
2978   else
2979     width = targetm.sched.reassociation_width (opc, mode);
2980
2981   if (width == 1)
2982     return width;
2983
2984   /* Get the minimal time required for sequence computation.  */
2985   cycles_best = get_required_cycles (ops_num, width);
2986
2987   /* Check if we may use less width and still compute sequence for
2988      the same time.  It will allow us to reduce registers usage.
2989      get_required_cycles is monotonically increasing with lower width
2990      so we can perform a binary search for the minimal width that still
2991      results in the optimal cycle count.  */
2992   width_min = 1;
2993   while (width > width_min)
2994     {
2995       int width_mid = (width + width_min) / 2;
2996
2997       if (get_required_cycles (ops_num, width_mid) == cycles_best)
2998         width = width_mid;
2999       else if (width_min < width_mid)
3000         width_min = width_mid;
3001       else
3002         break;
3003     }
3004
3005   return width;
3006 }
3007
3008 /* Recursively rewrite our linearized statements so that the operators
3009    match those in OPS[OPINDEX], putting the computation in rank
3010    order and trying to allow operations to be executed in
3011    parallel.  */
3012
3013 static void
3014 rewrite_expr_tree_parallel (gimple stmt, int width,
3015                             vec<operand_entry_t>  ops)
3016 {
3017   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3018   int op_num = ops.length ();
3019   int stmt_num = op_num - 1;
3020   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3021   int op_index = op_num - 1;
3022   int stmt_index = 0;
3023   int ready_stmts_end = 0;
3024   int i = 0;
3025   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3026
3027   /* We start expression rewriting from the top statements.
3028      So, in this loop we create a full list of statements
3029      we will work with.  */
3030   stmts[stmt_num - 1] = stmt;
3031   for (i = stmt_num - 2; i >= 0; i--)
3032     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3033
3034   for (i = 0; i < stmt_num; i++)
3035     {
3036       tree op1, op2;
3037
3038       /* Determine whether we should use results of
3039          already handled statements or not.  */
3040       if (ready_stmts_end == 0
3041           && (i - stmt_index >= width || op_index < 1))
3042         ready_stmts_end = i;
3043
3044       /* Now we choose operands for the next statement.  Non zero
3045          value in ready_stmts_end means here that we should use
3046          the result of already generated statements as new operand.  */
3047       if (ready_stmts_end > 0)
3048         {
3049           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3050           if (ready_stmts_end > stmt_index)
3051             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3052           else if (op_index >= 0)
3053             op2 = ops[op_index--]->op;
3054           else
3055             {
3056               gcc_assert (stmt_index < i);
3057               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3058             }
3059
3060           if (stmt_index >= ready_stmts_end)
3061             ready_stmts_end = 0;
3062         }
3063       else
3064         {
3065           if (op_index > 1)
3066             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3067           op2 = ops[op_index--]->op;
3068           op1 = ops[op_index--]->op;
3069         }
3070
3071       /* If we emit the last statement then we should put
3072          operands into the last statement.  It will also
3073          break the loop.  */
3074       if (op_index < 0 && stmt_index == i)
3075         i = stmt_num - 1;
3076
3077       if (dump_file && (dump_flags & TDF_DETAILS))
3078         {
3079           fprintf (dump_file, "Transforming ");
3080           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3081         }
3082
3083       /* We keep original statement only for the last one.  All
3084          others are recreated.  */
3085       if (i == stmt_num - 1)
3086         {
3087           gimple_assign_set_rhs1 (stmts[i], op1);
3088           gimple_assign_set_rhs2 (stmts[i], op2);
3089           update_stmt (stmts[i]);
3090         }
3091       else
3092         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3093
3094       if (dump_file && (dump_flags & TDF_DETAILS))
3095         {
3096           fprintf (dump_file, " into ");
3097           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3098         }
3099     }
3100
3101   remove_visited_stmt_chain (last_rhs1);
3102 }
3103
3104 /* Transform STMT, which is really (A +B) + (C + D) into the left
3105    linear form, ((A+B)+C)+D.
3106    Recurse on D if necessary.  */
3107
3108 static void
3109 linearize_expr (gimple stmt)
3110 {
3111   gimple_stmt_iterator gsinow, gsirhs;
3112   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3113   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3114   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3115   gimple newbinrhs = NULL;
3116   struct loop *loop = loop_containing_stmt (stmt);
3117
3118   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3119               && is_reassociable_op (binrhs, rhscode, loop));
3120
3121   gsinow = gsi_for_stmt (stmt);
3122   gsirhs = gsi_for_stmt (binrhs);
3123   gsi_move_before (&gsirhs, &gsinow);
3124
3125   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3126   gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3127   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3128
3129   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3130     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3131
3132   if (dump_file && (dump_flags & TDF_DETAILS))
3133     {
3134       fprintf (dump_file, "Linearized: ");
3135       print_gimple_stmt (dump_file, stmt, 0, 0);
3136     }
3137
3138   reassociate_stats.linearized++;
3139   update_stmt (binrhs);
3140   update_stmt (binlhs);
3141   update_stmt (stmt);
3142
3143   gimple_set_visited (stmt, true);
3144   gimple_set_visited (binlhs, true);
3145   gimple_set_visited (binrhs, true);
3146
3147   /* Tail recurse on the new rhs if it still needs reassociation.  */
3148   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3149     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3150            want to change the algorithm while converting to tuples.  */
3151     linearize_expr (stmt);
3152 }
3153
3154 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3155    it.  Otherwise, return NULL.  */
3156
3157 static gimple
3158 get_single_immediate_use (tree lhs)
3159 {
3160   use_operand_p immuse;
3161   gimple immusestmt;
3162
3163   if (TREE_CODE (lhs) == SSA_NAME
3164       && single_imm_use (lhs, &immuse, &immusestmt)
3165       && is_gimple_assign (immusestmt))
3166     return immusestmt;
3167
3168   return NULL;
3169 }
3170
3171 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3172    representing the negated value.  Insertions of any necessary
3173    instructions go before GSI.
3174    This function is recursive in that, if you hand it "a_5" as the
3175    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3176    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3177
3178 static tree
3179 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3180 {
3181   gimple negatedefstmt= NULL;
3182   tree resultofnegate;
3183
3184   /* If we are trying to negate a name, defined by an add, negate the
3185      add operands instead.  */
3186   if (TREE_CODE (tonegate) == SSA_NAME)
3187     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3188   if (TREE_CODE (tonegate) == SSA_NAME
3189       && is_gimple_assign (negatedefstmt)
3190       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3191       && has_single_use (gimple_assign_lhs (negatedefstmt))
3192       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3193     {
3194       gimple_stmt_iterator gsi;
3195       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3196       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3197
3198       gsi = gsi_for_stmt (negatedefstmt);
3199       rhs1 = negate_value (rhs1, &gsi);
3200       gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3201
3202       gsi = gsi_for_stmt (negatedefstmt);
3203       rhs2 = negate_value (rhs2, &gsi);
3204       gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3205
3206       update_stmt (negatedefstmt);
3207       return gimple_assign_lhs (negatedefstmt);
3208     }
3209
3210   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3211   resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3212                                              NULL_TREE, true, GSI_SAME_STMT);
3213   return resultofnegate;
3214 }
3215
3216 /* Return true if we should break up the subtract in STMT into an add
3217    with negate.  This is true when we the subtract operands are really
3218    adds, or the subtract itself is used in an add expression.  In
3219    either case, breaking up the subtract into an add with negate
3220    exposes the adds to reassociation.  */
3221
3222 static bool
3223 should_break_up_subtract (gimple stmt)
3224 {
3225   tree lhs = gimple_assign_lhs (stmt);
3226   tree binlhs = gimple_assign_rhs1 (stmt);
3227   tree binrhs = gimple_assign_rhs2 (stmt);
3228   gimple immusestmt;
3229   struct loop *loop = loop_containing_stmt (stmt);
3230
3231   if (TREE_CODE (binlhs) == SSA_NAME
3232       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3233     return true;
3234
3235   if (TREE_CODE (binrhs) == SSA_NAME
3236       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3237     return true;
3238
3239   if (TREE_CODE (lhs) == SSA_NAME
3240       && (immusestmt = get_single_immediate_use (lhs))
3241       && is_gimple_assign (immusestmt)
3242       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3243           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3244     return true;
3245   return false;
3246 }
3247
3248 /* Transform STMT from A - B into A + -B.  */
3249
3250 static void
3251 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3252 {
3253   tree rhs1 = gimple_assign_rhs1 (stmt);
3254   tree rhs2 = gimple_assign_rhs2 (stmt);
3255
3256   if (dump_file && (dump_flags & TDF_DETAILS))
3257     {
3258       fprintf (dump_file, "Breaking up subtract ");
3259       print_gimple_stmt (dump_file, stmt, 0, 0);
3260     }
3261
3262   rhs2 = negate_value (rhs2, gsip);
3263   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3264   update_stmt (stmt);
3265 }
3266
3267 /* Determine whether STMT is a builtin call that raises an SSA name
3268    to an integer power and has only one use.  If so, and this is early
3269    reassociation and unsafe math optimizations are permitted, place
3270    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3271    If any of these conditions does not hold, return FALSE.  */
3272
3273 static bool
3274 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3275 {
3276   tree fndecl, arg1;
3277   REAL_VALUE_TYPE c, cint;
3278
3279   if (!first_pass_instance
3280       || !flag_unsafe_math_optimizations
3281       || !is_gimple_call (stmt)
3282       || !has_single_use (gimple_call_lhs (stmt)))
3283     return false;
3284
3285   fndecl = gimple_call_fndecl (stmt);
3286
3287   if (!fndecl
3288       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3289     return false;
3290
3291   switch (DECL_FUNCTION_CODE (fndecl))
3292     {
3293     CASE_FLT_FN (BUILT_IN_POW):
3294       *base = gimple_call_arg (stmt, 0);
3295       arg1 = gimple_call_arg (stmt, 1);
3296
3297       if (TREE_CODE (arg1) != REAL_CST)
3298         return false;
3299
3300       c = TREE_REAL_CST (arg1);
3301
3302       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3303         return false;
3304
3305       *exponent = real_to_integer (&c);
3306       real_from_integer (&cint, VOIDmode, *exponent,
3307                          *exponent < 0 ? -1 : 0, 0);
3308       if (!real_identical (&c, &cint))
3309         return false;
3310
3311       break;
3312
3313     CASE_FLT_FN (BUILT_IN_POWI):
3314       *base = gimple_call_arg (stmt, 0);
3315       arg1 = gimple_call_arg (stmt, 1);
3316
3317       if (!host_integerp (arg1, 0))
3318         return false;
3319
3320       *exponent = TREE_INT_CST_LOW (arg1);
3321       break;
3322
3323     default:
3324       return false;
3325     }
3326
3327   /* Expanding negative exponents is generally unproductive, so we don't
3328      complicate matters with those.  Exponents of zero and one should
3329      have been handled by expression folding.  */
3330   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3331     return false;
3332
3333   return true;
3334 }
3335
3336 /* Recursively linearize a binary expression that is the RHS of STMT.
3337    Place the operands of the expression tree in the vector named OPS.  */
3338
3339 static void
3340 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3341                      bool is_associative, bool set_visited)
3342 {
3343   tree binlhs = gimple_assign_rhs1 (stmt);
3344   tree binrhs = gimple_assign_rhs2 (stmt);
3345   gimple binlhsdef = NULL, binrhsdef = NULL;
3346   bool binlhsisreassoc = false;
3347   bool binrhsisreassoc = false;
3348   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3349   struct loop *loop = loop_containing_stmt (stmt);
3350   tree base = NULL_TREE;
3351   HOST_WIDE_INT exponent = 0;
3352
3353   if (set_visited)
3354     gimple_set_visited (stmt, true);
3355
3356   if (TREE_CODE (binlhs) == SSA_NAME)
3357     {
3358       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3359       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3360                          && !stmt_could_throw_p (binlhsdef));
3361     }
3362
3363   if (TREE_CODE (binrhs) == SSA_NAME)
3364     {
3365       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3366       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3367                          && !stmt_could_throw_p (binrhsdef));
3368     }
3369
3370   /* If the LHS is not reassociable, but the RHS is, we need to swap
3371      them.  If neither is reassociable, there is nothing we can do, so
3372      just put them in the ops vector.  If the LHS is reassociable,
3373      linearize it.  If both are reassociable, then linearize the RHS
3374      and the LHS.  */
3375
3376   if (!binlhsisreassoc)
3377     {
3378       tree temp;
3379
3380       /* If this is not a associative operation like division, give up.  */
3381       if (!is_associative)
3382         {
3383           add_to_ops_vec (ops, binrhs);
3384           return;
3385         }
3386
3387       if (!binrhsisreassoc)
3388         {
3389           if (rhscode == MULT_EXPR
3390               && TREE_CODE (binrhs) == SSA_NAME
3391               && acceptable_pow_call (binrhsdef, &base, &exponent))
3392             {
3393               add_repeat_to_ops_vec (ops, base, exponent);
3394               gimple_set_visited (binrhsdef, true);
3395             }
3396           else
3397             add_to_ops_vec (ops, binrhs);
3398
3399           if (rhscode == MULT_EXPR
3400               && TREE_CODE (binlhs) == SSA_NAME
3401               && acceptable_pow_call (binlhsdef, &base, &exponent))
3402             {
3403               add_repeat_to_ops_vec (ops, base, exponent);
3404               gimple_set_visited (binlhsdef, true);
3405             }
3406           else
3407             add_to_ops_vec (ops, binlhs);
3408
3409           return;
3410         }
3411
3412       if (dump_file && (dump_flags & TDF_DETAILS))
3413         {
3414           fprintf (dump_file, "swapping operands of ");
3415           print_gimple_stmt (dump_file, stmt, 0, 0);
3416         }
3417
3418       swap_tree_operands (stmt,
3419                           gimple_assign_rhs1_ptr (stmt),
3420                           gimple_assign_rhs2_ptr (stmt));
3421       update_stmt (stmt);
3422
3423       if (dump_file && (dump_flags & TDF_DETAILS))
3424         {
3425           fprintf (dump_file, " is now ");
3426           print_gimple_stmt (dump_file, stmt, 0, 0);
3427         }
3428
3429       /* We want to make it so the lhs is always the reassociative op,
3430          so swap.  */
3431       temp = binlhs;
3432       binlhs = binrhs;
3433       binrhs = temp;
3434     }
3435   else if (binrhsisreassoc)
3436     {
3437       linearize_expr (stmt);
3438       binlhs = gimple_assign_rhs1 (stmt);
3439       binrhs = gimple_assign_rhs2 (stmt);
3440     }
3441
3442   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3443               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3444                                       rhscode, loop));
3445   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3446                        is_associative, set_visited);
3447
3448   if (rhscode == MULT_EXPR
3449       && TREE_CODE (binrhs) == SSA_NAME
3450       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3451     {
3452       add_repeat_to_ops_vec (ops, base, exponent);
3453       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3454     }
3455   else
3456     add_to_ops_vec (ops, binrhs);
3457 }
3458
3459 /* Repropagate the negates back into subtracts, since no other pass
3460    currently does it.  */
3461
3462 static void
3463 repropagate_negates (void)
3464 {
3465   unsigned int i = 0;
3466   tree negate;
3467
3468   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3469     {
3470       gimple user = get_single_immediate_use (negate);
3471
3472       if (!user || !is_gimple_assign (user))
3473         continue;
3474
3475       /* The negate operand can be either operand of a PLUS_EXPR
3476          (it can be the LHS if the RHS is a constant for example).
3477
3478          Force the negate operand to the RHS of the PLUS_EXPR, then
3479          transform the PLUS_EXPR into a MINUS_EXPR.  */
3480       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3481         {
3482           /* If the negated operand appears on the LHS of the
3483              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3484              to force the negated operand to the RHS of the PLUS_EXPR.  */
3485           if (gimple_assign_rhs1 (user) == negate)
3486             {
3487               swap_tree_operands (user,
3488                                   gimple_assign_rhs1_ptr (user),
3489                                   gimple_assign_rhs2_ptr (user));
3490             }
3491
3492           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3493              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3494           if (gimple_assign_rhs2 (user) == negate)
3495             {
3496               tree rhs1 = gimple_assign_rhs1 (user);
3497               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3498               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3499               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3500               update_stmt (user);
3501             }
3502         }
3503       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3504         {
3505           if (gimple_assign_rhs1 (user) == negate)
3506             {
3507               /* We have
3508                    x = -a
3509                    y = x - b
3510                  which we transform into
3511                    x = a + b
3512                    y = -x .
3513                  This pushes down the negate which we possibly can merge
3514                  into some other operation, hence insert it into the
3515                  plus_negates vector.  */
3516               gimple feed = SSA_NAME_DEF_STMT (negate);
3517               tree a = gimple_assign_rhs1 (feed);
3518               tree rhs2 = gimple_assign_rhs2 (user);
3519               gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3520               gimple_replace_lhs (feed, negate);
3521               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3522               update_stmt (gsi_stmt (gsi));
3523               gsi2 = gsi_for_stmt (user);
3524               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3525               update_stmt (gsi_stmt (gsi2));
3526               gsi_move_before (&gsi, &gsi2);
3527               plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3528             }
3529           else
3530             {
3531               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3532                  rid of one operation.  */
3533               gimple feed = SSA_NAME_DEF_STMT (negate);
3534               tree a = gimple_assign_rhs1 (feed);
3535               tree rhs1 = gimple_assign_rhs1 (user);
3536               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3537               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3538               update_stmt (gsi_stmt (gsi));
3539             }
3540         }
3541     }
3542 }
3543
3544 /* Returns true if OP is of a type for which we can do reassociation.
3545    That is for integral or non-saturating fixed-point types, and for
3546    floating point type when associative-math is enabled.  */
3547
3548 static bool
3549 can_reassociate_p (tree op)
3550 {
3551   tree type = TREE_TYPE (op);
3552   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3553       || NON_SAT_FIXED_POINT_TYPE_P (type)
3554       || (flag_associative_math && FLOAT_TYPE_P (type)))
3555     return true;
3556   return false;
3557 }
3558
3559 /* Break up subtract operations in block BB.
3560
3561    We do this top down because we don't know whether the subtract is
3562    part of a possible chain of reassociation except at the top.
3563
3564    IE given
3565    d = f + g
3566    c = a + e
3567    b = c - d
3568    q = b - r
3569    k = t - q
3570
3571    we want to break up k = t - q, but we won't until we've transformed q
3572    = b - r, which won't be broken up until we transform b = c - d.
3573
3574    En passant, clear the GIMPLE visited flag on every statement.  */
3575
3576 static void
3577 break_up_subtract_bb (basic_block bb)
3578 {
3579   gimple_stmt_iterator gsi;
3580   basic_block son;
3581
3582   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3583     {
3584       gimple stmt = gsi_stmt (gsi);
3585       gimple_set_visited (stmt, false);
3586
3587       if (!is_gimple_assign (stmt)
3588           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3589         continue;
3590
3591       /* Look for simple gimple subtract operations.  */
3592       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3593         {
3594           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3595               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3596             continue;
3597
3598           /* Check for a subtract used only in an addition.  If this
3599              is the case, transform it into add of a negate for better
3600              reassociation.  IE transform C = A-B into C = A + -B if C
3601              is only used in an addition.  */
3602           if (should_break_up_subtract (stmt))
3603             break_up_subtract (stmt, &gsi);
3604         }
3605       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3606                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3607         plus_negates.safe_push (gimple_assign_lhs (stmt));
3608     }
3609   for (son = first_dom_son (CDI_DOMINATORS, bb);
3610        son;
3611        son = next_dom_son (CDI_DOMINATORS, son))
3612     break_up_subtract_bb (son);
3613 }
3614
3615 /* Used for repeated factor analysis.  */
3616 struct repeat_factor_d
3617 {
3618   /* An SSA name that occurs in a multiply chain.  */
3619   tree factor;
3620
3621   /* Cached rank of the factor.  */
3622   unsigned rank;
3623
3624   /* Number of occurrences of the factor in the chain.  */
3625   HOST_WIDE_INT count;
3626
3627   /* An SSA name representing the product of this factor and
3628      all factors appearing later in the repeated factor vector.  */
3629   tree repr;
3630 };
3631
3632 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3633 typedef const struct repeat_factor_d *const_repeat_factor_t;
3634
3635
3636 static vec<repeat_factor> repeat_factor_vec;
3637
3638 /* Used for sorting the repeat factor vector.  Sort primarily by
3639    ascending occurrence count, secondarily by descending rank.  */
3640
3641 static int
3642 compare_repeat_factors (const void *x1, const void *x2)
3643 {
3644   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3645   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3646
3647   if (rf1->count != rf2->count)
3648     return rf1->count - rf2->count;
3649
3650   return rf2->rank - rf1->rank;
3651 }
3652
3653 /* Look for repeated operands in OPS in the multiply tree rooted at
3654    STMT.  Replace them with an optimal sequence of multiplies and powi
3655    builtin calls, and remove the used operands from OPS.  Return an
3656    SSA name representing the value of the replacement sequence.  */
3657
3658 static tree
3659 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3660 {
3661   unsigned i, j, vec_len;
3662   int ii;
3663   operand_entry_t oe;
3664   repeat_factor_t rf1, rf2;
3665   repeat_factor rfnew;
3666   tree result = NULL_TREE;
3667   tree target_ssa, iter_result;
3668   tree type = TREE_TYPE (gimple_get_lhs (stmt));
3669   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3670   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3671   gimple mul_stmt, pow_stmt;
3672
3673   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3674      target.  */
3675   if (!powi_fndecl)
3676     return NULL_TREE;
3677
3678   /* Allocate the repeated factor vector.  */
3679   repeat_factor_vec.create (10);
3680
3681   /* Scan the OPS vector for all SSA names in the product and build
3682      up a vector of occurrence counts for each factor.  */
3683   FOR_EACH_VEC_ELT (*ops, i, oe)
3684     {
3685       if (TREE_CODE (oe->op) == SSA_NAME)
3686         {
3687           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3688             {
3689               if (rf1->factor == oe->op)
3690                 {
3691                   rf1->count += oe->count;
3692                   break;
3693                 }
3694             }
3695
3696           if (j >= repeat_factor_vec.length ())
3697             {
3698               rfnew.factor = oe->op;
3699               rfnew.rank = oe->rank;
3700               rfnew.count = oe->count;
3701               rfnew.repr = NULL_TREE;
3702               repeat_factor_vec.safe_push (rfnew);
3703             }
3704         }
3705     }
3706
3707   /* Sort the repeated factor vector by (a) increasing occurrence count,
3708      and (b) decreasing rank.  */
3709   repeat_factor_vec.qsort (compare_repeat_factors);
3710
3711   /* It is generally best to combine as many base factors as possible
3712      into a product before applying __builtin_powi to the result.
3713      However, the sort order chosen for the repeated factor vector
3714      allows us to cache partial results for the product of the base
3715      factors for subsequent use.  When we already have a cached partial
3716      result from a previous iteration, it is best to make use of it
3717      before looking for another __builtin_pow opportunity.
3718
3719      As an example, consider x * x * y * y * y * z * z * z * z.
3720      We want to first compose the product x * y * z, raise it to the
3721      second power, then multiply this by y * z, and finally multiply
3722      by z.  This can be done in 5 multiplies provided we cache y * z
3723      for use in both expressions:
3724
3725         t1 = y * z
3726         t2 = t1 * x
3727         t3 = t2 * t2
3728         t4 = t1 * t3
3729         result = t4 * z
3730
3731      If we instead ignored the cached y * z and first multiplied by
3732      the __builtin_pow opportunity z * z, we would get the inferior:
3733
3734         t1 = y * z
3735         t2 = t1 * x
3736         t3 = t2 * t2
3737         t4 = z * z
3738         t5 = t3 * t4
3739         result = t5 * y  */
3740
3741   vec_len = repeat_factor_vec.length ();
3742
3743   /* Repeatedly look for opportunities to create a builtin_powi call.  */
3744   while (true)
3745     {
3746       HOST_WIDE_INT power;
3747
3748       /* First look for the largest cached product of factors from
3749          preceding iterations.  If found, create a builtin_powi for
3750          it if the minimum occurrence count for its factors is at
3751          least 2, or just use this cached product as our next
3752          multiplicand if the minimum occurrence count is 1.  */
3753       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3754         {
3755           if (rf1->repr && rf1->count > 0)
3756             break;
3757         }
3758
3759       if (j < vec_len)
3760         {
3761           power = rf1->count;
3762
3763           if (power == 1)
3764             {
3765               iter_result = rf1->repr;
3766
3767               if (dump_file && (dump_flags & TDF_DETAILS))
3768                 {
3769                   unsigned elt;
3770                   repeat_factor_t rf;
3771                   fputs ("Multiplying by cached product ", dump_file);
3772                   for (elt = j; elt < vec_len; elt++)
3773                     {
3774                       rf = &repeat_factor_vec[elt];
3775                       print_generic_expr (dump_file, rf->factor, 0);
3776                       if (elt < vec_len - 1)
3777                         fputs (" * ", dump_file);
3778                     }
3779                   fputs ("\n", dump_file);
3780                 }
3781             }
3782           else
3783             {
3784               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3785               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3786                                             build_int_cst (integer_type_node,
3787                                                            power));
3788               gimple_call_set_lhs (pow_stmt, iter_result);
3789               gimple_set_location (pow_stmt, gimple_location (stmt));
3790               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3791
3792               if (dump_file && (dump_flags & TDF_DETAILS))
3793                 {
3794                   unsigned elt;
3795                   repeat_factor_t rf;
3796                   fputs ("Building __builtin_pow call for cached product (",
3797                          dump_file);
3798                   for (elt = j; elt < vec_len; elt++)
3799                     {
3800                       rf = &repeat_factor_vec[elt];
3801                       print_generic_expr (dump_file, rf->factor, 0);
3802                       if (elt < vec_len - 1)
3803                         fputs (" * ", dump_file);
3804                     }
3805                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3806                            power);
3807                 }
3808             }
3809         }
3810       else
3811         {
3812           /* Otherwise, find the first factor in the repeated factor
3813              vector whose occurrence count is at least 2.  If no such
3814              factor exists, there are no builtin_powi opportunities
3815              remaining.  */
3816           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3817             {
3818               if (rf1->count >= 2)
3819                 break;
3820             }
3821
3822           if (j >= vec_len)
3823             break;
3824
3825           power = rf1->count;
3826
3827           if (dump_file && (dump_flags & TDF_DETAILS))
3828             {
3829               unsigned elt;
3830               repeat_factor_t rf;
3831               fputs ("Building __builtin_pow call for (", dump_file);
3832               for (elt = j; elt < vec_len; elt++)
3833                 {
3834                   rf = &repeat_factor_vec[elt];
3835                   print_generic_expr (dump_file, rf->factor, 0);
3836                   if (elt < vec_len - 1)
3837                     fputs (" * ", dump_file);
3838                 }
3839               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3840             }
3841
3842           reassociate_stats.pows_created++;
3843
3844           /* Visit each element of the vector in reverse order (so that
3845              high-occurrence elements are visited first, and within the
3846              same occurrence count, lower-ranked elements are visited
3847              first).  Form a linear product of all elements in this order
3848              whose occurrencce count is at least that of element J.
3849              Record the SSA name representing the product of each element
3850              with all subsequent elements in the vector.  */
3851           if (j == vec_len - 1)
3852             rf1->repr = rf1->factor;
3853           else
3854             {
3855               for (ii = vec_len - 2; ii >= (int)j; ii--)
3856                 {
3857                   tree op1, op2;
3858
3859                   rf1 = &repeat_factor_vec[ii];
3860                   rf2 = &repeat_factor_vec[ii + 1];
3861
3862                   /* Init the last factor's representative to be itself.  */
3863                   if (!rf2->repr)
3864                     rf2->repr = rf2->factor;
3865
3866                   op1 = rf1->factor;
3867                   op2 = rf2->repr;
3868
3869                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
3870                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3871                                                            target_ssa,
3872                                                            op1, op2);
3873                   gimple_set_location (mul_stmt, gimple_location (stmt));
3874                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3875                   rf1->repr = target_ssa;
3876
3877                   /* Don't reprocess the multiply we just introduced.  */
3878                   gimple_set_visited (mul_stmt, true);
3879                 }
3880             }
3881
3882           /* Form a call to __builtin_powi for the maximum product
3883              just formed, raised to the power obtained earlier.  */
3884           rf1 = &repeat_factor_vec[j];
3885           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3886           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3887                                         build_int_cst (integer_type_node,
3888                                                        power));
3889           gimple_call_set_lhs (pow_stmt, iter_result);
3890           gimple_set_location (pow_stmt, gimple_location (stmt));
3891           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3892         }
3893
3894       /* If we previously formed at least one other builtin_powi call,
3895          form the product of this one and those others.  */
3896       if (result)
3897         {
3898           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
3899           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3900                                                    result, iter_result);
3901           gimple_set_location (mul_stmt, gimple_location (stmt));
3902           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3903           gimple_set_visited (mul_stmt, true);
3904           result = new_result;
3905         }
3906       else
3907         result = iter_result;
3908
3909       /* Decrement the occurrence count of each element in the product
3910          by the count found above, and remove this many copies of each
3911          factor from OPS.  */
3912       for (i = j; i < vec_len; i++)
3913         {
3914           unsigned k = power;
3915           unsigned n;
3916
3917           rf1 = &repeat_factor_vec[i];
3918           rf1->count -= power;
3919
3920           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
3921             {
3922               if (oe->op == rf1->factor)
3923                 {
3924                   if (oe->count <= k)
3925                     {
3926                       ops->ordered_remove (n);
3927                       k -= oe->count;
3928
3929                       if (k == 0)
3930                         break;
3931                     }
3932                   else
3933                     {
3934                       oe->count -= k;
3935                       break;
3936                     }
3937                 }
3938             }
3939         }
3940     }
3941
3942   /* At this point all elements in the repeated factor vector have a
3943      remaining occurrence count of 0 or 1, and those with a count of 1
3944      don't have cached representatives.  Re-sort the ops vector and
3945      clean up.  */
3946   ops->qsort (sort_by_operand_rank);
3947   repeat_factor_vec.release ();
3948
3949   /* Return the final product computed herein.  Note that there may
3950      still be some elements with single occurrence count left in OPS;
3951      those will be handled by the normal reassociation logic.  */
3952   return result;
3953 }
3954
3955 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
3956
3957 static void
3958 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3959 {
3960   tree rhs1;
3961
3962   if (dump_file && (dump_flags & TDF_DETAILS))
3963     {
3964       fprintf (dump_file, "Transforming ");
3965       print_gimple_stmt (dump_file, stmt, 0, 0);
3966     }
3967
3968   rhs1 = gimple_assign_rhs1 (stmt);
3969   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3970   update_stmt (stmt);
3971   remove_visited_stmt_chain (rhs1);
3972
3973   if (dump_file && (dump_flags & TDF_DETAILS))
3974     {
3975       fprintf (dump_file, " into ");
3976       print_gimple_stmt (dump_file, stmt, 0, 0);
3977     }
3978 }
3979
3980 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
3981
3982 static void
3983 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3984                             tree rhs1, tree rhs2)
3985 {
3986   if (dump_file && (dump_flags & TDF_DETAILS))
3987     {
3988       fprintf (dump_file, "Transforming ");
3989       print_gimple_stmt (dump_file, stmt, 0, 0);
3990     }
3991
3992   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
3993   update_stmt (gsi_stmt (*gsi));
3994   remove_visited_stmt_chain (rhs1);
3995
3996   if (dump_file && (dump_flags & TDF_DETAILS))
3997     {
3998       fprintf (dump_file, " into ");
3999       print_gimple_stmt (dump_file, stmt, 0, 0);
4000     }
4001 }
4002
4003 /* Reassociate expressions in basic block BB and its post-dominator as
4004    children.  */
4005
4006 static void
4007 reassociate_bb (basic_block bb)
4008 {
4009   gimple_stmt_iterator gsi;
4010   basic_block son;
4011   gimple stmt = last_stmt (bb);
4012
4013   if (stmt && !gimple_visited_p (stmt))
4014     maybe_optimize_range_tests (stmt);
4015
4016   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4017     {
4018       stmt = gsi_stmt (gsi);
4019
4020       if (is_gimple_assign (stmt)
4021           && !stmt_could_throw_p (stmt))
4022         {
4023           tree lhs, rhs1, rhs2;
4024           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4025
4026           /* If this is not a gimple binary expression, there is
4027              nothing for us to do with it.  */
4028           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4029             continue;
4030
4031           /* If this was part of an already processed statement,
4032              we don't need to touch it again. */
4033           if (gimple_visited_p (stmt))
4034             {
4035               /* This statement might have become dead because of previous
4036                  reassociations.  */
4037               if (has_zero_uses (gimple_get_lhs (stmt)))
4038                 {
4039                   gsi_remove (&gsi, true);
4040                   release_defs (stmt);
4041                   /* We might end up removing the last stmt above which
4042                      places the iterator to the end of the sequence.
4043                      Reset it to the last stmt in this case which might
4044                      be the end of the sequence as well if we removed
4045                      the last statement of the sequence.  In which case
4046                      we need to bail out.  */
4047                   if (gsi_end_p (gsi))
4048                     {
4049                       gsi = gsi_last_bb (bb);
4050                       if (gsi_end_p (gsi))
4051                         break;
4052                     }
4053                 }
4054               continue;
4055             }
4056
4057           lhs = gimple_assign_lhs (stmt);
4058           rhs1 = gimple_assign_rhs1 (stmt);
4059           rhs2 = gimple_assign_rhs2 (stmt);
4060
4061           /* For non-bit or min/max operations we can't associate
4062              all types.  Verify that here.  */
4063           if (rhs_code != BIT_IOR_EXPR
4064               && rhs_code != BIT_AND_EXPR
4065               && rhs_code != BIT_XOR_EXPR
4066               && rhs_code != MIN_EXPR
4067               && rhs_code != MAX_EXPR
4068               && (!can_reassociate_p (lhs)
4069                   || !can_reassociate_p (rhs1)
4070                   || !can_reassociate_p (rhs2)))
4071             continue;
4072
4073           if (associative_tree_code (rhs_code))
4074             {
4075               vec<operand_entry_t> ops = vNULL;
4076               tree powi_result = NULL_TREE;
4077
4078               /* There may be no immediate uses left by the time we
4079                  get here because we may have eliminated them all.  */
4080               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4081                 continue;
4082
4083               gimple_set_visited (stmt, true);
4084               linearize_expr_tree (&ops, stmt, true, true);
4085               ops.qsort (sort_by_operand_rank);
4086               optimize_ops_list (rhs_code, &ops);
4087               if (undistribute_ops_list (rhs_code, &ops,
4088                                          loop_containing_stmt (stmt)))
4089                 {
4090                   ops.qsort (sort_by_operand_rank);
4091                   optimize_ops_list (rhs_code, &ops);
4092                 }
4093
4094               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4095                 optimize_range_tests (rhs_code, &ops);
4096
4097               if (first_pass_instance
4098                   && rhs_code == MULT_EXPR
4099                   && flag_unsafe_math_optimizations)
4100                 powi_result = attempt_builtin_powi (stmt, &ops);
4101
4102               /* If the operand vector is now empty, all operands were
4103                  consumed by the __builtin_powi optimization.  */
4104               if (ops.length () == 0)
4105                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4106               else if (ops.length () == 1)
4107                 {
4108                   tree last_op = ops.last ()->op;
4109
4110                   if (powi_result)
4111                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4112                                                 powi_result);
4113                   else
4114                     transform_stmt_to_copy (&gsi, stmt, last_op);
4115                 }
4116               else
4117                 {
4118                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4119                   int ops_num = ops.length ();
4120                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4121
4122                   if (dump_file && (dump_flags & TDF_DETAILS))
4123                     fprintf (dump_file,
4124                              "Width = %d was chosen for reassociation\n", width);
4125
4126                   if (width > 1
4127                       && ops.length () > 3)
4128                     rewrite_expr_tree_parallel (stmt, width, ops);
4129                   else
4130                     rewrite_expr_tree (stmt, 0, ops, false);
4131
4132                   /* If we combined some repeated factors into a
4133                      __builtin_powi call, multiply that result by the
4134                      reassociated operands.  */
4135                   if (powi_result)
4136                     {
4137                       gimple mul_stmt;
4138                       tree type = TREE_TYPE (gimple_get_lhs (stmt));
4139                       tree target_ssa = make_temp_ssa_name (type, NULL,
4140                                                             "reassocpow");
4141                       gimple_set_lhs (stmt, target_ssa);
4142                       update_stmt (stmt);
4143                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4144                                                                powi_result,
4145                                                                target_ssa);
4146                       gimple_set_location (mul_stmt, gimple_location (stmt));
4147                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4148                     }
4149                 }
4150
4151               ops.release ();
4152             }
4153         }
4154     }
4155   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4156        son;
4157        son = next_dom_son (CDI_POST_DOMINATORS, son))
4158     reassociate_bb (son);
4159 }
4160
4161 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4162 void debug_ops_vector (vec<operand_entry_t> ops);
4163
4164 /* Dump the operand entry vector OPS to FILE.  */
4165
4166 void
4167 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4168 {
4169   operand_entry_t oe;
4170   unsigned int i;
4171
4172   FOR_EACH_VEC_ELT (ops, i, oe)
4173     {
4174       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4175       print_generic_expr (file, oe->op, 0);
4176     }
4177 }
4178
4179 /* Dump the operand entry vector OPS to STDERR.  */
4180
4181 DEBUG_FUNCTION void
4182 debug_ops_vector (vec<operand_entry_t> ops)
4183 {
4184   dump_ops_vector (stderr, ops);
4185 }
4186
4187 static void
4188 do_reassoc (void)
4189 {
4190   break_up_subtract_bb (ENTRY_BLOCK_PTR);
4191   reassociate_bb (EXIT_BLOCK_PTR);
4192 }
4193
4194 /* Initialize the reassociation pass.  */
4195
4196 static void
4197 init_reassoc (void)
4198 {
4199   int i;
4200   long rank = 2;
4201   int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4202
4203   /* Find the loops, so that we can prevent moving calculations in
4204      them.  */
4205   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4206
4207   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4208
4209   operand_entry_pool = create_alloc_pool ("operand entry pool",
4210                                           sizeof (struct operand_entry), 30);
4211   next_operand_entry_id = 0;
4212
4213   /* Reverse RPO (Reverse Post Order) will give us something where
4214      deeper loops come later.  */
4215   pre_and_rev_post_order_compute (NULL, bbs, false);
4216   bb_rank = XCNEWVEC (long, last_basic_block);
4217   operand_rank = pointer_map_create ();
4218
4219   /* Give each default definition a distinct rank.  This includes
4220      parameters and the static chain.  Walk backwards over all
4221      SSA names so that we get proper rank ordering according
4222      to tree_swap_operands_p.  */
4223   for (i = num_ssa_names - 1; i > 0; --i)
4224     {
4225       tree name = ssa_name (i);
4226       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4227         insert_operand_rank (name, ++rank);
4228     }
4229
4230   /* Set up rank for each BB  */
4231   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4232     bb_rank[bbs[i]] = ++rank  << 16;
4233
4234   free (bbs);
4235   calculate_dominance_info (CDI_POST_DOMINATORS);
4236   plus_negates = vNULL;
4237 }
4238
4239 /* Cleanup after the reassociation pass, and print stats if
4240    requested.  */
4241
4242 static void
4243 fini_reassoc (void)
4244 {
4245   statistics_counter_event (cfun, "Linearized",
4246                             reassociate_stats.linearized);
4247   statistics_counter_event (cfun, "Constants eliminated",
4248                             reassociate_stats.constants_eliminated);
4249   statistics_counter_event (cfun, "Ops eliminated",
4250                             reassociate_stats.ops_eliminated);
4251   statistics_counter_event (cfun, "Statements rewritten",
4252                             reassociate_stats.rewritten);
4253   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4254                             reassociate_stats.pows_encountered);
4255   statistics_counter_event (cfun, "Built-in powi calls created",
4256                             reassociate_stats.pows_created);
4257
4258   pointer_map_destroy (operand_rank);
4259   free_alloc_pool (operand_entry_pool);
4260   free (bb_rank);
4261   plus_negates.release ();
4262   free_dominance_info (CDI_POST_DOMINATORS);
4263   loop_optimizer_finalize ();
4264 }
4265
4266 /* Gate and execute functions for Reassociation.  */
4267
4268 static unsigned int
4269 execute_reassoc (void)
4270 {
4271   init_reassoc ();
4272
4273   do_reassoc ();
4274   repropagate_negates ();
4275
4276   fini_reassoc ();
4277   return 0;
4278 }
4279
4280 static bool
4281 gate_tree_ssa_reassoc (void)
4282 {
4283   return flag_tree_reassoc != 0;
4284 }
4285
4286 struct gimple_opt_pass pass_reassoc =
4287 {
4288  {
4289   GIMPLE_PASS,
4290   "reassoc",                            /* name */
4291   OPTGROUP_NONE,                        /* optinfo_flags */
4292   gate_tree_ssa_reassoc,                /* gate */
4293   execute_reassoc,                      /* execute */
4294   NULL,                                 /* sub */
4295   NULL,                                 /* next */
4296   0,                                    /* static_pass_number */
4297   TV_TREE_REASSOC,                      /* tv_id */
4298   PROP_cfg | PROP_ssa,                  /* properties_required */
4299   0,                                    /* properties_provided */
4300   0,                                    /* properties_destroyed */
4301   0,                                    /* todo_flags_start */
4302   TODO_verify_ssa
4303     | TODO_update_ssa_only_virtuals
4304     | TODO_verify_flow
4305     | TODO_ggc_collect                  /* todo_flags_finish */
4306  }
4307 };