gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "backend.h"
  68 #include "rtl.h"
  69 #include "tree.h"
  70 #include "gimple.h"
  71 #include "cfghooks.h"
  72 #include "tree-pass.h"
  73 #include "tm_p.h"
  74 #include "ssa.h"
  75 #include "expmed.h"
  76 #include "insn-config.h"
  77 #include "emit-rtl.h"
  78 #include "recog.h"
  79 #include "cgraph.h"
  80 #include "gimple-pretty-print.h"
  81 #include "alias.h"
  82 #include "fold-const.h"
  83 #include "stor-layout.h"
  84 #include "tree-eh.h"
  85 #include "gimplify.h"
  86 #include "gimple-iterator.h"
  87 #include "gimplify-me.h"
  88 #include "tree-cfg.h"
  89 #include "tree-ssa-loop-ivopts.h"
  90 #include "tree-ssa-loop-manip.h"
  91 #include "tree-ssa-loop-niter.h"
  92 #include "tree-ssa-loop.h"
  93 #include "explow.h"
  94 #include "expr.h"
  95 #include "tree-dfa.h"
  96 #include "tree-ssa.h"
  97 #include "cfgloop.h"
  98 #include "tree-scalar-evolution.h"
  99 #include "params.h"
 100 #include "tree-affine.h"
 101 #include "tree-ssa-propagate.h"
 102 #include "tree-ssa-address.h"
 103 #include "builtins.h"
 104 #include "tree-vectorizer.h"
 105
 106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 107    cost of different addressing modes.  This should be moved to a TBD
 108    interface between the GIMPLE and RTL worlds.  */
 109
 110 /* The infinite cost.  */
 111 #define INFTY 10000000
 112
 113 #define AVG_LOOP_NITER(LOOP) 5
 114
 115 /* Returns the expected number of loop iterations for LOOP.
 116    The average trip count is computed from profile data if it
 117    exists. */
 118
 119 static inline HOST_WIDE_INT
 120 avg_loop_niter (struct loop *loop)
 121 {
 122   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 123   if (niter == -1)
 124     return AVG_LOOP_NITER (loop);
 125
 126   return niter;
 127 }
 128
 129 /* Representation of the induction variable.  */
 130 struct iv
 131 {
 132   tree base;            /* Initial value of the iv.  */
 133   tree base_object;     /* A memory object to that the induction variable points.  */
 134   tree step;            /* Step of the iv (constant only).  */
 135   tree ssa_name;        /* The ssa name with the value.  */
 136   unsigned use_id;      /* The identifier in the use if it is the case.  */
 137   bool biv_p;           /* Is it a biv?  */
 138   bool have_use_for;    /* Do we already have a use for it?  */
 139   bool no_overflow;     /* True if the iv doesn't overflow.  */
 140   bool have_address_use;/* For biv, indicate if it's used in any address
 141                            type use.  */
 142 };
 143
 144 /* Per-ssa version information (induction variable descriptions, etc.).  */
 145 struct version_info
 146 {
 147   tree name;            /* The ssa name.  */
 148   struct iv *iv;        /* Induction variable description.  */
 149   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 150                            an expression that is not an induction variable.  */
 151   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 152   unsigned inv_id;      /* Id of an invariant.  */
 153 };
 154
 155 /* Types of uses.  */
 156 enum use_type
 157 {
 158   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 159   USE_ADDRESS,          /* Use in an address.  */
 160   USE_COMPARE           /* Use is a compare.  */
 161 };
 162
 163 /* Cost of a computation.  */
 164 struct comp_cost
 165 {
 166   int cost;             /* The runtime cost.  */
 167   unsigned complexity;  /* The estimate of the complexity of the code for
 168                            the computation (in no concrete units --
 169                            complexity field should be larger for more
 170                            complex expressions and addressing modes).  */
 171   int scratch;          /* Scratch used during cost computation.  */
 172 };
 173
 174 static const comp_cost no_cost = {0, 0, 0};
 175 static const comp_cost infinite_cost = {INFTY, INFTY, INFTY};
 176
 177 /* The candidate - cost pair.  */
 178 struct cost_pair
 179 {
 180   struct iv_cand *cand; /* The candidate.  */
 181   comp_cost cost;       /* The cost.  */
 182   bitmap depends_on;    /* The list of invariants that have to be
 183                            preserved.  */
 184   tree value;           /* For final value elimination, the expression for
 185                            the final value of the iv.  For iv elimination,
 186                            the new bound to compare with.  */
 187   enum tree_code comp;  /* For iv elimination, the comparison.  */
 188   int inv_expr_id;      /* Loop invariant expression id.  */
 189 };
 190
 191 /* Use.  */
 192 struct iv_use
 193 {
 194   unsigned id;          /* The id of the use.  */
 195   unsigned sub_id;      /* The id of the sub use.  */
 196   enum use_type type;   /* Type of the use.  */
 197   struct iv *iv;        /* The induction variable it is based on.  */
 198   gimple *stmt;         /* Statement in that it occurs.  */
 199   tree *op_p;           /* The place where it occurs.  */
 200   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 201                            important ones.  */
 202
 203   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 204   struct cost_pair *cost_map;
 205                         /* The costs wrto the iv candidates.  */
 206
 207   struct iv_cand *selected;
 208                         /* The selected candidate.  */
 209
 210   struct iv_use *next;  /* The next sub use.  */
 211   tree addr_base;       /* Base address with const offset stripped.  */
 212   unsigned HOST_WIDE_INT addr_offset;
 213                         /* Const offset stripped from base address.  */
 214 };
 215
 216 /* The position where the iv is computed.  */
 217 enum iv_position
 218 {
 219   IP_NORMAL,            /* At the end, just before the exit condition.  */
 220   IP_END,               /* At the end of the latch block.  */
 221   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 222   IP_AFTER_USE,         /* Immediately after a specific use.  */
 223   IP_ORIGINAL           /* The original biv.  */
 224 };
 225
 226 /* The induction variable candidate.  */
 227 struct iv_cand
 228 {
 229   unsigned id;          /* The number of the candidate.  */
 230   bool important;       /* Whether this is an "important" candidate, i.e. such
 231                            that it should be considered by all uses.  */
 232   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 233   gimple *incremented_at;/* For original biv, the statement where it is
 234                            incremented.  */
 235   tree var_before;      /* The variable used for it before increment.  */
 236   tree var_after;       /* The variable used for it after increment.  */
 237   struct iv *iv;        /* The value of the candidate.  NULL for
 238                            "pseudocandidate" used to indicate the possibility
 239                            to replace the final value of an iv by direct
 240                            computation of the value.  */
 241   unsigned cost;        /* Cost of the candidate.  */
 242   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 243   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 244                               where it is incremented.  */
 245   bitmap depends_on;    /* The list of invariants that are used in step of the
 246                            biv.  */
 247   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 248                            smaller type.  */
 249 };
 250
 251 /* Hashtable entry for common candidate derived from iv uses.  */
 252 struct iv_common_cand
 253 {
 254   tree base;
 255   tree step;
 256   /* IV uses from which this common candidate is derived.  */
 257   auto_vec<iv_use *> uses;
 258   hashval_t hash;
 259 };
 260
 261 /* Hashtable helpers.  */
 262
 263 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 264 {
 265   static inline hashval_t hash (const iv_common_cand *);
 266   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 267 };
 268
 269 /* Hash function for possible common candidates.  */
 270
 271 inline hashval_t
 272 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 273 {
 274   return ccand->hash;
 275 }
 276
 277 /* Hash table equality function for common candidates.  */
 278
 279 inline bool
 280 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 281                               const iv_common_cand *ccand2)
 282 {
 283   return (ccand1->hash == ccand2->hash
 284           && operand_equal_p (ccand1->base, ccand2->base, 0)
 285           && operand_equal_p (ccand1->step, ccand2->step, 0)
 286           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 287               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 288 }
 289
 290 /* Loop invariant expression hashtable entry.  */
 291 struct iv_inv_expr_ent
 292 {
 293   tree expr;
 294   int id;
 295   hashval_t hash;
 296 };
 297
 298 /* Hashtable helpers.  */
 299
 300 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 301 {
 302   static inline hashval_t hash (const iv_inv_expr_ent *);
 303   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 304 };
 305
 306 /* Hash function for loop invariant expressions.  */
 307
 308 inline hashval_t
 309 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 310 {
 311   return expr->hash;
 312 }
 313
 314 /* Hash table equality function for expressions.  */
 315
 316 inline bool
 317 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 318                            const iv_inv_expr_ent *expr2)
 319 {
 320   return expr1->hash == expr2->hash
 321          && operand_equal_p (expr1->expr, expr2->expr, 0);
 322 }
 323
 324 struct ivopts_data
 325 {
 326   /* The currently optimized loop.  */
 327   struct loop *current_loop;
 328   source_location loop_loc;
 329
 330   /* Numbers of iterations for all exits of the current loop.  */
 331   hash_map<edge, tree_niter_desc *> *niters;
 332
 333   /* Number of registers used in it.  */
 334   unsigned regs_used;
 335
 336   /* The size of version_info array allocated.  */
 337   unsigned version_info_size;
 338
 339   /* The array of information for the ssa names.  */
 340   struct version_info *version_info;
 341
 342   /* The hashtable of loop invariant expressions created
 343      by ivopt.  */
 344   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 345
 346   /* Loop invariant expression id.  */
 347   int inv_expr_id;
 348
 349   /* The bitmap of indices in version_info whose value was changed.  */
 350   bitmap relevant;
 351
 352   /* The uses of induction variables.  */
 353   vec<iv_use *> iv_uses;
 354
 355   /* The candidates.  */
 356   vec<iv_cand *> iv_candidates;
 357
 358   /* A bitmap of important candidates.  */
 359   bitmap important_candidates;
 360
 361   /* Cache used by tree_to_aff_combination_expand.  */
 362   hash_map<tree, name_expansion *> *name_expansion_cache;
 363
 364   /* The hashtable of common candidates derived from iv uses.  */
 365   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 366
 367   /* The common candidates.  */
 368   vec<iv_common_cand *> iv_common_cands;
 369
 370   /* The maximum invariant id.  */
 371   unsigned max_inv_id;
 372
 373   /* Number of no_overflow BIVs which are not used in memory address.  */
 374   unsigned bivs_not_used_in_addr;
 375
 376   /* Obstack for iv structure.  */
 377   struct obstack iv_obstack;
 378
 379   /* Whether to consider just related and important candidates when replacing a
 380      use.  */
 381   bool consider_all_candidates;
 382
 383   /* Are we optimizing for speed?  */
 384   bool speed;
 385
 386   /* Whether the loop body includes any function calls.  */
 387   bool body_includes_call;
 388
 389   /* Whether the loop body can only be exited via single exit.  */
 390   bool loop_single_exit_p;
 391 };
 392
 393 /* An assignment of iv candidates to uses.  */
 394
 395 struct iv_ca
 396 {
 397   /* The number of uses covered by the assignment.  */
 398   unsigned upto;
 399
 400   /* Number of uses that cannot be expressed by the candidates in the set.  */
 401   unsigned bad_uses;
 402
 403   /* Candidate assigned to a use, together with the related costs.  */
 404   struct cost_pair **cand_for_use;
 405
 406   /* Number of times each candidate is used.  */
 407   unsigned *n_cand_uses;
 408
 409   /* The candidates used.  */
 410   bitmap cands;
 411
 412   /* The number of candidates in the set.  */
 413   unsigned n_cands;
 414
 415   /* Total number of registers needed.  */
 416   unsigned n_regs;
 417
 418   /* Total cost of expressing uses.  */
 419   comp_cost cand_use_cost;
 420
 421   /* Total cost of candidates.  */
 422   unsigned cand_cost;
 423
 424   /* Number of times each invariant is used.  */
 425   unsigned *n_invariant_uses;
 426
 427   /* The array holding the number of uses of each loop
 428      invariant expressions created by ivopt.  */
 429   unsigned *used_inv_expr;
 430
 431   /* The number of created loop invariants.  */
 432   unsigned num_used_inv_expr;
 433
 434   /* Total cost of the assignment.  */
 435   comp_cost cost;
 436 };
 437
 438 /* Difference of two iv candidate assignments.  */
 439
 440 struct iv_ca_delta
 441 {
 442   /* Changed use.  */
 443   struct iv_use *use;
 444
 445   /* An old assignment (for rollback purposes).  */
 446   struct cost_pair *old_cp;
 447
 448   /* A new assignment.  */
 449   struct cost_pair *new_cp;
 450
 451   /* Next change in the list.  */
 452   struct iv_ca_delta *next_change;
 453 };
 454
 455 /* Bound on number of candidates below that all candidates are considered.  */
 456
 457 #define CONSIDER_ALL_CANDIDATES_BOUND \
 458   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 459
 460 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 461    optimizing such a loop would help, and it would take ages).  */
 462
 463 #define MAX_CONSIDERED_USES \
 464   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 465
 466 /* If there are at most this number of ivs in the set, try removing unnecessary
 467    ivs from the set always.  */
 468
 469 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 470   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 471
 472 /* The list of trees for that the decl_rtl field must be reset is stored
 473    here.  */
 474
 475 static vec<tree> decl_rtl_to_reset;
 476
 477 static comp_cost force_expr_to_var_cost (tree, bool);
 478
 479 /* Number of uses recorded in DATA.  */
 480
 481 static inline unsigned
 482 n_iv_uses (struct ivopts_data *data)
 483 {
 484   return data->iv_uses.length ();
 485 }
 486
 487 /* Ith use recorded in DATA.  */
 488
 489 static inline struct iv_use *
 490 iv_use (struct ivopts_data *data, unsigned i)
 491 {
 492   return data->iv_uses[i];
 493 }
 494
 495 /* Number of candidates recorded in DATA.  */
 496
 497 static inline unsigned
 498 n_iv_cands (struct ivopts_data *data)
 499 {
 500   return data->iv_candidates.length ();
 501 }
 502
 503 /* Ith candidate recorded in DATA.  */
 504
 505 static inline struct iv_cand *
 506 iv_cand (struct ivopts_data *data, unsigned i)
 507 {
 508   return data->iv_candidates[i];
 509 }
 510
 511 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 512
 513 edge
 514 single_dom_exit (struct loop *loop)
 515 {
 516   edge exit = single_exit (loop);
 517
 518   if (!exit)
 519     return NULL;
 520
 521   if (!just_once_each_iteration_p (loop, exit->src))
 522     return NULL;
 523
 524   return exit;
 525 }
 526
 527 /* Dumps information about the induction variable IV to FILE.  */
 528
 529 void
 530 dump_iv (FILE *file, struct iv *iv, bool dump_name)
 531 {
 532   if (iv->ssa_name && dump_name)
 533     {
 534       fprintf (file, "ssa name ");
 535       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 536       fprintf (file, "\n");
 537     }
 538
 539   fprintf (file, "  type ");
 540   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 541   fprintf (file, "\n");
 542
 543   if (iv->step)
 544     {
 545       fprintf (file, "  base ");
 546       print_generic_expr (file, iv->base, TDF_SLIM);
 547       fprintf (file, "\n");
 548
 549       fprintf (file, "  step ");
 550       print_generic_expr (file, iv->step, TDF_SLIM);
 551       fprintf (file, "\n");
 552     }
 553   else
 554     {
 555       fprintf (file, "  invariant ");
 556       print_generic_expr (file, iv->base, TDF_SLIM);
 557       fprintf (file, "\n");
 558     }
 559
 560   if (iv->base_object)
 561     {
 562       fprintf (file, "  base object ");
 563       print_generic_expr (file, iv->base_object, TDF_SLIM);
 564       fprintf (file, "\n");
 565     }
 566
 567   if (iv->biv_p)
 568     fprintf (file, "  is a biv\n");
 569
 570   if (iv->no_overflow)
 571     fprintf (file, "  iv doesn't overflow wrto loop niter\n");
 572 }
 573
 574 /* Dumps information about the USE to FILE.  */
 575
 576 void
 577 dump_use (FILE *file, struct iv_use *use)
 578 {
 579   fprintf (file, "use %d", use->id);
 580   if (use->sub_id)
 581     fprintf (file, ".%d", use->sub_id);
 582
 583   fprintf (file, "\n");
 584
 585   switch (use->type)
 586     {
 587     case USE_NONLINEAR_EXPR:
 588       fprintf (file, "  generic\n");
 589       break;
 590
 591     case USE_ADDRESS:
 592       fprintf (file, "  address\n");
 593       break;
 594
 595     case USE_COMPARE:
 596       fprintf (file, "  compare\n");
 597       break;
 598
 599     default:
 600       gcc_unreachable ();
 601     }
 602
 603   fprintf (file, "  in statement ");
 604   print_gimple_stmt (file, use->stmt, 0, 0);
 605   fprintf (file, "\n");
 606
 607   fprintf (file, "  at position ");
 608   if (use->op_p)
 609     print_generic_expr (file, *use->op_p, TDF_SLIM);
 610   fprintf (file, "\n");
 611
 612   dump_iv (file, use->iv, false);
 613
 614   if (use->related_cands)
 615     {
 616       fprintf (file, "  related candidates ");
 617       dump_bitmap (file, use->related_cands);
 618     }
 619 }
 620
 621 /* Dumps information about the uses to FILE.  */
 622
 623 void
 624 dump_uses (FILE *file, struct ivopts_data *data)
 625 {
 626   unsigned i;
 627   struct iv_use *use;
 628
 629   for (i = 0; i < n_iv_uses (data); i++)
 630     {
 631       use = iv_use (data, i);
 632       do
 633         {
 634           dump_use (file, use);
 635           use = use->next;
 636         }
 637       while (use);
 638       fprintf (file, "\n");
 639     }
 640 }
 641
 642 /* Dumps information about induction variable candidate CAND to FILE.  */
 643
 644 void
 645 dump_cand (FILE *file, struct iv_cand *cand)
 646 {
 647   struct iv *iv = cand->iv;
 648
 649   fprintf (file, "candidate %d%s\n",
 650            cand->id, cand->important ? " (important)" : "");
 651
 652   if (cand->depends_on)
 653     {
 654       fprintf (file, "  depends on ");
 655       dump_bitmap (file, cand->depends_on);
 656     }
 657
 658   if (!iv)
 659     {
 660       fprintf (file, "  final value replacement\n");
 661       return;
 662     }
 663
 664   if (cand->var_before)
 665     {
 666       fprintf (file, "  var_before ");
 667       print_generic_expr (file, cand->var_before, TDF_SLIM);
 668       fprintf (file, "\n");
 669     }
 670   if (cand->var_after)
 671     {
 672       fprintf (file, "  var_after ");
 673       print_generic_expr (file, cand->var_after, TDF_SLIM);
 674       fprintf (file, "\n");
 675     }
 676
 677   switch (cand->pos)
 678     {
 679     case IP_NORMAL:
 680       fprintf (file, "  incremented before exit test\n");
 681       break;
 682
 683     case IP_BEFORE_USE:
 684       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 685       break;
 686
 687     case IP_AFTER_USE:
 688       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 689       break;
 690
 691     case IP_END:
 692       fprintf (file, "  incremented at end\n");
 693       break;
 694
 695     case IP_ORIGINAL:
 696       fprintf (file, "  original biv\n");
 697       break;
 698     }
 699
 700   dump_iv (file, iv, false);
 701 }
 702
 703 /* Returns the info for ssa version VER.  */
 704
 705 static inline struct version_info *
 706 ver_info (struct ivopts_data *data, unsigned ver)
 707 {
 708   return data->version_info + ver;
 709 }
 710
 711 /* Returns the info for ssa name NAME.  */
 712
 713 static inline struct version_info *
 714 name_info (struct ivopts_data *data, tree name)
 715 {
 716   return ver_info (data, SSA_NAME_VERSION (name));
 717 }
 718
 719 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 720    emitted in LOOP.  */
 721
 722 static bool
 723 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 724 {
 725   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 726
 727   gcc_assert (bb);
 728
 729   if (sbb == loop->latch)
 730     return true;
 731
 732   if (sbb != bb)
 733     return false;
 734
 735   return stmt == last_stmt (bb);
 736 }
 737
 738 /* Returns true if STMT if after the place where the original induction
 739    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 740    if the positions are identical.  */
 741
 742 static bool
 743 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 744 {
 745   basic_block cand_bb = gimple_bb (cand->incremented_at);
 746   basic_block stmt_bb = gimple_bb (stmt);
 747
 748   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 749     return false;
 750
 751   if (stmt_bb != cand_bb)
 752     return true;
 753
 754   if (true_if_equal
 755       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 756     return true;
 757   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 758 }
 759
 760 /* Returns true if STMT if after the place where the induction variable
 761    CAND is incremented in LOOP.  */
 762
 763 static bool
 764 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 765 {
 766   switch (cand->pos)
 767     {
 768     case IP_END:
 769       return false;
 770
 771     case IP_NORMAL:
 772       return stmt_after_ip_normal_pos (loop, stmt);
 773
 774     case IP_ORIGINAL:
 775     case IP_AFTER_USE:
 776       return stmt_after_inc_pos (cand, stmt, false);
 777
 778     case IP_BEFORE_USE:
 779       return stmt_after_inc_pos (cand, stmt, true);
 780
 781     default:
 782       gcc_unreachable ();
 783     }
 784 }
 785
 786 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 787
 788 static bool
 789 abnormal_ssa_name_p (tree exp)
 790 {
 791   if (!exp)
 792     return false;
 793
 794   if (TREE_CODE (exp) != SSA_NAME)
 795     return false;
 796
 797   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 798 }
 799
 800 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 801    abnormal phi node.  Callback for for_each_index.  */
 802
 803 static bool
 804 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 805                                   void *data ATTRIBUTE_UNUSED)
 806 {
 807   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 808     {
 809       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 810         return false;
 811       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 812         return false;
 813     }
 814
 815   return !abnormal_ssa_name_p (*index);
 816 }
 817
 818 /* Returns true if EXPR contains a ssa name that occurs in an
 819    abnormal phi node.  */
 820
 821 bool
 822 contains_abnormal_ssa_name_p (tree expr)
 823 {
 824   enum tree_code code;
 825   enum tree_code_class codeclass;
 826
 827   if (!expr)
 828     return false;
 829
 830   code = TREE_CODE (expr);
 831   codeclass = TREE_CODE_CLASS (code);
 832
 833   if (code == SSA_NAME)
 834     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 835
 836   if (code == INTEGER_CST
 837       || is_gimple_min_invariant (expr))
 838     return false;
 839
 840   if (code == ADDR_EXPR)
 841     return !for_each_index (&TREE_OPERAND (expr, 0),
 842                             idx_contains_abnormal_ssa_name_p,
 843                             NULL);
 844
 845   if (code == COND_EXPR)
 846     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 847       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 848       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 849
 850   switch (codeclass)
 851     {
 852     case tcc_binary:
 853     case tcc_comparison:
 854       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 855         return true;
 856
 857       /* Fallthru.  */
 858     case tcc_unary:
 859       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 860         return true;
 861
 862       break;
 863
 864     default:
 865       gcc_unreachable ();
 866     }
 867
 868   return false;
 869 }
 870
 871 /*  Returns the structure describing number of iterations determined from
 872     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 873
 874 static struct tree_niter_desc *
 875 niter_for_exit (struct ivopts_data *data, edge exit)
 876 {
 877   struct tree_niter_desc *desc;
 878   tree_niter_desc **slot;
 879
 880   if (!data->niters)
 881     {
 882       data->niters = new hash_map<edge, tree_niter_desc *>;
 883       slot = NULL;
 884     }
 885   else
 886     slot = data->niters->get (exit);
 887
 888   if (!slot)
 889     {
 890       /* Try to determine number of iterations.  We cannot safely work with ssa
 891          names that appear in phi nodes on abnormal edges, so that we do not
 892          create overlapping life ranges for them (PR 27283).  */
 893       desc = XNEW (struct tree_niter_desc);
 894       if (!number_of_iterations_exit (data->current_loop,
 895                                       exit, desc, true)
 896           || contains_abnormal_ssa_name_p (desc->niter))
 897         {
 898           XDELETE (desc);
 899           desc = NULL;
 900         }
 901       data->niters->put (exit, desc);
 902     }
 903   else
 904     desc = *slot;
 905
 906   return desc;
 907 }
 908
 909 /* Returns the structure describing number of iterations determined from
 910    single dominating exit of DATA->current_loop, or NULL if something
 911    goes wrong.  */
 912
 913 static struct tree_niter_desc *
 914 niter_for_single_dom_exit (struct ivopts_data *data)
 915 {
 916   edge exit = single_dom_exit (data->current_loop);
 917
 918   if (!exit)
 919     return NULL;
 920
 921   return niter_for_exit (data, exit);
 922 }
 923
 924 /* Initializes data structures used by the iv optimization pass, stored
 925    in DATA.  */
 926
 927 static void
 928 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 929 {
 930   data->version_info_size = 2 * num_ssa_names;
 931   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 932   data->relevant = BITMAP_ALLOC (NULL);
 933   data->important_candidates = BITMAP_ALLOC (NULL);
 934   data->max_inv_id = 0;
 935   data->niters = NULL;
 936   data->iv_uses.create (20);
 937   data->iv_candidates.create (20);
 938   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 939   data->inv_expr_id = 0;
 940   data->name_expansion_cache = NULL;
 941   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
 942   data->iv_common_cands.create (20);
 943   decl_rtl_to_reset.create (20);
 944   gcc_obstack_init (&data->iv_obstack);
 945 }
 946
 947 /* Returns a memory object to that EXPR points.  In case we are able to
 948    determine that it does not point to any such object, NULL is returned.  */
 949
 950 static tree
 951 determine_base_object (tree expr)
 952 {
 953   enum tree_code code = TREE_CODE (expr);
 954   tree base, obj;
 955
 956   /* If this is a pointer casted to any type, we need to determine
 957      the base object for the pointer; so handle conversions before
 958      throwing away non-pointer expressions.  */
 959   if (CONVERT_EXPR_P (expr))
 960     return determine_base_object (TREE_OPERAND (expr, 0));
 961
 962   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 963     return NULL_TREE;
 964
 965   switch (code)
 966     {
 967     case INTEGER_CST:
 968       return NULL_TREE;
 969
 970     case ADDR_EXPR:
 971       obj = TREE_OPERAND (expr, 0);
 972       base = get_base_address (obj);
 973
 974       if (!base)
 975         return expr;
 976
 977       if (TREE_CODE (base) == MEM_REF)
 978         return determine_base_object (TREE_OPERAND (base, 0));
 979
 980       return fold_convert (ptr_type_node,
 981                            build_fold_addr_expr (base));
 982
 983     case POINTER_PLUS_EXPR:
 984       return determine_base_object (TREE_OPERAND (expr, 0));
 985
 986     case PLUS_EXPR:
 987     case MINUS_EXPR:
 988       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 989       gcc_unreachable ();
 990
 991     default:
 992       return fold_convert (ptr_type_node, expr);
 993     }
 994 }
 995
 996 /* Return true if address expression with non-DECL_P operand appears
 997    in EXPR.  */
 998
 999 static bool
1000 contain_complex_addr_expr (tree expr)
1001 {
1002   bool res = false;
1003
1004   STRIP_NOPS (expr);
1005   switch (TREE_CODE (expr))
1006     {
1007     case POINTER_PLUS_EXPR:
1008     case PLUS_EXPR:
1009     case MINUS_EXPR:
1010       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1011       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1012       break;
1013
1014     case ADDR_EXPR:
1015       return (!DECL_P (TREE_OPERAND (expr, 0)));
1016
1017     default:
1018       return false;
1019     }
1020
1021   return res;
1022 }
1023
1024 /* Allocates an induction variable with given initial value BASE and step STEP
1025    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1026
1027 static struct iv *
1028 alloc_iv (struct ivopts_data *data, tree base, tree step,
1029           bool no_overflow = false)
1030 {
1031   tree expr = base;
1032   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1033                                               sizeof (struct iv));
1034   gcc_assert (step != NULL_TREE);
1035
1036   /* Lower address expression in base except ones with DECL_P as operand.
1037      By doing this:
1038        1) More accurate cost can be computed for address expressions;
1039        2) Duplicate candidates won't be created for bases in different
1040           forms, like &a[0] and &a.  */
1041   STRIP_NOPS (expr);
1042   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1043       || contain_complex_addr_expr (expr))
1044     {
1045       aff_tree comb;
1046       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1047       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1048     }
1049
1050   iv->base = base;
1051   iv->base_object = determine_base_object (base);
1052   iv->step = step;
1053   iv->biv_p = false;
1054   iv->have_use_for = false;
1055   iv->use_id = 0;
1056   iv->ssa_name = NULL_TREE;
1057   iv->no_overflow = no_overflow;
1058   iv->have_address_use = false;
1059
1060   return iv;
1061 }
1062
1063 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1064    doesn't overflow.  */
1065
1066 static void
1067 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1068         bool no_overflow)
1069 {
1070   struct version_info *info = name_info (data, iv);
1071
1072   gcc_assert (!info->iv);
1073
1074   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1075   info->iv = alloc_iv (data, base, step, no_overflow);
1076   info->iv->ssa_name = iv;
1077 }
1078
1079 /* Finds induction variable declaration for VAR.  */
1080
1081 static struct iv *
1082 get_iv (struct ivopts_data *data, tree var)
1083 {
1084   basic_block bb;
1085   tree type = TREE_TYPE (var);
1086
1087   if (!POINTER_TYPE_P (type)
1088       && !INTEGRAL_TYPE_P (type))
1089     return NULL;
1090
1091   if (!name_info (data, var)->iv)
1092     {
1093       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1094
1095       if (!bb
1096           || !flow_bb_inside_loop_p (data->current_loop, bb))
1097         set_iv (data, var, var, build_int_cst (type, 0), true);
1098     }
1099
1100   return name_info (data, var)->iv;
1101 }
1102
1103 /* Return the first non-invariant ssa var found in EXPR.  */
1104
1105 static tree
1106 extract_single_var_from_expr (tree expr)
1107 {
1108   int i, n;
1109   tree tmp;
1110   enum tree_code code;
1111
1112   if (!expr || is_gimple_min_invariant (expr))
1113     return NULL;
1114
1115   code = TREE_CODE (expr);
1116   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1117     {
1118       n = TREE_OPERAND_LENGTH (expr);
1119       for (i = 0; i < n; i++)
1120         {
1121           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1122
1123           if (tmp)
1124             return tmp;
1125         }
1126     }
1127   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1128 }
1129
1130 /* Finds basic ivs.  */
1131
1132 static bool
1133 find_bivs (struct ivopts_data *data)
1134 {
1135   gphi *phi;
1136   affine_iv iv;
1137   tree step, type, base, stop;
1138   bool found = false;
1139   struct loop *loop = data->current_loop;
1140   gphi_iterator psi;
1141
1142   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1143     {
1144       phi = psi.phi ();
1145
1146       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1147         continue;
1148
1149       if (virtual_operand_p (PHI_RESULT (phi)))
1150         continue;
1151
1152       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1153         continue;
1154
1155       if (integer_zerop (iv.step))
1156         continue;
1157
1158       step = iv.step;
1159       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1160       /* Stop expanding iv base at the first ssa var referred by iv step.
1161          Ideally we should stop at any ssa var, because that's expensive
1162          and unusual to happen, we just do it on the first one.
1163
1164          See PR64705 for the rationale.  */
1165       stop = extract_single_var_from_expr (step);
1166       base = expand_simple_operations (base, stop);
1167       if (contains_abnormal_ssa_name_p (base)
1168           || contains_abnormal_ssa_name_p (step))
1169         continue;
1170
1171       type = TREE_TYPE (PHI_RESULT (phi));
1172       base = fold_convert (type, base);
1173       if (step)
1174         {
1175           if (POINTER_TYPE_P (type))
1176             step = convert_to_ptrofftype (step);
1177           else
1178             step = fold_convert (type, step);
1179         }
1180
1181       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1182       found = true;
1183     }
1184
1185   return found;
1186 }
1187
1188 /* Marks basic ivs.  */
1189
1190 static void
1191 mark_bivs (struct ivopts_data *data)
1192 {
1193   gphi *phi;
1194   gimple *def;
1195   tree var;
1196   struct iv *iv, *incr_iv;
1197   struct loop *loop = data->current_loop;
1198   basic_block incr_bb;
1199   gphi_iterator psi;
1200
1201   data->bivs_not_used_in_addr = 0;
1202   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1203     {
1204       phi = psi.phi ();
1205
1206       iv = get_iv (data, PHI_RESULT (phi));
1207       if (!iv)
1208         continue;
1209
1210       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1211       def = SSA_NAME_DEF_STMT (var);
1212       /* Don't mark iv peeled from other one as biv.  */
1213       if (def
1214           && gimple_code (def) == GIMPLE_PHI
1215           && gimple_bb (def) == loop->header)
1216         continue;
1217
1218       incr_iv = get_iv (data, var);
1219       if (!incr_iv)
1220         continue;
1221
1222       /* If the increment is in the subloop, ignore it.  */
1223       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1224       if (incr_bb->loop_father != data->current_loop
1225           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1226         continue;
1227
1228       iv->biv_p = true;
1229       incr_iv->biv_p = true;
1230       if (iv->no_overflow)
1231         data->bivs_not_used_in_addr++;
1232       if (incr_iv->no_overflow)
1233         data->bivs_not_used_in_addr++;
1234     }
1235 }
1236
1237 /* Checks whether STMT defines a linear induction variable and stores its
1238    parameters to IV.  */
1239
1240 static bool
1241 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1242 {
1243   tree lhs, stop;
1244   struct loop *loop = data->current_loop;
1245
1246   iv->base = NULL_TREE;
1247   iv->step = NULL_TREE;
1248
1249   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1250     return false;
1251
1252   lhs = gimple_assign_lhs (stmt);
1253   if (TREE_CODE (lhs) != SSA_NAME)
1254     return false;
1255
1256   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1257     return false;
1258
1259   /* Stop expanding iv base at the first ssa var referred by iv step.
1260      Ideally we should stop at any ssa var, because that's expensive
1261      and unusual to happen, we just do it on the first one.
1262
1263      See PR64705 for the rationale.  */
1264   stop = extract_single_var_from_expr (iv->step);
1265   iv->base = expand_simple_operations (iv->base, stop);
1266   if (contains_abnormal_ssa_name_p (iv->base)
1267       || contains_abnormal_ssa_name_p (iv->step))
1268     return false;
1269
1270   /* If STMT could throw, then do not consider STMT as defining a GIV.
1271      While this will suppress optimizations, we can not safely delete this
1272      GIV and associated statements, even if it appears it is not used.  */
1273   if (stmt_could_throw_p (stmt))
1274     return false;
1275
1276   return true;
1277 }
1278
1279 /* Finds general ivs in statement STMT.  */
1280
1281 static void
1282 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1283 {
1284   affine_iv iv;
1285
1286   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1287     return;
1288
1289   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1290 }
1291
1292 /* Finds general ivs in basic block BB.  */
1293
1294 static void
1295 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1296 {
1297   gimple_stmt_iterator bsi;
1298
1299   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1300     find_givs_in_stmt (data, gsi_stmt (bsi));
1301 }
1302
1303 /* Finds general ivs.  */
1304
1305 static void
1306 find_givs (struct ivopts_data *data)
1307 {
1308   struct loop *loop = data->current_loop;
1309   basic_block *body = get_loop_body_in_dom_order (loop);
1310   unsigned i;
1311
1312   for (i = 0; i < loop->num_nodes; i++)
1313     find_givs_in_bb (data, body[i]);
1314   free (body);
1315 }
1316
1317 /* For each ssa name defined in LOOP determines whether it is an induction
1318    variable and if so, its initial value and step.  */
1319
1320 static bool
1321 find_induction_variables (struct ivopts_data *data)
1322 {
1323   unsigned i;
1324   bitmap_iterator bi;
1325
1326   if (!find_bivs (data))
1327     return false;
1328
1329   find_givs (data);
1330   mark_bivs (data);
1331
1332   if (dump_file && (dump_flags & TDF_DETAILS))
1333     {
1334       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1335
1336       if (niter)
1337         {
1338           fprintf (dump_file, "  number of iterations ");
1339           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1340           if (!integer_zerop (niter->may_be_zero))
1341             {
1342               fprintf (dump_file, "; zero if ");
1343               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1344             }
1345           fprintf (dump_file, "\n\n");
1346         };
1347
1348       fprintf (dump_file, "Induction variables:\n\n");
1349
1350       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1351         {
1352           if (ver_info (data, i)->iv)
1353             dump_iv (dump_file, ver_info (data, i)->iv, true);
1354         }
1355     }
1356
1357   return true;
1358 }
1359
1360 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1361    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1362    is the const offset stripped from IV base.  For uses of other types,
1363    ADDR_BASE and ADDR_OFFSET are zero by default.  */
1364
1365 static struct iv_use *
1366 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1367             gimple *stmt, enum use_type use_type, tree addr_base = NULL,
1368             unsigned HOST_WIDE_INT addr_offset = 0)
1369 {
1370   struct iv_use *use = XCNEW (struct iv_use);
1371
1372   use->id = n_iv_uses (data);
1373   use->sub_id = 0;
1374   use->type = use_type;
1375   use->iv = iv;
1376   use->stmt = stmt;
1377   use->op_p = use_p;
1378   use->related_cands = BITMAP_ALLOC (NULL);
1379   use->next = NULL;
1380   use->addr_base = addr_base;
1381   use->addr_offset = addr_offset;
1382
1383   data->iv_uses.safe_push (use);
1384
1385   return use;
1386 }
1387
1388 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1389    The sub use is recorded under the one whose use id is ID_GROUP.  */
1390
1391 static struct iv_use *
1392 record_sub_use (struct ivopts_data *data, tree *use_p,
1393                     struct iv *iv, gimple *stmt, enum use_type use_type,
1394                     tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1395                     unsigned int id_group)
1396 {
1397   struct iv_use *use = XCNEW (struct iv_use);
1398   struct iv_use *group = iv_use (data, id_group);
1399
1400   use->id = group->id;
1401   use->sub_id = 0;
1402   use->type = use_type;
1403   use->iv = iv;
1404   use->stmt = stmt;
1405   use->op_p = use_p;
1406   use->related_cands = NULL;
1407   use->addr_base = addr_base;
1408   use->addr_offset = addr_offset;
1409
1410   /* Sub use list is maintained in offset ascending order.  */
1411   if (addr_offset <= group->addr_offset)
1412     {
1413       use->related_cands = group->related_cands;
1414       group->related_cands = NULL;
1415       use->next = group;
1416       data->iv_uses[id_group] = use;
1417     }
1418   else
1419     {
1420       struct iv_use *pre;
1421       do
1422         {
1423           pre = group;
1424           group = group->next;
1425         }
1426       while (group && addr_offset > group->addr_offset);
1427       use->next = pre->next;
1428       pre->next = use;
1429     }
1430
1431   return use;
1432 }
1433
1434 /* Checks whether OP is a loop-level invariant and if so, records it.
1435    NONLINEAR_USE is true if the invariant is used in a way we do not
1436    handle specially.  */
1437
1438 static void
1439 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1440 {
1441   basic_block bb;
1442   struct version_info *info;
1443
1444   if (TREE_CODE (op) != SSA_NAME
1445       || virtual_operand_p (op))
1446     return;
1447
1448   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1449   if (bb
1450       && flow_bb_inside_loop_p (data->current_loop, bb))
1451     return;
1452
1453   info = name_info (data, op);
1454   info->name = op;
1455   info->has_nonlin_use |= nonlinear_use;
1456   if (!info->inv_id)
1457     info->inv_id = ++data->max_inv_id;
1458   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1459 }
1460
1461 /* Checks whether the use OP is interesting and if so, records it.  */
1462
1463 static struct iv_use *
1464 find_interesting_uses_op (struct ivopts_data *data, tree op)
1465 {
1466   struct iv *iv;
1467   gimple *stmt;
1468   struct iv_use *use;
1469
1470   if (TREE_CODE (op) != SSA_NAME)
1471     return NULL;
1472
1473   iv = get_iv (data, op);
1474   if (!iv)
1475     return NULL;
1476
1477   if (iv->have_use_for)
1478     {
1479       use = iv_use (data, iv->use_id);
1480
1481       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1482       return use;
1483     }
1484
1485   if (integer_zerop (iv->step))
1486     {
1487       record_invariant (data, op, true);
1488       return NULL;
1489     }
1490   iv->have_use_for = true;
1491
1492   stmt = SSA_NAME_DEF_STMT (op);
1493   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1494               || is_gimple_assign (stmt));
1495
1496   use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1497   iv->use_id = use->id;
1498
1499   return use;
1500 }
1501
1502 /* Given a condition in statement STMT, checks whether it is a compare
1503    of an induction variable and an invariant.  If this is the case,
1504    CONTROL_VAR is set to location of the iv, BOUND to the location of
1505    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1506    induction variable descriptions, and true is returned.  If this is not
1507    the case, CONTROL_VAR and BOUND are set to the arguments of the
1508    condition and false is returned.  */
1509
1510 static bool
1511 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1512                        tree **control_var, tree **bound,
1513                        struct iv **iv_var, struct iv **iv_bound)
1514 {
1515   /* The objects returned when COND has constant operands.  */
1516   static struct iv const_iv;
1517   static tree zero;
1518   tree *op0 = &zero, *op1 = &zero;
1519   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1520   bool ret = false;
1521
1522   if (gimple_code (stmt) == GIMPLE_COND)
1523     {
1524       gcond *cond_stmt = as_a <gcond *> (stmt);
1525       op0 = gimple_cond_lhs_ptr (cond_stmt);
1526       op1 = gimple_cond_rhs_ptr (cond_stmt);
1527     }
1528   else
1529     {
1530       op0 = gimple_assign_rhs1_ptr (stmt);
1531       op1 = gimple_assign_rhs2_ptr (stmt);
1532     }
1533
1534   zero = integer_zero_node;
1535   const_iv.step = integer_zero_node;
1536
1537   if (TREE_CODE (*op0) == SSA_NAME)
1538     iv0 = get_iv (data, *op0);
1539   if (TREE_CODE (*op1) == SSA_NAME)
1540     iv1 = get_iv (data, *op1);
1541
1542   /* Exactly one of the compared values must be an iv, and the other one must
1543      be an invariant.  */
1544   if (!iv0 || !iv1)
1545     goto end;
1546
1547   if (integer_zerop (iv0->step))
1548     {
1549       /* Control variable may be on the other side.  */
1550       std::swap (op0, op1);
1551       std::swap (iv0, iv1);
1552     }
1553   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1554
1555 end:
1556   if (control_var)
1557     *control_var = op0;
1558   if (iv_var)
1559     *iv_var = iv0;
1560   if (bound)
1561     *bound = op1;
1562   if (iv_bound)
1563     *iv_bound = iv1;
1564
1565   return ret;
1566 }
1567
1568 /* Checks whether the condition in STMT is interesting and if so,
1569    records it.  */
1570
1571 static void
1572 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1573 {
1574   tree *var_p, *bound_p;
1575   struct iv *var_iv;
1576
1577   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1578     {
1579       find_interesting_uses_op (data, *var_p);
1580       find_interesting_uses_op (data, *bound_p);
1581       return;
1582     }
1583
1584   record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1585 }
1586
1587 /* Returns the outermost loop EXPR is obviously invariant in
1588    relative to the loop LOOP, i.e. if all its operands are defined
1589    outside of the returned loop.  Returns NULL if EXPR is not
1590    even obviously invariant in LOOP.  */
1591
1592 struct loop *
1593 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1594 {
1595   basic_block def_bb;
1596   unsigned i, len;
1597
1598   if (is_gimple_min_invariant (expr))
1599     return current_loops->tree_root;
1600
1601   if (TREE_CODE (expr) == SSA_NAME)
1602     {
1603       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1604       if (def_bb)
1605         {
1606           if (flow_bb_inside_loop_p (loop, def_bb))
1607             return NULL;
1608           return superloop_at_depth (loop,
1609                                      loop_depth (def_bb->loop_father) + 1);
1610         }
1611
1612       return current_loops->tree_root;
1613     }
1614
1615   if (!EXPR_P (expr))
1616     return NULL;
1617
1618   unsigned maxdepth = 0;
1619   len = TREE_OPERAND_LENGTH (expr);
1620   for (i = 0; i < len; i++)
1621     {
1622       struct loop *ivloop;
1623       if (!TREE_OPERAND (expr, i))
1624         continue;
1625
1626       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1627       if (!ivloop)
1628         return NULL;
1629       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1630     }
1631
1632   return superloop_at_depth (loop, maxdepth);
1633 }
1634
1635 /* Returns true if expression EXPR is obviously invariant in LOOP,
1636    i.e. if all its operands are defined outside of the LOOP.  LOOP
1637    should not be the function body.  */
1638
1639 bool
1640 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1641 {
1642   basic_block def_bb;
1643   unsigned i, len;
1644
1645   gcc_assert (loop_depth (loop) > 0);
1646
1647   if (is_gimple_min_invariant (expr))
1648     return true;
1649
1650   if (TREE_CODE (expr) == SSA_NAME)
1651     {
1652       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1653       if (def_bb
1654           && flow_bb_inside_loop_p (loop, def_bb))
1655         return false;
1656
1657       return true;
1658     }
1659
1660   if (!EXPR_P (expr))
1661     return false;
1662
1663   len = TREE_OPERAND_LENGTH (expr);
1664   for (i = 0; i < len; i++)
1665     if (TREE_OPERAND (expr, i)
1666         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1667       return false;
1668
1669   return true;
1670 }
1671
1672 /* Given expression EXPR which computes inductive values with respect
1673    to loop recorded in DATA, this function returns biv from which EXPR
1674    is derived by tracing definition chains of ssa variables in EXPR.  */
1675
1676 static struct iv*
1677 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1678 {
1679   struct iv *iv;
1680   unsigned i, n;
1681   tree e2, e1;
1682   enum tree_code code;
1683   gimple *stmt;
1684
1685   if (expr == NULL_TREE)
1686     return NULL;
1687
1688   if (is_gimple_min_invariant (expr))
1689     return NULL;
1690
1691   code = TREE_CODE (expr);
1692   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1693     {
1694       n = TREE_OPERAND_LENGTH (expr);
1695       for (i = 0; i < n; i++)
1696         {
1697           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1698           if (iv)
1699             return iv;
1700         }
1701     }
1702
1703   /* Stop if it's not ssa name.  */
1704   if (code != SSA_NAME)
1705     return NULL;
1706
1707   iv = get_iv (data, expr);
1708   if (!iv || integer_zerop (iv->step))
1709     return NULL;
1710   else if (iv->biv_p)
1711     return iv;
1712
1713   stmt = SSA_NAME_DEF_STMT (expr);
1714   if (gphi *phi = dyn_cast <gphi *> (stmt))
1715     {
1716       ssa_op_iter iter;
1717       use_operand_p use_p;
1718
1719       if (virtual_operand_p (gimple_phi_result (phi)))
1720         return NULL;
1721
1722       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1723         {
1724           tree use = USE_FROM_PTR (use_p);
1725           iv = find_deriving_biv_for_expr (data, use);
1726           if (iv)
1727             return iv;
1728         }
1729       return NULL;
1730     }
1731   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1732     return NULL;
1733
1734   e1 = gimple_assign_rhs1 (stmt);
1735   code = gimple_assign_rhs_code (stmt);
1736   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1737     return find_deriving_biv_for_expr (data, e1);
1738
1739   switch (code)
1740     {
1741     case MULT_EXPR:
1742     case PLUS_EXPR:
1743     case MINUS_EXPR:
1744     case POINTER_PLUS_EXPR:
1745       /* Increments, decrements and multiplications by a constant
1746          are simple.  */
1747       e2 = gimple_assign_rhs2 (stmt);
1748       iv = find_deriving_biv_for_expr (data, e2);
1749       if (iv)
1750         return iv;
1751
1752       /* Fallthru.  */
1753     CASE_CONVERT:
1754       /* Casts are simple.  */
1755       return find_deriving_biv_for_expr (data, e1);
1756
1757     default:
1758       break;
1759     }
1760
1761   return NULL;
1762 }
1763
1764 /* Record BIV, its predecessor and successor that they are used in
1765    address type uses.  */
1766
1767 static void
1768 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1769 {
1770   unsigned i;
1771   tree type, base_1, base_2;
1772   bitmap_iterator bi;
1773
1774   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1775       || biv->have_address_use || !biv->no_overflow)
1776     return;
1777
1778   type = TREE_TYPE (biv->base);
1779   if (!INTEGRAL_TYPE_P (type))
1780     return;
1781
1782   biv->have_address_use = true;
1783   data->bivs_not_used_in_addr--;
1784   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1785   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1786     {
1787       struct iv *iv = ver_info (data, i)->iv;
1788
1789       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1790           || iv->have_address_use || !iv->no_overflow)
1791         continue;
1792
1793       if (type != TREE_TYPE (iv->base)
1794           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1795         continue;
1796
1797       if (!operand_equal_p (biv->step, iv->step, 0))
1798         continue;
1799
1800       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1801       if (operand_equal_p (base_1, iv->base, 0)
1802           || operand_equal_p (base_2, biv->base, 0))
1803         {
1804           iv->have_address_use = true;
1805           data->bivs_not_used_in_addr--;
1806         }
1807     }
1808 }
1809
1810 /* Cumulates the steps of indices into DATA and replaces their values with the
1811    initial ones.  Returns false when the value of the index cannot be determined.
1812    Callback for for_each_index.  */
1813
1814 struct ifs_ivopts_data
1815 {
1816   struct ivopts_data *ivopts_data;
1817   gimple *stmt;
1818   tree step;
1819 };
1820
1821 static bool
1822 idx_find_step (tree base, tree *idx, void *data)
1823 {
1824   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1825   struct iv *iv;
1826   bool use_overflow_semantics = false;
1827   tree step, iv_base, iv_step, lbound, off;
1828   struct loop *loop = dta->ivopts_data->current_loop;
1829
1830   /* If base is a component ref, require that the offset of the reference
1831      be invariant.  */
1832   if (TREE_CODE (base) == COMPONENT_REF)
1833     {
1834       off = component_ref_field_offset (base);
1835       return expr_invariant_in_loop_p (loop, off);
1836     }
1837
1838   /* If base is array, first check whether we will be able to move the
1839      reference out of the loop (in order to take its address in strength
1840      reduction).  In order for this to work we need both lower bound
1841      and step to be loop invariants.  */
1842   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1843     {
1844       /* Moreover, for a range, the size needs to be invariant as well.  */
1845       if (TREE_CODE (base) == ARRAY_RANGE_REF
1846           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1847         return false;
1848
1849       step = array_ref_element_size (base);
1850       lbound = array_ref_low_bound (base);
1851
1852       if (!expr_invariant_in_loop_p (loop, step)
1853           || !expr_invariant_in_loop_p (loop, lbound))
1854         return false;
1855     }
1856
1857   if (TREE_CODE (*idx) != SSA_NAME)
1858     return true;
1859
1860   iv = get_iv (dta->ivopts_data, *idx);
1861   if (!iv)
1862     return false;
1863
1864   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1865           *&x[0], which is not folded and does not trigger the
1866           ARRAY_REF path below.  */
1867   *idx = iv->base;
1868
1869   if (integer_zerop (iv->step))
1870     return true;
1871
1872   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1873     {
1874       step = array_ref_element_size (base);
1875
1876       /* We only handle addresses whose step is an integer constant.  */
1877       if (TREE_CODE (step) != INTEGER_CST)
1878         return false;
1879     }
1880   else
1881     /* The step for pointer arithmetics already is 1 byte.  */
1882     step = size_one_node;
1883
1884   iv_base = iv->base;
1885   iv_step = iv->step;
1886   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1887     use_overflow_semantics = true;
1888
1889   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1890                             sizetype, &iv_base, &iv_step, dta->stmt,
1891                             use_overflow_semantics))
1892     {
1893       /* The index might wrap.  */
1894       return false;
1895     }
1896
1897   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1898   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1899
1900   if (dta->ivopts_data->bivs_not_used_in_addr)
1901     {
1902       if (!iv->biv_p)
1903         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1904
1905       record_biv_for_address_use (dta->ivopts_data, iv);
1906     }
1907   return true;
1908 }
1909
1910 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1911    object is passed to it in DATA.  */
1912
1913 static bool
1914 idx_record_use (tree base, tree *idx,
1915                 void *vdata)
1916 {
1917   struct ivopts_data *data = (struct ivopts_data *) vdata;
1918   find_interesting_uses_op (data, *idx);
1919   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1920     {
1921       find_interesting_uses_op (data, array_ref_element_size (base));
1922       find_interesting_uses_op (data, array_ref_low_bound (base));
1923     }
1924   return true;
1925 }
1926
1927 /* If we can prove that TOP = cst * BOT for some constant cst,
1928    store cst to MUL and return true.  Otherwise return false.
1929    The returned value is always sign-extended, regardless of the
1930    signedness of TOP and BOT.  */
1931
1932 static bool
1933 constant_multiple_of (tree top, tree bot, widest_int *mul)
1934 {
1935   tree mby;
1936   enum tree_code code;
1937   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1938   widest_int res, p0, p1;
1939
1940   STRIP_NOPS (top);
1941   STRIP_NOPS (bot);
1942
1943   if (operand_equal_p (top, bot, 0))
1944     {
1945       *mul = 1;
1946       return true;
1947     }
1948
1949   code = TREE_CODE (top);
1950   switch (code)
1951     {
1952     case MULT_EXPR:
1953       mby = TREE_OPERAND (top, 1);
1954       if (TREE_CODE (mby) != INTEGER_CST)
1955         return false;
1956
1957       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1958         return false;
1959
1960       *mul = wi::sext (res * wi::to_widest (mby), precision);
1961       return true;
1962
1963     case PLUS_EXPR:
1964     case MINUS_EXPR:
1965       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1966           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1967         return false;
1968
1969       if (code == MINUS_EXPR)
1970         p1 = -p1;
1971       *mul = wi::sext (p0 + p1, precision);
1972       return true;
1973
1974     case INTEGER_CST:
1975       if (TREE_CODE (bot) != INTEGER_CST)
1976         return false;
1977
1978       p0 = widest_int::from (top, SIGNED);
1979       p1 = widest_int::from (bot, SIGNED);
1980       if (p1 == 0)
1981         return false;
1982       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1983       return res == 0;
1984
1985     default:
1986       return false;
1987     }
1988 }
1989
1990 /* Return true if memory reference REF with step STEP may be unaligned.  */
1991
1992 static bool
1993 may_be_unaligned_p (tree ref, tree step)
1994 {
1995   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1996      thus they are not misaligned.  */
1997   if (TREE_CODE (ref) == TARGET_MEM_REF)
1998     return false;
1999
2000   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2001   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2002     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2003
2004   unsigned HOST_WIDE_INT bitpos;
2005   unsigned int ref_align;
2006   get_object_alignment_1 (ref, &ref_align, &bitpos);
2007   if (ref_align < align
2008       || (bitpos % align) != 0
2009       || (bitpos % BITS_PER_UNIT) != 0)
2010     return true;
2011
2012   unsigned int trailing_zeros = tree_ctz (step);
2013   if (trailing_zeros < HOST_BITS_PER_INT
2014       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2015     return true;
2016
2017   return false;
2018 }
2019
2020 /* Return true if EXPR may be non-addressable.   */
2021
2022 bool
2023 may_be_nonaddressable_p (tree expr)
2024 {
2025   switch (TREE_CODE (expr))
2026     {
2027     case TARGET_MEM_REF:
2028       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2029          target, thus they are always addressable.  */
2030       return false;
2031
2032     case MEM_REF:
2033       /* Likewise for MEM_REFs, modulo the storage order.  */
2034       return REF_REVERSE_STORAGE_ORDER (expr);
2035
2036     case BIT_FIELD_REF:
2037       if (REF_REVERSE_STORAGE_ORDER (expr))
2038         return true;
2039       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2040
2041     case COMPONENT_REF:
2042       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2043         return true;
2044       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2045              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2046
2047     case ARRAY_REF:
2048     case ARRAY_RANGE_REF:
2049       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2050         return true;
2051       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2052
2053     case VIEW_CONVERT_EXPR:
2054       /* This kind of view-conversions may wrap non-addressable objects
2055          and make them look addressable.  After some processing the
2056          non-addressability may be uncovered again, causing ADDR_EXPRs
2057          of inappropriate objects to be built.  */
2058       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2059           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2060         return true;
2061       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2062
2063     CASE_CONVERT:
2064       return true;
2065
2066     default:
2067       break;
2068     }
2069
2070   return false;
2071 }
2072
2073 static tree
2074 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
2075
2076 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2077    If there is an existing use which has same stripped iv base and step,
2078    this function records this one as a sub use to that; otherwise records
2079    it as a normal one.  */
2080
2081 static struct iv_use *
2082 record_group_use (struct ivopts_data *data, tree *use_p,
2083                   struct iv *iv, gimple *stmt, enum use_type use_type)
2084 {
2085   unsigned int i;
2086   struct iv_use *use;
2087   tree addr_base;
2088   unsigned HOST_WIDE_INT addr_offset;
2089
2090   /* Only support sub use for address type uses, that is, with base
2091      object.  */
2092   if (!iv->base_object)
2093     return record_use (data, use_p, iv, stmt, use_type);
2094
2095   addr_base = strip_offset (iv->base, &addr_offset);
2096   for (i = 0; i < n_iv_uses (data); i++)
2097     {
2098       use = iv_use (data, i);
2099       if (use->type != USE_ADDRESS || !use->iv->base_object)
2100         continue;
2101
2102       /* Check if it has the same stripped base and step.  */
2103       if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
2104           && operand_equal_p (iv->step, use->iv->step, 0)
2105           && operand_equal_p (addr_base, use->addr_base, 0))
2106         break;
2107     }
2108
2109   if (i == n_iv_uses (data))
2110     return record_use (data, use_p, iv, stmt,
2111                        use_type, addr_base, addr_offset);
2112   else
2113     return record_sub_use (data, use_p, iv, stmt,
2114                            use_type, addr_base, addr_offset, i);
2115 }
2116
2117 /* Finds addresses in *OP_P inside STMT.  */
2118
2119 static void
2120 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2121                                tree *op_p)
2122 {
2123   tree base = *op_p, step = size_zero_node;
2124   struct iv *civ;
2125   struct ifs_ivopts_data ifs_ivopts_data;
2126
2127   /* Do not play with volatile memory references.  A bit too conservative,
2128      perhaps, but safe.  */
2129   if (gimple_has_volatile_ops (stmt))
2130     goto fail;
2131
2132   /* Ignore bitfields for now.  Not really something terribly complicated
2133      to handle.  TODO.  */
2134   if (TREE_CODE (base) == BIT_FIELD_REF)
2135     goto fail;
2136
2137   base = unshare_expr (base);
2138
2139   if (TREE_CODE (base) == TARGET_MEM_REF)
2140     {
2141       tree type = build_pointer_type (TREE_TYPE (base));
2142       tree astep;
2143
2144       if (TMR_BASE (base)
2145           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2146         {
2147           civ = get_iv (data, TMR_BASE (base));
2148           if (!civ)
2149             goto fail;
2150
2151           TMR_BASE (base) = civ->base;
2152           step = civ->step;
2153         }
2154       if (TMR_INDEX2 (base)
2155           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2156         {
2157           civ = get_iv (data, TMR_INDEX2 (base));
2158           if (!civ)
2159             goto fail;
2160
2161           TMR_INDEX2 (base) = civ->base;
2162           step = civ->step;
2163         }
2164       if (TMR_INDEX (base)
2165           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2166         {
2167           civ = get_iv (data, TMR_INDEX (base));
2168           if (!civ)
2169             goto fail;
2170
2171           TMR_INDEX (base) = civ->base;
2172           astep = civ->step;
2173
2174           if (astep)
2175             {
2176               if (TMR_STEP (base))
2177                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2178
2179               step = fold_build2 (PLUS_EXPR, type, step, astep);
2180             }
2181         }
2182
2183       if (integer_zerop (step))
2184         goto fail;
2185       base = tree_mem_ref_addr (type, base);
2186     }
2187   else
2188     {
2189       ifs_ivopts_data.ivopts_data = data;
2190       ifs_ivopts_data.stmt = stmt;
2191       ifs_ivopts_data.step = size_zero_node;
2192       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2193           || integer_zerop (ifs_ivopts_data.step))
2194         goto fail;
2195       step = ifs_ivopts_data.step;
2196
2197       /* Check that the base expression is addressable.  This needs
2198          to be done after substituting bases of IVs into it.  */
2199       if (may_be_nonaddressable_p (base))
2200         goto fail;
2201
2202       /* Moreover, on strict alignment platforms, check that it is
2203          sufficiently aligned.  */
2204       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2205         goto fail;
2206
2207       base = build_fold_addr_expr (base);
2208
2209       /* Substituting bases of IVs into the base expression might
2210          have caused folding opportunities.  */
2211       if (TREE_CODE (base) == ADDR_EXPR)
2212         {
2213           tree *ref = &TREE_OPERAND (base, 0);
2214           while (handled_component_p (*ref))
2215             ref = &TREE_OPERAND (*ref, 0);
2216           if (TREE_CODE (*ref) == MEM_REF)
2217             {
2218               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2219                                       TREE_OPERAND (*ref, 0),
2220                                       TREE_OPERAND (*ref, 1));
2221               if (tem)
2222                 *ref = tem;
2223             }
2224         }
2225     }
2226
2227   civ = alloc_iv (data, base, step);
2228   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2229   return;
2230
2231 fail:
2232   for_each_index (op_p, idx_record_use, data);
2233 }
2234
2235 /* Finds and records invariants used in STMT.  */
2236
2237 static void
2238 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2239 {
2240   ssa_op_iter iter;
2241   use_operand_p use_p;
2242   tree op;
2243
2244   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2245     {
2246       op = USE_FROM_PTR (use_p);
2247       record_invariant (data, op, false);
2248     }
2249 }
2250
2251 /* Finds interesting uses of induction variables in the statement STMT.  */
2252
2253 static void
2254 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2255 {
2256   struct iv *iv;
2257   tree op, *lhs, *rhs;
2258   ssa_op_iter iter;
2259   use_operand_p use_p;
2260   enum tree_code code;
2261
2262   find_invariants_stmt (data, stmt);
2263
2264   if (gimple_code (stmt) == GIMPLE_COND)
2265     {
2266       find_interesting_uses_cond (data, stmt);
2267       return;
2268     }
2269
2270   if (is_gimple_assign (stmt))
2271     {
2272       lhs = gimple_assign_lhs_ptr (stmt);
2273       rhs = gimple_assign_rhs1_ptr (stmt);
2274
2275       if (TREE_CODE (*lhs) == SSA_NAME)
2276         {
2277           /* If the statement defines an induction variable, the uses are not
2278              interesting by themselves.  */
2279
2280           iv = get_iv (data, *lhs);
2281
2282           if (iv && !integer_zerop (iv->step))
2283             return;
2284         }
2285
2286       code = gimple_assign_rhs_code (stmt);
2287       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2288           && (REFERENCE_CLASS_P (*rhs)
2289               || is_gimple_val (*rhs)))
2290         {
2291           if (REFERENCE_CLASS_P (*rhs))
2292             find_interesting_uses_address (data, stmt, rhs);
2293           else
2294             find_interesting_uses_op (data, *rhs);
2295
2296           if (REFERENCE_CLASS_P (*lhs))
2297             find_interesting_uses_address (data, stmt, lhs);
2298           return;
2299         }
2300       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2301         {
2302           find_interesting_uses_cond (data, stmt);
2303           return;
2304         }
2305
2306       /* TODO -- we should also handle address uses of type
2307
2308          memory = call (whatever);
2309
2310          and
2311
2312          call (memory).  */
2313     }
2314
2315   if (gimple_code (stmt) == GIMPLE_PHI
2316       && gimple_bb (stmt) == data->current_loop->header)
2317     {
2318       iv = get_iv (data, PHI_RESULT (stmt));
2319
2320       if (iv && !integer_zerop (iv->step))
2321         return;
2322     }
2323
2324   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2325     {
2326       op = USE_FROM_PTR (use_p);
2327
2328       if (TREE_CODE (op) != SSA_NAME)
2329         continue;
2330
2331       iv = get_iv (data, op);
2332       if (!iv)
2333         continue;
2334
2335       find_interesting_uses_op (data, op);
2336     }
2337 }
2338
2339 /* Finds interesting uses of induction variables outside of loops
2340    on loop exit edge EXIT.  */
2341
2342 static void
2343 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2344 {
2345   gphi *phi;
2346   gphi_iterator psi;
2347   tree def;
2348
2349   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2350     {
2351       phi = psi.phi ();
2352       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2353       if (!virtual_operand_p (def))
2354         find_interesting_uses_op (data, def);
2355     }
2356 }
2357
2358 /* Finds uses of the induction variables that are interesting.  */
2359
2360 static void
2361 find_interesting_uses (struct ivopts_data *data)
2362 {
2363   basic_block bb;
2364   gimple_stmt_iterator bsi;
2365   basic_block *body = get_loop_body (data->current_loop);
2366   unsigned i;
2367   struct version_info *info;
2368   edge e;
2369
2370   if (dump_file && (dump_flags & TDF_DETAILS))
2371     fprintf (dump_file, "Uses:\n\n");
2372
2373   for (i = 0; i < data->current_loop->num_nodes; i++)
2374     {
2375       edge_iterator ei;
2376       bb = body[i];
2377
2378       FOR_EACH_EDGE (e, ei, bb->succs)
2379         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2380             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2381           find_interesting_uses_outside (data, e);
2382
2383       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2384         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2385       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2386         if (!is_gimple_debug (gsi_stmt (bsi)))
2387           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2388     }
2389
2390   if (dump_file && (dump_flags & TDF_DETAILS))
2391     {
2392       bitmap_iterator bi;
2393
2394       fprintf (dump_file, "\n");
2395
2396       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2397         {
2398           info = ver_info (data, i);
2399           if (info->inv_id)
2400             {
2401               fprintf (dump_file, "  ");
2402               print_generic_expr (dump_file, info->name, TDF_SLIM);
2403               fprintf (dump_file, " is invariant (%d)%s\n",
2404                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2405             }
2406         }
2407
2408       fprintf (dump_file, "\n");
2409     }
2410
2411   free (body);
2412 }
2413
2414 /* Compute maximum offset of [base + offset] addressing mode
2415    for memory reference represented by USE.  */
2416
2417 static HOST_WIDE_INT
2418 compute_max_addr_offset (struct iv_use *use)
2419 {
2420   int width;
2421   rtx reg, addr;
2422   HOST_WIDE_INT i, off;
2423   unsigned list_index, num;
2424   addr_space_t as;
2425   machine_mode mem_mode, addr_mode;
2426   static vec<HOST_WIDE_INT> max_offset_list;
2427
2428   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2429   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2430
2431   num = max_offset_list.length ();
2432   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2433   if (list_index >= num)
2434     {
2435       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2436       for (; num < max_offset_list.length (); num++)
2437         max_offset_list[num] = -1;
2438     }
2439
2440   off = max_offset_list[list_index];
2441   if (off != -1)
2442     return off;
2443
2444   addr_mode = targetm.addr_space.address_mode (as);
2445   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2446   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2447
2448   width = GET_MODE_BITSIZE (addr_mode) - 1;
2449   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2450     width = HOST_BITS_PER_WIDE_INT - 1;
2451
2452   for (i = width; i > 0; i--)
2453     {
2454       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2455       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2456       if (memory_address_addr_space_p (mem_mode, addr, as))
2457         break;
2458
2459       /* For some strict-alignment targets, the offset must be naturally
2460          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2461       off = ((unsigned HOST_WIDE_INT) 1 << i);
2462       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2463         {
2464           off -= GET_MODE_SIZE (mem_mode);
2465           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2466           if (memory_address_addr_space_p (mem_mode, addr, as))
2467             break;
2468         }
2469     }
2470   if (i == 0)
2471     off = 0;
2472
2473   max_offset_list[list_index] = off;
2474   return off;
2475 }
2476
2477 /* Check if all small groups should be split.  Return true if and
2478    only if:
2479
2480      1) At least one groups contain two uses with different offsets.
2481      2) No group contains more than two uses with different offsets.
2482
2483    Return false otherwise.  We want to split such groups because:
2484
2485      1) Small groups don't have much benefit and may interfer with
2486         general candidate selection.
2487      2) Size for problem with only small groups is usually small and
2488         general algorithm can handle it well.
2489
2490    TODO -- Above claim may not hold when auto increment is supported.  */
2491
2492 static bool
2493 split_all_small_groups (struct ivopts_data *data)
2494 {
2495   bool split_p = false;
2496   unsigned int i, n, distinct;
2497   struct iv_use *pre, *use;
2498
2499   n = n_iv_uses (data);
2500   for (i = 0; i < n; i++)
2501     {
2502       use = iv_use (data, i);
2503       if (!use->next)
2504         continue;
2505
2506       distinct = 1;
2507       gcc_assert (use->type == USE_ADDRESS);
2508       for (pre = use, use = use->next; use; pre = use, use = use->next)
2509         {
2510           if (pre->addr_offset != use->addr_offset)
2511             distinct++;
2512
2513           if (distinct > 2)
2514             return false;
2515         }
2516       if (distinct == 2)
2517         split_p = true;
2518     }
2519
2520   return split_p;
2521 }
2522
2523 /* For each group of address type uses, this function further groups
2524    these uses according to the maximum offset supported by target's
2525    [base + offset] addressing mode.  */
2526
2527 static void
2528 group_address_uses (struct ivopts_data *data)
2529 {
2530   HOST_WIDE_INT max_offset = -1;
2531   unsigned int i, n, sub_id;
2532   struct iv_use *pre, *use;
2533   unsigned HOST_WIDE_INT addr_offset_first;
2534
2535   /* Reset max offset to split all small groups.  */
2536   if (split_all_small_groups (data))
2537     max_offset = 0;
2538
2539   n = n_iv_uses (data);
2540   for (i = 0; i < n; i++)
2541     {
2542       use = iv_use (data, i);
2543       if (!use->next)
2544         continue;
2545
2546       gcc_assert (use->type == USE_ADDRESS);
2547       if (max_offset != 0)
2548         max_offset = compute_max_addr_offset (use);
2549
2550       while (use)
2551         {
2552           sub_id = 0;
2553           addr_offset_first = use->addr_offset;
2554           /* Only uses with offset that can fit in offset part against
2555              the first use can be grouped together.  */
2556           for (pre = use, use = use->next;
2557                use && (use->addr_offset - addr_offset_first
2558                        <= (unsigned HOST_WIDE_INT) max_offset);
2559                pre = use, use = use->next)
2560             {
2561               use->id = pre->id;
2562               use->sub_id = ++sub_id;
2563             }
2564
2565           /* Break the list and create new group.  */
2566           if (use)
2567             {
2568               pre->next = NULL;
2569               use->id = n_iv_uses (data);
2570               use->related_cands = BITMAP_ALLOC (NULL);
2571               data->iv_uses.safe_push (use);
2572             }
2573         }
2574     }
2575
2576   if (dump_file && (dump_flags & TDF_DETAILS))
2577     dump_uses (dump_file, data);
2578 }
2579
2580 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2581    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2582    we are at the top-level of the processed address.  */
2583
2584 static tree
2585 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2586                 HOST_WIDE_INT *offset)
2587 {
2588   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2589   enum tree_code code;
2590   tree type, orig_type = TREE_TYPE (expr);
2591   HOST_WIDE_INT off0, off1, st;
2592   tree orig_expr = expr;
2593
2594   STRIP_NOPS (expr);
2595
2596   type = TREE_TYPE (expr);
2597   code = TREE_CODE (expr);
2598   *offset = 0;
2599
2600   switch (code)
2601     {
2602     case INTEGER_CST:
2603       if (!cst_and_fits_in_hwi (expr)
2604           || integer_zerop (expr))
2605         return orig_expr;
2606
2607       *offset = int_cst_value (expr);
2608       return build_int_cst (orig_type, 0);
2609
2610     case POINTER_PLUS_EXPR:
2611     case PLUS_EXPR:
2612     case MINUS_EXPR:
2613       op0 = TREE_OPERAND (expr, 0);
2614       op1 = TREE_OPERAND (expr, 1);
2615
2616       op0 = strip_offset_1 (op0, false, false, &off0);
2617       op1 = strip_offset_1 (op1, false, false, &off1);
2618
2619       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2620       if (op0 == TREE_OPERAND (expr, 0)
2621           && op1 == TREE_OPERAND (expr, 1))
2622         return orig_expr;
2623
2624       if (integer_zerop (op1))
2625         expr = op0;
2626       else if (integer_zerop (op0))
2627         {
2628           if (code == MINUS_EXPR)
2629             expr = fold_build1 (NEGATE_EXPR, type, op1);
2630           else
2631             expr = op1;
2632         }
2633       else
2634         expr = fold_build2 (code, type, op0, op1);
2635
2636       return fold_convert (orig_type, expr);
2637
2638     case MULT_EXPR:
2639       op1 = TREE_OPERAND (expr, 1);
2640       if (!cst_and_fits_in_hwi (op1))
2641         return orig_expr;
2642
2643       op0 = TREE_OPERAND (expr, 0);
2644       op0 = strip_offset_1 (op0, false, false, &off0);
2645       if (op0 == TREE_OPERAND (expr, 0))
2646         return orig_expr;
2647
2648       *offset = off0 * int_cst_value (op1);
2649       if (integer_zerop (op0))
2650         expr = op0;
2651       else
2652         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2653
2654       return fold_convert (orig_type, expr);
2655
2656     case ARRAY_REF:
2657     case ARRAY_RANGE_REF:
2658       if (!inside_addr)
2659         return orig_expr;
2660
2661       step = array_ref_element_size (expr);
2662       if (!cst_and_fits_in_hwi (step))
2663         break;
2664
2665       st = int_cst_value (step);
2666       op1 = TREE_OPERAND (expr, 1);
2667       op1 = strip_offset_1 (op1, false, false, &off1);
2668       *offset = off1 * st;
2669
2670       if (top_compref
2671           && integer_zerop (op1))
2672         {
2673           /* Strip the component reference completely.  */
2674           op0 = TREE_OPERAND (expr, 0);
2675           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2676           *offset += off0;
2677           return op0;
2678         }
2679       break;
2680
2681     case COMPONENT_REF:
2682       {
2683         tree field;
2684
2685         if (!inside_addr)
2686           return orig_expr;
2687
2688         tmp = component_ref_field_offset (expr);
2689         field = TREE_OPERAND (expr, 1);
2690         if (top_compref
2691             && cst_and_fits_in_hwi (tmp)
2692             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2693           {
2694             HOST_WIDE_INT boffset, abs_off;
2695
2696             /* Strip the component reference completely.  */
2697             op0 = TREE_OPERAND (expr, 0);
2698             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2699             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2700             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2701             if (boffset < 0)
2702               abs_off = -abs_off;
2703
2704             *offset = off0 + int_cst_value (tmp) + abs_off;
2705             return op0;
2706           }
2707       }
2708       break;
2709
2710     case ADDR_EXPR:
2711       op0 = TREE_OPERAND (expr, 0);
2712       op0 = strip_offset_1 (op0, true, true, &off0);
2713       *offset += off0;
2714
2715       if (op0 == TREE_OPERAND (expr, 0))
2716         return orig_expr;
2717
2718       expr = build_fold_addr_expr (op0);
2719       return fold_convert (orig_type, expr);
2720
2721     case MEM_REF:
2722       /* ???  Offset operand?  */
2723       inside_addr = false;
2724       break;
2725
2726     default:
2727       return orig_expr;
2728     }
2729
2730   /* Default handling of expressions for that we want to recurse into
2731      the first operand.  */
2732   op0 = TREE_OPERAND (expr, 0);
2733   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2734   *offset += off0;
2735
2736   if (op0 == TREE_OPERAND (expr, 0)
2737       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2738     return orig_expr;
2739
2740   expr = copy_node (expr);
2741   TREE_OPERAND (expr, 0) = op0;
2742   if (op1)
2743     TREE_OPERAND (expr, 1) = op1;
2744
2745   /* Inside address, we might strip the top level component references,
2746      thus changing type of the expression.  Handling of ADDR_EXPR
2747      will fix that.  */
2748   expr = fold_convert (orig_type, expr);
2749
2750   return expr;
2751 }
2752
2753 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2754
2755 static tree
2756 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2757 {
2758   HOST_WIDE_INT off;
2759   tree core = strip_offset_1 (expr, false, false, &off);
2760   *offset = off;
2761   return core;
2762 }
2763
2764 /* Returns variant of TYPE that can be used as base for different uses.
2765    We return unsigned type with the same precision, which avoids problems
2766    with overflows.  */
2767
2768 static tree
2769 generic_type_for (tree type)
2770 {
2771   if (POINTER_TYPE_P (type))
2772     return unsigned_type_for (type);
2773
2774   if (TYPE_UNSIGNED (type))
2775     return type;
2776
2777   return unsigned_type_for (type);
2778 }
2779
2780 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2781    the bitmap to that we should store it.  */
2782
2783 static struct ivopts_data *fd_ivopts_data;
2784 static tree
2785 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2786 {
2787   bitmap *depends_on = (bitmap *) data;
2788   struct version_info *info;
2789
2790   if (TREE_CODE (*expr_p) != SSA_NAME)
2791     return NULL_TREE;
2792   info = name_info (fd_ivopts_data, *expr_p);
2793
2794   if (!info->inv_id || info->has_nonlin_use)
2795     return NULL_TREE;
2796
2797   if (!*depends_on)
2798     *depends_on = BITMAP_ALLOC (NULL);
2799   bitmap_set_bit (*depends_on, info->inv_id);
2800
2801   return NULL_TREE;
2802 }
2803
2804 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2805    position to POS.  If USE is not NULL, the candidate is set as related to
2806    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2807    replacement of the final value of the iv by a direct computation.  */
2808
2809 static struct iv_cand *
2810 add_candidate_1 (struct ivopts_data *data,
2811                  tree base, tree step, bool important, enum iv_position pos,
2812                  struct iv_use *use, gimple *incremented_at,
2813                  struct iv *orig_iv = NULL)
2814 {
2815   unsigned i;
2816   struct iv_cand *cand = NULL;
2817   tree type, orig_type;
2818
2819   /* -fkeep-gc-roots-live means that we have to keep a real pointer
2820      live, but the ivopts code may replace a real pointer with one
2821      pointing before or after the memory block that is then adjusted
2822      into the memory block during the loop.  FIXME: It would likely be
2823      better to actually force the pointer live and still use ivopts;
2824      for example, it would be enough to write the pointer into memory
2825      and keep it there until after the loop.  */
2826   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2827     return NULL;
2828
2829   /* For non-original variables, make sure their values are computed in a type
2830      that does not invoke undefined behavior on overflows (since in general,
2831      we cannot prove that these induction variables are non-wrapping).  */
2832   if (pos != IP_ORIGINAL)
2833     {
2834       orig_type = TREE_TYPE (base);
2835       type = generic_type_for (orig_type);
2836       if (type != orig_type)
2837         {
2838           base = fold_convert (type, base);
2839           step = fold_convert (type, step);
2840         }
2841     }
2842
2843   for (i = 0; i < n_iv_cands (data); i++)
2844     {
2845       cand = iv_cand (data, i);
2846
2847       if (cand->pos != pos)
2848         continue;
2849
2850       if (cand->incremented_at != incremented_at
2851           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2852               && cand->ainc_use != use))
2853         continue;
2854
2855       if (!cand->iv)
2856         {
2857           if (!base && !step)
2858             break;
2859
2860           continue;
2861         }
2862
2863       if (!base && !step)
2864         continue;
2865
2866       if (operand_equal_p (base, cand->iv->base, 0)
2867           && operand_equal_p (step, cand->iv->step, 0)
2868           && (TYPE_PRECISION (TREE_TYPE (base))
2869               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2870         break;
2871     }
2872
2873   if (i == n_iv_cands (data))
2874     {
2875       cand = XCNEW (struct iv_cand);
2876       cand->id = i;
2877
2878       if (!base && !step)
2879         cand->iv = NULL;
2880       else
2881         cand->iv = alloc_iv (data, base, step);
2882
2883       cand->pos = pos;
2884       if (pos != IP_ORIGINAL && cand->iv)
2885         {
2886           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2887           cand->var_after = cand->var_before;
2888         }
2889       cand->important = important;
2890       cand->incremented_at = incremented_at;
2891       data->iv_candidates.safe_push (cand);
2892
2893       if (step
2894           && TREE_CODE (step) != INTEGER_CST)
2895         {
2896           fd_ivopts_data = data;
2897           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2898         }
2899
2900       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2901         cand->ainc_use = use;
2902       else
2903         cand->ainc_use = NULL;
2904
2905       cand->orig_iv = orig_iv;
2906       if (dump_file && (dump_flags & TDF_DETAILS))
2907         dump_cand (dump_file, cand);
2908     }
2909
2910   if (important && !cand->important)
2911     {
2912       cand->important = true;
2913       if (dump_file && (dump_flags & TDF_DETAILS))
2914         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2915     }
2916
2917   if (use)
2918     {
2919       bitmap_set_bit (use->related_cands, i);
2920       if (dump_file && (dump_flags & TDF_DETAILS))
2921         fprintf (dump_file, "Candidate %d is related to use %d\n",
2922                  cand->id, use->id);
2923     }
2924
2925   return cand;
2926 }
2927
2928 /* Returns true if incrementing the induction variable at the end of the LOOP
2929    is allowed.
2930
2931    The purpose is to avoid splitting latch edge with a biv increment, thus
2932    creating a jump, possibly confusing other optimization passes and leaving
2933    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2934    is not available (so we do not have a better alternative), or if the latch
2935    edge is already nonempty.  */
2936
2937 static bool
2938 allow_ip_end_pos_p (struct loop *loop)
2939 {
2940   if (!ip_normal_pos (loop))
2941     return true;
2942
2943   if (!empty_block_p (ip_end_pos (loop)))
2944     return true;
2945
2946   return false;
2947 }
2948
2949 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2950    Important field is set to IMPORTANT.  */
2951
2952 static void
2953 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2954                         bool important, struct iv_use *use)
2955 {
2956   basic_block use_bb = gimple_bb (use->stmt);
2957   machine_mode mem_mode;
2958   unsigned HOST_WIDE_INT cstepi;
2959
2960   /* If we insert the increment in any position other than the standard
2961      ones, we must ensure that it is incremented once per iteration.
2962      It must not be in an inner nested loop, or one side of an if
2963      statement.  */
2964   if (use_bb->loop_father != data->current_loop
2965       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2966       || stmt_could_throw_p (use->stmt)
2967       || !cst_and_fits_in_hwi (step))
2968     return;
2969
2970   cstepi = int_cst_value (step);
2971
2972   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2973   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2974         || USE_STORE_PRE_INCREMENT (mem_mode))
2975        && GET_MODE_SIZE (mem_mode) == cstepi)
2976       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2977            || USE_STORE_PRE_DECREMENT (mem_mode))
2978           && GET_MODE_SIZE (mem_mode) == -cstepi))
2979     {
2980       enum tree_code code = MINUS_EXPR;
2981       tree new_base;
2982       tree new_step = step;
2983
2984       if (POINTER_TYPE_P (TREE_TYPE (base)))
2985         {
2986           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2987           code = POINTER_PLUS_EXPR;
2988         }
2989       else
2990         new_step = fold_convert (TREE_TYPE (base), new_step);
2991       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2992       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2993                        use->stmt);
2994     }
2995   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2996         || USE_STORE_POST_INCREMENT (mem_mode))
2997        && GET_MODE_SIZE (mem_mode) == cstepi)
2998       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2999            || USE_STORE_POST_DECREMENT (mem_mode))
3000           && GET_MODE_SIZE (mem_mode) == -cstepi))
3001     {
3002       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3003                        use->stmt);
3004     }
3005 }
3006
3007 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3008    position to POS.  If USE is not NULL, the candidate is set as related to
3009    it.  The candidate computation is scheduled before exit condition and at
3010    the end of loop.  */
3011
3012 static void
3013 add_candidate (struct ivopts_data *data,
3014                tree base, tree step, bool important, struct iv_use *use,
3015                struct iv *orig_iv = NULL)
3016 {
3017   gcc_assert (use == NULL || use->sub_id == 0);
3018
3019   if (ip_normal_pos (data->current_loop))
3020     add_candidate_1 (data, base, step, important,
3021                      IP_NORMAL, use, NULL, orig_iv);
3022   if (ip_end_pos (data->current_loop)
3023       && allow_ip_end_pos_p (data->current_loop))
3024     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3025 }
3026
3027 /* Adds standard iv candidates.  */
3028
3029 static void
3030 add_standard_iv_candidates (struct ivopts_data *data)
3031 {
3032   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3033
3034   /* The same for a double-integer type if it is still fast enough.  */
3035   if (TYPE_PRECISION
3036         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3037       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3038     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3039                    build_int_cst (long_integer_type_node, 1), true, NULL);
3040
3041   /* The same for a double-integer type if it is still fast enough.  */
3042   if (TYPE_PRECISION
3043         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3044       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3045     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3046                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3047 }
3048
3049
3050 /* Adds candidates bases on the old induction variable IV.  */
3051
3052 static void
3053 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3054 {
3055   gimple *phi;
3056   tree def;
3057   struct iv_cand *cand;
3058
3059   /* Check if this biv is used in address type use.  */
3060   if (iv->no_overflow  && iv->have_address_use
3061       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3062       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3063     {
3064       tree base = fold_convert (sizetype, iv->base);
3065       tree step = fold_convert (sizetype, iv->step);
3066
3067       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3068       add_candidate (data, base, step, true, NULL, iv);
3069       /* Add iv cand of the original type only if it has nonlinear use.  */
3070       if (iv->have_use_for)
3071         add_candidate (data, iv->base, iv->step, true, NULL);
3072     }
3073   else
3074     add_candidate (data, iv->base, iv->step, true, NULL);
3075
3076   /* The same, but with initial value zero.  */
3077   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3078     add_candidate (data, size_int (0), iv->step, true, NULL);
3079   else
3080     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3081                    iv->step, true, NULL);
3082
3083   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3084   if (gimple_code (phi) == GIMPLE_PHI)
3085     {
3086       /* Additionally record the possibility of leaving the original iv
3087          untouched.  */
3088       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3089       /* Don't add candidate if it's from another PHI node because
3090          it's an affine iv appearing in the form of PEELED_CHREC.  */
3091       phi = SSA_NAME_DEF_STMT (def);
3092       if (gimple_code (phi) != GIMPLE_PHI)
3093         {
3094           cand = add_candidate_1 (data,
3095                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3096                                   SSA_NAME_DEF_STMT (def));
3097           if (cand)
3098             {
3099               cand->var_before = iv->ssa_name;
3100               cand->var_after = def;
3101             }
3102         }
3103       else
3104         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3105     }
3106 }
3107
3108 /* Adds candidates based on the old induction variables.  */
3109
3110 static void
3111 add_iv_candidate_for_bivs (struct ivopts_data *data)
3112 {
3113   unsigned i;
3114   struct iv *iv;
3115   bitmap_iterator bi;
3116
3117   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3118     {
3119       iv = ver_info (data, i)->iv;
3120       if (iv && iv->biv_p && !integer_zerop (iv->step))
3121         add_iv_candidate_for_biv (data, iv);
3122     }
3123 }
3124
3125 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3126
3127 static void
3128 record_common_cand (struct ivopts_data *data, tree base,
3129                     tree step, struct iv_use *use)
3130 {
3131   struct iv_common_cand ent;
3132   struct iv_common_cand **slot;
3133
3134   gcc_assert (use != NULL);
3135
3136   ent.base = base;
3137   ent.step = step;
3138   ent.hash = iterative_hash_expr (base, 0);
3139   ent.hash = iterative_hash_expr (step, ent.hash);
3140
3141   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3142   if (*slot == NULL)
3143     {
3144       *slot = new iv_common_cand ();
3145       (*slot)->base = base;
3146       (*slot)->step = step;
3147       (*slot)->uses.create (8);
3148       (*slot)->hash = ent.hash;
3149       data->iv_common_cands.safe_push ((*slot));
3150     }
3151   (*slot)->uses.safe_push (use);
3152   return;
3153 }
3154
3155 /* Comparison function used to sort common candidates.  */
3156
3157 static int
3158 common_cand_cmp (const void *p1, const void *p2)
3159 {
3160   unsigned n1, n2;
3161   const struct iv_common_cand *const *const ccand1
3162     = (const struct iv_common_cand *const *)p1;
3163   const struct iv_common_cand *const *const ccand2
3164     = (const struct iv_common_cand *const *)p2;
3165
3166   n1 = (*ccand1)->uses.length ();
3167   n2 = (*ccand2)->uses.length ();
3168   return n2 - n1;
3169 }
3170
3171 /* Adds IV candidates based on common candidated recorded.  */
3172
3173 static void
3174 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3175 {
3176   unsigned i, j;
3177   struct iv_cand *cand_1, *cand_2;
3178
3179   data->iv_common_cands.qsort (common_cand_cmp);
3180   for (i = 0; i < data->iv_common_cands.length (); i++)
3181     {
3182       struct iv_common_cand *ptr = data->iv_common_cands[i];
3183
3184       /* Only add IV candidate if it's derived from multiple uses.  */
3185       if (ptr->uses.length () <= 1)
3186         break;
3187
3188       cand_1 = NULL;
3189       cand_2 = NULL;
3190       if (ip_normal_pos (data->current_loop))
3191         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3192                                   false, IP_NORMAL, NULL, NULL);
3193
3194       if (ip_end_pos (data->current_loop)
3195           && allow_ip_end_pos_p (data->current_loop))
3196         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3197                                   false, IP_END, NULL, NULL);
3198
3199       /* Bind deriving uses and the new candidates.  */
3200       for (j = 0; j < ptr->uses.length (); j++)
3201         {
3202           struct iv_use *use = ptr->uses[j];
3203           if (cand_1)
3204             bitmap_set_bit (use->related_cands, cand_1->id);
3205           if (cand_2)
3206             bitmap_set_bit (use->related_cands, cand_2->id);
3207         }
3208     }
3209
3210   /* Release data since it is useless from this point.  */
3211   data->iv_common_cand_tab->empty ();
3212   data->iv_common_cands.truncate (0);
3213 }
3214
3215 /* Adds candidates based on the value of USE's iv.  */
3216
3217 static void
3218 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3219 {
3220   unsigned HOST_WIDE_INT offset;
3221   tree base;
3222   tree basetype;
3223   struct iv *iv = use->iv;
3224
3225   add_candidate (data, iv->base, iv->step, false, use);
3226
3227   /* Record common candidate for use in case it can be shared by others.  */
3228   record_common_cand (data, iv->base, iv->step, use);
3229
3230   /* Record common candidate with initial value zero.  */
3231   basetype = TREE_TYPE (iv->base);
3232   if (POINTER_TYPE_P (basetype))
3233     basetype = sizetype;
3234   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3235
3236   /* Record common candidate with constant offset stripped in base.
3237      Like the use itself, we also add candidate directly for it.  */
3238   base = strip_offset (iv->base, &offset);
3239   if (offset || base != iv->base)
3240     {
3241       record_common_cand (data, base, iv->step, use);
3242       add_candidate (data, base, iv->step, false, use);
3243     }
3244
3245   /* Record common candidate with base_object removed in base.  */
3246   if (iv->base_object != NULL)
3247     {
3248       unsigned i;
3249       aff_tree aff_base;
3250       tree step, base_object = iv->base_object;
3251
3252       base = iv->base;
3253       step = iv->step;
3254       STRIP_NOPS (base);
3255       STRIP_NOPS (step);
3256       STRIP_NOPS (base_object);
3257       tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3258       for (i = 0; i < aff_base.n; i++)
3259         {
3260           if (aff_base.elts[i].coef != 1)
3261             continue;
3262
3263           if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3264             break;
3265         }
3266       if (i < aff_base.n)
3267         {
3268           aff_combination_remove_elt (&aff_base, i);
3269           base = aff_combination_to_tree (&aff_base);
3270           basetype = TREE_TYPE (base);
3271           if (POINTER_TYPE_P (basetype))
3272             basetype = sizetype;
3273
3274           step = fold_convert (basetype, step);
3275           record_common_cand (data, base, step, use);
3276           /* Also record common candidate with offset stripped.  */
3277           base = strip_offset (base, &offset);
3278           if (offset)
3279             record_common_cand (data, base, step, use);
3280         }
3281     }
3282
3283   /* At last, add auto-incremental candidates.  Make such variables
3284      important since other iv uses with same base object may be based
3285      on it.  */
3286   if (use != NULL && use->type == USE_ADDRESS)
3287     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3288 }
3289
3290 /* Adds candidates based on the uses.  */
3291
3292 static void
3293 add_iv_candidate_for_uses (struct ivopts_data *data)
3294 {
3295   unsigned i;
3296
3297   for (i = 0; i < n_iv_uses (data); i++)
3298     {
3299       struct iv_use *use = iv_use (data, i);
3300
3301       if (!use)
3302         continue;
3303
3304       switch (use->type)
3305         {
3306         case USE_NONLINEAR_EXPR:
3307         case USE_COMPARE:
3308         case USE_ADDRESS:
3309           /* Just add the ivs based on the value of the iv used here.  */
3310           add_iv_candidate_for_use (data, use);
3311           break;
3312
3313         default:
3314           gcc_unreachable ();
3315         }
3316     }
3317   add_iv_candidate_derived_from_uses (data);
3318 }
3319
3320 /* Record important candidates and add them to related_cands bitmaps.  */
3321
3322 static void
3323 record_important_candidates (struct ivopts_data *data)
3324 {
3325   unsigned i;
3326   struct iv_use *use;
3327
3328   for (i = 0; i < n_iv_cands (data); i++)
3329     {
3330       struct iv_cand *cand = iv_cand (data, i);
3331
3332       if (cand->important)
3333         bitmap_set_bit (data->important_candidates, i);
3334     }
3335
3336   data->consider_all_candidates = (n_iv_cands (data)
3337                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3338
3339   /* Add important candidates to uses' related_cands bitmaps.  */
3340   for (i = 0; i < n_iv_uses (data); i++)
3341     {
3342       use = iv_use (data, i);
3343       bitmap_ior_into (use->related_cands, data->important_candidates);
3344     }
3345 }
3346
3347 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3348    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3349    we allocate a simple list to every use.  */
3350
3351 static void
3352 alloc_use_cost_map (struct ivopts_data *data)
3353 {
3354   unsigned i, size, s;
3355
3356   for (i = 0; i < n_iv_uses (data); i++)
3357     {
3358       struct iv_use *use = iv_use (data, i);
3359
3360       if (data->consider_all_candidates)
3361         size = n_iv_cands (data);
3362       else
3363         {
3364           s = bitmap_count_bits (use->related_cands);
3365
3366           /* Round up to the power of two, so that moduling by it is fast.  */
3367           size = s ? (1 << ceil_log2 (s)) : 1;
3368         }
3369
3370       use->n_map_members = size;
3371       use->cost_map = XCNEWVEC (struct cost_pair, size);
3372     }
3373 }
3374
3375 /* Returns description of computation cost of expression whose runtime
3376    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3377
3378 static comp_cost
3379 new_cost (unsigned runtime, unsigned complexity)
3380 {
3381   comp_cost cost;
3382
3383   cost.cost = runtime;
3384   cost.complexity = complexity;
3385
3386   return cost;
3387 }
3388
3389 /* Returns true if COST is infinite.  */
3390
3391 static bool
3392 infinite_cost_p (comp_cost cost)
3393 {
3394   return cost.cost == INFTY;
3395 }
3396
3397 /* Adds costs COST1 and COST2.  */
3398
3399 static comp_cost
3400 add_costs (comp_cost cost1, comp_cost cost2)
3401 {
3402   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3403     return infinite_cost;
3404
3405   cost1.cost += cost2.cost;
3406   cost1.complexity += cost2.complexity;
3407
3408   return cost1;
3409 }
3410 /* Subtracts costs COST1 and COST2.  */
3411
3412 static comp_cost
3413 sub_costs (comp_cost cost1, comp_cost cost2)
3414 {
3415   cost1.cost -= cost2.cost;
3416   cost1.complexity -= cost2.complexity;
3417
3418   return cost1;
3419 }
3420
3421 /* Returns a negative number if COST1 < COST2, a positive number if
3422    COST1 > COST2, and 0 if COST1 = COST2.  */
3423
3424 static int
3425 compare_costs (comp_cost cost1, comp_cost cost2)
3426 {
3427   if (cost1.cost == cost2.cost)
3428     return cost1.complexity - cost2.complexity;
3429
3430   return cost1.cost - cost2.cost;
3431 }
3432
3433 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3434    on invariants DEPENDS_ON and that the value used in expressing it
3435    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3436
3437 static void
3438 set_use_iv_cost (struct ivopts_data *data,
3439                  struct iv_use *use, struct iv_cand *cand,
3440                  comp_cost cost, bitmap depends_on, tree value,
3441                  enum tree_code comp, int inv_expr_id)
3442 {
3443   unsigned i, s;
3444
3445   if (infinite_cost_p (cost))
3446     {
3447       BITMAP_FREE (depends_on);
3448       return;
3449     }
3450
3451   if (data->consider_all_candidates)
3452     {
3453       use->cost_map[cand->id].cand = cand;
3454       use->cost_map[cand->id].cost = cost;
3455       use->cost_map[cand->id].depends_on = depends_on;
3456       use->cost_map[cand->id].value = value;
3457       use->cost_map[cand->id].comp = comp;
3458       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3459       return;
3460     }
3461
3462   /* n_map_members is a power of two, so this computes modulo.  */
3463   s = cand->id & (use->n_map_members - 1);
3464   for (i = s; i < use->n_map_members; i++)
3465     if (!use->cost_map[i].cand)
3466       goto found;
3467   for (i = 0; i < s; i++)
3468     if (!use->cost_map[i].cand)
3469       goto found;
3470
3471   gcc_unreachable ();
3472
3473 found:
3474   use->cost_map[i].cand = cand;
3475   use->cost_map[i].cost = cost;
3476   use->cost_map[i].depends_on = depends_on;
3477   use->cost_map[i].value = value;
3478   use->cost_map[i].comp = comp;
3479   use->cost_map[i].inv_expr_id = inv_expr_id;
3480 }
3481
3482 /* Gets cost of (USE, CANDIDATE) pair.  */
3483
3484 static struct cost_pair *
3485 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3486                  struct iv_cand *cand)
3487 {
3488   unsigned i, s;
3489   struct cost_pair *ret;
3490
3491   if (!cand)
3492     return NULL;
3493
3494   if (data->consider_all_candidates)
3495     {
3496       ret = use->cost_map + cand->id;
3497       if (!ret->cand)
3498         return NULL;
3499
3500       return ret;
3501     }
3502
3503   /* n_map_members is a power of two, so this computes modulo.  */
3504   s = cand->id & (use->n_map_members - 1);
3505   for (i = s; i < use->n_map_members; i++)
3506     if (use->cost_map[i].cand == cand)
3507       return use->cost_map + i;
3508     else if (use->cost_map[i].cand == NULL)
3509       return NULL;
3510   for (i = 0; i < s; i++)
3511     if (use->cost_map[i].cand == cand)
3512       return use->cost_map + i;
3513     else if (use->cost_map[i].cand == NULL)
3514       return NULL;
3515
3516   return NULL;
3517 }
3518
3519 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3520 static rtx
3521 produce_memory_decl_rtl (tree obj, int *regno)
3522 {
3523   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3524   machine_mode address_mode = targetm.addr_space.address_mode (as);
3525   rtx x;
3526
3527   gcc_assert (obj);
3528   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3529     {
3530       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3531       x = gen_rtx_SYMBOL_REF (address_mode, name);
3532       SET_SYMBOL_REF_DECL (x, obj);
3533       x = gen_rtx_MEM (DECL_MODE (obj), x);
3534       set_mem_addr_space (x, as);
3535       targetm.encode_section_info (obj, x, true);
3536     }
3537   else
3538     {
3539       x = gen_raw_REG (address_mode, (*regno)++);
3540       x = gen_rtx_MEM (DECL_MODE (obj), x);
3541       set_mem_addr_space (x, as);
3542     }
3543
3544   return x;
3545 }
3546
3547 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3548    walk_tree.  DATA contains the actual fake register number.  */
3549
3550 static tree
3551 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3552 {
3553   tree obj = NULL_TREE;
3554   rtx x = NULL_RTX;
3555   int *regno = (int *) data;
3556
3557   switch (TREE_CODE (*expr_p))
3558     {
3559     case ADDR_EXPR:
3560       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3561            handled_component_p (*expr_p);
3562            expr_p = &TREE_OPERAND (*expr_p, 0))
3563         continue;
3564       obj = *expr_p;
3565       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3566         x = produce_memory_decl_rtl (obj, regno);
3567       break;
3568
3569     case SSA_NAME:
3570       *ws = 0;
3571       obj = SSA_NAME_VAR (*expr_p);
3572       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3573       if (!obj)
3574         return NULL_TREE;
3575       if (!DECL_RTL_SET_P (obj))
3576         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3577       break;
3578
3579     case VAR_DECL:
3580     case PARM_DECL:
3581     case RESULT_DECL:
3582       *ws = 0;
3583       obj = *expr_p;
3584
3585       if (DECL_RTL_SET_P (obj))
3586         break;
3587
3588       if (DECL_MODE (obj) == BLKmode)
3589         x = produce_memory_decl_rtl (obj, regno);
3590       else
3591         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3592
3593       break;
3594
3595     default:
3596       break;
3597     }
3598
3599   if (x)
3600     {
3601       decl_rtl_to_reset.safe_push (obj);
3602       SET_DECL_RTL (obj, x);
3603     }
3604
3605   return NULL_TREE;
3606 }
3607
3608 /* Determines cost of the computation of EXPR.  */
3609
3610 static unsigned
3611 computation_cost (tree expr, bool speed)
3612 {
3613   rtx_insn *seq;
3614   rtx rslt;
3615   tree type = TREE_TYPE (expr);
3616   unsigned cost;
3617   /* Avoid using hard regs in ways which may be unsupported.  */
3618   int regno = LAST_VIRTUAL_REGISTER + 1;
3619   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3620   enum node_frequency real_frequency = node->frequency;
3621
3622   node->frequency = NODE_FREQUENCY_NORMAL;
3623   crtl->maybe_hot_insn_p = speed;
3624   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3625   start_sequence ();
3626   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3627   seq = get_insns ();
3628   end_sequence ();
3629   default_rtl_profile ();
3630   node->frequency = real_frequency;
3631
3632   cost = seq_cost (seq, speed);
3633   if (MEM_P (rslt))
3634     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3635                           TYPE_ADDR_SPACE (type), speed);
3636   else if (!REG_P (rslt))
3637     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3638
3639   return cost;
3640 }
3641
3642 /* Returns variable containing the value of candidate CAND at statement AT.  */
3643
3644 static tree
3645 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3646 {
3647   if (stmt_after_increment (loop, cand, stmt))
3648     return cand->var_after;
3649   else
3650     return cand->var_before;
3651 }
3652
3653 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3654    same precision that is at least as wide as the precision of TYPE, stores
3655    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3656    type of A and B.  */
3657
3658 static tree
3659 determine_common_wider_type (tree *a, tree *b)
3660 {
3661   tree wider_type = NULL;
3662   tree suba, subb;
3663   tree atype = TREE_TYPE (*a);
3664
3665   if (CONVERT_EXPR_P (*a))
3666     {
3667       suba = TREE_OPERAND (*a, 0);
3668       wider_type = TREE_TYPE (suba);
3669       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3670         return atype;
3671     }
3672   else
3673     return atype;
3674
3675   if (CONVERT_EXPR_P (*b))
3676     {
3677       subb = TREE_OPERAND (*b, 0);
3678       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3679         return atype;
3680     }
3681   else
3682     return atype;
3683
3684   *a = suba;
3685   *b = subb;
3686   return wider_type;
3687 }
3688
3689 /* Determines the expression by that USE is expressed from induction variable
3690    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3691    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3692
3693 static bool
3694 get_computation_aff (struct loop *loop,
3695                      struct iv_use *use, struct iv_cand *cand, gimple *at,
3696                      struct aff_tree *aff)
3697 {
3698   tree ubase = use->iv->base;
3699   tree ustep = use->iv->step;
3700   tree cbase = cand->iv->base;
3701   tree cstep = cand->iv->step, cstep_common;
3702   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3703   tree common_type, var;
3704   tree uutype;
3705   aff_tree cbase_aff, var_aff;
3706   widest_int rat;
3707
3708   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3709     {
3710       /* We do not have a precision to express the values of use.  */
3711       return false;
3712     }
3713
3714   var = var_at_stmt (loop, cand, at);
3715   uutype = unsigned_type_for (utype);
3716
3717   /* If the conversion is not noop, perform it.  */
3718   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3719     {
3720       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3721           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3722         {
3723           tree inner_base, inner_step, inner_type;
3724           inner_base = TREE_OPERAND (cbase, 0);
3725           if (CONVERT_EXPR_P (cstep))
3726             inner_step = TREE_OPERAND (cstep, 0);
3727           else
3728             inner_step = cstep;
3729
3730           inner_type = TREE_TYPE (inner_base);
3731           /* If candidate is added from a biv whose type is smaller than
3732              ctype, we know both candidate and the biv won't overflow.
3733              In this case, it's safe to skip the convertion in candidate.
3734              As an example, (unsigned short)((unsigned long)A) equals to
3735              (unsigned short)A, if A has a type no larger than short.  */
3736           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3737             {
3738               cbase = inner_base;
3739               cstep = inner_step;
3740             }
3741         }
3742       cstep = fold_convert (uutype, cstep);
3743       cbase = fold_convert (uutype, cbase);
3744       var = fold_convert (uutype, var);
3745     }
3746
3747   /* Ratio is 1 when computing the value of biv cand by itself.
3748      We can't rely on constant_multiple_of in this case because the
3749      use is created after the original biv is selected.  The call
3750      could fail because of inconsistent fold behavior.  See PR68021
3751      for more information.  */
3752   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3753     {
3754       gcc_assert (is_gimple_assign (use->stmt));
3755       gcc_assert (use->iv->ssa_name == cand->var_after);
3756       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3757       rat = 1;
3758     }
3759   else if (!constant_multiple_of (ustep, cstep, &rat))
3760     return false;
3761
3762   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3763      type, we achieve better folding by computing their difference in this
3764      wider type, and cast the result to UUTYPE.  We do not need to worry about
3765      overflows, as all the arithmetics will in the end be performed in UUTYPE
3766      anyway.  */
3767   common_type = determine_common_wider_type (&ubase, &cbase);
3768
3769   /* use = ubase - ratio * cbase + ratio * var.  */
3770   tree_to_aff_combination (ubase, common_type, aff);
3771   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3772   tree_to_aff_combination (var, uutype, &var_aff);
3773
3774   /* We need to shift the value if we are after the increment.  */
3775   if (stmt_after_increment (loop, cand, at))
3776     {
3777       aff_tree cstep_aff;
3778
3779       if (common_type != uutype)
3780         cstep_common = fold_convert (common_type, cstep);
3781       else
3782         cstep_common = cstep;
3783
3784       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3785       aff_combination_add (&cbase_aff, &cstep_aff);
3786     }
3787
3788   aff_combination_scale (&cbase_aff, -rat);
3789   aff_combination_add (aff, &cbase_aff);
3790   if (common_type != uutype)
3791     aff_combination_convert (aff, uutype);
3792
3793   aff_combination_scale (&var_aff, rat);
3794   aff_combination_add (aff, &var_aff);
3795
3796   return true;
3797 }
3798
3799 /* Return the type of USE.  */
3800
3801 static tree
3802 get_use_type (struct iv_use *use)
3803 {
3804   tree base_type = TREE_TYPE (use->iv->base);
3805   tree type;
3806
3807   if (use->type == USE_ADDRESS)
3808     {
3809       /* The base_type may be a void pointer.  Create a pointer type based on
3810          the mem_ref instead.  */
3811       type = build_pointer_type (TREE_TYPE (*use->op_p));
3812       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3813                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3814     }
3815   else
3816     type = base_type;
3817
3818   return type;
3819 }
3820
3821 /* Determines the expression by that USE is expressed from induction variable
3822    CAND at statement AT in LOOP.  The computation is unshared.  */
3823
3824 static tree
3825 get_computation_at (struct loop *loop,
3826                     struct iv_use *use, struct iv_cand *cand, gimple *at)
3827 {
3828   aff_tree aff;
3829   tree type = get_use_type (use);
3830
3831   if (!get_computation_aff (loop, use, cand, at, &aff))
3832     return NULL_TREE;
3833   unshare_aff_combination (&aff);
3834   return fold_convert (type, aff_combination_to_tree (&aff));
3835 }
3836
3837 /* Determines the expression by that USE is expressed from induction variable
3838    CAND in LOOP.  The computation is unshared.  */
3839
3840 static tree
3841 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3842 {
3843   return get_computation_at (loop, use, cand, use->stmt);
3844 }
3845
3846 /* Adjust the cost COST for being in loop setup rather than loop body.
3847    If we're optimizing for space, the loop setup overhead is constant;
3848    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3849 static unsigned
3850 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3851 {
3852   if (cost == INFTY)
3853     return cost;
3854   else if (optimize_loop_for_speed_p (data->current_loop))
3855     return cost / avg_loop_niter (data->current_loop);
3856   else
3857     return cost;
3858 }
3859
3860 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3861    validity for a memory reference accessing memory of mode MODE in
3862    address space AS.  */
3863
3864
3865 bool
3866 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3867                                  addr_space_t as)
3868 {
3869 #define MAX_RATIO 128
3870   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3871   static vec<sbitmap> valid_mult_list;
3872   sbitmap valid_mult;
3873
3874   if (data_index >= valid_mult_list.length ())
3875     valid_mult_list.safe_grow_cleared (data_index + 1);
3876
3877   valid_mult = valid_mult_list[data_index];
3878   if (!valid_mult)
3879     {
3880       machine_mode address_mode = targetm.addr_space.address_mode (as);
3881       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3882       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3883       rtx addr, scaled;
3884       HOST_WIDE_INT i;
3885
3886       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3887       bitmap_clear (valid_mult);
3888       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3889       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3890       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3891         {
3892           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3893           if (memory_address_addr_space_p (mode, addr, as)
3894               || memory_address_addr_space_p (mode, scaled, as))
3895             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3896         }
3897
3898       if (dump_file && (dump_flags & TDF_DETAILS))
3899         {
3900           fprintf (dump_file, "  allowed multipliers:");
3901           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3902             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3903               fprintf (dump_file, " %d", (int) i);
3904           fprintf (dump_file, "\n");
3905           fprintf (dump_file, "\n");
3906         }
3907
3908       valid_mult_list[data_index] = valid_mult;
3909     }
3910
3911   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3912     return false;
3913
3914   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3915 }
3916
3917 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3918    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3919    variable is omitted.  Compute the cost for a memory reference that accesses
3920    a memory location of mode MEM_MODE in address space AS.
3921
3922    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3923    size of MEM_MODE / RATIO) is available.  To make this determination, we
3924    look at the size of the increment to be made, which is given in CSTEP.
3925    CSTEP may be zero if the step is unknown.
3926    STMT_AFTER_INC is true iff the statement we're looking at is after the
3927    increment of the original biv.
3928
3929    TODO -- there must be some better way.  This all is quite crude.  */
3930
3931 enum ainc_type
3932 {
3933   AINC_PRE_INC,         /* Pre increment.  */
3934   AINC_PRE_DEC,         /* Pre decrement.  */
3935   AINC_POST_INC,        /* Post increment.  */
3936   AINC_POST_DEC,        /* Post decrement.  */
3937   AINC_NONE             /* Also the number of auto increment types.  */
3938 };
3939
3940 struct address_cost_data
3941 {
3942   HOST_WIDE_INT min_offset, max_offset;
3943   unsigned costs[2][2][2][2];
3944   unsigned ainc_costs[AINC_NONE];
3945 };
3946
3947
3948 static comp_cost
3949 get_address_cost (bool symbol_present, bool var_present,
3950                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3951                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3952                   addr_space_t as, bool speed,
3953                   bool stmt_after_inc, bool *may_autoinc)
3954 {
3955   machine_mode address_mode = targetm.addr_space.address_mode (as);
3956   static vec<address_cost_data *> address_cost_data_list;
3957   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3958   address_cost_data *data;
3959   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3960   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3961   unsigned cost, acost, complexity;
3962   enum ainc_type autoinc_type;
3963   bool offset_p, ratio_p, autoinc;
3964   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3965   unsigned HOST_WIDE_INT mask;
3966   unsigned bits;
3967
3968   if (data_index >= address_cost_data_list.length ())
3969     address_cost_data_list.safe_grow_cleared (data_index + 1);
3970
3971   data = address_cost_data_list[data_index];
3972   if (!data)
3973     {
3974       HOST_WIDE_INT i;
3975       HOST_WIDE_INT rat, off = 0;
3976       int old_cse_not_expected, width;
3977       unsigned sym_p, var_p, off_p, rat_p, add_c;
3978       rtx_insn *seq;
3979       rtx addr, base;
3980       rtx reg0, reg1;
3981
3982       data = (address_cost_data *) xcalloc (1, sizeof (*data));
3983
3984       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3985
3986       width = GET_MODE_BITSIZE (address_mode) - 1;
3987       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3988         width = HOST_BITS_PER_WIDE_INT - 1;
3989       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3990
3991       for (i = width; i >= 0; i--)
3992         {
3993           off = -((unsigned HOST_WIDE_INT) 1 << i);
3994           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3995           if (memory_address_addr_space_p (mem_mode, addr, as))
3996             break;
3997         }
3998       data->min_offset = (i == -1? 0 : off);
3999
4000       for (i = width; i >= 0; i--)
4001         {
4002           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
4003           XEXP (addr, 1) = gen_int_mode (off, address_mode);
4004           if (memory_address_addr_space_p (mem_mode, addr, as))
4005             break;
4006           /* For some strict-alignment targets, the offset must be naturally
4007              aligned.  Try an aligned offset if mem_mode is not QImode.  */
4008           off = mem_mode != QImode
4009                 ? ((unsigned HOST_WIDE_INT) 1 << i)
4010                     - GET_MODE_SIZE (mem_mode)
4011                 : 0;
4012           if (off > 0)
4013             {
4014               XEXP (addr, 1) = gen_int_mode (off, address_mode);
4015               if (memory_address_addr_space_p (mem_mode, addr, as))
4016                 break;
4017             }
4018         }
4019       if (i == -1)
4020         off = 0;
4021       data->max_offset = off;
4022
4023       if (dump_file && (dump_flags & TDF_DETAILS))
4024         {
4025           fprintf (dump_file, "get_address_cost:\n");
4026           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4027                    GET_MODE_NAME (mem_mode),
4028                    data->min_offset);
4029           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4030                    GET_MODE_NAME (mem_mode),
4031                    data->max_offset);
4032         }
4033
4034       rat = 1;
4035       for (i = 2; i <= MAX_RATIO; i++)
4036         if (multiplier_allowed_in_address_p (i, mem_mode, as))
4037           {
4038             rat = i;
4039             break;
4040           }
4041
4042       /* Compute the cost of various addressing modes.  */
4043       acost = 0;
4044       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4045       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4046
4047       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4048           || USE_STORE_PRE_DECREMENT (mem_mode))
4049         {
4050           addr = gen_rtx_PRE_DEC (address_mode, reg0);
4051           has_predec[mem_mode]
4052             = memory_address_addr_space_p (mem_mode, addr, as);
4053
4054           if (has_predec[mem_mode])
4055             data->ainc_costs[AINC_PRE_DEC]
4056               = address_cost (addr, mem_mode, as, speed);
4057         }
4058       if (USE_LOAD_POST_DECREMENT (mem_mode)
4059           || USE_STORE_POST_DECREMENT (mem_mode))
4060         {
4061           addr = gen_rtx_POST_DEC (address_mode, reg0);
4062           has_postdec[mem_mode]
4063             = memory_address_addr_space_p (mem_mode, addr, as);
4064
4065           if (has_postdec[mem_mode])
4066             data->ainc_costs[AINC_POST_DEC]
4067               = address_cost (addr, mem_mode, as, speed);
4068         }
4069       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4070           || USE_STORE_PRE_DECREMENT (mem_mode))
4071         {
4072           addr = gen_rtx_PRE_INC (address_mode, reg0);
4073           has_preinc[mem_mode]
4074             = memory_address_addr_space_p (mem_mode, addr, as);
4075
4076           if (has_preinc[mem_mode])
4077             data->ainc_costs[AINC_PRE_INC]
4078               = address_cost (addr, mem_mode, as, speed);
4079         }
4080       if (USE_LOAD_POST_INCREMENT (mem_mode)
4081           || USE_STORE_POST_INCREMENT (mem_mode))
4082         {
4083           addr = gen_rtx_POST_INC (address_mode, reg0);
4084           has_postinc[mem_mode]
4085             = memory_address_addr_space_p (mem_mode, addr, as);
4086
4087           if (has_postinc[mem_mode])
4088             data->ainc_costs[AINC_POST_INC]
4089               = address_cost (addr, mem_mode, as, speed);
4090         }
4091       for (i = 0; i < 16; i++)
4092         {
4093           sym_p = i & 1;
4094           var_p = (i >> 1) & 1;
4095           off_p = (i >> 2) & 1;
4096           rat_p = (i >> 3) & 1;
4097
4098           addr = reg0;
4099           if (rat_p)
4100             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4101                                    gen_int_mode (rat, address_mode));
4102
4103           if (var_p)
4104             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4105
4106           if (sym_p)
4107             {
4108               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4109               /* ??? We can run into trouble with some backends by presenting
4110                  it with symbols which haven't been properly passed through
4111                  targetm.encode_section_info.  By setting the local bit, we
4112                  enhance the probability of things working.  */
4113               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4114
4115               if (off_p)
4116                 base = gen_rtx_fmt_e (CONST, address_mode,
4117                                       gen_rtx_fmt_ee
4118                                         (PLUS, address_mode, base,
4119                                          gen_int_mode (off, address_mode)));
4120             }
4121           else if (off_p)
4122             base = gen_int_mode (off, address_mode);
4123           else
4124             base = NULL_RTX;
4125
4126           if (base)
4127             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4128
4129           start_sequence ();
4130           /* To avoid splitting addressing modes, pretend that no cse will
4131              follow.  */
4132           old_cse_not_expected = cse_not_expected;
4133           cse_not_expected = true;
4134           addr = memory_address_addr_space (mem_mode, addr, as);
4135           cse_not_expected = old_cse_not_expected;
4136           seq = get_insns ();
4137           end_sequence ();
4138
4139           acost = seq_cost (seq, speed);
4140           acost += address_cost (addr, mem_mode, as, speed);
4141
4142           if (!acost)
4143             acost = 1;
4144           data->costs[sym_p][var_p][off_p][rat_p] = acost;
4145         }
4146
4147       /* On some targets, it is quite expensive to load symbol to a register,
4148          which makes addresses that contain symbols look much more expensive.
4149          However, the symbol will have to be loaded in any case before the
4150          loop (and quite likely we have it in register already), so it does not
4151          make much sense to penalize them too heavily.  So make some final
4152          tweaks for the SYMBOL_PRESENT modes:
4153
4154          If VAR_PRESENT is false, and the mode obtained by changing symbol to
4155          var is cheaper, use this mode with small penalty.
4156          If VAR_PRESENT is true, try whether the mode with
4157          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4158          if this is the case, use it.  */
4159       add_c = add_cost (speed, address_mode);
4160       for (i = 0; i < 8; i++)
4161         {
4162           var_p = i & 1;
4163           off_p = (i >> 1) & 1;
4164           rat_p = (i >> 2) & 1;
4165
4166           acost = data->costs[0][1][off_p][rat_p] + 1;
4167           if (var_p)
4168             acost += add_c;
4169
4170           if (acost < data->costs[1][var_p][off_p][rat_p])
4171             data->costs[1][var_p][off_p][rat_p] = acost;
4172         }
4173
4174       if (dump_file && (dump_flags & TDF_DETAILS))
4175         {
4176           fprintf (dump_file, "Address costs:\n");
4177
4178           for (i = 0; i < 16; i++)
4179             {
4180               sym_p = i & 1;
4181               var_p = (i >> 1) & 1;
4182               off_p = (i >> 2) & 1;
4183               rat_p = (i >> 3) & 1;
4184
4185               fprintf (dump_file, "  ");
4186               if (sym_p)
4187                 fprintf (dump_file, "sym + ");
4188               if (var_p)
4189                 fprintf (dump_file, "var + ");
4190               if (off_p)
4191                 fprintf (dump_file, "cst + ");
4192               if (rat_p)
4193                 fprintf (dump_file, "rat * ");
4194
4195               acost = data->costs[sym_p][var_p][off_p][rat_p];
4196               fprintf (dump_file, "index costs %d\n", acost);
4197             }
4198           if (has_predec[mem_mode] || has_postdec[mem_mode]
4199               || has_preinc[mem_mode] || has_postinc[mem_mode])
4200             fprintf (dump_file, "  May include autoinc/dec\n");
4201           fprintf (dump_file, "\n");
4202         }
4203
4204       address_cost_data_list[data_index] = data;
4205     }
4206
4207   bits = GET_MODE_BITSIZE (address_mode);
4208   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4209   offset &= mask;
4210   if ((offset >> (bits - 1) & 1))
4211     offset |= ~mask;
4212   s_offset = offset;
4213
4214   autoinc = false;
4215   autoinc_type = AINC_NONE;
4216   msize = GET_MODE_SIZE (mem_mode);
4217   autoinc_offset = offset;
4218   if (stmt_after_inc)
4219     autoinc_offset += ratio * cstep;
4220   if (symbol_present || var_present || ratio != 1)
4221     autoinc = false;
4222   else
4223     {
4224       if (has_postinc[mem_mode] && autoinc_offset == 0
4225           && msize == cstep)
4226         autoinc_type = AINC_POST_INC;
4227       else if (has_postdec[mem_mode] && autoinc_offset == 0
4228                && msize == -cstep)
4229         autoinc_type = AINC_POST_DEC;
4230       else if (has_preinc[mem_mode] && autoinc_offset == msize
4231                && msize == cstep)
4232         autoinc_type = AINC_PRE_INC;
4233       else if (has_predec[mem_mode] && autoinc_offset == -msize
4234                && msize == -cstep)
4235         autoinc_type = AINC_PRE_DEC;
4236
4237       if (autoinc_type != AINC_NONE)
4238         autoinc = true;
4239     }
4240
4241   cost = 0;
4242   offset_p = (s_offset != 0
4243               && data->min_offset <= s_offset
4244               && s_offset <= data->max_offset);
4245   ratio_p = (ratio != 1
4246              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4247
4248   if (ratio != 1 && !ratio_p)
4249     cost += mult_by_coeff_cost (ratio, address_mode, speed);
4250
4251   if (s_offset && !offset_p && !symbol_present)
4252     cost += add_cost (speed, address_mode);
4253
4254   if (may_autoinc)
4255     *may_autoinc = autoinc;
4256   if (autoinc)
4257     acost = data->ainc_costs[autoinc_type];
4258   else
4259     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4260   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4261   return new_cost (cost + acost, complexity);
4262 }
4263
4264  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4265     EXPR operand holding the shift.  COST0 and COST1 are the costs for
4266     calculating the operands of EXPR.  Returns true if successful, and returns
4267     the cost in COST.  */
4268
4269 static bool
4270 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4271                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4272 {
4273   comp_cost res;
4274   tree op1 = TREE_OPERAND (expr, 1);
4275   tree cst = TREE_OPERAND (mult, 1);
4276   tree multop = TREE_OPERAND (mult, 0);
4277   int m = exact_log2 (int_cst_value (cst));
4278   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4279   int as_cost, sa_cost;
4280   bool mult_in_op1;
4281
4282   if (!(m >= 0 && m < maxm))
4283     return false;
4284
4285   STRIP_NOPS (op1);
4286   mult_in_op1 = operand_equal_p (op1, mult, 0);
4287
4288   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4289
4290   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4291      use that in preference to a shift insn followed by an add insn.  */
4292   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4293              ? shiftadd_cost (speed, mode, m)
4294              : (mult_in_op1
4295                 ? shiftsub1_cost (speed, mode, m)
4296                 : shiftsub0_cost (speed, mode, m)));
4297
4298   res = new_cost (MIN (as_cost, sa_cost), 0);
4299   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4300
4301   STRIP_NOPS (multop);
4302   if (!is_gimple_val (multop))
4303     res = add_costs (res, force_expr_to_var_cost (multop, speed));
4304
4305   *cost = res;
4306   return true;
4307 }
4308
4309 /* Estimates cost of forcing expression EXPR into a variable.  */
4310
4311 static comp_cost
4312 force_expr_to_var_cost (tree expr, bool speed)
4313 {
4314   static bool costs_initialized = false;
4315   static unsigned integer_cost [2];
4316   static unsigned symbol_cost [2];
4317   static unsigned address_cost [2];
4318   tree op0, op1;
4319   comp_cost cost0, cost1, cost;
4320   machine_mode mode;
4321
4322   if (!costs_initialized)
4323     {
4324       tree type = build_pointer_type (integer_type_node);
4325       tree var, addr;
4326       rtx x;
4327       int i;
4328
4329       var = create_tmp_var_raw (integer_type_node, "test_var");
4330       TREE_STATIC (var) = 1;
4331       x = produce_memory_decl_rtl (var, NULL);
4332       SET_DECL_RTL (var, x);
4333
4334       addr = build1 (ADDR_EXPR, type, var);
4335
4336
4337       for (i = 0; i < 2; i++)
4338         {
4339           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4340                                                              2000), i);
4341
4342           symbol_cost[i] = computation_cost (addr, i) + 1;
4343
4344           address_cost[i]
4345             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4346           if (dump_file && (dump_flags & TDF_DETAILS))
4347             {
4348               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4349               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4350               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4351               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4352               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4353               fprintf (dump_file, "\n");
4354             }
4355         }
4356
4357       costs_initialized = true;
4358     }
4359
4360   STRIP_NOPS (expr);
4361
4362   if (SSA_VAR_P (expr))
4363     return no_cost;
4364
4365   if (is_gimple_min_invariant (expr))
4366     {
4367       if (TREE_CODE (expr) == INTEGER_CST)
4368         return new_cost (integer_cost [speed], 0);
4369
4370       if (TREE_CODE (expr) == ADDR_EXPR)
4371         {
4372           tree obj = TREE_OPERAND (expr, 0);
4373
4374           if (TREE_CODE (obj) == VAR_DECL
4375               || TREE_CODE (obj) == PARM_DECL
4376               || TREE_CODE (obj) == RESULT_DECL)
4377             return new_cost (symbol_cost [speed], 0);
4378         }
4379
4380       return new_cost (address_cost [speed], 0);
4381     }
4382
4383   switch (TREE_CODE (expr))
4384     {
4385     case POINTER_PLUS_EXPR:
4386     case PLUS_EXPR:
4387     case MINUS_EXPR:
4388     case MULT_EXPR:
4389       op0 = TREE_OPERAND (expr, 0);
4390       op1 = TREE_OPERAND (expr, 1);
4391       STRIP_NOPS (op0);
4392       STRIP_NOPS (op1);
4393       break;
4394
4395     CASE_CONVERT:
4396     case NEGATE_EXPR:
4397       op0 = TREE_OPERAND (expr, 0);
4398       STRIP_NOPS (op0);
4399       op1 = NULL_TREE;
4400       break;
4401
4402     default:
4403       /* Just an arbitrary value, FIXME.  */
4404       return new_cost (target_spill_cost[speed], 0);
4405     }
4406
4407   if (op0 == NULL_TREE
4408       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4409     cost0 = no_cost;
4410   else
4411     cost0 = force_expr_to_var_cost (op0, speed);
4412
4413   if (op1 == NULL_TREE
4414       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4415     cost1 = no_cost;
4416   else
4417     cost1 = force_expr_to_var_cost (op1, speed);
4418
4419   mode = TYPE_MODE (TREE_TYPE (expr));
4420   switch (TREE_CODE (expr))
4421     {
4422     case POINTER_PLUS_EXPR:
4423     case PLUS_EXPR:
4424     case MINUS_EXPR:
4425     case NEGATE_EXPR:
4426       cost = new_cost (add_cost (speed, mode), 0);
4427       if (TREE_CODE (expr) != NEGATE_EXPR)
4428         {
4429           tree mult = NULL_TREE;
4430           comp_cost sa_cost;
4431           if (TREE_CODE (op1) == MULT_EXPR)
4432             mult = op1;
4433           else if (TREE_CODE (op0) == MULT_EXPR)
4434             mult = op0;
4435
4436           if (mult != NULL_TREE
4437               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4438               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4439                                     speed, &sa_cost))
4440             return sa_cost;
4441         }
4442       break;
4443
4444     CASE_CONVERT:
4445       {
4446         tree inner_mode, outer_mode;
4447         outer_mode = TREE_TYPE (expr);
4448         inner_mode = TREE_TYPE (op0);
4449         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4450                                        TYPE_MODE (inner_mode), speed), 0);
4451       }
4452       break;
4453
4454     case MULT_EXPR:
4455       if (cst_and_fits_in_hwi (op0))
4456         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4457                                              mode, speed), 0);
4458       else if (cst_and_fits_in_hwi (op1))
4459         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4460                                              mode, speed), 0);
4461       else
4462         return new_cost (target_spill_cost [speed], 0);
4463       break;
4464
4465     default:
4466       gcc_unreachable ();
4467     }
4468
4469   cost = add_costs (cost, cost0);
4470   cost = add_costs (cost, cost1);
4471
4472   /* Bound the cost by target_spill_cost.  The parts of complicated
4473      computations often are either loop invariant or at least can
4474      be shared between several iv uses, so letting this grow without
4475      limits would not give reasonable results.  */
4476   if (cost.cost > (int) target_spill_cost [speed])
4477     cost.cost = target_spill_cost [speed];
4478
4479   return cost;
4480 }
4481
4482 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4483    invariants the computation depends on.  */
4484
4485 static comp_cost
4486 force_var_cost (struct ivopts_data *data,
4487                 tree expr, bitmap *depends_on)
4488 {
4489   if (depends_on)
4490     {
4491       fd_ivopts_data = data;
4492       walk_tree (&expr, find_depends, depends_on, NULL);
4493     }
4494
4495   return force_expr_to_var_cost (expr, data->speed);
4496 }
4497
4498 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4499    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4500    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4501    invariants the computation depends on.  */
4502
4503 static comp_cost
4504 split_address_cost (struct ivopts_data *data,
4505                     tree addr, bool *symbol_present, bool *var_present,
4506                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4507 {
4508   tree core;
4509   HOST_WIDE_INT bitsize;
4510   HOST_WIDE_INT bitpos;
4511   tree toffset;
4512   machine_mode mode;
4513   int unsignedp, reversep, volatilep;
4514
4515   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4516                               &unsignedp, &reversep, &volatilep, false);
4517
4518   if (toffset != 0
4519       || bitpos % BITS_PER_UNIT != 0
4520       || reversep
4521       || TREE_CODE (core) != VAR_DECL)
4522     {
4523       *symbol_present = false;
4524       *var_present = true;
4525       fd_ivopts_data = data;
4526       if (depends_on)
4527         walk_tree (&addr, find_depends, depends_on, NULL);
4528
4529       return new_cost (target_spill_cost[data->speed], 0);
4530     }
4531
4532   *offset += bitpos / BITS_PER_UNIT;
4533   if (TREE_STATIC (core)
4534       || DECL_EXTERNAL (core))
4535     {
4536       *symbol_present = true;
4537       *var_present = false;
4538       return no_cost;
4539     }
4540
4541   *symbol_present = false;
4542   *var_present = true;
4543   return no_cost;
4544 }
4545
4546 /* Estimates cost of expressing difference of addresses E1 - E2 as
4547    var + symbol + offset.  The value of offset is added to OFFSET,
4548    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4549    part is missing.  DEPENDS_ON is a set of the invariants the computation
4550    depends on.  */
4551
4552 static comp_cost
4553 ptr_difference_cost (struct ivopts_data *data,
4554                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4555                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4556 {
4557   HOST_WIDE_INT diff = 0;
4558   aff_tree aff_e1, aff_e2;
4559   tree type;
4560
4561   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4562
4563   if (ptr_difference_const (e1, e2, &diff))
4564     {
4565       *offset += diff;
4566       *symbol_present = false;
4567       *var_present = false;
4568       return no_cost;
4569     }
4570
4571   if (integer_zerop (e2))
4572     return split_address_cost (data, TREE_OPERAND (e1, 0),
4573                                symbol_present, var_present, offset, depends_on);
4574
4575   *symbol_present = false;
4576   *var_present = true;
4577
4578   type = signed_type_for (TREE_TYPE (e1));
4579   tree_to_aff_combination (e1, type, &aff_e1);
4580   tree_to_aff_combination (e2, type, &aff_e2);
4581   aff_combination_scale (&aff_e2, -1);
4582   aff_combination_add (&aff_e1, &aff_e2);
4583
4584   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4585 }
4586
4587 /* Estimates cost of expressing difference E1 - E2 as
4588    var + symbol + offset.  The value of offset is added to OFFSET,
4589    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4590    part is missing.  DEPENDS_ON is a set of the invariants the computation
4591    depends on.  */
4592
4593 static comp_cost
4594 difference_cost (struct ivopts_data *data,
4595                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4596                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4597 {
4598   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4599   unsigned HOST_WIDE_INT off1, off2;
4600   aff_tree aff_e1, aff_e2;
4601   tree type;
4602
4603   e1 = strip_offset (e1, &off1);
4604   e2 = strip_offset (e2, &off2);
4605   *offset += off1 - off2;
4606
4607   STRIP_NOPS (e1);
4608   STRIP_NOPS (e2);
4609
4610   if (TREE_CODE (e1) == ADDR_EXPR)
4611     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4612                                 offset, depends_on);
4613   *symbol_present = false;
4614
4615   if (operand_equal_p (e1, e2, 0))
4616     {
4617       *var_present = false;
4618       return no_cost;
4619     }
4620
4621   *var_present = true;
4622
4623   if (integer_zerop (e2))
4624     return force_var_cost (data, e1, depends_on);
4625
4626   if (integer_zerop (e1))
4627     {
4628       comp_cost cost = force_var_cost (data, e2, depends_on);
4629       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4630       return cost;
4631     }
4632
4633   type = signed_type_for (TREE_TYPE (e1));
4634   tree_to_aff_combination (e1, type, &aff_e1);
4635   tree_to_aff_combination (e2, type, &aff_e2);
4636   aff_combination_scale (&aff_e2, -1);
4637   aff_combination_add (&aff_e1, &aff_e2);
4638
4639   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4640 }
4641
4642 /* Returns true if AFF1 and AFF2 are identical.  */
4643
4644 static bool
4645 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4646 {
4647   unsigned i;
4648
4649   if (aff1->n != aff2->n)
4650     return false;
4651
4652   for (i = 0; i < aff1->n; i++)
4653     {
4654       if (aff1->elts[i].coef != aff2->elts[i].coef)
4655         return false;
4656
4657       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4658         return false;
4659     }
4660   return true;
4661 }
4662
4663 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4664
4665 static int
4666 get_expr_id (struct ivopts_data *data, tree expr)
4667 {
4668   struct iv_inv_expr_ent ent;
4669   struct iv_inv_expr_ent **slot;
4670
4671   ent.expr = expr;
4672   ent.hash = iterative_hash_expr (expr, 0);
4673   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4674   if (*slot)
4675     return (*slot)->id;
4676
4677   *slot = XNEW (struct iv_inv_expr_ent);
4678   (*slot)->expr = expr;
4679   (*slot)->hash = ent.hash;
4680   (*slot)->id = data->inv_expr_id++;
4681   return (*slot)->id;
4682 }
4683
4684 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4685    requires a new compiler generated temporary.  Returns -1 otherwise.
4686    ADDRESS_P is a flag indicating if the expression is for address
4687    computation.  */
4688
4689 static int
4690 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4691                             tree cbase, HOST_WIDE_INT ratio,
4692                             bool address_p)
4693 {
4694   aff_tree ubase_aff, cbase_aff;
4695   tree expr, ub, cb;
4696
4697   STRIP_NOPS (ubase);
4698   STRIP_NOPS (cbase);
4699   ub = ubase;
4700   cb = cbase;
4701
4702   if ((TREE_CODE (ubase) == INTEGER_CST)
4703       && (TREE_CODE (cbase) == INTEGER_CST))
4704     return -1;
4705
4706   /* Strips the constant part. */
4707   if (TREE_CODE (ubase) == PLUS_EXPR
4708       || TREE_CODE (ubase) == MINUS_EXPR
4709       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4710     {
4711       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4712         ubase = TREE_OPERAND (ubase, 0);
4713     }
4714
4715   /* Strips the constant part. */
4716   if (TREE_CODE (cbase) == PLUS_EXPR
4717       || TREE_CODE (cbase) == MINUS_EXPR
4718       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4719     {
4720       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4721         cbase = TREE_OPERAND (cbase, 0);
4722     }
4723
4724   if (address_p)
4725     {
4726       if (((TREE_CODE (ubase) == SSA_NAME)
4727            || (TREE_CODE (ubase) == ADDR_EXPR
4728                && is_gimple_min_invariant (ubase)))
4729           && (TREE_CODE (cbase) == INTEGER_CST))
4730         return -1;
4731
4732       if (((TREE_CODE (cbase) == SSA_NAME)
4733            || (TREE_CODE (cbase) == ADDR_EXPR
4734                && is_gimple_min_invariant (cbase)))
4735           && (TREE_CODE (ubase) == INTEGER_CST))
4736         return -1;
4737     }
4738
4739   if (ratio == 1)
4740     {
4741       if (operand_equal_p (ubase, cbase, 0))
4742         return -1;
4743
4744       if (TREE_CODE (ubase) == ADDR_EXPR
4745           && TREE_CODE (cbase) == ADDR_EXPR)
4746         {
4747           tree usym, csym;
4748
4749           usym = TREE_OPERAND (ubase, 0);
4750           csym = TREE_OPERAND (cbase, 0);
4751           if (TREE_CODE (usym) == ARRAY_REF)
4752             {
4753               tree ind = TREE_OPERAND (usym, 1);
4754               if (TREE_CODE (ind) == INTEGER_CST
4755                   && tree_fits_shwi_p (ind)
4756                   && tree_to_shwi (ind) == 0)
4757                 usym = TREE_OPERAND (usym, 0);
4758             }
4759           if (TREE_CODE (csym) == ARRAY_REF)
4760             {
4761               tree ind = TREE_OPERAND (csym, 1);
4762               if (TREE_CODE (ind) == INTEGER_CST
4763                   && tree_fits_shwi_p (ind)
4764                   && tree_to_shwi (ind) == 0)
4765                 csym = TREE_OPERAND (csym, 0);
4766             }
4767           if (operand_equal_p (usym, csym, 0))
4768             return -1;
4769         }
4770       /* Now do more complex comparison  */
4771       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4772       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4773       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4774         return -1;
4775     }
4776
4777   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4778   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4779
4780   aff_combination_scale (&cbase_aff, -1 * ratio);
4781   aff_combination_add (&ubase_aff, &cbase_aff);
4782   expr = aff_combination_to_tree (&ubase_aff);
4783   return get_expr_id (data, expr);
4784 }
4785
4786
4787
4788 /* Determines the cost of the computation by that USE is expressed
4789    from induction variable CAND.  If ADDRESS_P is true, we just need
4790    to create an address from it, otherwise we want to get it into
4791    register.  A set of invariants we depend on is stored in
4792    DEPENDS_ON.  AT is the statement at that the value is computed.
4793    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4794    addressing is likely.  */
4795
4796 static comp_cost
4797 get_computation_cost_at (struct ivopts_data *data,
4798                          struct iv_use *use, struct iv_cand *cand,
4799                          bool address_p, bitmap *depends_on, gimple *at,
4800                          bool *can_autoinc,
4801                          int *inv_expr_id)
4802 {
4803   tree ubase = use->iv->base, ustep = use->iv->step;
4804   tree cbase, cstep;
4805   tree utype = TREE_TYPE (ubase), ctype;
4806   unsigned HOST_WIDE_INT cstepi, offset = 0;
4807   HOST_WIDE_INT ratio, aratio;
4808   bool var_present, symbol_present, stmt_is_after_inc;
4809   comp_cost cost;
4810   widest_int rat;
4811   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4812   machine_mode mem_mode = (address_p
4813                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4814                                 : VOIDmode);
4815
4816   if (depends_on)
4817     *depends_on = NULL;
4818
4819   /* Only consider real candidates.  */
4820   if (!cand->iv)
4821     return infinite_cost;
4822
4823   cbase = cand->iv->base;
4824   cstep = cand->iv->step;
4825   ctype = TREE_TYPE (cbase);
4826
4827   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4828     {
4829       /* We do not have a precision to express the values of use.  */
4830       return infinite_cost;
4831     }
4832
4833   if (address_p
4834       || (use->iv->base_object
4835           && cand->iv->base_object
4836           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4837           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4838     {
4839       /* Do not try to express address of an object with computation based
4840          on address of a different object.  This may cause problems in rtl
4841          level alias analysis (that does not expect this to be happening,
4842          as this is illegal in C), and would be unlikely to be useful
4843          anyway.  */
4844       if (use->iv->base_object
4845           && cand->iv->base_object
4846           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4847         return infinite_cost;
4848     }
4849
4850   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4851     {
4852       /* TODO -- add direct handling of this case.  */
4853       goto fallback;
4854     }
4855
4856   /* CSTEPI is removed from the offset in case statement is after the
4857      increment.  If the step is not constant, we use zero instead.
4858      This is a bit imprecise (there is the extra addition), but
4859      redundancy elimination is likely to transform the code so that
4860      it uses value of the variable before increment anyway,
4861      so it is not that much unrealistic.  */
4862   if (cst_and_fits_in_hwi (cstep))
4863     cstepi = int_cst_value (cstep);
4864   else
4865     cstepi = 0;
4866
4867   if (!constant_multiple_of (ustep, cstep, &rat))
4868     return infinite_cost;
4869
4870   if (wi::fits_shwi_p (rat))
4871     ratio = rat.to_shwi ();
4872   else
4873     return infinite_cost;
4874
4875   STRIP_NOPS (cbase);
4876   ctype = TREE_TYPE (cbase);
4877
4878   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4879
4880   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4881      or ratio == 1, it is better to handle this like
4882
4883      ubase - ratio * cbase + ratio * var
4884
4885      (also holds in the case ratio == -1, TODO.  */
4886
4887   if (cst_and_fits_in_hwi (cbase))
4888     {
4889       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4890       cost = difference_cost (data,
4891                               ubase, build_int_cst (utype, 0),
4892                               &symbol_present, &var_present, &offset,
4893                               depends_on);
4894       cost.cost /= avg_loop_niter (data->current_loop);
4895     }
4896   else if (ratio == 1)
4897     {
4898       tree real_cbase = cbase;
4899
4900       /* Check to see if any adjustment is needed.  */
4901       if (cstepi == 0 && stmt_is_after_inc)
4902         {
4903           aff_tree real_cbase_aff;
4904           aff_tree cstep_aff;
4905
4906           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4907                                    &real_cbase_aff);
4908           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4909
4910           aff_combination_add (&real_cbase_aff, &cstep_aff);
4911           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4912         }
4913
4914       cost = difference_cost (data,
4915                               ubase, real_cbase,
4916                               &symbol_present, &var_present, &offset,
4917                               depends_on);
4918       cost.cost /= avg_loop_niter (data->current_loop);
4919     }
4920   else if (address_p
4921            && !POINTER_TYPE_P (ctype)
4922            && multiplier_allowed_in_address_p
4923                 (ratio, mem_mode,
4924                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4925     {
4926       if (cstepi == 0 && stmt_is_after_inc)
4927         {
4928           if (POINTER_TYPE_P (ctype))
4929             cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4930           else
4931             cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4932         }
4933       cbase
4934         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4935       cost = difference_cost (data,
4936                               ubase, cbase,
4937                               &symbol_present, &var_present, &offset,
4938                               depends_on);
4939       cost.cost /= avg_loop_niter (data->current_loop);
4940     }
4941   else
4942     {
4943       cost = force_var_cost (data, cbase, depends_on);
4944       cost = add_costs (cost,
4945                         difference_cost (data,
4946                                          ubase, build_int_cst (utype, 0),
4947                                          &symbol_present, &var_present,
4948                                          &offset, depends_on));
4949       cost.cost /= avg_loop_niter (data->current_loop);
4950       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4951     }
4952
4953   /* Record setup cost in scrach field.  */
4954   cost.scratch = cost.cost;
4955   /* Set of invariants depended on by sub use has already been computed
4956      for the first use in the group.  */
4957   if (use->sub_id)
4958     {
4959       cost.cost = 0;
4960       if (depends_on && *depends_on)
4961         bitmap_clear (*depends_on);
4962     }
4963   else if (inv_expr_id)
4964     {
4965       *inv_expr_id =
4966           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4967       /* Clear depends on.  */
4968       if (*inv_expr_id != -1 && depends_on && *depends_on)
4969         bitmap_clear (*depends_on);
4970     }
4971
4972   /* If we are after the increment, the value of the candidate is higher by
4973      one iteration.  */
4974   if (stmt_is_after_inc)
4975     offset -= ratio * cstepi;
4976
4977   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4978      (symbol/var1/const parts may be omitted).  If we are looking for an
4979      address, find the cost of addressing this.  */
4980   if (address_p)
4981     return add_costs (cost,
4982                       get_address_cost (symbol_present, var_present,
4983                                         offset, ratio, cstepi,
4984                                         mem_mode,
4985                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4986                                         speed, stmt_is_after_inc,
4987                                         can_autoinc));
4988
4989   /* Otherwise estimate the costs for computing the expression.  */
4990   if (!symbol_present && !var_present && !offset)
4991     {
4992       if (ratio != 1)
4993         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4994       return cost;
4995     }
4996
4997   /* Symbol + offset should be compile-time computable so consider that they
4998       are added once to the variable, if present.  */
4999   if (var_present && (symbol_present || offset))
5000     cost.cost += adjust_setup_cost (data,
5001                                     add_cost (speed, TYPE_MODE (ctype)));
5002
5003   /* Having offset does not affect runtime cost in case it is added to
5004      symbol, but it increases complexity.  */
5005   if (offset)
5006     cost.complexity++;
5007
5008   cost.cost += add_cost (speed, TYPE_MODE (ctype));
5009
5010   aratio = ratio > 0 ? ratio : -ratio;
5011   if (aratio != 1)
5012     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5013   return cost;
5014
5015 fallback:
5016   if (can_autoinc)
5017     *can_autoinc = false;
5018
5019   {
5020     /* Just get the expression, expand it and measure the cost.  */
5021     tree comp = get_computation_at (data->current_loop, use, cand, at);
5022
5023     if (!comp)
5024       return infinite_cost;
5025
5026     if (address_p)
5027       comp = build_simple_mem_ref (comp);
5028
5029     cost = new_cost (computation_cost (comp, speed), 0);
5030     cost.scratch = 0;
5031     return cost;
5032   }
5033 }
5034
5035 /* Determines the cost of the computation by that USE is expressed
5036    from induction variable CAND.  If ADDRESS_P is true, we just need
5037    to create an address from it, otherwise we want to get it into
5038    register.  A set of invariants we depend on is stored in
5039    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
5040    autoinc addressing is likely.  */
5041
5042 static comp_cost
5043 get_computation_cost (struct ivopts_data *data,
5044                       struct iv_use *use, struct iv_cand *cand,
5045                       bool address_p, bitmap *depends_on,
5046                       bool *can_autoinc, int *inv_expr_id)
5047 {
5048   return get_computation_cost_at (data,
5049                                   use, cand, address_p, depends_on, use->stmt,
5050                                   can_autoinc, inv_expr_id);
5051 }
5052
5053 /* Determines cost of basing replacement of USE on CAND in a generic
5054    expression.  */
5055
5056 static bool
5057 determine_use_iv_cost_generic (struct ivopts_data *data,
5058                                struct iv_use *use, struct iv_cand *cand)
5059 {
5060   bitmap depends_on;
5061   comp_cost cost;
5062   int inv_expr_id = -1;
5063
5064   /* The simple case first -- if we need to express value of the preserved
5065      original biv, the cost is 0.  This also prevents us from counting the
5066      cost of increment twice -- once at this use and once in the cost of
5067      the candidate.  */
5068   if (cand->pos == IP_ORIGINAL
5069       && cand->incremented_at == use->stmt)
5070     {
5071       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
5072                        ERROR_MARK, -1);
5073       return true;
5074     }
5075
5076   cost = get_computation_cost (data, use, cand, false, &depends_on,
5077                                NULL, &inv_expr_id);
5078
5079   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5080                    inv_expr_id);
5081
5082   return !infinite_cost_p (cost);
5083 }
5084
5085 /* Determines cost of basing replacement of USE on CAND in an address.  */
5086
5087 static bool
5088 determine_use_iv_cost_address (struct ivopts_data *data,
5089                                struct iv_use *use, struct iv_cand *cand)
5090 {
5091   bitmap depends_on;
5092   bool can_autoinc, first;
5093   int inv_expr_id = -1;
5094   struct iv_use *sub_use;
5095   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
5096                                          &can_autoinc, &inv_expr_id);
5097   comp_cost sub_cost = cost;
5098
5099   if (cand->ainc_use == use)
5100     {
5101       if (can_autoinc)
5102         cost.cost -= cand->cost_step;
5103       /* If we generated the candidate solely for exploiting autoincrement
5104          opportunities, and it turns out it can't be used, set the cost to
5105          infinity to make sure we ignore it.  */
5106       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5107         cost = infinite_cost;
5108     }
5109
5110   if (!infinite_cost_p (cost) && use->next)
5111     {
5112       first = true;
5113       sub_use = use->next;
5114       /* We don't want to add setup cost for sub-uses.  */
5115       sub_cost.cost -= sub_cost.scratch;
5116       /* Add cost for sub uses in group.  */
5117       do
5118         {
5119           /* Compute cost for the first sub use with different offset to
5120              the main use and add it afterwards.  Costs for these uses
5121              could be quite different.  Given below uses in a group:
5122                use 0  : {base + A + offset_0, step}
5123                use 0.1: {base + A + offset_0, step}
5124                use 0.2: {base + A + offset_1, step}
5125                use 0.3: {base + A + offset_2, step}
5126              when we need to compute costs with candidate:
5127                cand 1 : {base + B + offset_0, step}
5128
5129              The first sub use with different offset is use 0.2, its cost
5130              is larger than cost of use 0/0.1 because we need to compute:
5131                A - B + offset_1 - offset_0
5132              rather than:
5133                A - B.  */
5134           if (first && use->addr_offset != sub_use->addr_offset)
5135             {
5136               first = false;
5137               sub_cost = get_computation_cost (data, sub_use, cand, true,
5138                                                NULL, &can_autoinc, NULL);
5139               if (infinite_cost_p (sub_cost))
5140                 {
5141                   cost = infinite_cost;
5142                   break;
5143                 }
5144             }
5145
5146           cost = add_costs (cost, sub_cost);
5147           sub_use = sub_use->next;
5148         }
5149       while (sub_use);
5150     }
5151
5152   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5153                    inv_expr_id);
5154
5155   return !infinite_cost_p (cost);
5156 }
5157
5158 /* Computes value of candidate CAND at position AT in iteration NITER, and
5159    stores it to VAL.  */
5160
5161 static void
5162 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5163                aff_tree *val)
5164 {
5165   aff_tree step, delta, nit;
5166   struct iv *iv = cand->iv;
5167   tree type = TREE_TYPE (iv->base);
5168   tree steptype = type;
5169   if (POINTER_TYPE_P (type))
5170     steptype = sizetype;
5171   steptype = unsigned_type_for (type);
5172
5173   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5174   aff_combination_convert (&step, steptype);
5175   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5176   aff_combination_convert (&nit, steptype);
5177   aff_combination_mult (&nit, &step, &delta);
5178   if (stmt_after_increment (loop, cand, at))
5179     aff_combination_add (&delta, &step);
5180
5181   tree_to_aff_combination (iv->base, type, val);
5182   if (!POINTER_TYPE_P (type))
5183     aff_combination_convert (val, steptype);
5184   aff_combination_add (val, &delta);
5185 }
5186
5187 /* Returns period of induction variable iv.  */
5188
5189 static tree
5190 iv_period (struct iv *iv)
5191 {
5192   tree step = iv->step, period, type;
5193   tree pow2div;
5194
5195   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5196
5197   type = unsigned_type_for (TREE_TYPE (step));
5198   /* Period of the iv is lcm (step, type_range)/step -1,
5199      i.e., N*type_range/step - 1. Since type range is power
5200      of two, N == (step >> num_of_ending_zeros_binary (step),
5201      so the final result is
5202
5203        (type_range >> num_of_ending_zeros_binary (step)) - 1
5204
5205   */
5206   pow2div = num_ending_zeros (step);
5207
5208   period = build_low_bits_mask (type,
5209                                 (TYPE_PRECISION (type)
5210                                  - tree_to_uhwi (pow2div)));
5211
5212   return period;
5213 }
5214
5215 /* Returns the comparison operator used when eliminating the iv USE.  */
5216
5217 static enum tree_code
5218 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5219 {
5220   struct loop *loop = data->current_loop;
5221   basic_block ex_bb;
5222   edge exit;
5223
5224   ex_bb = gimple_bb (use->stmt);
5225   exit = EDGE_SUCC (ex_bb, 0);
5226   if (flow_bb_inside_loop_p (loop, exit->dest))
5227     exit = EDGE_SUCC (ex_bb, 1);
5228
5229   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5230 }
5231
5232 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5233    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5234    calculation is performed in non-wrapping type.
5235
5236    TODO: More generally, we could test for the situation that
5237          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5238          This would require knowing the sign of OFFSET.  */
5239
5240 static bool
5241 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5242 {
5243   enum tree_code code;
5244   tree e1, e2;
5245   aff_tree aff_e1, aff_e2, aff_offset;
5246
5247   if (!nowrap_type_p (TREE_TYPE (base)))
5248     return false;
5249
5250   base = expand_simple_operations (base);
5251
5252   if (TREE_CODE (base) == SSA_NAME)
5253     {
5254       gimple *stmt = SSA_NAME_DEF_STMT (base);
5255
5256       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5257         return false;
5258
5259       code = gimple_assign_rhs_code (stmt);
5260       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5261         return false;
5262
5263       e1 = gimple_assign_rhs1 (stmt);
5264       e2 = gimple_assign_rhs2 (stmt);
5265     }
5266   else
5267     {
5268       code = TREE_CODE (base);
5269       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5270         return false;
5271       e1 = TREE_OPERAND (base, 0);
5272       e2 = TREE_OPERAND (base, 1);
5273     }
5274
5275   /* Use affine expansion as deeper inspection to prove the equality.  */
5276   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5277                                   &aff_e2, &data->name_expansion_cache);
5278   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5279                                   &aff_offset, &data->name_expansion_cache);
5280   aff_combination_scale (&aff_offset, -1);
5281   switch (code)
5282     {
5283     case PLUS_EXPR:
5284       aff_combination_add (&aff_e2, &aff_offset);
5285       if (aff_combination_zero_p (&aff_e2))
5286         return true;
5287
5288       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5289                                       &aff_e1, &data->name_expansion_cache);
5290       aff_combination_add (&aff_e1, &aff_offset);
5291       return aff_combination_zero_p (&aff_e1);
5292
5293     case POINTER_PLUS_EXPR:
5294       aff_combination_add (&aff_e2, &aff_offset);
5295       return aff_combination_zero_p (&aff_e2);
5296
5297     default:
5298       return false;
5299     }
5300 }
5301
5302 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5303    comparison with CAND.  NITER describes the number of iterations of
5304    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5305
5306    We aim to handle the following situation:
5307
5308    sometype *base, *p;
5309    int a, b, i;
5310
5311    i = a;
5312    p = p_0 = base + a;
5313
5314    do
5315      {
5316        bla (*p);
5317        p++;
5318        i++;
5319      }
5320    while (i < b);
5321
5322    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5323    We aim to optimize this to
5324
5325    p = p_0 = base + a;
5326    do
5327      {
5328        bla (*p);
5329        p++;
5330      }
5331    while (p < p_0 - a + b);
5332
5333    This preserves the correctness, since the pointer arithmetics does not
5334    overflow.  More precisely:
5335
5336    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5337       overflow in computing it or the values of p.
5338    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5339       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5340
5341 static bool
5342 iv_elimination_compare_lt (struct ivopts_data *data,
5343                            struct iv_cand *cand, enum tree_code *comp_p,
5344                            struct tree_niter_desc *niter)
5345 {
5346   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5347   struct aff_tree nit, tmpa, tmpb;
5348   enum tree_code comp;
5349   HOST_WIDE_INT step;
5350
5351   /* We need to know that the candidate induction variable does not overflow.
5352      While more complex analysis may be used to prove this, for now just
5353      check that the variable appears in the original program and that it
5354      is computed in a type that guarantees no overflows.  */
5355   cand_type = TREE_TYPE (cand->iv->base);
5356   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5357     return false;
5358
5359   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5360      the calculation of the BOUND could overflow, making the comparison
5361      invalid.  */
5362   if (!data->loop_single_exit_p)
5363     return false;
5364
5365   /* We need to be able to decide whether candidate is increasing or decreasing
5366      in order to choose the right comparison operator.  */
5367   if (!cst_and_fits_in_hwi (cand->iv->step))
5368     return false;
5369   step = int_cst_value (cand->iv->step);
5370
5371   /* Check that the number of iterations matches the expected pattern:
5372      a + 1 > b ? 0 : b - a - 1.  */
5373   mbz = niter->may_be_zero;
5374   if (TREE_CODE (mbz) == GT_EXPR)
5375     {
5376       /* Handle a + 1 > b.  */
5377       tree op0 = TREE_OPERAND (mbz, 0);
5378       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5379         {
5380           a = TREE_OPERAND (op0, 0);
5381           b = TREE_OPERAND (mbz, 1);
5382         }
5383       else
5384         return false;
5385     }
5386   else if (TREE_CODE (mbz) == LT_EXPR)
5387     {
5388       tree op1 = TREE_OPERAND (mbz, 1);
5389
5390       /* Handle b < a + 1.  */
5391       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5392         {
5393           a = TREE_OPERAND (op1, 0);
5394           b = TREE_OPERAND (mbz, 0);
5395         }
5396       else
5397         return false;
5398     }
5399   else
5400     return false;
5401
5402   /* Expected number of iterations is B - A - 1.  Check that it matches
5403      the actual number, i.e., that B - A - NITER = 1.  */
5404   tree_to_aff_combination (niter->niter, nit_type, &nit);
5405   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5406   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5407   aff_combination_scale (&nit, -1);
5408   aff_combination_scale (&tmpa, -1);
5409   aff_combination_add (&tmpb, &tmpa);
5410   aff_combination_add (&tmpb, &nit);
5411   if (tmpb.n != 0 || tmpb.offset != 1)
5412     return false;
5413
5414   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5415      overflow.  */
5416   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5417                         cand->iv->step,
5418                         fold_convert (TREE_TYPE (cand->iv->step), a));
5419   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5420     return false;
5421
5422   /* Determine the new comparison operator.  */
5423   comp = step < 0 ? GT_EXPR : LT_EXPR;
5424   if (*comp_p == NE_EXPR)
5425     *comp_p = comp;
5426   else if (*comp_p == EQ_EXPR)
5427     *comp_p = invert_tree_comparison (comp, false);
5428   else
5429     gcc_unreachable ();
5430
5431   return true;
5432 }
5433
5434 /* Check whether it is possible to express the condition in USE by comparison
5435    of candidate CAND.  If so, store the value compared with to BOUND, and the
5436    comparison operator to COMP.  */
5437
5438 static bool
5439 may_eliminate_iv (struct ivopts_data *data,
5440                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5441                   enum tree_code *comp)
5442 {
5443   basic_block ex_bb;
5444   edge exit;
5445   tree period;
5446   struct loop *loop = data->current_loop;
5447   aff_tree bnd;
5448   struct tree_niter_desc *desc = NULL;
5449
5450   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5451     return false;
5452
5453   /* For now works only for exits that dominate the loop latch.
5454      TODO: extend to other conditions inside loop body.  */
5455   ex_bb = gimple_bb (use->stmt);
5456   if (use->stmt != last_stmt (ex_bb)
5457       || gimple_code (use->stmt) != GIMPLE_COND
5458       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5459     return false;
5460
5461   exit = EDGE_SUCC (ex_bb, 0);
5462   if (flow_bb_inside_loop_p (loop, exit->dest))
5463     exit = EDGE_SUCC (ex_bb, 1);
5464   if (flow_bb_inside_loop_p (loop, exit->dest))
5465     return false;
5466
5467   desc = niter_for_exit (data, exit);
5468   if (!desc)
5469     return false;
5470
5471   /* Determine whether we can use the variable to test the exit condition.
5472      This is the case iff the period of the induction variable is greater
5473      than the number of iterations for which the exit condition is true.  */
5474   period = iv_period (cand->iv);
5475
5476   /* If the number of iterations is constant, compare against it directly.  */
5477   if (TREE_CODE (desc->niter) == INTEGER_CST)
5478     {
5479       /* See cand_value_at.  */
5480       if (stmt_after_increment (loop, cand, use->stmt))
5481         {
5482           if (!tree_int_cst_lt (desc->niter, period))
5483             return false;
5484         }
5485       else
5486         {
5487           if (tree_int_cst_lt (period, desc->niter))
5488             return false;
5489         }
5490     }
5491
5492   /* If not, and if this is the only possible exit of the loop, see whether
5493      we can get a conservative estimate on the number of iterations of the
5494      entire loop and compare against that instead.  */
5495   else
5496     {
5497       widest_int period_value, max_niter;
5498
5499       max_niter = desc->max;
5500       if (stmt_after_increment (loop, cand, use->stmt))
5501         max_niter += 1;
5502       period_value = wi::to_widest (period);
5503       if (wi::gtu_p (max_niter, period_value))
5504         {
5505           /* See if we can take advantage of inferred loop bound information.  */
5506           if (data->loop_single_exit_p)
5507             {
5508               if (!max_loop_iterations (loop, &max_niter))
5509                 return false;
5510               /* The loop bound is already adjusted by adding 1.  */
5511               if (wi::gtu_p (max_niter, period_value))
5512                 return false;
5513             }
5514           else
5515             return false;
5516         }
5517     }
5518
5519   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5520
5521   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5522                          aff_combination_to_tree (&bnd));
5523   *comp = iv_elimination_compare (data, use);
5524
5525   /* It is unlikely that computing the number of iterations using division
5526      would be more profitable than keeping the original induction variable.  */
5527   if (expression_expensive_p (*bound))
5528     return false;
5529
5530   /* Sometimes, it is possible to handle the situation that the number of
5531      iterations may be zero unless additional assumtions by using <
5532      instead of != in the exit condition.
5533
5534      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5535            base the exit condition on it.  However, that is often too
5536            expensive.  */
5537   if (!integer_zerop (desc->may_be_zero))
5538     return iv_elimination_compare_lt (data, cand, comp, desc);
5539
5540   return true;
5541 }
5542
5543  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5544     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5545
5546 static int
5547 parm_decl_cost (struct ivopts_data *data, tree bound)
5548 {
5549   tree sbound = bound;
5550   STRIP_NOPS (sbound);
5551
5552   if (TREE_CODE (sbound) == SSA_NAME
5553       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5554       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5555       && data->body_includes_call)
5556     return COSTS_N_INSNS (1);
5557
5558   return 0;
5559 }
5560
5561 /* Determines cost of basing replacement of USE on CAND in a condition.  */
5562
5563 static bool
5564 determine_use_iv_cost_condition (struct ivopts_data *data,
5565                                  struct iv_use *use, struct iv_cand *cand)
5566 {
5567   tree bound = NULL_TREE;
5568   struct iv *cmp_iv;
5569   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5570   comp_cost elim_cost, express_cost, cost, bound_cost;
5571   bool ok;
5572   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5573   tree *control_var, *bound_cst;
5574   enum tree_code comp = ERROR_MARK;
5575
5576   /* Only consider real candidates.  */
5577   if (!cand->iv)
5578     {
5579       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5580                        ERROR_MARK, -1);
5581       return false;
5582     }
5583
5584   /* Try iv elimination.  */
5585   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5586     {
5587       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5588       if (elim_cost.cost == 0)
5589         elim_cost.cost = parm_decl_cost (data, bound);
5590       else if (TREE_CODE (bound) == INTEGER_CST)
5591         elim_cost.cost = 0;
5592       /* If we replace a loop condition 'i < n' with 'p < base + n',
5593          depends_on_elim will have 'base' and 'n' set, which implies
5594          that both 'base' and 'n' will be live during the loop.  More likely,
5595          'base + n' will be loop invariant, resulting in only one live value
5596          during the loop.  So in that case we clear depends_on_elim and set
5597         elim_inv_expr_id instead.  */
5598       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5599         {
5600           elim_inv_expr_id = get_expr_id (data, bound);
5601           bitmap_clear (depends_on_elim);
5602         }
5603       /* The bound is a loop invariant, so it will be only computed
5604          once.  */
5605       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5606     }
5607   else
5608     elim_cost = infinite_cost;
5609
5610   /* Try expressing the original giv.  If it is compared with an invariant,
5611      note that we cannot get rid of it.  */
5612   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5613                               NULL, &cmp_iv);
5614   gcc_assert (ok);
5615
5616   /* When the condition is a comparison of the candidate IV against
5617      zero, prefer this IV.
5618
5619      TODO: The constant that we're subtracting from the cost should
5620      be target-dependent.  This information should be added to the
5621      target costs for each backend.  */
5622   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5623       && integer_zerop (*bound_cst)
5624       && (operand_equal_p (*control_var, cand->var_after, 0)
5625           || operand_equal_p (*control_var, cand->var_before, 0)))
5626     elim_cost.cost -= 1;
5627
5628   express_cost = get_computation_cost (data, use, cand, false,
5629                                        &depends_on_express, NULL,
5630                                        &express_inv_expr_id);
5631   fd_ivopts_data = data;
5632   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5633
5634   /* Count the cost of the original bound as well.  */
5635   bound_cost = force_var_cost (data, *bound_cst, NULL);
5636   if (bound_cost.cost == 0)
5637     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5638   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5639     bound_cost.cost = 0;
5640   express_cost.cost += bound_cost.cost;
5641
5642   /* Choose the better approach, preferring the eliminated IV. */
5643   if (compare_costs (elim_cost, express_cost) <= 0)
5644     {
5645       cost = elim_cost;
5646       depends_on = depends_on_elim;
5647       depends_on_elim = NULL;
5648       inv_expr_id = elim_inv_expr_id;
5649     }
5650   else
5651     {
5652       cost = express_cost;
5653       depends_on = depends_on_express;
5654       depends_on_express = NULL;
5655       bound = NULL_TREE;
5656       comp = ERROR_MARK;
5657       inv_expr_id = express_inv_expr_id;
5658     }
5659
5660   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5661
5662   if (depends_on_elim)
5663     BITMAP_FREE (depends_on_elim);
5664   if (depends_on_express)
5665     BITMAP_FREE (depends_on_express);
5666
5667   return !infinite_cost_p (cost);
5668 }
5669
5670 /* Determines cost of basing replacement of USE on CAND.  Returns false
5671    if USE cannot be based on CAND.  */
5672
5673 static bool
5674 determine_use_iv_cost (struct ivopts_data *data,
5675                        struct iv_use *use, struct iv_cand *cand)
5676 {
5677   switch (use->type)
5678     {
5679     case USE_NONLINEAR_EXPR:
5680       return determine_use_iv_cost_generic (data, use, cand);
5681
5682     case USE_ADDRESS:
5683       return determine_use_iv_cost_address (data, use, cand);
5684
5685     case USE_COMPARE:
5686       return determine_use_iv_cost_condition (data, use, cand);
5687
5688     default:
5689       gcc_unreachable ();
5690     }
5691 }
5692
5693 /* Return true if get_computation_cost indicates that autoincrement is
5694    a possibility for the pair of USE and CAND, false otherwise.  */
5695
5696 static bool
5697 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5698                            struct iv_cand *cand)
5699 {
5700   bitmap depends_on;
5701   bool can_autoinc;
5702   comp_cost cost;
5703
5704   if (use->type != USE_ADDRESS)
5705     return false;
5706
5707   cost = get_computation_cost (data, use, cand, true, &depends_on,
5708                                &can_autoinc, NULL);
5709
5710   BITMAP_FREE (depends_on);
5711
5712   return !infinite_cost_p (cost) && can_autoinc;
5713 }
5714
5715 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5716    use that allows autoincrement, and set their AINC_USE if possible.  */
5717
5718 static void
5719 set_autoinc_for_original_candidates (struct ivopts_data *data)
5720 {
5721   unsigned i, j;
5722
5723   for (i = 0; i < n_iv_cands (data); i++)
5724     {
5725       struct iv_cand *cand = iv_cand (data, i);
5726       struct iv_use *closest_before = NULL;
5727       struct iv_use *closest_after = NULL;
5728       if (cand->pos != IP_ORIGINAL)
5729         continue;
5730
5731       for (j = 0; j < n_iv_uses (data); j++)
5732         {
5733           struct iv_use *use = iv_use (data, j);
5734           unsigned uid = gimple_uid (use->stmt);
5735
5736           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5737             continue;
5738
5739           if (uid < gimple_uid (cand->incremented_at)
5740               && (closest_before == NULL
5741                   || uid > gimple_uid (closest_before->stmt)))
5742             closest_before = use;
5743
5744           if (uid > gimple_uid (cand->incremented_at)
5745               && (closest_after == NULL
5746                   || uid < gimple_uid (closest_after->stmt)))
5747             closest_after = use;
5748         }
5749
5750       if (closest_before != NULL
5751           && autoinc_possible_for_pair (data, closest_before, cand))
5752         cand->ainc_use = closest_before;
5753       else if (closest_after != NULL
5754                && autoinc_possible_for_pair (data, closest_after, cand))
5755         cand->ainc_use = closest_after;
5756     }
5757 }
5758
5759 /* Finds the candidates for the induction variables.  */
5760
5761 static void
5762 find_iv_candidates (struct ivopts_data *data)
5763 {
5764   /* Add commonly used ivs.  */
5765   add_standard_iv_candidates (data);
5766
5767   /* Add old induction variables.  */
5768   add_iv_candidate_for_bivs (data);
5769
5770   /* Add induction variables derived from uses.  */
5771   add_iv_candidate_for_uses (data);
5772
5773   set_autoinc_for_original_candidates (data);
5774
5775   /* Record the important candidates.  */
5776   record_important_candidates (data);
5777 }
5778
5779 /* Determines costs of basing the use of the iv on an iv candidate.  */
5780
5781 static void
5782 determine_use_iv_costs (struct ivopts_data *data)
5783 {
5784   unsigned i, j;
5785   struct iv_use *use;
5786   struct iv_cand *cand;
5787   bitmap to_clear = BITMAP_ALLOC (NULL);
5788
5789   alloc_use_cost_map (data);
5790
5791   for (i = 0; i < n_iv_uses (data); i++)
5792     {
5793       use = iv_use (data, i);
5794
5795       if (data->consider_all_candidates)
5796         {
5797           for (j = 0; j < n_iv_cands (data); j++)
5798             {
5799               cand = iv_cand (data, j);
5800               determine_use_iv_cost (data, use, cand);
5801             }
5802         }
5803       else
5804         {
5805           bitmap_iterator bi;
5806
5807           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5808             {
5809               cand = iv_cand (data, j);
5810               if (!determine_use_iv_cost (data, use, cand))
5811                 bitmap_set_bit (to_clear, j);
5812             }
5813
5814           /* Remove the candidates for that the cost is infinite from
5815              the list of related candidates.  */
5816           bitmap_and_compl_into (use->related_cands, to_clear);
5817           bitmap_clear (to_clear);
5818         }
5819     }
5820
5821   BITMAP_FREE (to_clear);
5822
5823   if (dump_file && (dump_flags & TDF_DETAILS))
5824     {
5825       fprintf (dump_file, "Use-candidate costs:\n");
5826
5827       for (i = 0; i < n_iv_uses (data); i++)
5828         {
5829           use = iv_use (data, i);
5830
5831           fprintf (dump_file, "Use %d:\n", i);
5832           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5833           for (j = 0; j < use->n_map_members; j++)
5834             {
5835               if (!use->cost_map[j].cand
5836                   || infinite_cost_p (use->cost_map[j].cost))
5837                 continue;
5838
5839               fprintf (dump_file, "  %d\t%d\t%d\t",
5840                        use->cost_map[j].cand->id,
5841                        use->cost_map[j].cost.cost,
5842                        use->cost_map[j].cost.complexity);
5843               if (use->cost_map[j].depends_on)
5844                 bitmap_print (dump_file,
5845                               use->cost_map[j].depends_on, "","");
5846               if (use->cost_map[j].inv_expr_id != -1)
5847                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5848               fprintf (dump_file, "\n");
5849             }
5850
5851           fprintf (dump_file, "\n");
5852         }
5853       fprintf (dump_file, "\n");
5854     }
5855 }
5856
5857 /* Determines cost of the candidate CAND.  */
5858
5859 static void
5860 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5861 {
5862   comp_cost cost_base;
5863   unsigned cost, cost_step;
5864   tree base;
5865
5866   if (!cand->iv)
5867     {
5868       cand->cost = 0;
5869       return;
5870     }
5871
5872   /* There are two costs associated with the candidate -- its increment
5873      and its initialization.  The second is almost negligible for any loop
5874      that rolls enough, so we take it just very little into account.  */
5875
5876   base = cand->iv->base;
5877   cost_base = force_var_cost (data, base, NULL);
5878   /* It will be exceptional that the iv register happens to be initialized with
5879      the proper value at no cost.  In general, there will at least be a regcopy
5880      or a const set.  */
5881   if (cost_base.cost == 0)
5882     cost_base.cost = COSTS_N_INSNS (1);
5883   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5884
5885   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5886
5887   /* Prefer the original ivs unless we may gain something by replacing it.
5888      The reason is to make debugging simpler; so this is not relevant for
5889      artificial ivs created by other optimization passes.  */
5890   if (cand->pos != IP_ORIGINAL
5891       || !SSA_NAME_VAR (cand->var_before)
5892       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5893     cost++;
5894
5895   /* Prefer not to insert statements into latch unless there are some
5896      already (so that we do not create unnecessary jumps).  */
5897   if (cand->pos == IP_END
5898       && empty_block_p (ip_end_pos (data->current_loop)))
5899     cost++;
5900
5901   cand->cost = cost;
5902   cand->cost_step = cost_step;
5903 }
5904
5905 /* Determines costs of computation of the candidates.  */
5906
5907 static void
5908 determine_iv_costs (struct ivopts_data *data)
5909 {
5910   unsigned i;
5911
5912   if (dump_file && (dump_flags & TDF_DETAILS))
5913     {
5914       fprintf (dump_file, "Candidate costs:\n");
5915       fprintf (dump_file, "  cand\tcost\n");
5916     }
5917
5918   for (i = 0; i < n_iv_cands (data); i++)
5919     {
5920       struct iv_cand *cand = iv_cand (data, i);
5921
5922       determine_iv_cost (data, cand);
5923
5924       if (dump_file && (dump_flags & TDF_DETAILS))
5925         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5926     }
5927
5928   if (dump_file && (dump_flags & TDF_DETAILS))
5929     fprintf (dump_file, "\n");
5930 }
5931
5932 /* Calculates cost for having SIZE induction variables.  */
5933
5934 static unsigned
5935 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5936 {
5937   /* We add size to the cost, so that we prefer eliminating ivs
5938      if possible.  */
5939   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5940                                             data->body_includes_call);
5941 }
5942
5943 /* For each size of the induction variable set determine the penalty.  */
5944
5945 static void
5946 determine_set_costs (struct ivopts_data *data)
5947 {
5948   unsigned j, n;
5949   gphi *phi;
5950   gphi_iterator psi;
5951   tree op;
5952   struct loop *loop = data->current_loop;
5953   bitmap_iterator bi;
5954
5955   if (dump_file && (dump_flags & TDF_DETAILS))
5956     {
5957       fprintf (dump_file, "Global costs:\n");
5958       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5959       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5960       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5961       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5962     }
5963
5964   n = 0;
5965   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5966     {
5967       phi = psi.phi ();
5968       op = PHI_RESULT (phi);
5969
5970       if (virtual_operand_p (op))
5971         continue;
5972
5973       if (get_iv (data, op))
5974         continue;
5975
5976       n++;
5977     }
5978
5979   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5980     {
5981       struct version_info *info = ver_info (data, j);
5982
5983       if (info->inv_id && info->has_nonlin_use)
5984         n++;
5985     }
5986
5987   data->regs_used = n;
5988   if (dump_file && (dump_flags & TDF_DETAILS))
5989     fprintf (dump_file, "  regs_used %d\n", n);
5990
5991   if (dump_file && (dump_flags & TDF_DETAILS))
5992     {
5993       fprintf (dump_file, "  cost for size:\n");
5994       fprintf (dump_file, "  ivs\tcost\n");
5995       for (j = 0; j <= 2 * target_avail_regs; j++)
5996         fprintf (dump_file, "  %d\t%d\n", j,
5997                  ivopts_global_cost_for_size (data, j));
5998       fprintf (dump_file, "\n");
5999     }
6000 }
6001
6002 /* Returns true if A is a cheaper cost pair than B.  */
6003
6004 static bool
6005 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
6006 {
6007   int cmp;
6008
6009   if (!a)
6010     return false;
6011
6012   if (!b)
6013     return true;
6014
6015   cmp = compare_costs (a->cost, b->cost);
6016   if (cmp < 0)
6017     return true;
6018
6019   if (cmp > 0)
6020     return false;
6021
6022   /* In case the costs are the same, prefer the cheaper candidate.  */
6023   if (a->cand->cost < b->cand->cost)
6024     return true;
6025
6026   return false;
6027 }
6028
6029
6030 /* Returns candidate by that USE is expressed in IVS.  */
6031
6032 static struct cost_pair *
6033 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
6034 {
6035   return ivs->cand_for_use[use->id];
6036 }
6037
6038 /* Computes the cost field of IVS structure.  */
6039
6040 static void
6041 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
6042 {
6043   comp_cost cost = ivs->cand_use_cost;
6044
6045   cost.cost += ivs->cand_cost;
6046
6047   cost.cost += ivopts_global_cost_for_size (data,
6048                                             ivs->n_regs + ivs->num_used_inv_expr);
6049
6050   ivs->cost = cost;
6051 }
6052
6053 /* Remove invariants in set INVS to set IVS.  */
6054
6055 static void
6056 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
6057 {
6058   bitmap_iterator bi;
6059   unsigned iid;
6060
6061   if (!invs)
6062     return;
6063
6064   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6065     {
6066       ivs->n_invariant_uses[iid]--;
6067       if (ivs->n_invariant_uses[iid] == 0)
6068         ivs->n_regs--;
6069     }
6070 }
6071
6072 /* Set USE not to be expressed by any candidate in IVS.  */
6073
6074 static void
6075 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6076                  struct iv_use *use)
6077 {
6078   unsigned uid = use->id, cid;
6079   struct cost_pair *cp;
6080
6081   cp = ivs->cand_for_use[uid];
6082   if (!cp)
6083     return;
6084   cid = cp->cand->id;
6085
6086   ivs->bad_uses++;
6087   ivs->cand_for_use[uid] = NULL;
6088   ivs->n_cand_uses[cid]--;
6089
6090   if (ivs->n_cand_uses[cid] == 0)
6091     {
6092       bitmap_clear_bit (ivs->cands, cid);
6093       /* Do not count the pseudocandidates.  */
6094       if (cp->cand->iv)
6095         ivs->n_regs--;
6096       ivs->n_cands--;
6097       ivs->cand_cost -= cp->cand->cost;
6098
6099       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6100     }
6101
6102   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
6103
6104   iv_ca_set_remove_invariants (ivs, cp->depends_on);
6105
6106   if (cp->inv_expr_id != -1)
6107     {
6108       ivs->used_inv_expr[cp->inv_expr_id]--;
6109       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
6110         ivs->num_used_inv_expr--;
6111     }
6112   iv_ca_recount_cost (data, ivs);
6113 }
6114
6115 /* Add invariants in set INVS to set IVS.  */
6116
6117 static void
6118 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6119 {
6120   bitmap_iterator bi;
6121   unsigned iid;
6122
6123   if (!invs)
6124     return;
6125
6126   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6127     {
6128       ivs->n_invariant_uses[iid]++;
6129       if (ivs->n_invariant_uses[iid] == 1)
6130         ivs->n_regs++;
6131     }
6132 }
6133
6134 /* Set cost pair for USE in set IVS to CP.  */
6135
6136 static void
6137 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6138               struct iv_use *use, struct cost_pair *cp)
6139 {
6140   unsigned uid = use->id, cid;
6141
6142   if (ivs->cand_for_use[uid] == cp)
6143     return;
6144
6145   if (ivs->cand_for_use[uid])
6146     iv_ca_set_no_cp (data, ivs, use);
6147
6148   if (cp)
6149     {
6150       cid = cp->cand->id;
6151
6152       ivs->bad_uses--;
6153       ivs->cand_for_use[uid] = cp;
6154       ivs->n_cand_uses[cid]++;
6155       if (ivs->n_cand_uses[cid] == 1)
6156         {
6157           bitmap_set_bit (ivs->cands, cid);
6158           /* Do not count the pseudocandidates.  */
6159           if (cp->cand->iv)
6160             ivs->n_regs++;
6161           ivs->n_cands++;
6162           ivs->cand_cost += cp->cand->cost;
6163
6164           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6165         }
6166
6167       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
6168       iv_ca_set_add_invariants (ivs, cp->depends_on);
6169
6170       if (cp->inv_expr_id != -1)
6171         {
6172           ivs->used_inv_expr[cp->inv_expr_id]++;
6173           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
6174             ivs->num_used_inv_expr++;
6175         }
6176       iv_ca_recount_cost (data, ivs);
6177     }
6178 }
6179
6180 /* Extend set IVS by expressing USE by some of the candidates in it
6181    if possible.  Consider all important candidates if candidates in
6182    set IVS don't give any result.  */
6183
6184 static void
6185 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
6186                struct iv_use *use)
6187 {
6188   struct cost_pair *best_cp = NULL, *cp;
6189   bitmap_iterator bi;
6190   unsigned i;
6191   struct iv_cand *cand;
6192
6193   gcc_assert (ivs->upto >= use->id);
6194   ivs->upto++;
6195   ivs->bad_uses++;
6196
6197   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6198     {
6199       cand = iv_cand (data, i);
6200       cp = get_use_iv_cost (data, use, cand);
6201       if (cheaper_cost_pair (cp, best_cp))
6202         best_cp = cp;
6203     }
6204
6205   if (best_cp == NULL)
6206     {
6207       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6208         {
6209           cand = iv_cand (data, i);
6210           cp = get_use_iv_cost (data, use, cand);
6211           if (cheaper_cost_pair (cp, best_cp))
6212             best_cp = cp;
6213         }
6214     }
6215
6216   iv_ca_set_cp (data, ivs, use, best_cp);
6217 }
6218
6219 /* Get cost for assignment IVS.  */
6220
6221 static comp_cost
6222 iv_ca_cost (struct iv_ca *ivs)
6223 {
6224   /* This was a conditional expression but it triggered a bug in
6225      Sun C 5.5.  */
6226   if (ivs->bad_uses)
6227     return infinite_cost;
6228   else
6229     return ivs->cost;
6230 }
6231
6232 /* Returns true if all dependences of CP are among invariants in IVS.  */
6233
6234 static bool
6235 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6236 {
6237   unsigned i;
6238   bitmap_iterator bi;
6239
6240   if (!cp->depends_on)
6241     return true;
6242
6243   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6244     {
6245       if (ivs->n_invariant_uses[i] == 0)
6246         return false;
6247     }
6248
6249   return true;
6250 }
6251
6252 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6253    it before NEXT_CHANGE.  */
6254
6255 static struct iv_ca_delta *
6256 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
6257                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
6258 {
6259   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6260
6261   change->use = use;
6262   change->old_cp = old_cp;
6263   change->new_cp = new_cp;
6264   change->next_change = next_change;
6265
6266   return change;
6267 }
6268
6269 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6270    are rewritten.  */
6271
6272 static struct iv_ca_delta *
6273 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6274 {
6275   struct iv_ca_delta *last;
6276
6277   if (!l2)
6278     return l1;
6279
6280   if (!l1)
6281     return l2;
6282
6283   for (last = l1; last->next_change; last = last->next_change)
6284     continue;
6285   last->next_change = l2;
6286
6287   return l1;
6288 }
6289
6290 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6291
6292 static struct iv_ca_delta *
6293 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6294 {
6295   struct iv_ca_delta *act, *next, *prev = NULL;
6296
6297   for (act = delta; act; act = next)
6298     {
6299       next = act->next_change;
6300       act->next_change = prev;
6301       prev = act;
6302
6303       std::swap (act->old_cp, act->new_cp);
6304     }
6305
6306   return prev;
6307 }
6308
6309 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6310    reverted instead.  */
6311
6312 static void
6313 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6314                     struct iv_ca_delta *delta, bool forward)
6315 {
6316   struct cost_pair *from, *to;
6317   struct iv_ca_delta *act;
6318
6319   if (!forward)
6320     delta = iv_ca_delta_reverse (delta);
6321
6322   for (act = delta; act; act = act->next_change)
6323     {
6324       from = act->old_cp;
6325       to = act->new_cp;
6326       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
6327       iv_ca_set_cp (data, ivs, act->use, to);
6328     }
6329
6330   if (!forward)
6331     iv_ca_delta_reverse (delta);
6332 }
6333
6334 /* Returns true if CAND is used in IVS.  */
6335
6336 static bool
6337 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6338 {
6339   return ivs->n_cand_uses[cand->id] > 0;
6340 }
6341
6342 /* Returns number of induction variable candidates in the set IVS.  */
6343
6344 static unsigned
6345 iv_ca_n_cands (struct iv_ca *ivs)
6346 {
6347   return ivs->n_cands;
6348 }
6349
6350 /* Free the list of changes DELTA.  */
6351
6352 static void
6353 iv_ca_delta_free (struct iv_ca_delta **delta)
6354 {
6355   struct iv_ca_delta *act, *next;
6356
6357   for (act = *delta; act; act = next)
6358     {
6359       next = act->next_change;
6360       free (act);
6361     }
6362
6363   *delta = NULL;
6364 }
6365
6366 /* Allocates new iv candidates assignment.  */
6367
6368 static struct iv_ca *
6369 iv_ca_new (struct ivopts_data *data)
6370 {
6371   struct iv_ca *nw = XNEW (struct iv_ca);
6372
6373   nw->upto = 0;
6374   nw->bad_uses = 0;
6375   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
6376   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
6377   nw->cands = BITMAP_ALLOC (NULL);
6378   nw->n_cands = 0;
6379   nw->n_regs = 0;
6380   nw->cand_use_cost = no_cost;
6381   nw->cand_cost = 0;
6382   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6383   nw->cost = no_cost;
6384   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
6385   nw->num_used_inv_expr = 0;
6386
6387   return nw;
6388 }
6389
6390 /* Free memory occupied by the set IVS.  */
6391
6392 static void
6393 iv_ca_free (struct iv_ca **ivs)
6394 {
6395   free ((*ivs)->cand_for_use);
6396   free ((*ivs)->n_cand_uses);
6397   BITMAP_FREE ((*ivs)->cands);
6398   free ((*ivs)->n_invariant_uses);
6399   free ((*ivs)->used_inv_expr);
6400   free (*ivs);
6401   *ivs = NULL;
6402 }
6403
6404 /* Dumps IVS to FILE.  */
6405
6406 static void
6407 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6408 {
6409   const char *pref = "  invariants ";
6410   unsigned i;
6411   comp_cost cost = iv_ca_cost (ivs);
6412
6413   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6414   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
6415            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6416   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6417
6418    for (i = 0; i < ivs->upto; i++)
6419     {
6420       struct iv_use *use = iv_use (data, i);
6421       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6422       if (cp)
6423         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6424                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6425       else
6426         fprintf (file, "   use:%d --> ??\n", use->id);
6427     }
6428
6429   for (i = 1; i <= data->max_inv_id; i++)
6430     if (ivs->n_invariant_uses[i])
6431       {
6432         fprintf (file, "%s%d", pref, i);
6433         pref = ", ";
6434       }
6435   fprintf (file, "\n\n");
6436 }
6437
6438 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6439    new set, and store differences in DELTA.  Number of induction variables
6440    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6441    the function will try to find a solution with mimimal iv candidates.  */
6442
6443 static comp_cost
6444 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6445               struct iv_cand *cand, struct iv_ca_delta **delta,
6446               unsigned *n_ivs, bool min_ncand)
6447 {
6448   unsigned i;
6449   comp_cost cost;
6450   struct iv_use *use;
6451   struct cost_pair *old_cp, *new_cp;
6452
6453   *delta = NULL;
6454   for (i = 0; i < ivs->upto; i++)
6455     {
6456       use = iv_use (data, i);
6457       old_cp = iv_ca_cand_for_use (ivs, use);
6458
6459       if (old_cp
6460           && old_cp->cand == cand)
6461         continue;
6462
6463       new_cp = get_use_iv_cost (data, use, cand);
6464       if (!new_cp)
6465         continue;
6466
6467       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6468         continue;
6469
6470       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6471         continue;
6472
6473       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6474     }
6475
6476   iv_ca_delta_commit (data, ivs, *delta, true);
6477   cost = iv_ca_cost (ivs);
6478   if (n_ivs)
6479     *n_ivs = iv_ca_n_cands (ivs);
6480   iv_ca_delta_commit (data, ivs, *delta, false);
6481
6482   return cost;
6483 }
6484
6485 /* Try narrowing set IVS by removing CAND.  Return the cost of
6486    the new set and store the differences in DELTA.  START is
6487    the candidate with which we start narrowing.  */
6488
6489 static comp_cost
6490 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6491               struct iv_cand *cand, struct iv_cand *start,
6492               struct iv_ca_delta **delta)
6493 {
6494   unsigned i, ci;
6495   struct iv_use *use;
6496   struct cost_pair *old_cp, *new_cp, *cp;
6497   bitmap_iterator bi;
6498   struct iv_cand *cnd;
6499   comp_cost cost, best_cost, acost;
6500
6501   *delta = NULL;
6502   for (i = 0; i < n_iv_uses (data); i++)
6503     {
6504       use = iv_use (data, i);
6505
6506       old_cp = iv_ca_cand_for_use (ivs, use);
6507       if (old_cp->cand != cand)
6508         continue;
6509
6510       best_cost = iv_ca_cost (ivs);
6511       /* Start narrowing with START.  */
6512       new_cp = get_use_iv_cost (data, use, start);
6513
6514       if (data->consider_all_candidates)
6515         {
6516           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6517             {
6518               if (ci == cand->id || (start && ci == start->id))
6519                 continue;
6520
6521               cnd = iv_cand (data, ci);
6522
6523               cp = get_use_iv_cost (data, use, cnd);
6524               if (!cp)
6525                 continue;
6526
6527               iv_ca_set_cp (data, ivs, use, cp);
6528               acost = iv_ca_cost (ivs);
6529
6530               if (compare_costs (acost, best_cost) < 0)
6531                 {
6532                   best_cost = acost;
6533                   new_cp = cp;
6534                 }
6535             }
6536         }
6537       else
6538         {
6539           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6540             {
6541               if (ci == cand->id || (start && ci == start->id))
6542                 continue;
6543
6544               cnd = iv_cand (data, ci);
6545
6546               cp = get_use_iv_cost (data, use, cnd);
6547               if (!cp)
6548                 continue;
6549
6550               iv_ca_set_cp (data, ivs, use, cp);
6551               acost = iv_ca_cost (ivs);
6552
6553               if (compare_costs (acost, best_cost) < 0)
6554                 {
6555                   best_cost = acost;
6556                   new_cp = cp;
6557                 }
6558             }
6559         }
6560       /* Restore to old cp for use.  */
6561       iv_ca_set_cp (data, ivs, use, old_cp);
6562
6563       if (!new_cp)
6564         {
6565           iv_ca_delta_free (delta);
6566           return infinite_cost;
6567         }
6568
6569       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6570     }
6571
6572   iv_ca_delta_commit (data, ivs, *delta, true);
6573   cost = iv_ca_cost (ivs);
6574   iv_ca_delta_commit (data, ivs, *delta, false);
6575
6576   return cost;
6577 }
6578
6579 /* Try optimizing the set of candidates IVS by removing candidates different
6580    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6581    differences in DELTA.  */
6582
6583 static comp_cost
6584 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6585              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6586 {
6587   bitmap_iterator bi;
6588   struct iv_ca_delta *act_delta, *best_delta;
6589   unsigned i;
6590   comp_cost best_cost, acost;
6591   struct iv_cand *cand;
6592
6593   best_delta = NULL;
6594   best_cost = iv_ca_cost (ivs);
6595
6596   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6597     {
6598       cand = iv_cand (data, i);
6599
6600       if (cand == except_cand)
6601         continue;
6602
6603       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6604
6605       if (compare_costs (acost, best_cost) < 0)
6606         {
6607           best_cost = acost;
6608           iv_ca_delta_free (&best_delta);
6609           best_delta = act_delta;
6610         }
6611       else
6612         iv_ca_delta_free (&act_delta);
6613     }
6614
6615   if (!best_delta)
6616     {
6617       *delta = NULL;
6618       return best_cost;
6619     }
6620
6621   /* Recurse to possibly remove other unnecessary ivs.  */
6622   iv_ca_delta_commit (data, ivs, best_delta, true);
6623   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6624   iv_ca_delta_commit (data, ivs, best_delta, false);
6625   *delta = iv_ca_delta_join (best_delta, *delta);
6626   return best_cost;
6627 }
6628
6629 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6630    cheaper local cost for USE than BEST_CP.  Return pointer to
6631    the corresponding cost_pair, otherwise just return BEST_CP.  */
6632
6633 static struct cost_pair*
6634 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6635                         unsigned int cand_idx, struct iv_cand *old_cand,
6636                         struct cost_pair *best_cp)
6637 {
6638   struct iv_cand *cand;
6639   struct cost_pair *cp;
6640
6641   gcc_assert (old_cand != NULL && best_cp != NULL);
6642   if (cand_idx == old_cand->id)
6643     return best_cp;
6644
6645   cand = iv_cand (data, cand_idx);
6646   cp = get_use_iv_cost (data, use, cand);
6647   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6648     return cp;
6649
6650   return best_cp;
6651 }
6652
6653 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6654    which are used by more than one iv uses.  For each of those candidates,
6655    this function tries to represent iv uses under that candidate using
6656    other ones with lower local cost, then tries to prune the new set.
6657    If the new set has lower cost, It returns the new cost after recording
6658    candidate replacement in list DELTA.  */
6659
6660 static comp_cost
6661 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6662                struct iv_ca_delta **delta)
6663 {
6664   bitmap_iterator bi, bj;
6665   unsigned int i, j, k;
6666   struct iv_use *use;
6667   struct iv_cand *cand;
6668   comp_cost orig_cost, acost;
6669   struct iv_ca_delta *act_delta, *tmp_delta;
6670   struct cost_pair *old_cp, *best_cp = NULL;
6671
6672   *delta = NULL;
6673   orig_cost = iv_ca_cost (ivs);
6674
6675   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6676     {
6677       if (ivs->n_cand_uses[i] == 1
6678           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6679         continue;
6680
6681       cand = iv_cand (data, i);
6682
6683       act_delta = NULL;
6684       /*  Represent uses under current candidate using other ones with
6685           lower local cost.  */
6686       for (j = 0; j < ivs->upto; j++)
6687         {
6688           use = iv_use (data, j);
6689           old_cp = iv_ca_cand_for_use (ivs, use);
6690
6691           if (old_cp->cand != cand)
6692             continue;
6693
6694           best_cp = old_cp;
6695           if (data->consider_all_candidates)
6696             for (k = 0; k < n_iv_cands (data); k++)
6697               best_cp = cheaper_cost_with_cand (data, use, k,
6698                                                 old_cp->cand, best_cp);
6699           else
6700             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6701               best_cp = cheaper_cost_with_cand (data, use, k,
6702                                                 old_cp->cand, best_cp);
6703
6704           if (best_cp == old_cp)
6705             continue;
6706
6707           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6708         }
6709       /* No need for further prune.  */
6710       if (!act_delta)
6711         continue;
6712
6713       /* Prune the new candidate set.  */
6714       iv_ca_delta_commit (data, ivs, act_delta, true);
6715       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6716       iv_ca_delta_commit (data, ivs, act_delta, false);
6717       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6718
6719       if (compare_costs (acost, orig_cost) < 0)
6720         {
6721           *delta = act_delta;
6722           return acost;
6723         }
6724       else
6725         iv_ca_delta_free (&act_delta);
6726     }
6727
6728   return orig_cost;
6729 }
6730
6731 /* Tries to extend the sets IVS in the best possible way in order
6732    to express the USE.  If ORIGINALP is true, prefer candidates from
6733    the original set of IVs, otherwise favor important candidates not
6734    based on any memory object.  */
6735
6736 static bool
6737 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6738                   struct iv_use *use, bool originalp)
6739 {
6740   comp_cost best_cost, act_cost;
6741   unsigned i;
6742   bitmap_iterator bi;
6743   struct iv_cand *cand;
6744   struct iv_ca_delta *best_delta = NULL, *act_delta;
6745   struct cost_pair *cp;
6746
6747   iv_ca_add_use (data, ivs, use);
6748   best_cost = iv_ca_cost (ivs);
6749   cp = iv_ca_cand_for_use (ivs, use);
6750   if (cp)
6751     {
6752       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6753       iv_ca_set_no_cp (data, ivs, use);
6754     }
6755
6756   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6757      first try important candidates not based on any memory object.  Only if
6758      this fails, try the specific ones.  Rationale -- in loops with many
6759      variables the best choice often is to use just one generic biv.  If we
6760      added here many ivs specific to the uses, the optimization algorithm later
6761      would be likely to get stuck in a local minimum, thus causing us to create
6762      too many ivs.  The approach from few ivs to more seems more likely to be
6763      successful -- starting from few ivs, replacing an expensive use by a
6764      specific iv should always be a win.  */
6765   EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, i, bi)
6766     {
6767       cand = iv_cand (data, i);
6768
6769       if (originalp && cand->pos !=IP_ORIGINAL)
6770         continue;
6771
6772       if (!originalp && cand->iv->base_object != NULL_TREE)
6773         continue;
6774
6775       if (iv_ca_cand_used_p (ivs, cand))
6776         continue;
6777
6778       cp = get_use_iv_cost (data, use, cand);
6779       if (!cp)
6780         continue;
6781
6782       iv_ca_set_cp (data, ivs, use, cp);
6783       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6784                                true);
6785       iv_ca_set_no_cp (data, ivs, use);
6786       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6787
6788       if (compare_costs (act_cost, best_cost) < 0)
6789         {
6790           best_cost = act_cost;
6791
6792           iv_ca_delta_free (&best_delta);
6793           best_delta = act_delta;
6794         }
6795       else
6796         iv_ca_delta_free (&act_delta);
6797     }
6798
6799   if (infinite_cost_p (best_cost))
6800     {
6801       for (i = 0; i < use->n_map_members; i++)
6802         {
6803           cp = use->cost_map + i;
6804           cand = cp->cand;
6805           if (!cand)
6806             continue;
6807
6808           /* Already tried this.  */
6809           if (cand->important)
6810             {
6811               if (originalp && cand->pos == IP_ORIGINAL)
6812                 continue;
6813               if (!originalp && cand->iv->base_object == NULL_TREE)
6814                 continue;
6815             }
6816
6817           if (iv_ca_cand_used_p (ivs, cand))
6818             continue;
6819
6820           act_delta = NULL;
6821           iv_ca_set_cp (data, ivs, use, cp);
6822           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6823           iv_ca_set_no_cp (data, ivs, use);
6824           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6825                                        cp, act_delta);
6826
6827           if (compare_costs (act_cost, best_cost) < 0)
6828             {
6829               best_cost = act_cost;
6830
6831               if (best_delta)
6832                 iv_ca_delta_free (&best_delta);
6833               best_delta = act_delta;
6834             }
6835           else
6836             iv_ca_delta_free (&act_delta);
6837         }
6838     }
6839
6840   iv_ca_delta_commit (data, ivs, best_delta, true);
6841   iv_ca_delta_free (&best_delta);
6842
6843   return !infinite_cost_p (best_cost);
6844 }
6845
6846 /* Finds an initial assignment of candidates to uses.  */
6847
6848 static struct iv_ca *
6849 get_initial_solution (struct ivopts_data *data, bool originalp)
6850 {
6851   struct iv_ca *ivs = iv_ca_new (data);
6852   unsigned i;
6853
6854   for (i = 0; i < n_iv_uses (data); i++)
6855     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6856       {
6857         iv_ca_free (&ivs);
6858         return NULL;
6859       }
6860
6861   return ivs;
6862 }
6863
6864 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6865    points to a bool variable, this function tries to break local
6866    optimal fixed-point by replacing candidates in IVS if it's true.  */
6867
6868 static bool
6869 try_improve_iv_set (struct ivopts_data *data,
6870                     struct iv_ca *ivs, bool *try_replace_p)
6871 {
6872   unsigned i, n_ivs;
6873   comp_cost acost, best_cost = iv_ca_cost (ivs);
6874   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6875   struct iv_cand *cand;
6876
6877   /* Try extending the set of induction variables by one.  */
6878   for (i = 0; i < n_iv_cands (data); i++)
6879     {
6880       cand = iv_cand (data, i);
6881
6882       if (iv_ca_cand_used_p (ivs, cand))
6883         continue;
6884
6885       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6886       if (!act_delta)
6887         continue;
6888
6889       /* If we successfully added the candidate and the set is small enough,
6890          try optimizing it by removing other candidates.  */
6891       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6892         {
6893           iv_ca_delta_commit (data, ivs, act_delta, true);
6894           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6895           iv_ca_delta_commit (data, ivs, act_delta, false);
6896           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6897         }
6898
6899       if (compare_costs (acost, best_cost) < 0)
6900         {
6901           best_cost = acost;
6902           iv_ca_delta_free (&best_delta);
6903           best_delta = act_delta;
6904         }
6905       else
6906         iv_ca_delta_free (&act_delta);
6907     }
6908
6909   if (!best_delta)
6910     {
6911       /* Try removing the candidates from the set instead.  */
6912       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6913
6914       if (!best_delta && *try_replace_p)
6915         {
6916           *try_replace_p = false;
6917           /* So far candidate selecting algorithm tends to choose fewer IVs
6918              so that it can handle cases in which loops have many variables
6919              but the best choice is often to use only one general biv.  One
6920              weakness is it can't handle opposite cases, in which different
6921              candidates should be chosen with respect to each use.  To solve
6922              the problem, we replace candidates in a manner described by the
6923              comments of iv_ca_replace, thus give general algorithm a chance
6924              to break local optimal fixed-point in these cases.  */
6925           best_cost = iv_ca_replace (data, ivs, &best_delta);
6926         }
6927
6928       if (!best_delta)
6929         return false;
6930     }
6931
6932   iv_ca_delta_commit (data, ivs, best_delta, true);
6933   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6934   iv_ca_delta_free (&best_delta);
6935   return true;
6936 }
6937
6938 /* Attempts to find the optimal set of induction variables.  We do simple
6939    greedy heuristic -- we try to replace at most one candidate in the selected
6940    solution and remove the unused ivs while this improves the cost.  */
6941
6942 static struct iv_ca *
6943 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6944 {
6945   struct iv_ca *set;
6946   bool try_replace_p = true;
6947
6948   /* Get the initial solution.  */
6949   set = get_initial_solution (data, originalp);
6950   if (!set)
6951     {
6952       if (dump_file && (dump_flags & TDF_DETAILS))
6953         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6954       return NULL;
6955     }
6956
6957   if (dump_file && (dump_flags & TDF_DETAILS))
6958     {
6959       fprintf (dump_file, "Initial set of candidates:\n");
6960       iv_ca_dump (data, dump_file, set);
6961     }
6962
6963   while (try_improve_iv_set (data, set, &try_replace_p))
6964     {
6965       if (dump_file && (dump_flags & TDF_DETAILS))
6966         {
6967           fprintf (dump_file, "Improved to:\n");
6968           iv_ca_dump (data, dump_file, set);
6969         }
6970     }
6971
6972   return set;
6973 }
6974
6975 static struct iv_ca *
6976 find_optimal_iv_set (struct ivopts_data *data)
6977 {
6978   unsigned i;
6979   struct iv_ca *set, *origset;
6980   struct iv_use *use;
6981   comp_cost cost, origcost;
6982
6983   /* Determine the cost based on a strategy that starts with original IVs,
6984      and try again using a strategy that prefers candidates not based
6985      on any IVs.  */
6986   origset = find_optimal_iv_set_1 (data, true);
6987   set = find_optimal_iv_set_1 (data, false);
6988
6989   if (!origset && !set)
6990     return NULL;
6991
6992   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6993   cost = set ? iv_ca_cost (set) : infinite_cost;
6994
6995   if (dump_file && (dump_flags & TDF_DETAILS))
6996     {
6997       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6998                origcost.cost, origcost.complexity);
6999       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
7000                cost.cost, cost.complexity);
7001     }
7002
7003   /* Choose the one with the best cost.  */
7004   if (compare_costs (origcost, cost) <= 0)
7005     {
7006       if (set)
7007         iv_ca_free (&set);
7008       set = origset;
7009     }
7010   else if (origset)
7011     iv_ca_free (&origset);
7012
7013   for (i = 0; i < n_iv_uses (data); i++)
7014     {
7015       use = iv_use (data, i);
7016       use->selected = iv_ca_cand_for_use (set, use)->cand;
7017     }
7018
7019   return set;
7020 }
7021
7022 /* Creates a new induction variable corresponding to CAND.  */
7023
7024 static void
7025 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7026 {
7027   gimple_stmt_iterator incr_pos;
7028   tree base;
7029   bool after = false;
7030
7031   if (!cand->iv)
7032     return;
7033
7034   switch (cand->pos)
7035     {
7036     case IP_NORMAL:
7037       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7038       break;
7039
7040     case IP_END:
7041       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7042       after = true;
7043       break;
7044
7045     case IP_AFTER_USE:
7046       after = true;
7047       /* fall through */
7048     case IP_BEFORE_USE:
7049       incr_pos = gsi_for_stmt (cand->incremented_at);
7050       break;
7051
7052     case IP_ORIGINAL:
7053       /* Mark that the iv is preserved.  */
7054       name_info (data, cand->var_before)->preserve_biv = true;
7055       name_info (data, cand->var_after)->preserve_biv = true;
7056
7057       /* Rewrite the increment so that it uses var_before directly.  */
7058       find_interesting_uses_op (data, cand->var_after)->selected = cand;
7059       return;
7060     }
7061
7062   gimple_add_tmp_var (cand->var_before);
7063
7064   base = unshare_expr (cand->iv->base);
7065
7066   create_iv (base, unshare_expr (cand->iv->step),
7067              cand->var_before, data->current_loop,
7068              &incr_pos, after, &cand->var_before, &cand->var_after);
7069 }
7070
7071 /* Creates new induction variables described in SET.  */
7072
7073 static void
7074 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7075 {
7076   unsigned i;
7077   struct iv_cand *cand;
7078   bitmap_iterator bi;
7079
7080   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7081     {
7082       cand = iv_cand (data, i);
7083       create_new_iv (data, cand);
7084     }
7085
7086   if (dump_file && (dump_flags & TDF_DETAILS))
7087     {
7088       fprintf (dump_file, "Selected IV set for loop %d",
7089                data->current_loop->num);
7090       if (data->loop_loc != UNKNOWN_LOCATION)
7091         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7092                  LOCATION_LINE (data->loop_loc));
7093       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7094       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7095         {
7096           cand = iv_cand (data, i);
7097           dump_cand (dump_file, cand);
7098         }
7099       fprintf (dump_file, "\n");
7100     }
7101 }
7102
7103 /* Rewrites USE (definition of iv used in a nonlinear expression)
7104    using candidate CAND.  */
7105
7106 static void
7107 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7108                             struct iv_use *use, struct iv_cand *cand)
7109 {
7110   tree comp;
7111   tree op, tgt;
7112   gassign *ass;
7113   gimple_stmt_iterator bsi;
7114
7115   /* An important special case -- if we are asked to express value of
7116      the original iv by itself, just exit; there is no need to
7117      introduce a new computation (that might also need casting the
7118      variable to unsigned and back).  */
7119   if (cand->pos == IP_ORIGINAL
7120       && cand->incremented_at == use->stmt)
7121     {
7122       enum tree_code stmt_code;
7123
7124       gcc_assert (is_gimple_assign (use->stmt));
7125       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7126
7127       /* Check whether we may leave the computation unchanged.
7128          This is the case only if it does not rely on other
7129          computations in the loop -- otherwise, the computation
7130          we rely upon may be removed in remove_unused_ivs,
7131          thus leading to ICE.  */
7132       stmt_code = gimple_assign_rhs_code (use->stmt);
7133       if (stmt_code == PLUS_EXPR
7134           || stmt_code == MINUS_EXPR
7135           || stmt_code == POINTER_PLUS_EXPR)
7136         {
7137           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7138             op = gimple_assign_rhs2 (use->stmt);
7139           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7140             op = gimple_assign_rhs1 (use->stmt);
7141           else
7142             op = NULL_TREE;
7143         }
7144       else
7145         op = NULL_TREE;
7146
7147       if (op && expr_invariant_in_loop_p (data->current_loop, op))
7148         return;
7149     }
7150
7151   comp = get_computation (data->current_loop, use, cand);
7152   gcc_assert (comp != NULL_TREE);
7153
7154   switch (gimple_code (use->stmt))
7155     {
7156     case GIMPLE_PHI:
7157       tgt = PHI_RESULT (use->stmt);
7158
7159       /* If we should keep the biv, do not replace it.  */
7160       if (name_info (data, tgt)->preserve_biv)
7161         return;
7162
7163       bsi = gsi_after_labels (gimple_bb (use->stmt));
7164       break;
7165
7166     case GIMPLE_ASSIGN:
7167       tgt = gimple_assign_lhs (use->stmt);
7168       bsi = gsi_for_stmt (use->stmt);
7169       break;
7170
7171     default:
7172       gcc_unreachable ();
7173     }
7174
7175   if (!valid_gimple_rhs_p (comp)
7176       || (gimple_code (use->stmt) != GIMPLE_PHI
7177           /* We can't allow re-allocating the stmt as it might be pointed
7178              to still.  */
7179           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7180               >= gimple_num_ops (gsi_stmt (bsi)))))
7181     {
7182       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7183                                        true, GSI_SAME_STMT);
7184       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7185         {
7186           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7187           /* As this isn't a plain copy we have to reset alignment
7188              information.  */
7189           if (SSA_NAME_PTR_INFO (comp))
7190             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7191         }
7192     }
7193
7194   if (gimple_code (use->stmt) == GIMPLE_PHI)
7195     {
7196       ass = gimple_build_assign (tgt, comp);
7197       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7198
7199       bsi = gsi_for_stmt (use->stmt);
7200       remove_phi_node (&bsi, false);
7201     }
7202   else
7203     {
7204       gimple_assign_set_rhs_from_tree (&bsi, comp);
7205       use->stmt = gsi_stmt (bsi);
7206     }
7207 }
7208
7209 /* Performs a peephole optimization to reorder the iv update statement with
7210    a mem ref to enable instruction combining in later phases. The mem ref uses
7211    the iv value before the update, so the reordering transformation requires
7212    adjustment of the offset. CAND is the selected IV_CAND.
7213
7214    Example:
7215
7216    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7217    iv2 = iv1 + 1;
7218
7219    if (t < val)      (1)
7220      goto L;
7221    goto Head;
7222
7223
7224    directly propagating t over to (1) will introduce overlapping live range
7225    thus increase register pressure. This peephole transform it into:
7226
7227
7228    iv2 = iv1 + 1;
7229    t = MEM_REF (base, iv2, 8, 8);
7230    if (t < val)
7231      goto L;
7232    goto Head;
7233 */
7234
7235 static void
7236 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7237 {
7238   tree var_after;
7239   gimple *iv_update, *stmt;
7240   basic_block bb;
7241   gimple_stmt_iterator gsi, gsi_iv;
7242
7243   if (cand->pos != IP_NORMAL)
7244     return;
7245
7246   var_after = cand->var_after;
7247   iv_update = SSA_NAME_DEF_STMT (var_after);
7248
7249   bb = gimple_bb (iv_update);
7250   gsi = gsi_last_nondebug_bb (bb);
7251   stmt = gsi_stmt (gsi);
7252
7253   /* Only handle conditional statement for now.  */
7254   if (gimple_code (stmt) != GIMPLE_COND)
7255     return;
7256
7257   gsi_prev_nondebug (&gsi);
7258   stmt = gsi_stmt (gsi);
7259   if (stmt != iv_update)
7260     return;
7261
7262   gsi_prev_nondebug (&gsi);
7263   if (gsi_end_p (gsi))
7264     return;
7265
7266   stmt = gsi_stmt (gsi);
7267   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7268     return;
7269
7270   if (stmt != use->stmt)
7271     return;
7272
7273   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7274     return;
7275
7276   if (dump_file && (dump_flags & TDF_DETAILS))
7277     {
7278       fprintf (dump_file, "Reordering \n");
7279       print_gimple_stmt (dump_file, iv_update, 0, 0);
7280       print_gimple_stmt (dump_file, use->stmt, 0, 0);
7281       fprintf (dump_file, "\n");
7282     }
7283
7284   gsi = gsi_for_stmt (use->stmt);
7285   gsi_iv = gsi_for_stmt (iv_update);
7286   gsi_move_before (&gsi_iv, &gsi);
7287
7288   cand->pos = IP_BEFORE_USE;
7289   cand->incremented_at = use->stmt;
7290 }
7291
7292 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7293
7294 static void
7295 rewrite_use_address_1 (struct ivopts_data *data,
7296                        struct iv_use *use, struct iv_cand *cand)
7297 {
7298   aff_tree aff;
7299   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7300   tree base_hint = NULL_TREE;
7301   tree ref, iv;
7302   bool ok;
7303
7304   adjust_iv_update_pos (cand, use);
7305   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7306   gcc_assert (ok);
7307   unshare_aff_combination (&aff);
7308
7309   /* To avoid undefined overflow problems, all IV candidates use unsigned
7310      integer types.  The drawback is that this makes it impossible for
7311      create_mem_ref to distinguish an IV that is based on a memory object
7312      from one that represents simply an offset.
7313
7314      To work around this problem, we pass a hint to create_mem_ref that
7315      indicates which variable (if any) in aff is an IV based on a memory
7316      object.  Note that we only consider the candidate.  If this is not
7317      based on an object, the base of the reference is in some subexpression
7318      of the use -- but these will use pointer types, so they are recognized
7319      by the create_mem_ref heuristics anyway.  */
7320   if (cand->iv->base_object)
7321     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7322
7323   iv = var_at_stmt (data->current_loop, cand, use->stmt);
7324   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7325                         reference_alias_ptr_type (*use->op_p),
7326                         iv, base_hint, data->speed);
7327   copy_ref_info (ref, *use->op_p);
7328   *use->op_p = ref;
7329 }
7330
7331 /* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
7332    first use of a group, rewrites sub uses in the group too.  */
7333
7334 static void
7335 rewrite_use_address (struct ivopts_data *data,
7336                       struct iv_use *use, struct iv_cand *cand)
7337 {
7338   struct iv_use *next;
7339
7340   gcc_assert (use->sub_id == 0);
7341   rewrite_use_address_1 (data, use, cand);
7342   update_stmt (use->stmt);
7343
7344   for (next = use->next; next != NULL; next = next->next)
7345     {
7346       rewrite_use_address_1 (data, next, cand);
7347       update_stmt (next->stmt);
7348     }
7349
7350   return;
7351 }
7352
7353 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7354    candidate CAND.  */
7355
7356 static void
7357 rewrite_use_compare (struct ivopts_data *data,
7358                      struct iv_use *use, struct iv_cand *cand)
7359 {
7360   tree comp, *var_p, op, bound;
7361   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7362   enum tree_code compare;
7363   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
7364   bool ok;
7365
7366   bound = cp->value;
7367   if (bound)
7368     {
7369       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7370       tree var_type = TREE_TYPE (var);
7371       gimple_seq stmts;
7372
7373       if (dump_file && (dump_flags & TDF_DETAILS))
7374         {
7375           fprintf (dump_file, "Replacing exit test: ");
7376           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7377         }
7378       compare = cp->comp;
7379       bound = unshare_expr (fold_convert (var_type, bound));
7380       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7381       if (stmts)
7382         gsi_insert_seq_on_edge_immediate (
7383                 loop_preheader_edge (data->current_loop),
7384                 stmts);
7385
7386       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7387       gimple_cond_set_lhs (cond_stmt, var);
7388       gimple_cond_set_code (cond_stmt, compare);
7389       gimple_cond_set_rhs (cond_stmt, op);
7390       return;
7391     }
7392
7393   /* The induction variable elimination failed; just express the original
7394      giv.  */
7395   comp = get_computation (data->current_loop, use, cand);
7396   gcc_assert (comp != NULL_TREE);
7397
7398   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7399   gcc_assert (ok);
7400
7401   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7402                                      true, GSI_SAME_STMT);
7403 }
7404
7405 /* Rewrites USE using candidate CAND.  */
7406
7407 static void
7408 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7409 {
7410   switch (use->type)
7411     {
7412       case USE_NONLINEAR_EXPR:
7413         rewrite_use_nonlinear_expr (data, use, cand);
7414         break;
7415
7416       case USE_ADDRESS:
7417         rewrite_use_address (data, use, cand);
7418         break;
7419
7420       case USE_COMPARE:
7421         rewrite_use_compare (data, use, cand);
7422         break;
7423
7424       default:
7425         gcc_unreachable ();
7426     }
7427
7428   update_stmt (use->stmt);
7429 }
7430
7431 /* Rewrite the uses using the selected induction variables.  */
7432
7433 static void
7434 rewrite_uses (struct ivopts_data *data)
7435 {
7436   unsigned i;
7437   struct iv_cand *cand;
7438   struct iv_use *use;
7439
7440   for (i = 0; i < n_iv_uses (data); i++)
7441     {
7442       use = iv_use (data, i);
7443       cand = use->selected;
7444       gcc_assert (cand);
7445
7446       rewrite_use (data, use, cand);
7447     }
7448 }
7449
7450 /* Removes the ivs that are not used after rewriting.  */
7451
7452 static void
7453 remove_unused_ivs (struct ivopts_data *data)
7454 {
7455   unsigned j;
7456   bitmap_iterator bi;
7457   bitmap toremove = BITMAP_ALLOC (NULL);
7458
7459   /* Figure out an order in which to release SSA DEFs so that we don't
7460      release something that we'd have to propagate into a debug stmt
7461      afterwards.  */
7462   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7463     {
7464       struct version_info *info;
7465
7466       info = ver_info (data, j);
7467       if (info->iv
7468           && !integer_zerop (info->iv->step)
7469           && !info->inv_id
7470           && !info->iv->have_use_for
7471           && !info->preserve_biv)
7472         {
7473           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7474
7475           tree def = info->iv->ssa_name;
7476
7477           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7478             {
7479               imm_use_iterator imm_iter;
7480               use_operand_p use_p;
7481               gimple *stmt;
7482               int count = 0;
7483
7484               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7485                 {
7486                   if (!gimple_debug_bind_p (stmt))
7487                     continue;
7488
7489                   /* We just want to determine whether to do nothing
7490                      (count == 0), to substitute the computed
7491                      expression into a single use of the SSA DEF by
7492                      itself (count == 1), or to use a debug temp
7493                      because the SSA DEF is used multiple times or as
7494                      part of a larger expression (count > 1). */
7495                   count++;
7496                   if (gimple_debug_bind_get_value (stmt) != def)
7497                     count++;
7498
7499                   if (count > 1)
7500                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7501                 }
7502
7503               if (!count)
7504                 continue;
7505
7506               struct iv_use dummy_use;
7507               struct iv_cand *best_cand = NULL, *cand;
7508               unsigned i, best_pref = 0, cand_pref;
7509
7510               memset (&dummy_use, 0, sizeof (dummy_use));
7511               dummy_use.iv = info->iv;
7512               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7513                 {
7514                   cand = iv_use (data, i)->selected;
7515                   if (cand == best_cand)
7516                     continue;
7517                   cand_pref = operand_equal_p (cand->iv->step,
7518                                                info->iv->step, 0)
7519                     ? 4 : 0;
7520                   cand_pref
7521                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7522                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7523                     ? 2 : 0;
7524                   cand_pref
7525                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7526                     ? 1 : 0;
7527                   if (best_cand == NULL || best_pref < cand_pref)
7528                     {
7529                       best_cand = cand;
7530                       best_pref = cand_pref;
7531                     }
7532                 }
7533
7534               if (!best_cand)
7535                 continue;
7536
7537               tree comp = get_computation_at (data->current_loop,
7538                                               &dummy_use, best_cand,
7539                                               SSA_NAME_DEF_STMT (def));
7540               if (!comp)
7541                 continue;
7542
7543               if (count > 1)
7544                 {
7545                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7546                   DECL_ARTIFICIAL (vexpr) = 1;
7547                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7548                   if (SSA_NAME_VAR (def))
7549                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7550                   else
7551                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7552                   gdebug *def_temp
7553                     = gimple_build_debug_bind (vexpr, comp, NULL);
7554                   gimple_stmt_iterator gsi;
7555
7556                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7557                     gsi = gsi_after_labels (gimple_bb
7558                                             (SSA_NAME_DEF_STMT (def)));
7559                   else
7560                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7561
7562                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7563                   comp = vexpr;
7564                 }
7565
7566               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7567                 {
7568                   if (!gimple_debug_bind_p (stmt))
7569                     continue;
7570
7571                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7572                     SET_USE (use_p, comp);
7573
7574                   update_stmt (stmt);
7575                 }
7576             }
7577         }
7578     }
7579
7580   release_defs_bitset (toremove);
7581
7582   BITMAP_FREE (toremove);
7583 }
7584
7585 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7586    for hash_map::traverse.  */
7587
7588 bool
7589 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7590 {
7591   free (value);
7592   return true;
7593 }
7594
7595 /* Frees data allocated by the optimization of a single loop.  */
7596
7597 static void
7598 free_loop_data (struct ivopts_data *data)
7599 {
7600   unsigned i, j;
7601   bitmap_iterator bi;
7602   tree obj;
7603
7604   if (data->niters)
7605     {
7606       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7607       delete data->niters;
7608       data->niters = NULL;
7609     }
7610
7611   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7612     {
7613       struct version_info *info;
7614
7615       info = ver_info (data, i);
7616       info->iv = NULL;
7617       info->has_nonlin_use = false;
7618       info->preserve_biv = false;
7619       info->inv_id = 0;
7620     }
7621   bitmap_clear (data->relevant);
7622   bitmap_clear (data->important_candidates);
7623
7624   for (i = 0; i < n_iv_uses (data); i++)
7625     {
7626       struct iv_use *use = iv_use (data, i);
7627       struct iv_use *pre = use, *sub = use->next;
7628
7629       while (sub)
7630         {
7631           gcc_assert (sub->related_cands == NULL);
7632           gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7633
7634           pre = sub;
7635           sub = sub->next;
7636           free (pre);
7637         }
7638
7639       BITMAP_FREE (use->related_cands);
7640       for (j = 0; j < use->n_map_members; j++)
7641         if (use->cost_map[j].depends_on)
7642           BITMAP_FREE (use->cost_map[j].depends_on);
7643       free (use->cost_map);
7644       free (use);
7645     }
7646   data->iv_uses.truncate (0);
7647
7648   for (i = 0; i < n_iv_cands (data); i++)
7649     {
7650       struct iv_cand *cand = iv_cand (data, i);
7651
7652       if (cand->depends_on)
7653         BITMAP_FREE (cand->depends_on);
7654       free (cand);
7655     }
7656   data->iv_candidates.truncate (0);
7657
7658   if (data->version_info_size < num_ssa_names)
7659     {
7660       data->version_info_size = 2 * num_ssa_names;
7661       free (data->version_info);
7662       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7663     }
7664
7665   data->max_inv_id = 0;
7666
7667   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7668     SET_DECL_RTL (obj, NULL_RTX);
7669
7670   decl_rtl_to_reset.truncate (0);
7671
7672   data->inv_expr_tab->empty ();
7673   data->inv_expr_id = 0;
7674
7675   data->iv_common_cand_tab->empty ();
7676   data->iv_common_cands.truncate (0);
7677 }
7678
7679 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7680    loop tree.  */
7681
7682 static void
7683 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7684 {
7685   free_loop_data (data);
7686   free (data->version_info);
7687   BITMAP_FREE (data->relevant);
7688   BITMAP_FREE (data->important_candidates);
7689
7690   decl_rtl_to_reset.release ();
7691   data->iv_uses.release ();
7692   data->iv_candidates.release ();
7693   delete data->inv_expr_tab;
7694   data->inv_expr_tab = NULL;
7695   free_affine_expand_cache (&data->name_expansion_cache);
7696   delete data->iv_common_cand_tab;
7697   data->iv_common_cand_tab = NULL;
7698   data->iv_common_cands.release ();
7699   obstack_free (&data->iv_obstack, NULL);
7700 }
7701
7702 /* Returns true if the loop body BODY includes any function calls.  */
7703
7704 static bool
7705 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7706 {
7707   gimple_stmt_iterator gsi;
7708   unsigned i;
7709
7710   for (i = 0; i < num_nodes; i++)
7711     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7712       {
7713         gimple *stmt = gsi_stmt (gsi);
7714         if (is_gimple_call (stmt)
7715             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7716           return true;
7717       }
7718   return false;
7719 }
7720
7721 /* Optimizes the LOOP.  Returns true if anything changed.  */
7722
7723 static bool
7724 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7725 {
7726   bool changed = false;
7727   struct iv_ca *iv_ca;
7728   edge exit = single_dom_exit (loop);
7729   basic_block *body;
7730
7731   gcc_assert (!data->niters);
7732   data->current_loop = loop;
7733   data->loop_loc = find_loop_location (loop);
7734   data->speed = optimize_loop_for_speed_p (loop);
7735
7736   if (dump_file && (dump_flags & TDF_DETAILS))
7737     {
7738       fprintf (dump_file, "Processing loop %d", loop->num);
7739       if (data->loop_loc != UNKNOWN_LOCATION)
7740         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7741                  LOCATION_LINE (data->loop_loc));
7742       fprintf (dump_file, "\n");
7743
7744       if (exit)
7745         {
7746           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7747                    exit->src->index, exit->dest->index);
7748           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7749           fprintf (dump_file, "\n");
7750         }
7751
7752       fprintf (dump_file, "\n");
7753     }
7754
7755   body = get_loop_body (loop);
7756   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7757   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7758   free (body);
7759
7760   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7761
7762   /* For each ssa name determines whether it behaves as an induction variable
7763      in some loop.  */
7764   if (!find_induction_variables (data))
7765     goto finish;
7766
7767   /* Finds interesting uses (item 1).  */
7768   find_interesting_uses (data);
7769   group_address_uses (data);
7770   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7771     goto finish;
7772
7773   /* Finds candidates for the induction variables (item 2).  */
7774   find_iv_candidates (data);
7775
7776   /* Calculates the costs (item 3, part 1).  */
7777   determine_iv_costs (data);
7778   determine_use_iv_costs (data);
7779   determine_set_costs (data);
7780
7781   /* Find the optimal set of induction variables (item 3, part 2).  */
7782   iv_ca = find_optimal_iv_set (data);
7783   if (!iv_ca)
7784     goto finish;
7785   changed = true;
7786
7787   /* Create the new induction variables (item 4, part 1).  */
7788   create_new_ivs (data, iv_ca);
7789   iv_ca_free (&iv_ca);
7790
7791   /* Rewrite the uses (item 4, part 2).  */
7792   rewrite_uses (data);
7793
7794   /* Remove the ivs that are unused after rewriting.  */
7795   remove_unused_ivs (data);
7796
7797   /* We have changed the structure of induction variables; it might happen
7798      that definitions in the scev database refer to some of them that were
7799      eliminated.  */
7800   scev_reset ();
7801
7802 finish:
7803   free_loop_data (data);
7804
7805   return changed;
7806 }
7807
7808 /* Main entry point.  Optimizes induction variables in loops.  */
7809
7810 void
7811 tree_ssa_iv_optimize (void)
7812 {
7813   struct loop *loop;
7814   struct ivopts_data data;
7815
7816   tree_ssa_iv_optimize_init (&data);
7817
7818   /* Optimize the loops starting with the innermost ones.  */
7819   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7820     {
7821       if (dump_file && (dump_flags & TDF_DETAILS))
7822         flow_loop_dump (loop, dump_file, NULL, 1);
7823
7824       tree_ssa_iv_optimize_loop (&data, loop);
7825     }
7826
7827   tree_ssa_iv_optimize_finalize (&data);
7828 }