gcc/loop-unroll.c

   1 /* Loop unrolling.
   2    Copyright (C) 2002-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "cfghooks.h"
  25 #include "rtl.h"
  26 #include "alias.h"
  27 #include "tree.h"
  28 #include "profile.h"
  29 #include "cfgrtl.h"
  30 #include "cfgloop.h"
  31 #include "params.h"
  32 #include "insn-codes.h"
  33 #include "optabs.h"
  34 #include "flags.h"
  35 #include "insn-config.h"
  36 #include "expmed.h"
  37 #include "dojump.h"
  38 #include "explow.h"
  39 #include "calls.h"
  40 #include "emit-rtl.h"
  41 #include "varasm.h"
  42 #include "stmt.h"
  43 #include "expr.h"
  44 #include "recog.h"
  45 #include "target.h"
  46 #include "dumpfile.h"
  47
  48 /* This pass performs loop unrolling.  We only perform this
  49    optimization on innermost loops (with single exception) because
  50    the impact on performance is greatest here, and we want to avoid
  51    unnecessary code size growth.  The gain is caused by greater sequentiality
  52    of code, better code to optimize for further passes and in some cases
  53    by fewer testings of exit conditions.  The main problem is code growth,
  54    that impacts performance negatively due to effect of caches.
  55
  56    What we do:
  57
  58    -- unrolling of loops that roll constant times; this is almost always
  59       win, as we get rid of exit condition tests.
  60    -- unrolling of loops that roll number of times that we can compute
  61       in runtime; we also get rid of exit condition tests here, but there
  62       is the extra expense for calculating the number of iterations
  63    -- simple unrolling of remaining loops; this is performed only if we
  64       are asked to, as the gain is questionable in this case and often
  65       it may even slow down the code
  66    For more detailed descriptions of each of those, see comments at
  67    appropriate function below.
  68
  69    There is a lot of parameters (defined and described in params.def) that
  70    control how much we unroll.
  71
  72    ??? A great problem is that we don't have a good way how to determine
  73    how many times we should unroll the loop; the experiments I have made
  74    showed that this choice may affect performance in order of several %.
  75    */
  76
  77 /* Information about induction variables to split.  */
  78
  79 struct iv_to_split
  80 {
  81   rtx_insn *insn;       /* The insn in that the induction variable occurs.  */
  82   rtx orig_var;         /* The variable (register) for the IV before split.  */
  83   rtx base_var;         /* The variable on that the values in the further
  84                            iterations are based.  */
  85   rtx step;             /* Step of the induction variable.  */
  86   struct iv_to_split *next; /* Next entry in walking order.  */
  87 };
  88
  89 /* Information about accumulators to expand.  */
  90
  91 struct var_to_expand
  92 {
  93   rtx_insn *insn;                  /* The insn in that the variable expansion occurs.  */
  94   rtx reg;                         /* The accumulator which is expanded.  */
  95   vec<rtx> var_expansions;   /* The copies of the accumulator which is expanded.  */
  96   struct var_to_expand *next;      /* Next entry in walking order.  */
  97   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  98                                       or multiplication.  */
  99   int expansion_count;             /* Count the number of expansions generated so far.  */
 100   int reuse_expansion;             /* The expansion we intend to reuse to expand
 101                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
 102                                       the original accumulator.  Else use
 103                                       var_expansions[REUSE_EXPANSION - 1].  */
 104 };
 105
 106 /* Hashtable helper for iv_to_split.  */
 107
 108 struct iv_split_hasher : free_ptr_hash <iv_to_split>
 109 {
 110   static inline hashval_t hash (const iv_to_split *);
 111   static inline bool equal (const iv_to_split *, const iv_to_split *);
 112 };
 113
 114
 115 /* A hash function for information about insns to split.  */
 116
 117 inline hashval_t
 118 iv_split_hasher::hash (const iv_to_split *ivts)
 119 {
 120   return (hashval_t) INSN_UID (ivts->insn);
 121 }
 122
 123 /* An equality functions for information about insns to split.  */
 124
 125 inline bool
 126 iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
 127 {
 128   return i1->insn == i2->insn;
 129 }
 130
 131 /* Hashtable helper for iv_to_split.  */
 132
 133 struct var_expand_hasher : free_ptr_hash <var_to_expand>
 134 {
 135   static inline hashval_t hash (const var_to_expand *);
 136   static inline bool equal (const var_to_expand *, const var_to_expand *);
 137 };
 138
 139 /* Return a hash for VES.  */
 140
 141 inline hashval_t
 142 var_expand_hasher::hash (const var_to_expand *ves)
 143 {
 144   return (hashval_t) INSN_UID (ves->insn);
 145 }
 146
 147 /* Return true if I1 and I2 refer to the same instruction.  */
 148
 149 inline bool
 150 var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
 151 {
 152   return i1->insn == i2->insn;
 153 }
 154
 155 /* Information about optimization applied in
 156    the unrolled loop.  */
 157
 158 struct opt_info
 159 {
 160   hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
 161                                                   split.  */
 162   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
 163   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
 164   hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
 165                                         insns with accumulators to expand.  */
 166   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
 167   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
 168   unsigned first_new_block;        /* The first basic block that was
 169                                       duplicated.  */
 170   basic_block loop_exit;           /* The loop exit basic block.  */
 171   basic_block loop_preheader;      /* The loop preheader basic block.  */
 172 };
 173
 174 static void decide_unroll_stupid (struct loop *, int);
 175 static void decide_unroll_constant_iterations (struct loop *, int);
 176 static void decide_unroll_runtime_iterations (struct loop *, int);
 177 static void unroll_loop_stupid (struct loop *);
 178 static void decide_unrolling (int);
 179 static void unroll_loop_constant_iterations (struct loop *);
 180 static void unroll_loop_runtime_iterations (struct loop *);
 181 static struct opt_info *analyze_insns_in_loop (struct loop *);
 182 static void opt_info_start_duplication (struct opt_info *);
 183 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 184 static void free_opt_info (struct opt_info *);
 185 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
 186 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
 187 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
 188 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
 189 static void insert_var_expansion_initialization (struct var_to_expand *,
 190                                                  basic_block);
 191 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
 192                                              basic_block);
 193 static rtx get_expansion (struct var_to_expand *);
 194
 195 /* Emit a message summarizing the unroll that will be
 196    performed for LOOP, along with the loop's location LOCUS, if
 197    appropriate given the dump or -fopt-info settings.  */
 198
 199 static void
 200 report_unroll (struct loop *loop, location_t locus)
 201 {
 202   int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
 203
 204   if (loop->lpt_decision.decision == LPT_NONE)
 205     return;
 206
 207   if (!dump_enabled_p ())
 208     return;
 209
 210   dump_printf_loc (report_flags, locus,
 211                    "loop unrolled %d times",
 212                    loop->lpt_decision.times);
 213   if (profile_info)
 214     dump_printf (report_flags,
 215                  " (header execution count %d)",
 216                  (int)loop->header->count);
 217
 218   dump_printf (report_flags, "\n");
 219 }
 220
 221 /* Decide whether unroll loops and how much.  */
 222 static void
 223 decide_unrolling (int flags)
 224 {
 225   struct loop *loop;
 226
 227   /* Scan the loops, inner ones first.  */
 228   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 229     {
 230       loop->lpt_decision.decision = LPT_NONE;
 231       location_t locus = get_loop_location (loop);
 232
 233       if (dump_enabled_p ())
 234         dump_printf_loc (TDF_RTL, locus,
 235                          ";; *** Considering loop %d at BB %d for "
 236                          "unrolling ***\n",
 237                          loop->num, loop->header->index);
 238
 239       /* Do not peel cold areas.  */
 240       if (optimize_loop_for_size_p (loop))
 241         {
 242           if (dump_file)
 243             fprintf (dump_file, ";; Not considering loop, cold area\n");
 244           continue;
 245         }
 246
 247       /* Can the loop be manipulated?  */
 248       if (!can_duplicate_loop_p (loop))
 249         {
 250           if (dump_file)
 251             fprintf (dump_file,
 252                      ";; Not considering loop, cannot duplicate\n");
 253           continue;
 254         }
 255
 256       /* Skip non-innermost loops.  */
 257       if (loop->inner)
 258         {
 259           if (dump_file)
 260             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 261           continue;
 262         }
 263
 264       loop->ninsns = num_loop_insns (loop);
 265       loop->av_ninsns = average_num_loop_insns (loop);
 266
 267       /* Try transformations one by one in decreasing order of
 268          priority.  */
 269
 270       decide_unroll_constant_iterations (loop, flags);
 271       if (loop->lpt_decision.decision == LPT_NONE)
 272         decide_unroll_runtime_iterations (loop, flags);
 273       if (loop->lpt_decision.decision == LPT_NONE)
 274         decide_unroll_stupid (loop, flags);
 275
 276       report_unroll (loop, locus);
 277     }
 278 }
 279
 280 /* Unroll LOOPS.  */
 281 void
 282 unroll_loops (int flags)
 283 {
 284   struct loop *loop;
 285   bool changed = false;
 286
 287   /* Now decide rest of unrolling.  */
 288   decide_unrolling (flags);
 289
 290   /* Scan the loops, inner ones first.  */
 291   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 292     {
 293       /* And perform the appropriate transformations.  */
 294       switch (loop->lpt_decision.decision)
 295         {
 296         case LPT_UNROLL_CONSTANT:
 297           unroll_loop_constant_iterations (loop);
 298           changed = true;
 299           break;
 300         case LPT_UNROLL_RUNTIME:
 301           unroll_loop_runtime_iterations (loop);
 302           changed = true;
 303           break;
 304         case LPT_UNROLL_STUPID:
 305           unroll_loop_stupid (loop);
 306           changed = true;
 307           break;
 308         case LPT_NONE:
 309           break;
 310         default:
 311           gcc_unreachable ();
 312         }
 313     }
 314
 315     if (changed)
 316       {
 317         calculate_dominance_info (CDI_DOMINATORS);
 318         fix_loop_structure (NULL);
 319       }
 320
 321   iv_analysis_done ();
 322 }
 323
 324 /* Check whether exit of the LOOP is at the end of loop body.  */
 325
 326 static bool
 327 loop_exit_at_end_p (struct loop *loop)
 328 {
 329   struct niter_desc *desc = get_simple_loop_desc (loop);
 330   rtx_insn *insn;
 331
 332   /* We should never have conditional in latch block.  */
 333   gcc_assert (desc->in_edge->dest != loop->header);
 334
 335   if (desc->in_edge->dest != loop->latch)
 336     return false;
 337
 338   /* Check that the latch is empty.  */
 339   FOR_BB_INSNS (loop->latch, insn)
 340     {
 341       if (INSN_P (insn) && active_insn_p (insn))
 342         return false;
 343     }
 344
 345   return true;
 346 }
 347
 348 /* Decide whether to unroll LOOP iterating constant number of times
 349    and how much.  */
 350
 351 static void
 352 decide_unroll_constant_iterations (struct loop *loop, int flags)
 353 {
 354   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 355   struct niter_desc *desc;
 356   widest_int iterations;
 357
 358   if (!(flags & UAP_UNROLL))
 359     {
 360       /* We were not asked to, just return back silently.  */
 361       return;
 362     }
 363
 364   if (dump_file)
 365     fprintf (dump_file,
 366              "\n;; Considering unrolling loop with constant "
 367              "number of iterations\n");
 368
 369   /* nunroll = total number of copies of the original loop body in
 370      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 371   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 372   nunroll_by_av
 373     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 374   if (nunroll > nunroll_by_av)
 375     nunroll = nunroll_by_av;
 376   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 377     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 378
 379   if (targetm.loop_unroll_adjust)
 380     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 381
 382   /* Skip big loops.  */
 383   if (nunroll <= 1)
 384     {
 385       if (dump_file)
 386         fprintf (dump_file, ";; Not considering loop, is too big\n");
 387       return;
 388     }
 389
 390   /* Check for simple loops.  */
 391   desc = get_simple_loop_desc (loop);
 392
 393   /* Check number of iterations.  */
 394   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 395     {
 396       if (dump_file)
 397         fprintf (dump_file,
 398                  ";; Unable to prove that the loop iterates constant times\n");
 399       return;
 400     }
 401
 402   /* Check whether the loop rolls enough to consider.
 403      Consult also loop bounds and profile; in the case the loop has more
 404      than one exit it may well loop less than determined maximal number
 405      of iterations.  */
 406   if (desc->niter < 2 * nunroll
 407       || ((get_estimated_loop_iterations (loop, &iterations)
 408            || get_max_loop_iterations (loop, &iterations))
 409           && wi::ltu_p (iterations, 2 * nunroll)))
 410     {
 411       if (dump_file)
 412         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 413       return;
 414     }
 415
 416   /* Success; now compute number of iterations to unroll.  We alter
 417      nunroll so that as few as possible copies of loop body are
 418      necessary, while still not decreasing the number of unrollings
 419      too much (at most by 1).  */
 420   best_copies = 2 * nunroll + 10;
 421
 422   i = 2 * nunroll + 2;
 423   if (i - 1 >= desc->niter)
 424     i = desc->niter - 2;
 425
 426   for (; i >= nunroll - 1; i--)
 427     {
 428       unsigned exit_mod = desc->niter % (i + 1);
 429
 430       if (!loop_exit_at_end_p (loop))
 431         n_copies = exit_mod + i + 1;
 432       else if (exit_mod != (unsigned) i
 433                || desc->noloop_assumptions != NULL_RTX)
 434         n_copies = exit_mod + i + 2;
 435       else
 436         n_copies = i + 1;
 437
 438       if (n_copies < best_copies)
 439         {
 440           best_copies = n_copies;
 441           best_unroll = i;
 442         }
 443     }
 444
 445   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 446   loop->lpt_decision.times = best_unroll;
 447 }
 448
 449 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
 450    The transformation does this:
 451
 452    for (i = 0; i < 102; i++)
 453      body;
 454
 455    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 456
 457    i = 0;
 458    body; i++;
 459    body; i++;
 460    while (i < 102)
 461      {
 462        body; i++;
 463        body; i++;
 464        body; i++;
 465        body; i++;
 466      }
 467   */
 468 static void
 469 unroll_loop_constant_iterations (struct loop *loop)
 470 {
 471   unsigned HOST_WIDE_INT niter;
 472   unsigned exit_mod;
 473   sbitmap wont_exit;
 474   unsigned i;
 475   edge e;
 476   unsigned max_unroll = loop->lpt_decision.times;
 477   struct niter_desc *desc = get_simple_loop_desc (loop);
 478   bool exit_at_end = loop_exit_at_end_p (loop);
 479   struct opt_info *opt_info = NULL;
 480   bool ok;
 481
 482   niter = desc->niter;
 483
 484   /* Should not get here (such loop should be peeled instead).  */
 485   gcc_assert (niter > max_unroll + 1);
 486
 487   exit_mod = niter % (max_unroll + 1);
 488
 489   wont_exit = sbitmap_alloc (max_unroll + 1);
 490   bitmap_ones (wont_exit);
 491
 492   auto_vec<edge> remove_edges;
 493   if (flag_split_ivs_in_unroller
 494       || flag_variable_expansion_in_unroller)
 495     opt_info = analyze_insns_in_loop (loop);
 496
 497   if (!exit_at_end)
 498     {
 499       /* The exit is not at the end of the loop; leave exit test
 500          in the first copy, so that the loops that start with test
 501          of exit condition have continuous body after unrolling.  */
 502
 503       if (dump_file)
 504         fprintf (dump_file, ";; Condition at beginning of loop.\n");
 505
 506       /* Peel exit_mod iterations.  */
 507       bitmap_clear_bit (wont_exit, 0);
 508       if (desc->noloop_assumptions)
 509         bitmap_clear_bit (wont_exit, 1);
 510
 511       if (exit_mod)
 512         {
 513           opt_info_start_duplication (opt_info);
 514           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 515                                               exit_mod,
 516                                               wont_exit, desc->out_edge,
 517                                               &remove_edges,
 518                                               DLTHE_FLAG_UPDATE_FREQ
 519                                               | (opt_info && exit_mod > 1
 520                                                  ? DLTHE_RECORD_COPY_NUMBER
 521                                                    : 0));
 522           gcc_assert (ok);
 523
 524           if (opt_info && exit_mod > 1)
 525             apply_opt_in_copies (opt_info, exit_mod, false, false);
 526
 527           desc->noloop_assumptions = NULL_RTX;
 528           desc->niter -= exit_mod;
 529           loop->nb_iterations_upper_bound -= exit_mod;
 530           if (loop->any_estimate
 531               && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
 532             loop->nb_iterations_estimate -= exit_mod;
 533           else
 534             loop->any_estimate = false;
 535         }
 536
 537       bitmap_set_bit (wont_exit, 1);
 538     }
 539   else
 540     {
 541       /* Leave exit test in last copy, for the same reason as above if
 542          the loop tests the condition at the end of loop body.  */
 543
 544       if (dump_file)
 545         fprintf (dump_file, ";; Condition at end of loop.\n");
 546
 547       /* We know that niter >= max_unroll + 2; so we do not need to care of
 548          case when we would exit before reaching the loop.  So just peel
 549          exit_mod + 1 iterations.  */
 550       if (exit_mod != max_unroll
 551           || desc->noloop_assumptions)
 552         {
 553           bitmap_clear_bit (wont_exit, 0);
 554           if (desc->noloop_assumptions)
 555             bitmap_clear_bit (wont_exit, 1);
 556
 557           opt_info_start_duplication (opt_info);
 558           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 559                                               exit_mod + 1,
 560                                               wont_exit, desc->out_edge,
 561                                               &remove_edges,
 562                                               DLTHE_FLAG_UPDATE_FREQ
 563                                               | (opt_info && exit_mod > 0
 564                                                  ? DLTHE_RECORD_COPY_NUMBER
 565                                                    : 0));
 566           gcc_assert (ok);
 567
 568           if (opt_info && exit_mod > 0)
 569             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 570
 571           desc->niter -= exit_mod + 1;
 572           loop->nb_iterations_upper_bound -= exit_mod + 1;
 573           if (loop->any_estimate
 574               && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
 575             loop->nb_iterations_estimate -= exit_mod + 1;
 576           else
 577             loop->any_estimate = false;
 578           desc->noloop_assumptions = NULL_RTX;
 579
 580           bitmap_set_bit (wont_exit, 0);
 581           bitmap_set_bit (wont_exit, 1);
 582         }
 583
 584       bitmap_clear_bit (wont_exit, max_unroll);
 585     }
 586
 587   /* Now unroll the loop.  */
 588
 589   opt_info_start_duplication (opt_info);
 590   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 591                                       max_unroll,
 592                                       wont_exit, desc->out_edge,
 593                                       &remove_edges,
 594                                       DLTHE_FLAG_UPDATE_FREQ
 595                                       | (opt_info
 596                                          ? DLTHE_RECORD_COPY_NUMBER
 597                                            : 0));
 598   gcc_assert (ok);
 599
 600   if (opt_info)
 601     {
 602       apply_opt_in_copies (opt_info, max_unroll, true, true);
 603       free_opt_info (opt_info);
 604     }
 605
 606   free (wont_exit);
 607
 608   if (exit_at_end)
 609     {
 610       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 611       /* Find a new in and out edge; they are in the last copy we have made.  */
 612
 613       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 614         {
 615           desc->out_edge = EDGE_SUCC (exit_block, 0);
 616           desc->in_edge = EDGE_SUCC (exit_block, 1);
 617         }
 618       else
 619         {
 620           desc->out_edge = EDGE_SUCC (exit_block, 1);
 621           desc->in_edge = EDGE_SUCC (exit_block, 0);
 622         }
 623     }
 624
 625   desc->niter /= max_unroll + 1;
 626   loop->nb_iterations_upper_bound
 627     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
 628   if (loop->any_estimate)
 629     loop->nb_iterations_estimate
 630       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
 631   desc->niter_expr = GEN_INT (desc->niter);
 632
 633   /* Remove the edges.  */
 634   FOR_EACH_VEC_ELT (remove_edges, i, e)
 635     remove_path (e);
 636
 637   if (dump_file)
 638     fprintf (dump_file,
 639              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 640              max_unroll, num_loop_insns (loop));
 641 }
 642
 643 /* Decide whether to unroll LOOP iterating runtime computable number of times
 644    and how much.  */
 645 static void
 646 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 647 {
 648   unsigned nunroll, nunroll_by_av, i;
 649   struct niter_desc *desc;
 650   widest_int iterations;
 651
 652   if (!(flags & UAP_UNROLL))
 653     {
 654       /* We were not asked to, just return back silently.  */
 655       return;
 656     }
 657
 658   if (dump_file)
 659     fprintf (dump_file,
 660              "\n;; Considering unrolling loop with runtime "
 661              "computable number of iterations\n");
 662
 663   /* nunroll = total number of copies of the original loop body in
 664      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 665   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 666   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 667   if (nunroll > nunroll_by_av)
 668     nunroll = nunroll_by_av;
 669   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 670     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 671
 672   if (targetm.loop_unroll_adjust)
 673     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 674
 675   /* Skip big loops.  */
 676   if (nunroll <= 1)
 677     {
 678       if (dump_file)
 679         fprintf (dump_file, ";; Not considering loop, is too big\n");
 680       return;
 681     }
 682
 683   /* Check for simple loops.  */
 684   desc = get_simple_loop_desc (loop);
 685
 686   /* Check simpleness.  */
 687   if (!desc->simple_p || desc->assumptions)
 688     {
 689       if (dump_file)
 690         fprintf (dump_file,
 691                  ";; Unable to prove that the number of iterations "
 692                  "can be counted in runtime\n");
 693       return;
 694     }
 695
 696   if (desc->const_iter)
 697     {
 698       if (dump_file)
 699         fprintf (dump_file, ";; Loop iterates constant times\n");
 700       return;
 701     }
 702
 703   /* Check whether the loop rolls.  */
 704   if ((get_estimated_loop_iterations (loop, &iterations)
 705        || get_max_loop_iterations (loop, &iterations))
 706       && wi::ltu_p (iterations, 2 * nunroll))
 707     {
 708       if (dump_file)
 709         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 710       return;
 711     }
 712
 713   /* Success; now force nunroll to be power of 2, as we are unable to
 714      cope with overflows in computation of number of iterations.  */
 715   for (i = 1; 2 * i <= nunroll; i *= 2)
 716     continue;
 717
 718   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 719   loop->lpt_decision.times = i - 1;
 720 }
 721
 722 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
 723    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
 724    and NULL is returned instead.  */
 725
 726 basic_block
 727 split_edge_and_insert (edge e, rtx_insn *insns)
 728 {
 729   basic_block bb;
 730
 731   if (!insns)
 732     return NULL;
 733   bb = split_edge (e);
 734   emit_insn_after (insns, BB_END (bb));
 735
 736   /* ??? We used to assume that INSNS can contain control flow insns, and
 737      that we had to try to find sub basic blocks in BB to maintain a valid
 738      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
 739      and call break_superblocks when going out of cfglayout mode.  But it
 740      turns out that this never happens; and that if it does ever happen,
 741      the verify_flow_info at the end of the RTL loop passes would fail.
 742
 743      There are two reasons why we expected we could have control flow insns
 744      in INSNS.  The first is when a comparison has to be done in parts, and
 745      the second is when the number of iterations is computed for loops with
 746      the number of iterations known at runtime.  In both cases, test cases
 747      to get control flow in INSNS appear to be impossible to construct:
 748
 749       * If do_compare_rtx_and_jump needs several branches to do comparison
 750         in a mode that needs comparison by parts, we cannot analyze the
 751         number of iterations of the loop, and we never get to unrolling it.
 752
 753       * The code in expand_divmod that was suspected to cause creation of
 754         branching code seems to be only accessed for signed division.  The
 755         divisions used by # of iterations analysis are always unsigned.
 756         Problems might arise on architectures that emits branching code
 757         for some operations that may appear in the unroller (especially
 758         for division), but we have no such architectures.
 759
 760      Considering all this, it was decided that we should for now assume
 761      that INSNS can in theory contain control flow insns, but in practice
 762      it never does.  So we don't handle the theoretical case, and should
 763      a real failure ever show up, we have a pretty good clue for how to
 764      fix it.  */
 765
 766   return bb;
 767 }
 768
 769 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
 770    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
 771    in order to create a jump.  */
 772
 773 static rtx_insn *
 774 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
 775                       rtx_code_label *label, int prob, rtx_insn *cinsn)
 776 {
 777   rtx_insn *seq;
 778   rtx_jump_insn *jump;
 779   rtx cond;
 780   machine_mode mode;
 781
 782   mode = GET_MODE (op0);
 783   if (mode == VOIDmode)
 784     mode = GET_MODE (op1);
 785
 786   start_sequence ();
 787   if (GET_MODE_CLASS (mode) == MODE_CC)
 788     {
 789       /* A hack -- there seems to be no easy generic way how to make a
 790          conditional jump from a ccmode comparison.  */
 791       gcc_assert (cinsn);
 792       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
 793       gcc_assert (GET_CODE (cond) == comp);
 794       gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
 795       gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
 796       emit_jump_insn (copy_insn (PATTERN (cinsn)));
 797       jump = as_a <rtx_jump_insn *> (get_last_insn ());
 798       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
 799       LABEL_NUSES (JUMP_LABEL (jump))++;
 800       redirect_jump (jump, label, 0);
 801     }
 802   else
 803     {
 804       gcc_assert (!cinsn);
 805
 806       op0 = force_operand (op0, NULL_RTX);
 807       op1 = force_operand (op1, NULL_RTX);
 808       do_compare_rtx_and_jump (op0, op1, comp, 0,
 809                                mode, NULL_RTX, NULL, label, -1);
 810       jump = as_a <rtx_jump_insn *> (get_last_insn ());
 811       jump->set_jump_target (label);
 812       LABEL_NUSES (label)++;
 813     }
 814   add_int_reg_note (jump, REG_BR_PROB, prob);
 815
 816   seq = get_insns ();
 817   end_sequence ();
 818
 819   return seq;
 820 }
 821
 822 /* Unroll LOOP for which we are able to count number of iterations in runtime
 823    LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
 824    extra care for case n < 0):
 825
 826    for (i = 0; i < n; i++)
 827      body;
 828
 829    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 830
 831    i = 0;
 832    mod = n % 4;
 833
 834    switch (mod)
 835      {
 836        case 3:
 837          body; i++;
 838        case 2:
 839          body; i++;
 840        case 1:
 841          body; i++;
 842        case 0: ;
 843      }
 844
 845    while (i < n)
 846      {
 847        body; i++;
 848        body; i++;
 849        body; i++;
 850        body; i++;
 851      }
 852    */
 853 static void
 854 unroll_loop_runtime_iterations (struct loop *loop)
 855 {
 856   rtx old_niter, niter, tmp;
 857   rtx_insn *init_code, *branch_code;
 858   unsigned i, j, p;
 859   basic_block preheader, *body, swtch, ezc_swtch;
 860   sbitmap wont_exit;
 861   int may_exit_copy;
 862   unsigned n_peel;
 863   edge e;
 864   bool extra_zero_check, last_may_exit;
 865   unsigned max_unroll = loop->lpt_decision.times;
 866   struct niter_desc *desc = get_simple_loop_desc (loop);
 867   bool exit_at_end = loop_exit_at_end_p (loop);
 868   struct opt_info *opt_info = NULL;
 869   bool ok;
 870
 871   if (flag_split_ivs_in_unroller
 872       || flag_variable_expansion_in_unroller)
 873     opt_info = analyze_insns_in_loop (loop);
 874
 875   /* Remember blocks whose dominators will have to be updated.  */
 876   auto_vec<basic_block> dom_bbs;
 877
 878   body = get_loop_body (loop);
 879   for (i = 0; i < loop->num_nodes; i++)
 880     {
 881       vec<basic_block> ldom;
 882       basic_block bb;
 883
 884       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
 885       FOR_EACH_VEC_ELT (ldom, j, bb)
 886         if (!flow_bb_inside_loop_p (loop, bb))
 887           dom_bbs.safe_push (bb);
 888
 889       ldom.release ();
 890     }
 891   free (body);
 892
 893   if (!exit_at_end)
 894     {
 895       /* Leave exit in first copy (for explanation why see comment in
 896          unroll_loop_constant_iterations).  */
 897       may_exit_copy = 0;
 898       n_peel = max_unroll - 1;
 899       extra_zero_check = true;
 900       last_may_exit = false;
 901     }
 902   else
 903     {
 904       /* Leave exit in last copy (for explanation why see comment in
 905          unroll_loop_constant_iterations).  */
 906       may_exit_copy = max_unroll;
 907       n_peel = max_unroll;
 908       extra_zero_check = false;
 909       last_may_exit = true;
 910     }
 911
 912   /* Get expression for number of iterations.  */
 913   start_sequence ();
 914   old_niter = niter = gen_reg_rtx (desc->mode);
 915   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 916   if (tmp != niter)
 917     emit_move_insn (niter, tmp);
 918
 919   /* Count modulo by ANDing it with max_unroll; we use the fact that
 920      the number of unrollings is a power of two, and thus this is correct
 921      even if there is overflow in the computation.  */
 922   niter = expand_simple_binop (desc->mode, AND,
 923                                niter, gen_int_mode (max_unroll, desc->mode),
 924                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 925
 926   init_code = get_insns ();
 927   end_sequence ();
 928   unshare_all_rtl_in_chain (init_code);
 929
 930   /* Precondition the loop.  */
 931   split_edge_and_insert (loop_preheader_edge (loop), init_code);
 932
 933   auto_vec<edge> remove_edges;
 934
 935   wont_exit = sbitmap_alloc (max_unroll + 2);
 936
 937   /* Peel the first copy of loop body (almost always we must leave exit test
 938      here; the only exception is when we have extra zero check and the number
 939      of iterations is reliable.  Also record the place of (possible) extra
 940      zero check.  */
 941   bitmap_clear (wont_exit);
 942   if (extra_zero_check
 943       && !desc->noloop_assumptions)
 944     bitmap_set_bit (wont_exit, 1);
 945   ezc_swtch = loop_preheader_edge (loop)->src;
 946   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 947                                       1, wont_exit, desc->out_edge,
 948                                       &remove_edges,
 949                                       DLTHE_FLAG_UPDATE_FREQ);
 950   gcc_assert (ok);
 951
 952   /* Record the place where switch will be built for preconditioning.  */
 953   swtch = split_edge (loop_preheader_edge (loop));
 954
 955   for (i = 0; i < n_peel; i++)
 956     {
 957       /* Peel the copy.  */
 958       bitmap_clear (wont_exit);
 959       if (i != n_peel - 1 || !last_may_exit)
 960         bitmap_set_bit (wont_exit, 1);
 961       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 962                                           1, wont_exit, desc->out_edge,
 963                                           &remove_edges,
 964                                           DLTHE_FLAG_UPDATE_FREQ);
 965       gcc_assert (ok);
 966
 967       /* Create item for switch.  */
 968       j = n_peel - i - (extra_zero_check ? 0 : 1);
 969       p = REG_BR_PROB_BASE / (i + 2);
 970
 971       preheader = split_edge (loop_preheader_edge (loop));
 972       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
 973                                           block_label (preheader), p,
 974                                           NULL);
 975
 976       /* We rely on the fact that the compare and jump cannot be optimized out,
 977          and hence the cfg we create is correct.  */
 978       gcc_assert (branch_code != NULL_RTX);
 979
 980       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
 981       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
 982       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
 983       e = make_edge (swtch, preheader,
 984                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
 985       e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
 986       e->probability = p;
 987     }
 988
 989   if (extra_zero_check)
 990     {
 991       /* Add branch for zero iterations.  */
 992       p = REG_BR_PROB_BASE / (max_unroll + 1);
 993       swtch = ezc_swtch;
 994       preheader = split_edge (loop_preheader_edge (loop));
 995       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
 996                                           block_label (preheader), p,
 997                                           NULL);
 998       gcc_assert (branch_code != NULL_RTX);
 999
1000       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1001       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1002       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1003       e = make_edge (swtch, preheader,
1004                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1005       e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
1006       e->probability = p;
1007     }
1008
1009   /* Recount dominators for outer blocks.  */
1010   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1011
1012   /* And unroll loop.  */
1013
1014   bitmap_ones (wont_exit);
1015   bitmap_clear_bit (wont_exit, may_exit_copy);
1016   opt_info_start_duplication (opt_info);
1017
1018   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1019                                       max_unroll,
1020                                       wont_exit, desc->out_edge,
1021                                       &remove_edges,
1022                                       DLTHE_FLAG_UPDATE_FREQ
1023                                       | (opt_info
1024                                          ? DLTHE_RECORD_COPY_NUMBER
1025                                            : 0));
1026   gcc_assert (ok);
1027
1028   if (opt_info)
1029     {
1030       apply_opt_in_copies (opt_info, max_unroll, true, true);
1031       free_opt_info (opt_info);
1032     }
1033
1034   free (wont_exit);
1035
1036   if (exit_at_end)
1037     {
1038       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1039       /* Find a new in and out edge; they are in the last copy we have
1040          made.  */
1041
1042       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1043         {
1044           desc->out_edge = EDGE_SUCC (exit_block, 0);
1045           desc->in_edge = EDGE_SUCC (exit_block, 1);
1046         }
1047       else
1048         {
1049           desc->out_edge = EDGE_SUCC (exit_block, 1);
1050           desc->in_edge = EDGE_SUCC (exit_block, 0);
1051         }
1052     }
1053
1054   /* Remove the edges.  */
1055   FOR_EACH_VEC_ELT (remove_edges, i, e)
1056     remove_path (e);
1057
1058   /* We must be careful when updating the number of iterations due to
1059      preconditioning and the fact that the value must be valid at entry
1060      of the loop.  After passing through the above code, we see that
1061      the correct new number of iterations is this:  */
1062   gcc_assert (!desc->const_iter);
1063   desc->niter_expr =
1064     simplify_gen_binary (UDIV, desc->mode, old_niter,
1065                          gen_int_mode (max_unroll + 1, desc->mode));
1066   loop->nb_iterations_upper_bound
1067     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1068   if (loop->any_estimate)
1069     loop->nb_iterations_estimate
1070       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1071   if (exit_at_end)
1072     {
1073       desc->niter_expr =
1074         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1075       desc->noloop_assumptions = NULL_RTX;
1076       --loop->nb_iterations_upper_bound;
1077       if (loop->any_estimate
1078           && loop->nb_iterations_estimate != 0)
1079         --loop->nb_iterations_estimate;
1080       else
1081         loop->any_estimate = false;
1082     }
1083
1084   if (dump_file)
1085     fprintf (dump_file,
1086              ";; Unrolled loop %d times, counting # of iterations "
1087              "in runtime, %i insns\n",
1088              max_unroll, num_loop_insns (loop));
1089 }
1090
1091 /* Decide whether to unroll LOOP stupidly and how much.  */
1092 static void
1093 decide_unroll_stupid (struct loop *loop, int flags)
1094 {
1095   unsigned nunroll, nunroll_by_av, i;
1096   struct niter_desc *desc;
1097   widest_int iterations;
1098
1099   if (!(flags & UAP_UNROLL_ALL))
1100     {
1101       /* We were not asked to, just return back silently.  */
1102       return;
1103     }
1104
1105   if (dump_file)
1106     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1107
1108   /* nunroll = total number of copies of the original loop body in
1109      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1110   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1111   nunroll_by_av
1112     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1113   if (nunroll > nunroll_by_av)
1114     nunroll = nunroll_by_av;
1115   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1116     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1117
1118   if (targetm.loop_unroll_adjust)
1119     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1120
1121   /* Skip big loops.  */
1122   if (nunroll <= 1)
1123     {
1124       if (dump_file)
1125         fprintf (dump_file, ";; Not considering loop, is too big\n");
1126       return;
1127     }
1128
1129   /* Check for simple loops.  */
1130   desc = get_simple_loop_desc (loop);
1131
1132   /* Check simpleness.  */
1133   if (desc->simple_p && !desc->assumptions)
1134     {
1135       if (dump_file)
1136         fprintf (dump_file, ";; The loop is simple\n");
1137       return;
1138     }
1139
1140   /* Do not unroll loops with branches inside -- it increases number
1141      of mispredicts.
1142      TODO: this heuristic needs tunning; call inside the loop body
1143      is also relatively good reason to not unroll.  */
1144   if (num_loop_branches (loop) > 1)
1145     {
1146       if (dump_file)
1147         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1148       return;
1149     }
1150
1151   /* Check whether the loop rolls.  */
1152   if ((get_estimated_loop_iterations (loop, &iterations)
1153        || get_max_loop_iterations (loop, &iterations))
1154       && wi::ltu_p (iterations, 2 * nunroll))
1155     {
1156       if (dump_file)
1157         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1158       return;
1159     }
1160
1161   /* Success.  Now force nunroll to be power of 2, as it seems that this
1162      improves results (partially because of better alignments, partially
1163      because of some dark magic).  */
1164   for (i = 1; 2 * i <= nunroll; i *= 2)
1165     continue;
1166
1167   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1168   loop->lpt_decision.times = i - 1;
1169 }
1170
1171 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:
1172
1173    while (cond)
1174      body;
1175
1176    ==>  (LOOP->LPT_DECISION.TIMES == 3)
1177
1178    while (cond)
1179      {
1180        body;
1181        if (!cond) break;
1182        body;
1183        if (!cond) break;
1184        body;
1185        if (!cond) break;
1186        body;
1187      }
1188    */
1189 static void
1190 unroll_loop_stupid (struct loop *loop)
1191 {
1192   sbitmap wont_exit;
1193   unsigned nunroll = loop->lpt_decision.times;
1194   struct niter_desc *desc = get_simple_loop_desc (loop);
1195   struct opt_info *opt_info = NULL;
1196   bool ok;
1197
1198   if (flag_split_ivs_in_unroller
1199       || flag_variable_expansion_in_unroller)
1200     opt_info = analyze_insns_in_loop (loop);
1201
1202
1203   wont_exit = sbitmap_alloc (nunroll + 1);
1204   bitmap_clear (wont_exit);
1205   opt_info_start_duplication (opt_info);
1206
1207   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1208                                       nunroll, wont_exit,
1209                                       NULL, NULL,
1210                                       DLTHE_FLAG_UPDATE_FREQ
1211                                       | (opt_info
1212                                          ? DLTHE_RECORD_COPY_NUMBER
1213                                            : 0));
1214   gcc_assert (ok);
1215
1216   if (opt_info)
1217     {
1218       apply_opt_in_copies (opt_info, nunroll, true, true);
1219       free_opt_info (opt_info);
1220     }
1221
1222   free (wont_exit);
1223
1224   if (desc->simple_p)
1225     {
1226       /* We indeed may get here provided that there are nontrivial assumptions
1227          for a loop to be really simple.  We could update the counts, but the
1228          problem is that we are unable to decide which exit will be taken
1229          (not really true in case the number of iterations is constant,
1230          but no one will do anything with this information, so we do not
1231          worry about it).  */
1232       desc->simple_p = false;
1233     }
1234
1235   if (dump_file)
1236     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1237              nunroll, num_loop_insns (loop));
1238 }
1239
1240 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1241    Set *DEBUG_USES to the number of debug insns that reference the
1242    variable.  */
1243
1244 static bool
1245 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1246                                   int *debug_uses)
1247 {
1248   basic_block *body, bb;
1249   unsigned i;
1250   int count_ref = 0;
1251   rtx_insn *insn;
1252
1253   body = get_loop_body (loop);
1254   for (i = 0; i < loop->num_nodes; i++)
1255     {
1256       bb = body[i];
1257
1258       FOR_BB_INSNS (bb, insn)
1259         if (!rtx_referenced_p (reg, insn))
1260           continue;
1261         else if (DEBUG_INSN_P (insn))
1262           ++*debug_uses;
1263         else if (++count_ref > 1)
1264           break;
1265     }
1266   free (body);
1267   return (count_ref  == 1);
1268 }
1269
1270 /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1271
1272 static void
1273 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1274 {
1275   basic_block *body, bb;
1276   unsigned i;
1277   rtx_insn *insn;
1278
1279   body = get_loop_body (loop);
1280   for (i = 0; debug_uses && i < loop->num_nodes; i++)
1281     {
1282       bb = body[i];
1283
1284       FOR_BB_INSNS (bb, insn)
1285         if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1286           continue;
1287         else
1288           {
1289             validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1290                              gen_rtx_UNKNOWN_VAR_LOC (), 0);
1291             if (!--debug_uses)
1292               break;
1293           }
1294     }
1295   free (body);
1296 }
1297
1298 /* Determine whether INSN contains an accumulator
1299    which can be expanded into separate copies,
1300    one for each copy of the LOOP body.
1301
1302    for (i = 0 ; i < n; i++)
1303      sum += a[i];
1304
1305    ==>
1306
1307    sum += a[i]
1308    ....
1309    i = i+1;
1310    sum1 += a[i]
1311    ....
1312    i = i+1
1313    sum2 += a[i];
1314    ....
1315
1316    Return NULL if INSN contains no opportunity for expansion of accumulator.
1317    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1318    information and return a pointer to it.
1319 */
1320
1321 static struct var_to_expand *
1322 analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
1323 {
1324   rtx set, dest, src;
1325   struct var_to_expand *ves;
1326   unsigned accum_pos;
1327   enum rtx_code code;
1328   int debug_uses = 0;
1329
1330   set = single_set (insn);
1331   if (!set)
1332     return NULL;
1333
1334   dest = SET_DEST (set);
1335   src = SET_SRC (set);
1336   code = GET_CODE (src);
1337
1338   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1339     return NULL;
1340
1341   if (FLOAT_MODE_P (GET_MODE (dest)))
1342     {
1343       if (!flag_associative_math)
1344         return NULL;
1345       /* In the case of FMA, we're also changing the rounding.  */
1346       if (code == FMA && !flag_unsafe_math_optimizations)
1347         return NULL;
1348     }
1349
1350   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1351      in MD.  But if there is no optab to generate the insn, we can not
1352      perform the variable expansion.  This can happen if an MD provides
1353      an insn but not a named pattern to generate it, for example to avoid
1354      producing code that needs additional mode switches like for x87/mmx.
1355
1356      So we check have_insn_for which looks for an optab for the operation
1357      in SRC.  If it doesn't exist, we can't perform the expansion even
1358      though INSN is valid.  */
1359   if (!have_insn_for (code, GET_MODE (src)))
1360     return NULL;
1361
1362   if (!REG_P (dest)
1363       && !(GET_CODE (dest) == SUBREG
1364            && REG_P (SUBREG_REG (dest))))
1365     return NULL;
1366
1367   /* Find the accumulator use within the operation.  */
1368   if (code == FMA)
1369     {
1370       /* We only support accumulation via FMA in the ADD position.  */
1371       if (!rtx_equal_p  (dest, XEXP (src, 2)))
1372         return NULL;
1373       accum_pos = 2;
1374     }
1375   else if (rtx_equal_p (dest, XEXP (src, 0)))
1376     accum_pos = 0;
1377   else if (rtx_equal_p (dest, XEXP (src, 1)))
1378     {
1379       /* The method of expansion that we are using; which includes the
1380          initialization of the expansions with zero and the summation of
1381          the expansions at the end of the computation will yield wrong
1382          results for (x = something - x) thus avoid using it in that case.  */
1383       if (code == MINUS)
1384         return NULL;
1385       accum_pos = 1;
1386     }
1387   else
1388     return NULL;
1389
1390   /* It must not otherwise be used.  */
1391   if (code == FMA)
1392     {
1393       if (rtx_referenced_p (dest, XEXP (src, 0))
1394           || rtx_referenced_p (dest, XEXP (src, 1)))
1395         return NULL;
1396     }
1397   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1398     return NULL;
1399
1400   /* It must be used in exactly one insn.  */
1401   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1402     return NULL;
1403
1404   if (dump_file)
1405     {
1406       fprintf (dump_file, "\n;; Expanding Accumulator ");
1407       print_rtl (dump_file, dest);
1408       fprintf (dump_file, "\n");
1409     }
1410
1411   if (debug_uses)
1412     /* Instead of resetting the debug insns, we could replace each
1413        debug use in the loop with the sum or product of all expanded
1414        accummulators.  Since we'll only know of all expansions at the
1415        end, we'd have to keep track of which vars_to_expand a debug
1416        insn in the loop references, take note of each copy of the
1417        debug insn during unrolling, and when it's all done, compute
1418        the sum or product of each variable and adjust the original
1419        debug insn and each copy thereof.  What a pain!  */
1420     reset_debug_uses_in_loop (loop, dest, debug_uses);
1421
1422   /* Record the accumulator to expand.  */
1423   ves = XNEW (struct var_to_expand);
1424   ves->insn = insn;
1425   ves->reg = copy_rtx (dest);
1426   ves->var_expansions.create (1);
1427   ves->next = NULL;
1428   ves->op = GET_CODE (src);
1429   ves->expansion_count = 0;
1430   ves->reuse_expansion = 0;
1431   return ves;
1432 }
1433
1434 /* Determine whether there is an induction variable in INSN that
1435    we would like to split during unrolling.
1436
1437    I.e. replace
1438
1439    i = i + 1;
1440    ...
1441    i = i + 1;
1442    ...
1443    i = i + 1;
1444    ...
1445
1446    type chains by
1447
1448    i0 = i + 1
1449    ...
1450    i = i0 + 1
1451    ...
1452    i = i0 + 2
1453    ...
1454
1455    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1456    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1457    pointer to it.  */
1458
1459 static struct iv_to_split *
1460 analyze_iv_to_split_insn (rtx_insn *insn)
1461 {
1462   rtx set, dest;
1463   struct rtx_iv iv;
1464   struct iv_to_split *ivts;
1465   bool ok;
1466
1467   /* For now we just split the basic induction variables.  Later this may be
1468      extended for example by selecting also addresses of memory references.  */
1469   set = single_set (insn);
1470   if (!set)
1471     return NULL;
1472
1473   dest = SET_DEST (set);
1474   if (!REG_P (dest))
1475     return NULL;
1476
1477   if (!biv_p (insn, dest))
1478     return NULL;
1479
1480   ok = iv_analyze_result (insn, dest, &iv);
1481
1482   /* This used to be an assert under the assumption that if biv_p returns
1483      true that iv_analyze_result must also return true.  However, that
1484      assumption is not strictly correct as evidenced by pr25569.
1485
1486      Returning NULL when iv_analyze_result returns false is safe and
1487      avoids the problems in pr25569 until the iv_analyze_* routines
1488      can be fixed, which is apparently hard and time consuming
1489      according to their author.  */
1490   if (! ok)
1491     return NULL;
1492
1493   if (iv.step == const0_rtx
1494       || iv.mode != iv.extend_mode)
1495     return NULL;
1496
1497   /* Record the insn to split.  */
1498   ivts = XNEW (struct iv_to_split);
1499   ivts->insn = insn;
1500   ivts->orig_var = dest;
1501   ivts->base_var = NULL_RTX;
1502   ivts->step = iv.step;
1503   ivts->next = NULL;
1504
1505   return ivts;
1506 }
1507
1508 /* Determines which of insns in LOOP can be optimized.
1509    Return a OPT_INFO struct with the relevant hash tables filled
1510    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1511    is undefined for the return value.  */
1512
1513 static struct opt_info *
1514 analyze_insns_in_loop (struct loop *loop)
1515 {
1516   basic_block *body, bb;
1517   unsigned i;
1518   struct opt_info *opt_info = XCNEW (struct opt_info);
1519   rtx_insn *insn;
1520   struct iv_to_split *ivts = NULL;
1521   struct var_to_expand *ves = NULL;
1522   iv_to_split **slot1;
1523   var_to_expand **slot2;
1524   vec<edge> edges = get_loop_exit_edges (loop);
1525   edge exit;
1526   bool can_apply = false;
1527
1528   iv_analysis_loop_init (loop);
1529
1530   body = get_loop_body (loop);
1531
1532   if (flag_split_ivs_in_unroller)
1533     {
1534       opt_info->insns_to_split
1535         = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1536       opt_info->iv_to_split_head = NULL;
1537       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1538     }
1539
1540   /* Record the loop exit bb and loop preheader before the unrolling.  */
1541   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1542
1543   if (edges.length () == 1)
1544     {
1545       exit = edges[0];
1546       if (!(exit->flags & EDGE_COMPLEX))
1547         {
1548           opt_info->loop_exit = split_edge (exit);
1549           can_apply = true;
1550         }
1551     }
1552
1553   if (flag_variable_expansion_in_unroller
1554       && can_apply)
1555     {
1556       opt_info->insns_with_var_to_expand
1557         = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1558       opt_info->var_to_expand_head = NULL;
1559       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1560     }
1561
1562   for (i = 0; i < loop->num_nodes; i++)
1563     {
1564       bb = body[i];
1565       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1566         continue;
1567
1568       FOR_BB_INSNS (bb, insn)
1569       {
1570         if (!INSN_P (insn))
1571           continue;
1572
1573         if (opt_info->insns_to_split)
1574           ivts = analyze_iv_to_split_insn (insn);
1575
1576         if (ivts)
1577           {
1578             slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1579             gcc_assert (*slot1 == NULL);
1580             *slot1 = ivts;
1581             *opt_info->iv_to_split_tail = ivts;
1582             opt_info->iv_to_split_tail = &ivts->next;
1583             continue;
1584           }
1585
1586         if (opt_info->insns_with_var_to_expand)
1587           ves = analyze_insn_to_expand_var (loop, insn);
1588
1589         if (ves)
1590           {
1591             slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1592             gcc_assert (*slot2 == NULL);
1593             *slot2 = ves;
1594             *opt_info->var_to_expand_tail = ves;
1595             opt_info->var_to_expand_tail = &ves->next;
1596           }
1597       }
1598     }
1599
1600   edges.release ();
1601   free (body);
1602   return opt_info;
1603 }
1604
1605 /* Called just before loop duplication.  Records start of duplicated area
1606    to OPT_INFO.  */
1607
1608 static void
1609 opt_info_start_duplication (struct opt_info *opt_info)
1610 {
1611   if (opt_info)
1612     opt_info->first_new_block = last_basic_block_for_fn (cfun);
1613 }
1614
1615 /* Determine the number of iterations between initialization of the base
1616    variable and the current copy (N_COPY).  N_COPIES is the total number
1617    of newly created copies.  UNROLLING is true if we are unrolling
1618    (not peeling) the loop.  */
1619
1620 static unsigned
1621 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1622 {
1623   if (unrolling)
1624     {
1625       /* If we are unrolling, initialization is done in the original loop
1626          body (number 0).  */
1627       return n_copy;
1628     }
1629   else
1630     {
1631       /* If we are peeling, the copy in that the initialization occurs has
1632          number 1.  The original loop (number 0) is the last.  */
1633       if (n_copy)
1634         return n_copy - 1;
1635       else
1636         return n_copies;
1637     }
1638 }
1639
1640 /* Allocate basic variable for the induction variable chain.  */
1641
1642 static void
1643 allocate_basic_variable (struct iv_to_split *ivts)
1644 {
1645   rtx expr = SET_SRC (single_set (ivts->insn));
1646
1647   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1648 }
1649
1650 /* Insert initialization of basic variable of IVTS before INSN, taking
1651    the initial value from INSN.  */
1652
1653 static void
1654 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1655 {
1656   rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1657   rtx_insn *seq;
1658
1659   start_sequence ();
1660   expr = force_operand (expr, ivts->base_var);
1661   if (expr != ivts->base_var)
1662     emit_move_insn (ivts->base_var, expr);
1663   seq = get_insns ();
1664   end_sequence ();
1665
1666   emit_insn_before (seq, insn);
1667 }
1668
1669 /* Replace the use of induction variable described in IVTS in INSN
1670    by base variable + DELTA * step.  */
1671
1672 static void
1673 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1674 {
1675   rtx expr, *loc, incr, var;
1676   rtx_insn *seq;
1677   machine_mode mode = GET_MODE (ivts->base_var);
1678   rtx src, dest, set;
1679
1680   /* Construct base + DELTA * step.  */
1681   if (!delta)
1682     expr = ivts->base_var;
1683   else
1684     {
1685       incr = simplify_gen_binary (MULT, mode,
1686                                   ivts->step, gen_int_mode (delta, mode));
1687       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1688                                   ivts->base_var, incr);
1689     }
1690
1691   /* Figure out where to do the replacement.  */
1692   loc = &SET_SRC (single_set (insn));
1693
1694   /* If we can make the replacement right away, we're done.  */
1695   if (validate_change (insn, loc, expr, 0))
1696     return;
1697
1698   /* Otherwise, force EXPR into a register and try again.  */
1699   start_sequence ();
1700   var = gen_reg_rtx (mode);
1701   expr = force_operand (expr, var);
1702   if (expr != var)
1703     emit_move_insn (var, expr);
1704   seq = get_insns ();
1705   end_sequence ();
1706   emit_insn_before (seq, insn);
1707
1708   if (validate_change (insn, loc, var, 0))
1709     return;
1710
1711   /* The last chance.  Try recreating the assignment in insn
1712      completely from scratch.  */
1713   set = single_set (insn);
1714   gcc_assert (set);
1715
1716   start_sequence ();
1717   *loc = var;
1718   src = copy_rtx (SET_SRC (set));
1719   dest = copy_rtx (SET_DEST (set));
1720   src = force_operand (src, dest);
1721   if (src != dest)
1722     emit_move_insn (dest, src);
1723   seq = get_insns ();
1724   end_sequence ();
1725
1726   emit_insn_before (seq, insn);
1727   delete_insn (insn);
1728 }
1729
1730
1731 /* Return one expansion of the accumulator recorded in struct VE.  */
1732
1733 static rtx
1734 get_expansion (struct var_to_expand *ve)
1735 {
1736   rtx reg;
1737
1738   if (ve->reuse_expansion == 0)
1739     reg = ve->reg;
1740   else
1741     reg = ve->var_expansions[ve->reuse_expansion - 1];
1742
1743   if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
1744     ve->reuse_expansion = 0;
1745   else
1746     ve->reuse_expansion++;
1747
1748   return reg;
1749 }
1750
1751
1752 /* Given INSN replace the uses of the accumulator recorded in VE
1753    with a new register.  */
1754
1755 static void
1756 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
1757 {
1758   rtx new_reg, set;
1759   bool really_new_expansion = false;
1760
1761   set = single_set (insn);
1762   gcc_assert (set);
1763
1764   /* Generate a new register only if the expansion limit has not been
1765      reached.  Else reuse an already existing expansion.  */
1766   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1767     {
1768       really_new_expansion = true;
1769       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1770     }
1771   else
1772     new_reg = get_expansion (ve);
1773
1774   validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
1775   if (apply_change_group ())
1776     if (really_new_expansion)
1777       {
1778         ve->var_expansions.safe_push (new_reg);
1779         ve->expansion_count++;
1780       }
1781 }
1782
1783 /* Initialize the variable expansions in loop preheader.  PLACE is the
1784    loop-preheader basic block where the initialization of the
1785    expansions should take place.  The expansions are initialized with
1786    (-0) when the operation is plus or minus to honor sign zero.  This
1787    way we can prevent cases where the sign of the final result is
1788    effected by the sign of the expansion.  Here is an example to
1789    demonstrate this:
1790
1791    for (i = 0 ; i < n; i++)
1792      sum += something;
1793
1794    ==>
1795
1796    sum += something
1797    ....
1798    i = i+1;
1799    sum1 += something
1800    ....
1801    i = i+1
1802    sum2 += something;
1803    ....
1804
1805    When SUM is initialized with -zero and SOMETHING is also -zero; the
1806    final result of sum should be -zero thus the expansions sum1 and sum2
1807    should be initialized with -zero as well (otherwise we will get +zero
1808    as the final result).  */
1809
1810 static void
1811 insert_var_expansion_initialization (struct var_to_expand *ve,
1812                                      basic_block place)
1813 {
1814   rtx_insn *seq;
1815   rtx var, zero_init;
1816   unsigned i;
1817   machine_mode mode = GET_MODE (ve->reg);
1818   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1819
1820   if (ve->var_expansions.length () == 0)
1821     return;
1822
1823   start_sequence ();
1824   switch (ve->op)
1825     {
1826     case FMA:
1827       /* Note that we only accumulate FMA via the ADD operand.  */
1828     case PLUS:
1829     case MINUS:
1830       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1831         {
1832           if (honor_signed_zero_p)
1833             zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1834           else
1835             zero_init = CONST0_RTX (mode);
1836           emit_move_insn (var, zero_init);
1837         }
1838       break;
1839
1840     case MULT:
1841       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1842         {
1843           zero_init = CONST1_RTX (GET_MODE (var));
1844           emit_move_insn (var, zero_init);
1845         }
1846       break;
1847
1848     default:
1849       gcc_unreachable ();
1850     }
1851
1852   seq = get_insns ();
1853   end_sequence ();
1854
1855   emit_insn_after (seq, BB_END (place));
1856 }
1857
1858 /* Combine the variable expansions at the loop exit.  PLACE is the
1859    loop exit basic block where the summation of the expansions should
1860    take place.  */
1861
1862 static void
1863 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
1864 {
1865   rtx sum = ve->reg;
1866   rtx expr, var;
1867   rtx_insn *seq, *insn;
1868   unsigned i;
1869
1870   if (ve->var_expansions.length () == 0)
1871     return;
1872
1873   start_sequence ();
1874   switch (ve->op)
1875     {
1876     case FMA:
1877       /* Note that we only accumulate FMA via the ADD operand.  */
1878     case PLUS:
1879     case MINUS:
1880       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1881         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1882       break;
1883
1884     case MULT:
1885       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1886         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1887       break;
1888
1889     default:
1890       gcc_unreachable ();
1891     }
1892
1893   expr = force_operand (sum, ve->reg);
1894   if (expr != ve->reg)
1895     emit_move_insn (ve->reg, expr);
1896   seq = get_insns ();
1897   end_sequence ();
1898
1899   insn = BB_HEAD (place);
1900   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1901     insn = NEXT_INSN (insn);
1902
1903   emit_insn_after (seq, insn);
1904 }
1905
1906 /* Strip away REG_EQUAL notes for IVs we're splitting.
1907
1908    Updating REG_EQUAL notes for IVs we split is tricky: We
1909    cannot tell until after unrolling, DF-rescanning, and liveness
1910    updating, whether an EQ_USE is reached by the split IV while
1911    the IV reg is still live.  See PR55006.
1912
1913    ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1914    because RTL loop-iv requires us to defer rescanning insns and
1915    any notes attached to them.  So resort to old techniques...  */
1916
1917 static void
1918 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1919 {
1920   struct iv_to_split *ivts;
1921   rtx note = find_reg_equal_equiv_note (insn);
1922   if (! note)
1923     return;
1924   for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1925     if (reg_mentioned_p (ivts->orig_var, note))
1926       {
1927         remove_note (insn, note);
1928         return;
1929       }
1930 }
1931
1932 /* Apply loop optimizations in loop copies using the
1933    data which gathered during the unrolling.  Structure
1934    OPT_INFO record that data.
1935
1936    UNROLLING is true if we unrolled (not peeled) the loop.
1937    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1938    the loop (as it should happen in complete unrolling, but not in ordinary
1939    peeling of the loop).  */
1940
1941 static void
1942 apply_opt_in_copies (struct opt_info *opt_info,
1943                      unsigned n_copies, bool unrolling,
1944                      bool rewrite_original_loop)
1945 {
1946   unsigned i, delta;
1947   basic_block bb, orig_bb;
1948   rtx_insn *insn, *orig_insn, *next;
1949   struct iv_to_split ivts_templ, *ivts;
1950   struct var_to_expand ve_templ, *ves;
1951
1952   /* Sanity check -- we need to put initialization in the original loop
1953      body.  */
1954   gcc_assert (!unrolling || rewrite_original_loop);
1955
1956   /* Allocate the basic variables (i0).  */
1957   if (opt_info->insns_to_split)
1958     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1959       allocate_basic_variable (ivts);
1960
1961   for (i = opt_info->first_new_block;
1962        i < (unsigned) last_basic_block_for_fn (cfun);
1963        i++)
1964     {
1965       bb = BASIC_BLOCK_FOR_FN (cfun, i);
1966       orig_bb = get_bb_original (bb);
1967
1968       /* bb->aux holds position in copy sequence initialized by
1969          duplicate_loop_to_header_edge.  */
1970       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
1971                                         unrolling);
1972       bb->aux = 0;
1973       orig_insn = BB_HEAD (orig_bb);
1974       FOR_BB_INSNS_SAFE (bb, insn, next)
1975         {
1976           if (!INSN_P (insn)
1977               || (DEBUG_INSN_P (insn)
1978                   && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
1979             continue;
1980
1981           while (!INSN_P (orig_insn)
1982                  || (DEBUG_INSN_P (orig_insn)
1983                      && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
1984                          == LABEL_DECL)))
1985             orig_insn = NEXT_INSN (orig_insn);
1986
1987           ivts_templ.insn = orig_insn;
1988           ve_templ.insn = orig_insn;
1989
1990           /* Apply splitting iv optimization.  */
1991           if (opt_info->insns_to_split)
1992             {
1993               maybe_strip_eq_note_for_split_iv (opt_info, insn);
1994
1995               ivts = opt_info->insns_to_split->find (&ivts_templ);
1996
1997               if (ivts)
1998                 {
1999                   gcc_assert (GET_CODE (PATTERN (insn))
2000                               == GET_CODE (PATTERN (orig_insn)));
2001
2002                   if (!delta)
2003                     insert_base_initialization (ivts, insn);
2004                   split_iv (ivts, insn, delta);
2005                 }
2006             }
2007           /* Apply variable expansion optimization.  */
2008           if (unrolling && opt_info->insns_with_var_to_expand)
2009             {
2010               ves = (struct var_to_expand *)
2011                 opt_info->insns_with_var_to_expand->find (&ve_templ);
2012               if (ves)
2013                 {
2014                   gcc_assert (GET_CODE (PATTERN (insn))
2015                               == GET_CODE (PATTERN (orig_insn)));
2016                   expand_var_during_unrolling (ves, insn);
2017                 }
2018             }
2019           orig_insn = NEXT_INSN (orig_insn);
2020         }
2021     }
2022
2023   if (!rewrite_original_loop)
2024     return;
2025
2026   /* Initialize the variable expansions in the loop preheader
2027      and take care of combining them at the loop exit.  */
2028   if (opt_info->insns_with_var_to_expand)
2029     {
2030       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2031         insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2032       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2033         combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2034     }
2035
2036   /* Rewrite also the original loop body.  Find them as originals of the blocks
2037      in the last copied iteration, i.e. those that have
2038      get_bb_copy (get_bb_original (bb)) == bb.  */
2039   for (i = opt_info->first_new_block;
2040        i < (unsigned) last_basic_block_for_fn (cfun);
2041        i++)
2042     {
2043       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2044       orig_bb = get_bb_original (bb);
2045       if (get_bb_copy (orig_bb) != bb)
2046         continue;
2047
2048       delta = determine_split_iv_delta (0, n_copies, unrolling);
2049       for (orig_insn = BB_HEAD (orig_bb);
2050            orig_insn != NEXT_INSN (BB_END (bb));
2051            orig_insn = next)
2052         {
2053           next = NEXT_INSN (orig_insn);
2054
2055           if (!INSN_P (orig_insn))
2056             continue;
2057
2058           ivts_templ.insn = orig_insn;
2059           if (opt_info->insns_to_split)
2060             {
2061               maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2062
2063               ivts = (struct iv_to_split *)
2064                 opt_info->insns_to_split->find (&ivts_templ);
2065               if (ivts)
2066                 {
2067                   if (!delta)
2068                     insert_base_initialization (ivts, orig_insn);
2069                   split_iv (ivts, orig_insn, delta);
2070                   continue;
2071                 }
2072             }
2073
2074         }
2075     }
2076 }
2077
2078 /* Release OPT_INFO.  */
2079
2080 static void
2081 free_opt_info (struct opt_info *opt_info)
2082 {
2083   delete opt_info->insns_to_split;
2084   opt_info->insns_to_split = NULL;
2085   if (opt_info->insns_with_var_to_expand)
2086     {
2087       struct var_to_expand *ves;
2088
2089       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2090         ves->var_expansions.release ();
2091       delete opt_info->insns_with_var_to_expand;
2092       opt_info->insns_with_var_to_expand = NULL;
2093     }
2094   free (opt_info);
2095 }