gcc/gcse.c

   1 /* Partial redundancy elimination / Hoisting for RTL.
   2    Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
   3    2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* TODO
  22    - reordering of memory allocation and freeing to be more space efficient
  23    - do rough calc of how many regs are needed in each block, and a rough
  24      calc of how many regs are available in each class and use that to
  25      throttle back the code in cases where RTX_COST is minimal.
  26 */
  27
  28 /* References searched while implementing this.
  29
  30    Compilers Principles, Techniques and Tools
  31    Aho, Sethi, Ullman
  32    Addison-Wesley, 1988
  33
  34    Global Optimization by Suppression of Partial Redundancies
  35    E. Morel, C. Renvoise
  36    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  37
  38    A Portable Machine-Independent Global Optimizer - Design and Measurements
  39    Frederick Chow
  40    Stanford Ph.D. thesis, Dec. 1983
  41
  42    A Fast Algorithm for Code Movement Optimization
  43    D.M. Dhamdhere
  44    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  45
  46    A Solution to a Problem with Morel and Renvoise's
  47    Global Optimization by Suppression of Partial Redundancies
  48    K-H Drechsler, M.P. Stadel
  49    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  50
  51    Practical Adaptation of the Global Optimization
  52    Algorithm of Morel and Renvoise
  53    D.M. Dhamdhere
  54    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  55
  56    Efficiently Computing Static Single Assignment Form and the Control
  57    Dependence Graph
  58    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  59    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  60
  61    Lazy Code Motion
  62    J. Knoop, O. Ruthing, B. Steffen
  63    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  64
  65    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  66    Time for Reducible Flow Control
  67    Thomas Ball
  68    ACM Letters on Programming Languages and Systems,
  69    Vol. 2, Num. 1-4, Mar-Dec 1993
  70
  71    An Efficient Representation for Sparse Sets
  72    Preston Briggs, Linda Torczon
  73    ACM Letters on Programming Languages and Systems,
  74    Vol. 2, Num. 1-4, Mar-Dec 1993
  75
  76    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  77    K-H Drechsler, M.P. Stadel
  78    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  79
  80    Partial Dead Code Elimination
  81    J. Knoop, O. Ruthing, B. Steffen
  82    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  83
  84    Effective Partial Redundancy Elimination
  85    P. Briggs, K.D. Cooper
  86    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  87
  88    The Program Structure Tree: Computing Control Regions in Linear Time
  89    R. Johnson, D. Pearson, K. Pingali
  90    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  91
  92    Optimal Code Motion: Theory and Practice
  93    J. Knoop, O. Ruthing, B. Steffen
  94    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
  95
  96    The power of assignment motion
  97    J. Knoop, O. Ruthing, B. Steffen
  98    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
  99
 100    Global code motion / global value numbering
 101    C. Click
 102    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 103
 104    Value Driven Redundancy Elimination
 105    L.T. Simpson
 106    Rice University Ph.D. thesis, Apr. 1996
 107
 108    Value Numbering
 109    L.T. Simpson
 110    Massively Scalar Compiler Project, Rice University, Sep. 1996
 111
 112    High Performance Compilers for Parallel Computing
 113    Michael Wolfe
 114    Addison-Wesley, 1996
 115
 116    Advanced Compiler Design and Implementation
 117    Steven Muchnick
 118    Morgan Kaufmann, 1997
 119
 120    Building an Optimizing Compiler
 121    Robert Morgan
 122    Digital Press, 1998
 123
 124    People wishing to speed up the code here should read:
 125      Elimination Algorithms for Data Flow Analysis
 126      B.G. Ryder, M.C. Paull
 127      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 128
 129      How to Analyze Large Programs Efficiently and Informatively
 130      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 131      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 132
 133    People wishing to do something different can find various possibilities
 134    in the above papers and elsewhere.
 135 */
 136
 137 #include "config.h"
 138 #include "system.h"
 139 #include "coretypes.h"
 140 #include "tm.h"
 141 #include "diagnostic-core.h"
 142 #include "toplev.h"
 143
 144 #include "rtl.h"
 145 #include "tree.h"
 146 #include "tm_p.h"
 147 #include "regs.h"
 148 #include "hard-reg-set.h"
 149 #include "flags.h"
 150 #include "insn-config.h"
 151 #include "recog.h"
 152 #include "basic-block.h"
 153 #include "function.h"
 154 #include "expr.h"
 155 #include "except.h"
 156 #include "ggc.h"
 157 #include "params.h"
 158 #include "cselib.h"
 159 #include "intl.h"
 160 #include "obstack.h"
 161 #include "tree-pass.h"
 162 #include "hashtab.h"
 163 #include "df.h"
 164 #include "dbgcnt.h"
 165 #include "target.h"
 166 #include "gcse.h"
 167
 168 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 169    are a superset of those done by classic GCSE.
 170
 171    Two passes of copy/constant propagation are done around PRE or hoisting
 172    because the first one enables more GCSE and the second one helps to clean
 173    up the copies that PRE and HOIST create.  This is needed more for PRE than
 174    for HOIST because code hoisting will try to use an existing register
 175    containing the common subexpression rather than create a new one.  This is
 176    harder to do for PRE because of the code motion (which HOIST doesn't do).
 177
 178    Expressions we are interested in GCSE-ing are of the form
 179    (set (pseudo-reg) (expression)).
 180    Function want_to_gcse_p says what these are.
 181
 182    In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
 183    This allows PRE to hoist expressions that are expressed in multiple insns,
 184    such as complex address calculations (e.g. for PIC code, or loads with a
 185    high part and a low part).
 186
 187    PRE handles moving invariant expressions out of loops (by treating them as
 188    partially redundant).
 189
 190    **********************
 191
 192    We used to support multiple passes but there are diminishing returns in
 193    doing so.  The first pass usually makes 90% of the changes that are doable.
 194    A second pass can make a few more changes made possible by the first pass.
 195    Experiments show any further passes don't make enough changes to justify
 196    the expense.
 197
 198    A study of spec92 using an unlimited number of passes:
 199    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 200    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 201    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 202
 203    It was found doing copy propagation between each pass enables further
 204    substitutions.
 205
 206    This study was done before expressions in REG_EQUAL notes were added as
 207    candidate expressions for optimization, and before the GIMPLE optimizers
 208    were added.  Probably, multiple passes is even less efficient now than
 209    at the time when the study was conducted.
 210
 211    PRE is quite expensive in complicated functions because the DFA can take
 212    a while to converge.  Hence we only perform one pass.
 213
 214    **********************
 215
 216    The steps for PRE are:
 217
 218    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 219
 220    2) Perform the data flow analysis for PRE.
 221
 222    3) Delete the redundant instructions
 223
 224    4) Insert the required copies [if any] that make the partially
 225       redundant instructions fully redundant.
 226
 227    5) For other reaching expressions, insert an instruction to copy the value
 228       to a newly created pseudo that will reach the redundant instruction.
 229
 230    The deletion is done first so that when we do insertions we
 231    know which pseudo reg to use.
 232
 233    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 234    argue it is not.  The number of iterations for the algorithm to converge
 235    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 236
 237    PRE GCSE depends heavily on the second CPROP pass to clean up the copies
 238    we create.  To make an expression reach the place where it's redundant,
 239    the result of the expression is copied to a new register, and the redundant
 240    expression is deleted by replacing it with this new register.  Classic GCSE
 241    doesn't have this problem as much as it computes the reaching defs of
 242    each register in each block and thus can try to use an existing
 243    register.  */
 244 \f
 245 /* GCSE global vars.  */
 246
 247 struct target_gcse default_target_gcse;
 248 #if SWITCHABLE_TARGET
 249 struct target_gcse *this_target_gcse = &default_target_gcse;
 250 #endif
 251
 252 /* Set to non-zero if CSE should run after all GCSE optimizations are done.  */
 253 int flag_rerun_cse_after_global_opts;
 254
 255 /* An obstack for our working variables.  */
 256 static struct obstack gcse_obstack;
 257
 258 struct reg_use {rtx reg_rtx; };
 259
 260 /* Hash table of expressions.  */
 261
 262 struct expr
 263 {
 264   /* The expression.  */
 265   rtx expr;
 266   /* Index in the available expression bitmaps.  */
 267   int bitmap_index;
 268   /* Next entry with the same hash.  */
 269   struct expr *next_same_hash;
 270   /* List of anticipatable occurrences in basic blocks in the function.
 271      An "anticipatable occurrence" is one that is the first occurrence in the
 272      basic block, the operands are not modified in the basic block prior
 273      to the occurrence and the output is not used between the start of
 274      the block and the occurrence.  */
 275   struct occr *antic_occr;
 276   /* List of available occurrence in basic blocks in the function.
 277      An "available occurrence" is one that is the last occurrence in the
 278      basic block and the operands are not modified by following statements in
 279      the basic block [including this insn].  */
 280   struct occr *avail_occr;
 281   /* Non-null if the computation is PRE redundant.
 282      The value is the newly created pseudo-reg to record a copy of the
 283      expression in all the places that reach the redundant copy.  */
 284   rtx reaching_reg;
 285   /* Maximum distance in instructions this expression can travel.
 286      We avoid moving simple expressions for more than a few instructions
 287      to keep register pressure under control.
 288      A value of "0" removes restrictions on how far the expression can
 289      travel.  */
 290   int max_distance;
 291 };
 292
 293 /* Occurrence of an expression.
 294    There is one per basic block.  If a pattern appears more than once the
 295    last appearance is used [or first for anticipatable expressions].  */
 296
 297 struct occr
 298 {
 299   /* Next occurrence of this expression.  */
 300   struct occr *next;
 301   /* The insn that computes the expression.  */
 302   rtx insn;
 303   /* Nonzero if this [anticipatable] occurrence has been deleted.  */
 304   char deleted_p;
 305   /* Nonzero if this [available] occurrence has been copied to
 306      reaching_reg.  */
 307   /* ??? This is mutually exclusive with deleted_p, so they could share
 308      the same byte.  */
 309   char copied_p;
 310 };
 311
 312 typedef struct occr *occr_t;
 313 DEF_VEC_P (occr_t);
 314 DEF_VEC_ALLOC_P (occr_t, heap);
 315
 316 /* Expression hash tables.
 317    Each hash table is an array of buckets.
 318    ??? It is known that if it were an array of entries, structure elements
 319    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 320    not clear whether in the final analysis a sufficient amount of memory would
 321    be saved as the size of the available expression bitmaps would be larger
 322    [one could build a mapping table without holes afterwards though].
 323    Someday I'll perform the computation and figure it out.  */
 324
 325 struct hash_table_d
 326 {
 327   /* The table itself.
 328      This is an array of `expr_hash_table_size' elements.  */
 329   struct expr **table;
 330
 331   /* Size of the hash table, in elements.  */
 332   unsigned int size;
 333
 334   /* Number of hash table elements.  */
 335   unsigned int n_elems;
 336 };
 337
 338 /* Expression hash table.  */
 339 static struct hash_table_d expr_hash_table;
 340
 341 /* This is a list of expressions which are MEMs and will be used by load
 342    or store motion.
 343    Load motion tracks MEMs which aren't killed by anything except itself,
 344    i.e. loads and stores to a single location.
 345    We can then allow movement of these MEM refs with a little special
 346    allowance. (all stores copy the same value to the reaching reg used
 347    for the loads).  This means all values used to store into memory must have
 348    no side effects so we can re-issue the setter value.  */
 349
 350 struct ls_expr
 351 {
 352   struct expr * expr;           /* Gcse expression reference for LM.  */
 353   rtx pattern;                  /* Pattern of this mem.  */
 354   rtx pattern_regs;             /* List of registers mentioned by the mem.  */
 355   rtx loads;                    /* INSN list of loads seen.  */
 356   rtx stores;                   /* INSN list of stores seen.  */
 357   struct ls_expr * next;        /* Next in the list.  */
 358   int invalid;                  /* Invalid for some reason.  */
 359   int index;                    /* If it maps to a bitmap index.  */
 360   unsigned int hash_index;      /* Index when in a hash table.  */
 361   rtx reaching_reg;             /* Register to use when re-writing.  */
 362 };
 363
 364 /* Head of the list of load/store memory refs.  */
 365 static struct ls_expr * pre_ldst_mems = NULL;
 366
 367 /* Hashtable for the load/store memory refs.  */
 368 static htab_t pre_ldst_table = NULL;
 369
 370 /* Bitmap containing one bit for each register in the program.
 371    Used when performing GCSE to track which registers have been set since
 372    the start of the basic block.  */
 373 static regset reg_set_bitmap;
 374
 375 /* Array, indexed by basic block number for a list of insns which modify
 376    memory within that block.  */
 377 static VEC (rtx,heap) **modify_mem_list;
 378 static bitmap modify_mem_list_set;
 379
 380 typedef struct modify_pair_s
 381 {
 382   rtx dest;                     /* A MEM.  */
 383   rtx dest_addr;                /* The canonical address of `dest'.  */
 384 } modify_pair;
 385
 386 DEF_VEC_O(modify_pair);
 387 DEF_VEC_ALLOC_O(modify_pair,heap);
 388
 389 /* This array parallels modify_mem_list, except that it stores MEMs
 390    being set and their canonicalized memory addresses.  */
 391 static VEC (modify_pair,heap) **canon_modify_mem_list;
 392
 393 /* Bitmap indexed by block numbers to record which blocks contain
 394    function calls.  */
 395 static bitmap blocks_with_calls;
 396
 397 /* Various variables for statistics gathering.  */
 398
 399 /* Memory used in a pass.
 400    This isn't intended to be absolutely precise.  Its intent is only
 401    to keep an eye on memory usage.  */
 402 static int bytes_used;
 403
 404 /* GCSE substitutions made.  */
 405 static int gcse_subst_count;
 406 /* Number of copy instructions created.  */
 407 static int gcse_create_count;
 408 \f
 409 /* Doing code hoisting.  */
 410 static bool doing_code_hoisting_p = false;
 411 \f
 412 /* For available exprs */
 413 static sbitmap *ae_kill;
 414 \f
 415 static void compute_can_copy (void);
 416 static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
 417 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
 418 static void *gcse_alloc (unsigned long);
 419 static void alloc_gcse_mem (void);
 420 static void free_gcse_mem (void);
 421 static void hash_scan_insn (rtx, struct hash_table_d *);
 422 static void hash_scan_set (rtx, rtx, struct hash_table_d *);
 423 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
 424 static void hash_scan_call (rtx, rtx, struct hash_table_d *);
 425 static int want_to_gcse_p (rtx, int *);
 426 static int oprs_unchanged_p (const_rtx, const_rtx, int);
 427 static int oprs_anticipatable_p (const_rtx, const_rtx);
 428 static int oprs_available_p (const_rtx, const_rtx);
 429 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
 430                                   struct hash_table_d *);
 431 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
 432 static int expr_equiv_p (const_rtx, const_rtx);
 433 static void record_last_reg_set_info (rtx, int);
 434 static void record_last_mem_set_info (rtx);
 435 static void record_last_set_info (rtx, const_rtx, void *);
 436 static void compute_hash_table (struct hash_table_d *);
 437 static void alloc_hash_table (struct hash_table_d *);
 438 static void free_hash_table (struct hash_table_d *);
 439 static void compute_hash_table_work (struct hash_table_d *);
 440 static void dump_hash_table (FILE *, const char *, struct hash_table_d *);
 441 static void compute_transp (const_rtx, int, sbitmap *);
 442 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
 443                                       struct hash_table_d *);
 444 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
 445 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
 446 static void canon_list_insert (rtx, const_rtx, void *);
 447 static void alloc_pre_mem (int, int);
 448 static void free_pre_mem (void);
 449 static struct edge_list *compute_pre_data (void);
 450 static int pre_expr_reaches_here_p (basic_block, struct expr *,
 451                                     basic_block);
 452 static void insert_insn_end_basic_block (struct expr *, basic_block);
 453 static void pre_insert_copy_insn (struct expr *, rtx);
 454 static void pre_insert_copies (void);
 455 static int pre_delete (void);
 456 static int pre_gcse (struct edge_list *);
 457 static int one_pre_gcse_pass (void);
 458 static void add_label_notes (rtx, rtx);
 459 static void alloc_code_hoist_mem (int, int);
 460 static void free_code_hoist_mem (void);
 461 static void compute_code_hoist_vbeinout (void);
 462 static void compute_code_hoist_data (void);
 463 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *,
 464                                       int, int *);
 465 static int hoist_code (void);
 466 static int one_code_hoisting_pass (void);
 467 static rtx process_insert_insn (struct expr *);
 468 static int pre_edge_insert (struct edge_list *, struct expr **);
 469 static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
 470                                          basic_block, char *);
 471 static struct ls_expr * ldst_entry (rtx);
 472 static void free_ldst_entry (struct ls_expr *);
 473 static void free_ld_motion_mems (void);
 474 static void print_ldst_list (FILE *);
 475 static struct ls_expr * find_rtx_in_ldst (rtx);
 476 static int simple_mem (const_rtx);
 477 static void invalidate_any_buried_refs (rtx);
 478 static void compute_ld_motion_mems (void);
 479 static void trim_ld_motion_mems (void);
 480 static void update_ld_motion_stores (struct expr *);
 481 static void clear_modify_mem_tables (void);
 482 static void free_modify_mem_tables (void);
 483 static rtx gcse_emit_move_after (rtx, rtx, rtx);
 484 static bool is_too_expensive (const char *);
 485
 486 #define GNEW(T)                 ((T *) gmalloc (sizeof (T)))
 487 #define GCNEW(T)                ((T *) gcalloc (1, sizeof (T)))
 488
 489 #define GNEWVEC(T, N)           ((T *) gmalloc (sizeof (T) * (N)))
 490 #define GCNEWVEC(T, N)          ((T *) gcalloc ((N), sizeof (T)))
 491
 492 #define GNEWVAR(T, S)           ((T *) gmalloc ((S)))
 493 #define GCNEWVAR(T, S)          ((T *) gcalloc (1, (S)))
 494
 495 #define GOBNEW(T)               ((T *) gcse_alloc (sizeof (T)))
 496 #define GOBNEWVAR(T, S)         ((T *) gcse_alloc ((S)))
 497 \f
 498 /* Misc. utilities.  */
 499
 500 #define can_copy \
 501   (this_target_gcse->x_can_copy)
 502 #define can_copy_init_p \
 503   (this_target_gcse->x_can_copy_init_p)
 504
 505 /* Compute which modes support reg/reg copy operations.  */
 506
 507 static void
 508 compute_can_copy (void)
 509 {
 510   int i;
 511 #ifndef AVOID_CCMODE_COPIES
 512   rtx reg, insn;
 513 #endif
 514   memset (can_copy, 0, NUM_MACHINE_MODES);
 515
 516   start_sequence ();
 517   for (i = 0; i < NUM_MACHINE_MODES; i++)
 518     if (GET_MODE_CLASS (i) == MODE_CC)
 519       {
 520 #ifdef AVOID_CCMODE_COPIES
 521         can_copy[i] = 0;
 522 #else
 523         reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 524         insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 525         if (recog (PATTERN (insn), insn, NULL) >= 0)
 526           can_copy[i] = 1;
 527 #endif
 528       }
 529     else
 530       can_copy[i] = 1;
 531
 532   end_sequence ();
 533 }
 534
 535 /* Returns whether the mode supports reg/reg copy operations.  */
 536
 537 bool
 538 can_copy_p (enum machine_mode mode)
 539 {
 540   if (! can_copy_init_p)
 541     {
 542       compute_can_copy ();
 543       can_copy_init_p = true;
 544     }
 545
 546   return can_copy[mode] != 0;
 547 }
 548 \f
 549 /* Cover function to xmalloc to record bytes allocated.  */
 550
 551 static void *
 552 gmalloc (size_t size)
 553 {
 554   bytes_used += size;
 555   return xmalloc (size);
 556 }
 557
 558 /* Cover function to xcalloc to record bytes allocated.  */
 559
 560 static void *
 561 gcalloc (size_t nelem, size_t elsize)
 562 {
 563   bytes_used += nelem * elsize;
 564   return xcalloc (nelem, elsize);
 565 }
 566
 567 /* Cover function to obstack_alloc.  */
 568
 569 static void *
 570 gcse_alloc (unsigned long size)
 571 {
 572   bytes_used += size;
 573   return obstack_alloc (&gcse_obstack, size);
 574 }
 575
 576 /* Allocate memory for the reg/memory set tracking tables.
 577    This is called at the start of each pass.  */
 578
 579 static void
 580 alloc_gcse_mem (void)
 581 {
 582   /* Allocate vars to track sets of regs.  */
 583   reg_set_bitmap = ALLOC_REG_SET (NULL);
 584
 585   /* Allocate array to keep a list of insns which modify memory in each
 586      basic block.  */
 587   modify_mem_list = GCNEWVEC (VEC (rtx,heap) *, last_basic_block);
 588   canon_modify_mem_list = GCNEWVEC (VEC (modify_pair,heap) *,
 589                                     last_basic_block);
 590   modify_mem_list_set = BITMAP_ALLOC (NULL);
 591   blocks_with_calls = BITMAP_ALLOC (NULL);
 592 }
 593
 594 /* Free memory allocated by alloc_gcse_mem.  */
 595
 596 static void
 597 free_gcse_mem (void)
 598 {
 599   FREE_REG_SET (reg_set_bitmap);
 600
 601   free_modify_mem_tables ();
 602   BITMAP_FREE (modify_mem_list_set);
 603   BITMAP_FREE (blocks_with_calls);
 604 }
 605 \f
 606 /* Compute the local properties of each recorded expression.
 607
 608    Local properties are those that are defined by the block, irrespective of
 609    other blocks.
 610
 611    An expression is transparent in a block if its operands are not modified
 612    in the block.
 613
 614    An expression is computed (locally available) in a block if it is computed
 615    at least once and expression would contain the same value if the
 616    computation was moved to the end of the block.
 617
 618    An expression is locally anticipatable in a block if it is computed at
 619    least once and expression would contain the same value if the computation
 620    was moved to the beginning of the block.
 621
 622    We call this routine for pre and code hoisting.  They all compute
 623    basically the same information and thus can easily share this code.
 624
 625    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
 626    properties.  If NULL, then it is not necessary to compute or record that
 627    particular property.
 628
 629    TABLE controls which hash table to look at.  */
 630
 631 static void
 632 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
 633                           struct hash_table_d *table)
 634 {
 635   unsigned int i;
 636
 637   /* Initialize any bitmaps that were passed in.  */
 638   if (transp)
 639     {
 640       sbitmap_vector_ones (transp, last_basic_block);
 641     }
 642
 643   if (comp)
 644     sbitmap_vector_zero (comp, last_basic_block);
 645   if (antloc)
 646     sbitmap_vector_zero (antloc, last_basic_block);
 647
 648   for (i = 0; i < table->size; i++)
 649     {
 650       struct expr *expr;
 651
 652       for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
 653         {
 654           int indx = expr->bitmap_index;
 655           struct occr *occr;
 656
 657           /* The expression is transparent in this block if it is not killed.
 658              We start by assuming all are transparent [none are killed], and
 659              then reset the bits for those that are.  */
 660           if (transp)
 661             compute_transp (expr->expr, indx, transp);
 662
 663           /* The occurrences recorded in antic_occr are exactly those that
 664              we want to set to nonzero in ANTLOC.  */
 665           if (antloc)
 666             for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
 667               {
 668                 SET_BIT (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
 669
 670                 /* While we're scanning the table, this is a good place to
 671                    initialize this.  */
 672                 occr->deleted_p = 0;
 673               }
 674
 675           /* The occurrences recorded in avail_occr are exactly those that
 676              we want to set to nonzero in COMP.  */
 677           if (comp)
 678             for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
 679               {
 680                 SET_BIT (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
 681
 682                 /* While we're scanning the table, this is a good place to
 683                    initialize this.  */
 684                 occr->copied_p = 0;
 685               }
 686
 687           /* While we're scanning the table, this is a good place to
 688              initialize this.  */
 689           expr->reaching_reg = 0;
 690         }
 691     }
 692 }
 693 \f
 694 /* Hash table support.  */
 695
 696 struct reg_avail_info
 697 {
 698   basic_block last_bb;
 699   int first_set;
 700   int last_set;
 701 };
 702
 703 static struct reg_avail_info *reg_avail_info;
 704 static basic_block current_bb;
 705
 706 /* See whether X, the source of a set, is something we want to consider for
 707    GCSE.  */
 708
 709 static int
 710 want_to_gcse_p (rtx x, int *max_distance_ptr)
 711 {
 712 #ifdef STACK_REGS
 713   /* On register stack architectures, don't GCSE constants from the
 714      constant pool, as the benefits are often swamped by the overhead
 715      of shuffling the register stack between basic blocks.  */
 716   if (IS_STACK_MODE (GET_MODE (x)))
 717     x = avoid_constant_pool_reference (x);
 718 #endif
 719
 720   /* GCSE'ing constants:
 721
 722      We do not specifically distinguish between constant and non-constant
 723      expressions in PRE and Hoist.  We use set_src_cost below to limit
 724      the maximum distance simple expressions can travel.
 725
 726      Nevertheless, constants are much easier to GCSE, and, hence,
 727      it is easy to overdo the optimizations.  Usually, excessive PRE and
 728      Hoisting of constant leads to increased register pressure.
 729
 730      RA can deal with this by rematerialing some of the constants.
 731      Therefore, it is important that the back-end generates sets of constants
 732      in a way that allows reload rematerialize them under high register
 733      pressure, i.e., a pseudo register with REG_EQUAL to constant
 734      is set only once.  Failing to do so will result in IRA/reload
 735      spilling such constants under high register pressure instead of
 736      rematerializing them.  */
 737
 738   switch (GET_CODE (x))
 739     {
 740     case REG:
 741     case SUBREG:
 742     case CALL:
 743       return 0;
 744
 745     CASE_CONST_ANY:
 746       if (!doing_code_hoisting_p)
 747         /* Do not PRE constants.  */
 748         return 0;
 749
 750       /* FALLTHRU */
 751
 752     default:
 753       if (doing_code_hoisting_p)
 754         /* PRE doesn't implement max_distance restriction.  */
 755         {
 756           int cost;
 757           int max_distance;
 758
 759           gcc_assert (!optimize_function_for_speed_p (cfun)
 760                       && optimize_function_for_size_p (cfun));
 761           cost = set_src_cost (x, 0);
 762
 763           if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
 764             {
 765               max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
 766               if (max_distance == 0)
 767                 return 0;
 768
 769               gcc_assert (max_distance > 0);
 770             }
 771           else
 772             max_distance = 0;
 773
 774           if (max_distance_ptr)
 775             *max_distance_ptr = max_distance;
 776         }
 777
 778       return can_assign_to_reg_without_clobbers_p (x);
 779     }
 780 }
 781
 782 /* Used internally by can_assign_to_reg_without_clobbers_p.  */
 783
 784 static GTY(()) rtx test_insn;
 785
 786 /* Return true if we can assign X to a pseudo register such that the
 787    resulting insn does not result in clobbering a hard register as a
 788    side-effect.
 789
 790    Additionally, if the target requires it, check that the resulting insn
 791    can be copied.  If it cannot, this means that X is special and probably
 792    has hidden side-effects we don't want to mess with.
 793
 794    This function is typically used by code motion passes, to verify
 795    that it is safe to insert an insn without worrying about clobbering
 796    maybe live hard regs.  */
 797
 798 bool
 799 can_assign_to_reg_without_clobbers_p (rtx x)
 800 {
 801   int num_clobbers = 0;
 802   int icode;
 803
 804   /* If this is a valid operand, we are OK.  If it's VOIDmode, we aren't.  */
 805   if (general_operand (x, GET_MODE (x)))
 806     return 1;
 807   else if (GET_MODE (x) == VOIDmode)
 808     return 0;
 809
 810   /* Otherwise, check if we can make a valid insn from it.  First initialize
 811      our test insn if we haven't already.  */
 812   if (test_insn == 0)
 813     {
 814       test_insn
 815         = make_insn_raw (gen_rtx_SET (VOIDmode,
 816                                       gen_rtx_REG (word_mode,
 817                                                    FIRST_PSEUDO_REGISTER * 2),
 818                                       const0_rtx));
 819       NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
 820     }
 821
 822   /* Now make an insn like the one we would make when GCSE'ing and see if
 823      valid.  */
 824   PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
 825   SET_SRC (PATTERN (test_insn)) = x;
 826
 827   icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
 828   if (icode < 0)
 829     return false;
 830
 831   if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode))
 832     return false;
 833
 834   if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn))
 835     return false;
 836
 837   return true;
 838 }
 839
 840 /* Return nonzero if the operands of expression X are unchanged from the
 841    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
 842    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
 843
 844 static int
 845 oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
 846 {
 847   int i, j;
 848   enum rtx_code code;
 849   const char *fmt;
 850
 851   if (x == 0)
 852     return 1;
 853
 854   code = GET_CODE (x);
 855   switch (code)
 856     {
 857     case REG:
 858       {
 859         struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
 860
 861         if (info->last_bb != current_bb)
 862           return 1;
 863         if (avail_p)
 864           return info->last_set < DF_INSN_LUID (insn);
 865         else
 866           return info->first_set >= DF_INSN_LUID (insn);
 867       }
 868
 869     case MEM:
 870       if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
 871                                   x, avail_p))
 872         return 0;
 873       else
 874         return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
 875
 876     case PRE_DEC:
 877     case PRE_INC:
 878     case POST_DEC:
 879     case POST_INC:
 880     case PRE_MODIFY:
 881     case POST_MODIFY:
 882       return 0;
 883
 884     case PC:
 885     case CC0: /*FIXME*/
 886     case CONST:
 887     CASE_CONST_ANY:
 888     case SYMBOL_REF:
 889     case LABEL_REF:
 890     case ADDR_VEC:
 891     case ADDR_DIFF_VEC:
 892       return 1;
 893
 894     default:
 895       break;
 896     }
 897
 898   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
 899     {
 900       if (fmt[i] == 'e')
 901         {
 902           /* If we are about to do the last recursive call needed at this
 903              level, change it into iteration.  This function is called enough
 904              to be worth it.  */
 905           if (i == 0)
 906             return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
 907
 908           else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
 909             return 0;
 910         }
 911       else if (fmt[i] == 'E')
 912         for (j = 0; j < XVECLEN (x, i); j++)
 913           if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
 914             return 0;
 915     }
 916
 917   return 1;
 918 }
 919
 920 /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p.  */
 921
 922 struct mem_conflict_info
 923 {
 924   /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
 925      see if a memory store conflicts with this memory load.  */
 926   const_rtx mem;
 927
 928   /* True if mems_conflict_for_gcse_p finds a conflict between two memory
 929      references.  */
 930   bool conflict;
 931 };
 932
 933 /* DEST is the output of an instruction.  If it is a memory reference and
 934    possibly conflicts with the load found in DATA, then communicate this
 935    information back through DATA.  */
 936
 937 static void
 938 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
 939                           void *data)
 940 {
 941   struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
 942
 943   while (GET_CODE (dest) == SUBREG
 944          || GET_CODE (dest) == ZERO_EXTRACT
 945          || GET_CODE (dest) == STRICT_LOW_PART)
 946     dest = XEXP (dest, 0);
 947
 948   /* If DEST is not a MEM, then it will not conflict with the load.  Note
 949      that function calls are assumed to clobber memory, but are handled
 950      elsewhere.  */
 951   if (! MEM_P (dest))
 952     return;
 953
 954   /* If we are setting a MEM in our list of specially recognized MEMs,
 955      don't mark as killed this time.  */
 956   if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
 957     {
 958       if (!find_rtx_in_ldst (dest))
 959         mci->conflict = true;
 960       return;
 961     }
 962
 963   if (true_dependence (dest, GET_MODE (dest), mci->mem))
 964     mci->conflict = true;
 965 }
 966
 967 /* Return nonzero if the expression in X (a memory reference) is killed
 968    in block BB before or after the insn with the LUID in UID_LIMIT.
 969    AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
 970    before UID_LIMIT.
 971
 972    To check the entire block, set UID_LIMIT to max_uid + 1 and
 973    AVAIL_P to 0.  */
 974
 975 static int
 976 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
 977                         int avail_p)
 978 {
 979   VEC (rtx,heap) *list = modify_mem_list[bb->index];
 980   rtx setter;
 981   unsigned ix;
 982
 983   /* If this is a readonly then we aren't going to be changing it.  */
 984   if (MEM_READONLY_P (x))
 985     return 0;
 986
 987   FOR_EACH_VEC_ELT_REVERSE (rtx, list, ix, setter)
 988     {
 989       struct mem_conflict_info mci;
 990
 991       /* Ignore entries in the list that do not apply.  */
 992       if ((avail_p
 993            && DF_INSN_LUID (setter) < uid_limit)
 994           || (! avail_p
 995               && DF_INSN_LUID (setter) > uid_limit))
 996         continue;
 997
 998       /* If SETTER is a call everything is clobbered.  Note that calls
 999          to pure functions are never put on the list, so we need not
1000          worry about them.  */
1001       if (CALL_P (setter))
1002         return 1;
1003
1004       /* SETTER must be an INSN of some kind that sets memory.  Call
1005          note_stores to examine each hunk of memory that is modified.  */
1006       mci.mem = x;
1007       mci.conflict = false;
1008       note_stores (PATTERN (setter), mems_conflict_for_gcse_p, &mci);
1009       if (mci.conflict)
1010         return 1;
1011     }
1012   return 0;
1013 }
1014
1015 /* Return nonzero if the operands of expression X are unchanged from
1016    the start of INSN's basic block up to but not including INSN.  */
1017
1018 static int
1019 oprs_anticipatable_p (const_rtx x, const_rtx insn)
1020 {
1021   return oprs_unchanged_p (x, insn, 0);
1022 }
1023
1024 /* Return nonzero if the operands of expression X are unchanged from
1025    INSN to the end of INSN's basic block.  */
1026
1027 static int
1028 oprs_available_p (const_rtx x, const_rtx insn)
1029 {
1030   return oprs_unchanged_p (x, insn, 1);
1031 }
1032
1033 /* Hash expression X.
1034
1035    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1036    indicating if a volatile operand is found or if the expression contains
1037    something we don't want to insert in the table.  HASH_TABLE_SIZE is
1038    the current size of the hash table to be probed.  */
1039
1040 static unsigned int
1041 hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1042            int hash_table_size)
1043 {
1044   unsigned int hash;
1045
1046   *do_not_record_p = 0;
1047
1048   hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1049   return hash % hash_table_size;
1050 }
1051
1052 /* Return nonzero if exp1 is equivalent to exp2.  */
1053
1054 static int
1055 expr_equiv_p (const_rtx x, const_rtx y)
1056 {
1057   return exp_equiv_p (x, y, 0, true);
1058 }
1059
1060 /* Insert expression X in INSN in the hash TABLE.
1061    If it is already present, record it as the last occurrence in INSN's
1062    basic block.
1063
1064    MODE is the mode of the value X is being stored into.
1065    It is only used if X is a CONST_INT.
1066
1067    ANTIC_P is nonzero if X is an anticipatable expression.
1068    AVAIL_P is nonzero if X is an available expression.
1069
1070    MAX_DISTANCE is the maximum distance in instructions this expression can
1071    be moved.  */
1072
1073 static void
1074 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1075                       int avail_p, int max_distance, struct hash_table_d *table)
1076 {
1077   int found, do_not_record_p;
1078   unsigned int hash;
1079   struct expr *cur_expr, *last_expr = NULL;
1080   struct occr *antic_occr, *avail_occr;
1081
1082   hash = hash_expr (x, mode, &do_not_record_p, table->size);
1083
1084   /* Do not insert expression in table if it contains volatile operands,
1085      or if hash_expr determines the expression is something we don't want
1086      to or can't handle.  */
1087   if (do_not_record_p)
1088     return;
1089
1090   cur_expr = table->table[hash];
1091   found = 0;
1092
1093   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1094     {
1095       /* If the expression isn't found, save a pointer to the end of
1096          the list.  */
1097       last_expr = cur_expr;
1098       cur_expr = cur_expr->next_same_hash;
1099     }
1100
1101   if (! found)
1102     {
1103       cur_expr = GOBNEW (struct expr);
1104       bytes_used += sizeof (struct expr);
1105       if (table->table[hash] == NULL)
1106         /* This is the first pattern that hashed to this index.  */
1107         table->table[hash] = cur_expr;
1108       else
1109         /* Add EXPR to end of this hash chain.  */
1110         last_expr->next_same_hash = cur_expr;
1111
1112       /* Set the fields of the expr element.  */
1113       cur_expr->expr = x;
1114       cur_expr->bitmap_index = table->n_elems++;
1115       cur_expr->next_same_hash = NULL;
1116       cur_expr->antic_occr = NULL;
1117       cur_expr->avail_occr = NULL;
1118       gcc_assert (max_distance >= 0);
1119       cur_expr->max_distance = max_distance;
1120     }
1121   else
1122     gcc_assert (cur_expr->max_distance == max_distance);
1123
1124   /* Now record the occurrence(s).  */
1125   if (antic_p)
1126     {
1127       antic_occr = cur_expr->antic_occr;
1128
1129       if (antic_occr
1130           && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1131         antic_occr = NULL;
1132
1133       if (antic_occr)
1134         /* Found another instance of the expression in the same basic block.
1135            Prefer the currently recorded one.  We want the first one in the
1136            block and the block is scanned from start to end.  */
1137         ; /* nothing to do */
1138       else
1139         {
1140           /* First occurrence of this expression in this basic block.  */
1141           antic_occr = GOBNEW (struct occr);
1142           bytes_used += sizeof (struct occr);
1143           antic_occr->insn = insn;
1144           antic_occr->next = cur_expr->antic_occr;
1145           antic_occr->deleted_p = 0;
1146           cur_expr->antic_occr = antic_occr;
1147         }
1148     }
1149
1150   if (avail_p)
1151     {
1152       avail_occr = cur_expr->avail_occr;
1153
1154       if (avail_occr
1155           && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1156         {
1157           /* Found another instance of the expression in the same basic block.
1158              Prefer this occurrence to the currently recorded one.  We want
1159              the last one in the block and the block is scanned from start
1160              to end.  */
1161           avail_occr->insn = insn;
1162         }
1163       else
1164         {
1165           /* First occurrence of this expression in this basic block.  */
1166           avail_occr = GOBNEW (struct occr);
1167           bytes_used += sizeof (struct occr);
1168           avail_occr->insn = insn;
1169           avail_occr->next = cur_expr->avail_occr;
1170           avail_occr->deleted_p = 0;
1171           cur_expr->avail_occr = avail_occr;
1172         }
1173     }
1174 }
1175
1176 /* Scan SET present in INSN and add an entry to the hash TABLE.  */
1177
1178 static void
1179 hash_scan_set (rtx set, rtx insn, struct hash_table_d *table)
1180 {
1181   rtx src = SET_SRC (set);
1182   rtx dest = SET_DEST (set);
1183   rtx note;
1184
1185   if (GET_CODE (src) == CALL)
1186     hash_scan_call (src, insn, table);
1187
1188   else if (REG_P (dest))
1189     {
1190       unsigned int regno = REGNO (dest);
1191       int max_distance = 0;
1192
1193       /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1194
1195          This allows us to do a single GCSE pass and still eliminate
1196          redundant constants, addresses or other expressions that are
1197          constructed with multiple instructions.
1198
1199          However, keep the original SRC if INSN is a simple reg-reg move.
1200          In this case, there will almost always be a REG_EQUAL note on the
1201          insn that sets SRC.  By recording the REG_EQUAL value here as SRC
1202          for INSN, we miss copy propagation opportunities and we perform the
1203          same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1204          do more than one PRE GCSE pass.
1205
1206          Note that this does not impede profitable constant propagations.  We
1207          "look through" reg-reg sets in lookup_avail_set.  */
1208       note = find_reg_equal_equiv_note (insn);
1209       if (note != 0
1210           && REG_NOTE_KIND (note) == REG_EQUAL
1211           && !REG_P (src)
1212           && want_to_gcse_p (XEXP (note, 0), NULL))
1213         src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src);
1214
1215       /* Only record sets of pseudo-regs in the hash table.  */
1216       if (regno >= FIRST_PSEUDO_REGISTER
1217           /* Don't GCSE something if we can't do a reg/reg copy.  */
1218           && can_copy_p (GET_MODE (dest))
1219           /* GCSE commonly inserts instruction after the insn.  We can't
1220              do that easily for EH edges so disable GCSE on these for now.  */
1221           /* ??? We can now easily create new EH landing pads at the
1222              gimple level, for splitting edges; there's no reason we
1223              can't do the same thing at the rtl level.  */
1224           && !can_throw_internal (insn)
1225           /* Is SET_SRC something we want to gcse?  */
1226           && want_to_gcse_p (src, &max_distance)
1227           /* Don't CSE a nop.  */
1228           && ! set_noop_p (set)
1229           /* Don't GCSE if it has attached REG_EQUIV note.
1230              At this point this only function parameters should have
1231              REG_EQUIV notes and if the argument slot is used somewhere
1232              explicitly, it means address of parameter has been taken,
1233              so we should not extend the lifetime of the pseudo.  */
1234           && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1235         {
1236           /* An expression is not anticipatable if its operands are
1237              modified before this insn or if this is not the only SET in
1238              this insn.  The latter condition does not have to mean that
1239              SRC itself is not anticipatable, but we just will not be
1240              able to handle code motion of insns with multiple sets.  */
1241           int antic_p = oprs_anticipatable_p (src, insn)
1242                         && !multiple_sets (insn);
1243           /* An expression is not available if its operands are
1244              subsequently modified, including this insn.  It's also not
1245              available if this is a branch, because we can't insert
1246              a set after the branch.  */
1247           int avail_p = (oprs_available_p (src, insn)
1248                          && ! JUMP_P (insn));
1249
1250           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1251                                 max_distance, table);
1252         }
1253     }
1254   /* In case of store we want to consider the memory value as available in
1255      the REG stored in that memory. This makes it possible to remove
1256      redundant loads from due to stores to the same location.  */
1257   else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1258       {
1259         unsigned int regno = REGNO (src);
1260         int max_distance = 0;
1261
1262         /* Only record sets of pseudo-regs in the hash table.  */
1263         if (regno >= FIRST_PSEUDO_REGISTER
1264            /* Don't GCSE something if we can't do a reg/reg copy.  */
1265            && can_copy_p (GET_MODE (src))
1266            /* GCSE commonly inserts instruction after the insn.  We can't
1267               do that easily for EH edges so disable GCSE on these for now.  */
1268            && !can_throw_internal (insn)
1269            /* Is SET_DEST something we want to gcse?  */
1270            && want_to_gcse_p (dest, &max_distance)
1271            /* Don't CSE a nop.  */
1272            && ! set_noop_p (set)
1273            /* Don't GCSE if it has attached REG_EQUIV note.
1274               At this point this only function parameters should have
1275               REG_EQUIV notes and if the argument slot is used somewhere
1276               explicitly, it means address of parameter has been taken,
1277               so we should not extend the lifetime of the pseudo.  */
1278            && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1279                || ! MEM_P (XEXP (note, 0))))
1280              {
1281                /* Stores are never anticipatable.  */
1282                int antic_p = 0;
1283                /* An expression is not available if its operands are
1284                   subsequently modified, including this insn.  It's also not
1285                   available if this is a branch, because we can't insert
1286                   a set after the branch.  */
1287                int avail_p = oprs_available_p (dest, insn)
1288                              && ! JUMP_P (insn);
1289
1290                /* Record the memory expression (DEST) in the hash table.  */
1291                insert_expr_in_table (dest, GET_MODE (dest), insn,
1292                                      antic_p, avail_p, max_distance, table);
1293              }
1294       }
1295 }
1296
1297 static void
1298 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1299                    struct hash_table_d *table ATTRIBUTE_UNUSED)
1300 {
1301   /* Currently nothing to do.  */
1302 }
1303
1304 static void
1305 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1306                 struct hash_table_d *table ATTRIBUTE_UNUSED)
1307 {
1308   /* Currently nothing to do.  */
1309 }
1310
1311 /* Process INSN and add hash table entries as appropriate.  */
1312
1313 static void
1314 hash_scan_insn (rtx insn, struct hash_table_d *table)
1315 {
1316   rtx pat = PATTERN (insn);
1317   int i;
1318
1319   /* Pick out the sets of INSN and for other forms of instructions record
1320      what's been modified.  */
1321
1322   if (GET_CODE (pat) == SET)
1323     hash_scan_set (pat, insn, table);
1324
1325   else if (GET_CODE (pat) == CLOBBER)
1326     hash_scan_clobber (pat, insn, table);
1327
1328   else if (GET_CODE (pat) == CALL)
1329     hash_scan_call (pat, insn, table);
1330
1331   else if (GET_CODE (pat) == PARALLEL)
1332     for (i = 0; i < XVECLEN (pat, 0); i++)
1333       {
1334         rtx x = XVECEXP (pat, 0, i);
1335
1336         if (GET_CODE (x) == SET)
1337           hash_scan_set (x, insn, table);
1338         else if (GET_CODE (x) == CLOBBER)
1339           hash_scan_clobber (x, insn, table);
1340         else if (GET_CODE (x) == CALL)
1341           hash_scan_call (x, insn, table);
1342       }
1343 }
1344
1345 /* Dump the hash table TABLE to file FILE under the name NAME.  */
1346
1347 static void
1348 dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
1349 {
1350   int i;
1351   /* Flattened out table, so it's printed in proper order.  */
1352   struct expr **flat_table;
1353   unsigned int *hash_val;
1354   struct expr *expr;
1355
1356   flat_table = XCNEWVEC (struct expr *, table->n_elems);
1357   hash_val = XNEWVEC (unsigned int, table->n_elems);
1358
1359   for (i = 0; i < (int) table->size; i++)
1360     for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1361       {
1362         flat_table[expr->bitmap_index] = expr;
1363         hash_val[expr->bitmap_index] = i;
1364       }
1365
1366   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1367            name, table->size, table->n_elems);
1368
1369   for (i = 0; i < (int) table->n_elems; i++)
1370     if (flat_table[i] != 0)
1371       {
1372         expr = flat_table[i];
1373         fprintf (file, "Index %d (hash value %d; max distance %d)\n  ",
1374                  expr->bitmap_index, hash_val[i], expr->max_distance);
1375         print_rtl (file, expr->expr);
1376         fprintf (file, "\n");
1377       }
1378
1379   fprintf (file, "\n");
1380
1381   free (flat_table);
1382   free (hash_val);
1383 }
1384
1385 /* Record register first/last/block set information for REGNO in INSN.
1386
1387    first_set records the first place in the block where the register
1388    is set and is used to compute "anticipatability".
1389
1390    last_set records the last place in the block where the register
1391    is set and is used to compute "availability".
1392
1393    last_bb records the block for which first_set and last_set are
1394    valid, as a quick test to invalidate them.  */
1395
1396 static void
1397 record_last_reg_set_info (rtx insn, int regno)
1398 {
1399   struct reg_avail_info *info = &reg_avail_info[regno];
1400   int luid = DF_INSN_LUID (insn);
1401
1402   info->last_set = luid;
1403   if (info->last_bb != current_bb)
1404     {
1405       info->last_bb = current_bb;
1406       info->first_set = luid;
1407     }
1408 }
1409
1410 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1411    Note we store a pair of elements in the list, so they have to be
1412    taken off pairwise.  */
1413
1414 static void
1415 canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx x ATTRIBUTE_UNUSED,
1416                    void * v_insn)
1417 {
1418   rtx dest_addr, insn;
1419   int bb;
1420   modify_pair pair;
1421
1422   while (GET_CODE (dest) == SUBREG
1423       || GET_CODE (dest) == ZERO_EXTRACT
1424       || GET_CODE (dest) == STRICT_LOW_PART)
1425     dest = XEXP (dest, 0);
1426
1427   /* If DEST is not a MEM, then it will not conflict with a load.  Note
1428      that function calls are assumed to clobber memory, but are handled
1429      elsewhere.  */
1430
1431   if (! MEM_P (dest))
1432     return;
1433
1434   dest_addr = get_addr (XEXP (dest, 0));
1435   dest_addr = canon_rtx (dest_addr);
1436   insn = (rtx) v_insn;
1437   bb = BLOCK_FOR_INSN (insn)->index;
1438
1439   pair.dest = dest;
1440   pair.dest_addr = dest_addr;
1441   VEC_safe_push (modify_pair, heap, canon_modify_mem_list[bb], pair);
1442 }
1443
1444 /* Record memory modification information for INSN.  We do not actually care
1445    about the memory location(s) that are set, or even how they are set (consider
1446    a CALL_INSN).  We merely need to record which insns modify memory.  */
1447
1448 static void
1449 record_last_mem_set_info (rtx insn)
1450 {
1451   int bb = BLOCK_FOR_INSN (insn)->index;
1452
1453   /* load_killed_in_block_p will handle the case of calls clobbering
1454      everything.  */
1455   VEC_safe_push (rtx, heap, modify_mem_list[bb], insn);
1456   bitmap_set_bit (modify_mem_list_set, bb);
1457
1458   if (CALL_P (insn))
1459     bitmap_set_bit (blocks_with_calls, bb);
1460   else
1461     note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
1462 }
1463
1464 /* Called from compute_hash_table via note_stores to handle one
1465    SET or CLOBBER in an insn.  DATA is really the instruction in which
1466    the SET is taking place.  */
1467
1468 static void
1469 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1470 {
1471   rtx last_set_insn = (rtx) data;
1472
1473   if (GET_CODE (dest) == SUBREG)
1474     dest = SUBREG_REG (dest);
1475
1476   if (REG_P (dest))
1477     record_last_reg_set_info (last_set_insn, REGNO (dest));
1478   else if (MEM_P (dest)
1479            /* Ignore pushes, they clobber nothing.  */
1480            && ! push_operand (dest, GET_MODE (dest)))
1481     record_last_mem_set_info (last_set_insn);
1482 }
1483
1484 /* Top level function to create an expression hash table.
1485
1486    Expression entries are placed in the hash table if
1487    - they are of the form (set (pseudo-reg) src),
1488    - src is something we want to perform GCSE on,
1489    - none of the operands are subsequently modified in the block
1490
1491    Currently src must be a pseudo-reg or a const_int.
1492
1493    TABLE is the table computed.  */
1494
1495 static void
1496 compute_hash_table_work (struct hash_table_d *table)
1497 {
1498   int i;
1499
1500   /* re-Cache any INSN_LIST nodes we have allocated.  */
1501   clear_modify_mem_tables ();
1502   /* Some working arrays used to track first and last set in each block.  */
1503   reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1504
1505   for (i = 0; i < max_reg_num (); ++i)
1506     reg_avail_info[i].last_bb = NULL;
1507
1508   FOR_EACH_BB (current_bb)
1509     {
1510       rtx insn;
1511       unsigned int regno;
1512
1513       /* First pass over the instructions records information used to
1514          determine when registers and memory are first and last set.  */
1515       FOR_BB_INSNS (current_bb, insn)
1516         {
1517           if (!NONDEBUG_INSN_P (insn))
1518             continue;
1519
1520           if (CALL_P (insn))
1521             {
1522               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1523                 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
1524                   record_last_reg_set_info (insn, regno);
1525
1526               if (! RTL_CONST_OR_PURE_CALL_P (insn))
1527                 record_last_mem_set_info (insn);
1528             }
1529
1530           note_stores (PATTERN (insn), record_last_set_info, insn);
1531         }
1532
1533       /* The next pass builds the hash table.  */
1534       FOR_BB_INSNS (current_bb, insn)
1535         if (NONDEBUG_INSN_P (insn))
1536           hash_scan_insn (insn, table);
1537     }
1538
1539   free (reg_avail_info);
1540   reg_avail_info = NULL;
1541 }
1542
1543 /* Allocate space for the set/expr hash TABLE.
1544    It is used to determine the number of buckets to use.  */
1545
1546 static void
1547 alloc_hash_table (struct hash_table_d *table)
1548 {
1549   int n;
1550
1551   n = get_max_insn_count ();
1552
1553   table->size = n / 4;
1554   if (table->size < 11)
1555     table->size = 11;
1556
1557   /* Attempt to maintain efficient use of hash table.
1558      Making it an odd number is simplest for now.
1559      ??? Later take some measurements.  */
1560   table->size |= 1;
1561   n = table->size * sizeof (struct expr *);
1562   table->table = GNEWVAR (struct expr *, n);
1563 }
1564
1565 /* Free things allocated by alloc_hash_table.  */
1566
1567 static void
1568 free_hash_table (struct hash_table_d *table)
1569 {
1570   free (table->table);
1571 }
1572
1573 /* Compute the expression hash table TABLE.  */
1574
1575 static void
1576 compute_hash_table (struct hash_table_d *table)
1577 {
1578   /* Initialize count of number of entries in hash table.  */
1579   table->n_elems = 0;
1580   memset (table->table, 0, table->size * sizeof (struct expr *));
1581
1582   compute_hash_table_work (table);
1583 }
1584 \f
1585 /* Expression tracking support.  */
1586
1587 /* Clear canon_modify_mem_list and modify_mem_list tables.  */
1588 static void
1589 clear_modify_mem_tables (void)
1590 {
1591   unsigned i;
1592   bitmap_iterator bi;
1593
1594   EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1595     {
1596       VEC_free (rtx, heap, modify_mem_list[i]);
1597       VEC_free (modify_pair, heap, canon_modify_mem_list[i]);
1598     }
1599   bitmap_clear (modify_mem_list_set);
1600   bitmap_clear (blocks_with_calls);
1601 }
1602
1603 /* Release memory used by modify_mem_list_set.  */
1604
1605 static void
1606 free_modify_mem_tables (void)
1607 {
1608   clear_modify_mem_tables ();
1609   free (modify_mem_list);
1610   free (canon_modify_mem_list);
1611   modify_mem_list = 0;
1612   canon_modify_mem_list = 0;
1613 }
1614 \f
1615 /* For each block, compute whether X is transparent.  X is either an
1616    expression or an assignment [though we don't care which, for this context
1617    an assignment is treated as an expression].  For each block where an
1618    element of X is modified, reset the INDX bit in BMAP.  */
1619
1620 static void
1621 compute_transp (const_rtx x, int indx, sbitmap *bmap)
1622 {
1623   int i, j;
1624   enum rtx_code code;
1625   const char *fmt;
1626
1627   /* repeat is used to turn tail-recursion into iteration since GCC
1628      can't do it when there's no return value.  */
1629  repeat:
1630
1631   if (x == 0)
1632     return;
1633
1634   code = GET_CODE (x);
1635   switch (code)
1636     {
1637     case REG:
1638         {
1639           df_ref def;
1640           for (def = DF_REG_DEF_CHAIN (REGNO (x));
1641                def;
1642                def = DF_REF_NEXT_REG (def))
1643             RESET_BIT (bmap[DF_REF_BB (def)->index], indx);
1644         }
1645
1646       return;
1647
1648     case MEM:
1649       if (! MEM_READONLY_P (x))
1650         {
1651           bitmap_iterator bi;
1652           unsigned bb_index;
1653
1654           /* First handle all the blocks with calls.  We don't need to
1655              do any list walking for them.  */
1656           EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
1657             {
1658               RESET_BIT (bmap[bb_index], indx);
1659             }
1660
1661             /* Now iterate over the blocks which have memory modifications
1662                but which do not have any calls.  */
1663             EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
1664                                             blocks_with_calls,
1665                                             0, bb_index, bi)
1666               {
1667                 VEC (modify_pair,heap) *list
1668                   = canon_modify_mem_list[bb_index];
1669                 modify_pair *pair;
1670                 unsigned ix;
1671
1672                 FOR_EACH_VEC_ELT_REVERSE (modify_pair, list, ix, pair)
1673                   {
1674                     rtx dest = pair->dest;
1675                     rtx dest_addr = pair->dest_addr;
1676
1677                     if (canon_true_dependence (dest, GET_MODE (dest),
1678                                                dest_addr, x, NULL_RTX))
1679                       RESET_BIT (bmap[bb_index], indx);
1680                   }
1681               }
1682         }
1683
1684       x = XEXP (x, 0);
1685       goto repeat;
1686
1687     case PC:
1688     case CC0: /*FIXME*/
1689     case CONST:
1690     CASE_CONST_ANY:
1691     case SYMBOL_REF:
1692     case LABEL_REF:
1693     case ADDR_VEC:
1694     case ADDR_DIFF_VEC:
1695       return;
1696
1697     default:
1698       break;
1699     }
1700
1701   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1702     {
1703       if (fmt[i] == 'e')
1704         {
1705           /* If we are about to do the last recursive call
1706              needed at this level, change it into iteration.
1707              This function is called enough to be worth it.  */
1708           if (i == 0)
1709             {
1710               x = XEXP (x, i);
1711               goto repeat;
1712             }
1713
1714           compute_transp (XEXP (x, i), indx, bmap);
1715         }
1716       else if (fmt[i] == 'E')
1717         for (j = 0; j < XVECLEN (x, i); j++)
1718           compute_transp (XVECEXP (x, i, j), indx, bmap);
1719     }
1720 }
1721 \f
1722 /* Compute PRE+LCM working variables.  */
1723
1724 /* Local properties of expressions.  */
1725
1726 /* Nonzero for expressions that are transparent in the block.  */
1727 static sbitmap *transp;
1728
1729 /* Nonzero for expressions that are computed (available) in the block.  */
1730 static sbitmap *comp;
1731
1732 /* Nonzero for expressions that are locally anticipatable in the block.  */
1733 static sbitmap *antloc;
1734
1735 /* Nonzero for expressions where this block is an optimal computation
1736    point.  */
1737 static sbitmap *pre_optimal;
1738
1739 /* Nonzero for expressions which are redundant in a particular block.  */
1740 static sbitmap *pre_redundant;
1741
1742 /* Nonzero for expressions which should be inserted on a specific edge.  */
1743 static sbitmap *pre_insert_map;
1744
1745 /* Nonzero for expressions which should be deleted in a specific block.  */
1746 static sbitmap *pre_delete_map;
1747
1748 /* Allocate vars used for PRE analysis.  */
1749
1750 static void
1751 alloc_pre_mem (int n_blocks, int n_exprs)
1752 {
1753   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1754   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1755   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1756
1757   pre_optimal = NULL;
1758   pre_redundant = NULL;
1759   pre_insert_map = NULL;
1760   pre_delete_map = NULL;
1761   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1762
1763   /* pre_insert and pre_delete are allocated later.  */
1764 }
1765
1766 /* Free vars used for PRE analysis.  */
1767
1768 static void
1769 free_pre_mem (void)
1770 {
1771   sbitmap_vector_free (transp);
1772   sbitmap_vector_free (comp);
1773
1774   /* ANTLOC and AE_KILL are freed just after pre_lcm finishes.  */
1775
1776   if (pre_optimal)
1777     sbitmap_vector_free (pre_optimal);
1778   if (pre_redundant)
1779     sbitmap_vector_free (pre_redundant);
1780   if (pre_insert_map)
1781     sbitmap_vector_free (pre_insert_map);
1782   if (pre_delete_map)
1783     sbitmap_vector_free (pre_delete_map);
1784
1785   transp = comp = NULL;
1786   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1787 }
1788
1789 /* Remove certain expressions from anticipatable and transparent
1790    sets of basic blocks that have incoming abnormal edge.
1791    For PRE remove potentially trapping expressions to avoid placing
1792    them on abnormal edges.  For hoisting remove memory references that
1793    can be clobbered by calls.  */
1794
1795 static void
1796 prune_expressions (bool pre_p)
1797 {
1798   sbitmap prune_exprs;
1799   struct expr *expr;
1800   unsigned int ui;
1801   basic_block bb;
1802
1803   prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
1804   sbitmap_zero (prune_exprs);
1805   for (ui = 0; ui < expr_hash_table.size; ui++)
1806     {
1807       for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1808         {
1809           /* Note potentially trapping expressions.  */
1810           if (may_trap_p (expr->expr))
1811             {
1812               SET_BIT (prune_exprs, expr->bitmap_index);
1813               continue;
1814             }
1815
1816           if (!pre_p && MEM_P (expr->expr))
1817             /* Note memory references that can be clobbered by a call.
1818                We do not split abnormal edges in hoisting, so would
1819                a memory reference get hoisted along an abnormal edge,
1820                it would be placed /before/ the call.  Therefore, only
1821                constant memory references can be hoisted along abnormal
1822                edges.  */
1823             {
1824               if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
1825                   && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
1826                 continue;
1827
1828               if (MEM_READONLY_P (expr->expr)
1829                   && !MEM_VOLATILE_P (expr->expr)
1830                   && MEM_NOTRAP_P (expr->expr))
1831                 /* Constant memory reference, e.g., a PIC address.  */
1832                 continue;
1833
1834               /* ??? Optimally, we would use interprocedural alias
1835                  analysis to determine if this mem is actually killed
1836                  by this call.  */
1837
1838               SET_BIT (prune_exprs, expr->bitmap_index);
1839             }
1840         }
1841     }
1842
1843   FOR_EACH_BB (bb)
1844     {
1845       edge e;
1846       edge_iterator ei;
1847
1848       /* If the current block is the destination of an abnormal edge, we
1849          kill all trapping (for PRE) and memory (for hoist) expressions
1850          because we won't be able to properly place the instruction on
1851          the edge.  So make them neither anticipatable nor transparent.
1852          This is fairly conservative.
1853
1854          ??? For hoisting it may be necessary to check for set-and-jump
1855          instructions here, not just for abnormal edges.  The general problem
1856          is that when an expression cannot not be placed right at the end of
1857          a basic block we should account for any side-effects of a subsequent
1858          jump instructions that could clobber the expression.  It would
1859          be best to implement this check along the lines of
1860          hoist_expr_reaches_here_p where the target block is already known
1861          and, hence, there's no need to conservatively prune expressions on
1862          "intermediate" set-and-jump instructions.  */
1863       FOR_EACH_EDGE (e, ei, bb->preds)
1864         if ((e->flags & EDGE_ABNORMAL)
1865             && (pre_p || CALL_P (BB_END (e->src))))
1866           {
1867             sbitmap_difference (antloc[bb->index],
1868                                 antloc[bb->index], prune_exprs);
1869             sbitmap_difference (transp[bb->index],
1870                                 transp[bb->index], prune_exprs);
1871             break;
1872           }
1873     }
1874
1875   sbitmap_free (prune_exprs);
1876 }
1877
1878 /* It may be necessary to insert a large number of insns on edges to
1879    make the existing occurrences of expressions fully redundant.  This
1880    routine examines the set of insertions and deletions and if the ratio
1881    of insertions to deletions is too high for a particular expression, then
1882    the expression is removed from the insertion/deletion sets.
1883
1884    N_ELEMS is the number of elements in the hash table.  */
1885
1886 static void
1887 prune_insertions_deletions (int n_elems)
1888 {
1889   sbitmap_iterator sbi;
1890   sbitmap prune_exprs;
1891
1892   /* We always use I to iterate over blocks/edges and J to iterate over
1893      expressions.  */
1894   unsigned int i, j;
1895
1896   /* Counts for the number of times an expression needs to be inserted and
1897      number of times an expression can be removed as a result.  */
1898   int *insertions = GCNEWVEC (int, n_elems);
1899   int *deletions = GCNEWVEC (int, n_elems);
1900
1901   /* Set of expressions which require too many insertions relative to
1902      the number of deletions achieved.  We will prune these out of the
1903      insertion/deletion sets.  */
1904   prune_exprs = sbitmap_alloc (n_elems);
1905   sbitmap_zero (prune_exprs);
1906
1907   /* Iterate over the edges counting the number of times each expression
1908      needs to be inserted.  */
1909   for (i = 0; i < (unsigned) n_edges; i++)
1910     {
1911       EXECUTE_IF_SET_IN_SBITMAP (pre_insert_map[i], 0, j, sbi)
1912         insertions[j]++;
1913     }
1914
1915   /* Similarly for deletions, but those occur in blocks rather than on
1916      edges.  */
1917   for (i = 0; i < (unsigned) last_basic_block; i++)
1918     {
1919       EXECUTE_IF_SET_IN_SBITMAP (pre_delete_map[i], 0, j, sbi)
1920         deletions[j]++;
1921     }
1922
1923   /* Now that we have accurate counts, iterate over the elements in the
1924      hash table and see if any need too many insertions relative to the
1925      number of evaluations that can be removed.  If so, mark them in
1926      PRUNE_EXPRS.  */
1927   for (j = 0; j < (unsigned) n_elems; j++)
1928     if (deletions[j]
1929         && ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO)
1930       SET_BIT (prune_exprs, j);
1931
1932   /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS.  */
1933   EXECUTE_IF_SET_IN_SBITMAP (prune_exprs, 0, j, sbi)
1934     {
1935       for (i = 0; i < (unsigned) n_edges; i++)
1936         RESET_BIT (pre_insert_map[i], j);
1937
1938       for (i = 0; i < (unsigned) last_basic_block; i++)
1939         RESET_BIT (pre_delete_map[i], j);
1940     }
1941
1942   sbitmap_free (prune_exprs);
1943   free (insertions);
1944   free (deletions);
1945 }
1946
1947 /* Top level routine to do the dataflow analysis needed by PRE.  */
1948
1949 static struct edge_list *
1950 compute_pre_data (void)
1951 {
1952   struct edge_list *edge_list;
1953   basic_block bb;
1954
1955   compute_local_properties (transp, comp, antloc, &expr_hash_table);
1956   prune_expressions (true);
1957   sbitmap_vector_zero (ae_kill, last_basic_block);
1958
1959   /* Compute ae_kill for each basic block using:
1960
1961      ~(TRANSP | COMP)
1962   */
1963
1964   FOR_EACH_BB (bb)
1965     {
1966       sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1967       sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1968     }
1969
1970   edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
1971                             ae_kill, &pre_insert_map, &pre_delete_map);
1972   sbitmap_vector_free (antloc);
1973   antloc = NULL;
1974   sbitmap_vector_free (ae_kill);
1975   ae_kill = NULL;
1976
1977   prune_insertions_deletions (expr_hash_table.n_elems);
1978
1979   return edge_list;
1980 }
1981 \f
1982 /* PRE utilities */
1983
1984 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
1985    block BB.
1986
1987    VISITED is a pointer to a working buffer for tracking which BB's have
1988    been visited.  It is NULL for the top-level call.
1989
1990    We treat reaching expressions that go through blocks containing the same
1991    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
1992    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
1993    2 as not reaching.  The intent is to improve the probability of finding
1994    only one reaching expression and to reduce register lifetimes by picking
1995    the closest such expression.  */
1996
1997 static int
1998 pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr,
1999                               basic_block bb, char *visited)
2000 {
2001   edge pred;
2002   edge_iterator ei;
2003
2004   FOR_EACH_EDGE (pred, ei, bb->preds)
2005     {
2006       basic_block pred_bb = pred->src;
2007
2008       if (pred->src == ENTRY_BLOCK_PTR
2009           /* Has predecessor has already been visited?  */
2010           || visited[pred_bb->index])
2011         ;/* Nothing to do.  */
2012
2013       /* Does this predecessor generate this expression?  */
2014       else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
2015         {
2016           /* Is this the occurrence we're looking for?
2017              Note that there's only one generating occurrence per block
2018              so we just need to check the block number.  */
2019           if (occr_bb == pred_bb)
2020             return 1;
2021
2022           visited[pred_bb->index] = 1;
2023         }
2024       /* Ignore this predecessor if it kills the expression.  */
2025       else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
2026         visited[pred_bb->index] = 1;
2027
2028       /* Neither gen nor kill.  */
2029       else
2030         {
2031           visited[pred_bb->index] = 1;
2032           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
2033             return 1;
2034         }
2035     }
2036
2037   /* All paths have been checked.  */
2038   return 0;
2039 }
2040
2041 /* The wrapper for pre_expr_reaches_here_work that ensures that any
2042    memory allocated for that function is returned.  */
2043
2044 static int
2045 pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
2046 {
2047   int rval;
2048   char *visited = XCNEWVEC (char, last_basic_block);
2049
2050   rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
2051
2052   free (visited);
2053   return rval;
2054 }
2055 \f
2056 /* Generate RTL to copy an EXPR to its `reaching_reg' and return it.  */
2057
2058 static rtx
2059 process_insert_insn (struct expr *expr)
2060 {
2061   rtx reg = expr->reaching_reg;
2062   /* Copy the expression to make sure we don't have any sharing issues.  */
2063   rtx exp = copy_rtx (expr->expr);
2064   rtx pat;
2065
2066   start_sequence ();
2067
2068   /* If the expression is something that's an operand, like a constant,
2069      just copy it to a register.  */
2070   if (general_operand (exp, GET_MODE (reg)))
2071     emit_move_insn (reg, exp);
2072
2073   /* Otherwise, make a new insn to compute this expression and make sure the
2074      insn will be recognized (this also adds any needed CLOBBERs).  */
2075   else
2076     {
2077       rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
2078
2079       if (insn_invalid_p (insn, false))
2080         gcc_unreachable ();
2081     }
2082
2083   pat = get_insns ();
2084   end_sequence ();
2085
2086   return pat;
2087 }
2088
2089 /* Add EXPR to the end of basic block BB.
2090
2091    This is used by both the PRE and code hoisting.  */
2092
2093 static void
2094 insert_insn_end_basic_block (struct expr *expr, basic_block bb)
2095 {
2096   rtx insn = BB_END (bb);
2097   rtx new_insn;
2098   rtx reg = expr->reaching_reg;
2099   int regno = REGNO (reg);
2100   rtx pat, pat_end;
2101
2102   pat = process_insert_insn (expr);
2103   gcc_assert (pat && INSN_P (pat));
2104
2105   pat_end = pat;
2106   while (NEXT_INSN (pat_end) != NULL_RTX)
2107     pat_end = NEXT_INSN (pat_end);
2108
2109   /* If the last insn is a jump, insert EXPR in front [taking care to
2110      handle cc0, etc. properly].  Similarly we need to care trapping
2111      instructions in presence of non-call exceptions.  */
2112
2113   if (JUMP_P (insn)
2114       || (NONJUMP_INSN_P (insn)
2115           && (!single_succ_p (bb)
2116               || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2117     {
2118 #ifdef HAVE_cc0
2119       rtx note;
2120 #endif
2121
2122       /* If this is a jump table, then we can't insert stuff here.  Since
2123          we know the previous real insn must be the tablejump, we insert
2124          the new instruction just before the tablejump.  */
2125       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
2126           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
2127         insn = prev_active_insn (insn);
2128
2129 #ifdef HAVE_cc0
2130       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
2131          if cc0 isn't set.  */
2132       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
2133       if (note)
2134         insn = XEXP (note, 0);
2135       else
2136         {
2137           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
2138           if (maybe_cc0_setter
2139               && INSN_P (maybe_cc0_setter)
2140               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
2141             insn = maybe_cc0_setter;
2142         }
2143 #endif
2144       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
2145       new_insn = emit_insn_before_noloc (pat, insn, bb);
2146     }
2147
2148   /* Likewise if the last insn is a call, as will happen in the presence
2149      of exception handling.  */
2150   else if (CALL_P (insn)
2151            && (!single_succ_p (bb)
2152                || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2153     {
2154       /* Keeping in mind targets with small register classes and parameters
2155          in registers, we search backward and place the instructions before
2156          the first parameter is loaded.  Do this for everyone for consistency
2157          and a presumption that we'll get better code elsewhere as well.  */
2158
2159       /* Since different machines initialize their parameter registers
2160          in different orders, assume nothing.  Collect the set of all
2161          parameter registers.  */
2162       insn = find_first_parameter_load (insn, BB_HEAD (bb));
2163
2164       /* If we found all the parameter loads, then we want to insert
2165          before the first parameter load.
2166
2167          If we did not find all the parameter loads, then we might have
2168          stopped on the head of the block, which could be a CODE_LABEL.
2169          If we inserted before the CODE_LABEL, then we would be putting
2170          the insn in the wrong basic block.  In that case, put the insn
2171          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
2172       while (LABEL_P (insn)
2173              || NOTE_INSN_BASIC_BLOCK_P (insn))
2174         insn = NEXT_INSN (insn);
2175
2176       new_insn = emit_insn_before_noloc (pat, insn, bb);
2177     }
2178   else
2179     new_insn = emit_insn_after_noloc (pat, insn, bb);
2180
2181   while (1)
2182     {
2183       if (INSN_P (pat))
2184         add_label_notes (PATTERN (pat), new_insn);
2185       if (pat == pat_end)
2186         break;
2187       pat = NEXT_INSN (pat);
2188     }
2189
2190   gcse_create_count++;
2191
2192   if (dump_file)
2193     {
2194       fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2195                bb->index, INSN_UID (new_insn));
2196       fprintf (dump_file, "copying expression %d to reg %d\n",
2197                expr->bitmap_index, regno);
2198     }
2199 }
2200
2201 /* Insert partially redundant expressions on edges in the CFG to make
2202    the expressions fully redundant.  */
2203
2204 static int
2205 pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
2206 {
2207   int e, i, j, num_edges, set_size, did_insert = 0;
2208   sbitmap *inserted;
2209
2210   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2211      if it reaches any of the deleted expressions.  */
2212
2213   set_size = pre_insert_map[0]->size;
2214   num_edges = NUM_EDGES (edge_list);
2215   inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2216   sbitmap_vector_zero (inserted, num_edges);
2217
2218   for (e = 0; e < num_edges; e++)
2219     {
2220       int indx;
2221       basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
2222
2223       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2224         {
2225           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2226
2227           for (j = indx;
2228                insert && j < (int) expr_hash_table.n_elems;
2229                j++, insert >>= 1)
2230             if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2231               {
2232                 struct expr *expr = index_map[j];
2233                 struct occr *occr;
2234
2235                 /* Now look at each deleted occurrence of this expression.  */
2236                 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2237                   {
2238                     if (! occr->deleted_p)
2239                       continue;
2240
2241                     /* Insert this expression on this edge if it would
2242                        reach the deleted occurrence in BB.  */
2243                     if (!TEST_BIT (inserted[e], j))
2244                       {
2245                         rtx insn;
2246                         edge eg = INDEX_EDGE (edge_list, e);
2247
2248                         /* We can't insert anything on an abnormal and
2249                            critical edge, so we insert the insn at the end of
2250                            the previous block. There are several alternatives
2251                            detailed in Morgans book P277 (sec 10.5) for
2252                            handling this situation.  This one is easiest for
2253                            now.  */
2254
2255                         if (eg->flags & EDGE_ABNORMAL)
2256                           insert_insn_end_basic_block (index_map[j], bb);
2257                         else
2258                           {
2259                             insn = process_insert_insn (index_map[j]);
2260                             insert_insn_on_edge (insn, eg);
2261                           }
2262
2263                         if (dump_file)
2264                           {
2265                             fprintf (dump_file, "PRE: edge (%d,%d), ",
2266                                      bb->index,
2267                                      INDEX_EDGE_SUCC_BB (edge_list, e)->index);
2268                             fprintf (dump_file, "copy expression %d\n",
2269                                      expr->bitmap_index);
2270                           }
2271
2272                         update_ld_motion_stores (expr);
2273                         SET_BIT (inserted[e], j);
2274                         did_insert = 1;
2275                         gcse_create_count++;
2276                       }
2277                   }
2278               }
2279         }
2280     }
2281
2282   sbitmap_vector_free (inserted);
2283   return did_insert;
2284 }
2285
2286 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2287    Given "old_reg <- expr" (INSN), instead of adding after it
2288      reaching_reg <- old_reg
2289    it's better to do the following:
2290      reaching_reg <- expr
2291      old_reg      <- reaching_reg
2292    because this way copy propagation can discover additional PRE
2293    opportunities.  But if this fails, we try the old way.
2294    When "expr" is a store, i.e.
2295    given "MEM <- old_reg", instead of adding after it
2296      reaching_reg <- old_reg
2297    it's better to add it before as follows:
2298      reaching_reg <- old_reg
2299      MEM          <- reaching_reg.  */
2300
2301 static void
2302 pre_insert_copy_insn (struct expr *expr, rtx insn)
2303 {
2304   rtx reg = expr->reaching_reg;
2305   int regno = REGNO (reg);
2306   int indx = expr->bitmap_index;
2307   rtx pat = PATTERN (insn);
2308   rtx set, first_set, new_insn;
2309   rtx old_reg;
2310   int i;
2311
2312   /* This block matches the logic in hash_scan_insn.  */
2313   switch (GET_CODE (pat))
2314     {
2315     case SET:
2316       set = pat;
2317       break;
2318
2319     case PARALLEL:
2320       /* Search through the parallel looking for the set whose
2321          source was the expression that we're interested in.  */
2322       first_set = NULL_RTX;
2323       set = NULL_RTX;
2324       for (i = 0; i < XVECLEN (pat, 0); i++)
2325         {
2326           rtx x = XVECEXP (pat, 0, i);
2327           if (GET_CODE (x) == SET)
2328             {
2329               /* If the source was a REG_EQUAL or REG_EQUIV note, we
2330                  may not find an equivalent expression, but in this
2331                  case the PARALLEL will have a single set.  */
2332               if (first_set == NULL_RTX)
2333                 first_set = x;
2334               if (expr_equiv_p (SET_SRC (x), expr->expr))
2335                 {
2336                   set = x;
2337                   break;
2338                 }
2339             }
2340         }
2341
2342       gcc_assert (first_set);
2343       if (set == NULL_RTX)
2344         set = first_set;
2345       break;
2346
2347     default:
2348       gcc_unreachable ();
2349     }
2350
2351   if (REG_P (SET_DEST (set)))
2352     {
2353       old_reg = SET_DEST (set);
2354       /* Check if we can modify the set destination in the original insn.  */
2355       if (validate_change (insn, &SET_DEST (set), reg, 0))
2356         {
2357           new_insn = gen_move_insn (old_reg, reg);
2358           new_insn = emit_insn_after (new_insn, insn);
2359         }
2360       else
2361         {
2362           new_insn = gen_move_insn (reg, old_reg);
2363           new_insn = emit_insn_after (new_insn, insn);
2364         }
2365     }
2366   else /* This is possible only in case of a store to memory.  */
2367     {
2368       old_reg = SET_SRC (set);
2369       new_insn = gen_move_insn (reg, old_reg);
2370
2371       /* Check if we can modify the set source in the original insn.  */
2372       if (validate_change (insn, &SET_SRC (set), reg, 0))
2373         new_insn = emit_insn_before (new_insn, insn);
2374       else
2375         new_insn = emit_insn_after (new_insn, insn);
2376     }
2377
2378   gcse_create_count++;
2379
2380   if (dump_file)
2381     fprintf (dump_file,
2382              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2383               BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2384               INSN_UID (insn), regno);
2385 }
2386
2387 /* Copy available expressions that reach the redundant expression
2388    to `reaching_reg'.  */
2389
2390 static void
2391 pre_insert_copies (void)
2392 {
2393   unsigned int i, added_copy;
2394   struct expr *expr;
2395   struct occr *occr;
2396   struct occr *avail;
2397
2398   /* For each available expression in the table, copy the result to
2399      `reaching_reg' if the expression reaches a deleted one.
2400
2401      ??? The current algorithm is rather brute force.
2402      Need to do some profiling.  */
2403
2404   for (i = 0; i < expr_hash_table.size; i++)
2405     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2406       {
2407         /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
2408            we don't want to insert a copy here because the expression may not
2409            really be redundant.  So only insert an insn if the expression was
2410            deleted.  This test also avoids further processing if the
2411            expression wasn't deleted anywhere.  */
2412         if (expr->reaching_reg == NULL)
2413           continue;
2414
2415         /* Set when we add a copy for that expression.  */
2416         added_copy = 0;
2417
2418         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2419           {
2420             if (! occr->deleted_p)
2421               continue;
2422
2423             for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2424               {
2425                 rtx insn = avail->insn;
2426
2427                 /* No need to handle this one if handled already.  */
2428                 if (avail->copied_p)
2429                   continue;
2430
2431                 /* Don't handle this one if it's a redundant one.  */
2432                 if (INSN_DELETED_P (insn))
2433                   continue;
2434
2435                 /* Or if the expression doesn't reach the deleted one.  */
2436                 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2437                                                expr,
2438                                                BLOCK_FOR_INSN (occr->insn)))
2439                   continue;
2440
2441                 added_copy = 1;
2442
2443                 /* Copy the result of avail to reaching_reg.  */
2444                 pre_insert_copy_insn (expr, insn);
2445                 avail->copied_p = 1;
2446               }
2447           }
2448
2449           if (added_copy)
2450             update_ld_motion_stores (expr);
2451       }
2452 }
2453
2454 /* Emit move from SRC to DEST noting the equivalence with expression computed
2455    in INSN.  */
2456
2457 static rtx
2458 gcse_emit_move_after (rtx dest, rtx src, rtx insn)
2459 {
2460   rtx new_rtx;
2461   rtx set = single_set (insn), set2;
2462   rtx note;
2463   rtx eqv;
2464
2465   /* This should never fail since we're creating a reg->reg copy
2466      we've verified to be valid.  */
2467
2468   new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2469
2470   /* Note the equivalence for local CSE pass.  */
2471   set2 = single_set (new_rtx);
2472   if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2473     return new_rtx;
2474   if ((note = find_reg_equal_equiv_note (insn)))
2475     eqv = XEXP (note, 0);
2476   else
2477     eqv = SET_SRC (set);
2478
2479   set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2480
2481   return new_rtx;
2482 }
2483
2484 /* Delete redundant computations.
2485    Deletion is done by changing the insn to copy the `reaching_reg' of
2486    the expression into the result of the SET.  It is left to later passes
2487    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
2488
2489    Return nonzero if a change is made.  */
2490
2491 static int
2492 pre_delete (void)
2493 {
2494   unsigned int i;
2495   int changed;
2496   struct expr *expr;
2497   struct occr *occr;
2498
2499   changed = 0;
2500   for (i = 0; i < expr_hash_table.size; i++)
2501     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2502       {
2503         int indx = expr->bitmap_index;
2504
2505         /* We only need to search antic_occr since we require ANTLOC != 0.  */
2506         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2507           {
2508             rtx insn = occr->insn;
2509             rtx set;
2510             basic_block bb = BLOCK_FOR_INSN (insn);
2511
2512             /* We only delete insns that have a single_set.  */
2513             if (TEST_BIT (pre_delete_map[bb->index], indx)
2514                 && (set = single_set (insn)) != 0
2515                 && dbg_cnt (pre_insn))
2516               {
2517                 /* Create a pseudo-reg to store the result of reaching
2518                    expressions into.  Get the mode for the new pseudo from
2519                    the mode of the original destination pseudo.  */
2520                 if (expr->reaching_reg == NULL)
2521                   expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2522
2523                 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn);
2524                 delete_insn (insn);
2525                 occr->deleted_p = 1;
2526                 changed = 1;
2527                 gcse_subst_count++;
2528
2529                 if (dump_file)
2530                   {
2531                     fprintf (dump_file,
2532                              "PRE: redundant insn %d (expression %d) in ",
2533                                INSN_UID (insn), indx);
2534                     fprintf (dump_file, "bb %d, reaching reg is %d\n",
2535                              bb->index, REGNO (expr->reaching_reg));
2536                   }
2537               }
2538           }
2539       }
2540
2541   return changed;
2542 }
2543
2544 /* Perform GCSE optimizations using PRE.
2545    This is called by one_pre_gcse_pass after all the dataflow analysis
2546    has been done.
2547
2548    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2549    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2550    Compiler Design and Implementation.
2551
2552    ??? A new pseudo reg is created to hold the reaching expression.  The nice
2553    thing about the classical approach is that it would try to use an existing
2554    reg.  If the register can't be adequately optimized [i.e. we introduce
2555    reload problems], one could add a pass here to propagate the new register
2556    through the block.
2557
2558    ??? We don't handle single sets in PARALLELs because we're [currently] not
2559    able to copy the rest of the parallel when we insert copies to create full
2560    redundancies from partial redundancies.  However, there's no reason why we
2561    can't handle PARALLELs in the cases where there are no partial
2562    redundancies.  */
2563
2564 static int
2565 pre_gcse (struct edge_list *edge_list)
2566 {
2567   unsigned int i;
2568   int did_insert, changed;
2569   struct expr **index_map;
2570   struct expr *expr;
2571
2572   /* Compute a mapping from expression number (`bitmap_index') to
2573      hash table entry.  */
2574
2575   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2576   for (i = 0; i < expr_hash_table.size; i++)
2577     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2578       index_map[expr->bitmap_index] = expr;
2579
2580   /* Delete the redundant insns first so that
2581      - we know what register to use for the new insns and for the other
2582        ones with reaching expressions
2583      - we know which insns are redundant when we go to create copies  */
2584
2585   changed = pre_delete ();
2586   did_insert = pre_edge_insert (edge_list, index_map);
2587
2588   /* In other places with reaching expressions, copy the expression to the
2589      specially allocated pseudo-reg that reaches the redundant expr.  */
2590   pre_insert_copies ();
2591   if (did_insert)
2592     {
2593       commit_edge_insertions ();
2594       changed = 1;
2595     }
2596
2597   free (index_map);
2598   return changed;
2599 }
2600
2601 /* Top level routine to perform one PRE GCSE pass.
2602
2603    Return nonzero if a change was made.  */
2604
2605 static int
2606 one_pre_gcse_pass (void)
2607 {
2608   int changed = 0;
2609
2610   gcse_subst_count = 0;
2611   gcse_create_count = 0;
2612
2613   /* Return if there's nothing to do, or it is too expensive.  */
2614   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
2615       || is_too_expensive (_("PRE disabled")))
2616     return 0;
2617
2618   /* We need alias.  */
2619   init_alias_analysis ();
2620
2621   bytes_used = 0;
2622   gcc_obstack_init (&gcse_obstack);
2623   alloc_gcse_mem ();
2624
2625   alloc_hash_table (&expr_hash_table);
2626   add_noreturn_fake_exit_edges ();
2627   if (flag_gcse_lm)
2628     compute_ld_motion_mems ();
2629
2630   compute_hash_table (&expr_hash_table);
2631   if (flag_gcse_lm)
2632     trim_ld_motion_mems ();
2633   if (dump_file)
2634     dump_hash_table (dump_file, "Expression", &expr_hash_table);
2635
2636   if (expr_hash_table.n_elems > 0)
2637     {
2638       struct edge_list *edge_list;
2639       alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
2640       edge_list = compute_pre_data ();
2641       changed |= pre_gcse (edge_list);
2642       free_edge_list (edge_list);
2643       free_pre_mem ();
2644     }
2645
2646   if (flag_gcse_lm)
2647     free_ld_motion_mems ();
2648   remove_fake_exit_edges ();
2649   free_hash_table (&expr_hash_table);
2650
2651   free_gcse_mem ();
2652   obstack_free (&gcse_obstack, NULL);
2653
2654   /* We are finished with alias.  */
2655   end_alias_analysis ();
2656
2657   if (dump_file)
2658     {
2659       fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2660                current_function_name (), n_basic_blocks, bytes_used);
2661       fprintf (dump_file, "%d substs, %d insns created\n",
2662                gcse_subst_count, gcse_create_count);
2663     }
2664
2665   return changed;
2666 }
2667 \f
2668 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2669    to INSN.  If such notes are added to an insn which references a
2670    CODE_LABEL, the LABEL_NUSES count is incremented.  We have to add
2671    that note, because the following loop optimization pass requires
2672    them.  */
2673
2674 /* ??? If there was a jump optimization pass after gcse and before loop,
2675    then we would not need to do this here, because jump would add the
2676    necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes.  */
2677
2678 static void
2679 add_label_notes (rtx x, rtx insn)
2680 {
2681   enum rtx_code code = GET_CODE (x);
2682   int i, j;
2683   const char *fmt;
2684
2685   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2686     {
2687       /* This code used to ignore labels that referred to dispatch tables to
2688          avoid flow generating (slightly) worse code.
2689
2690          We no longer ignore such label references (see LABEL_REF handling in
2691          mark_jump_label for additional information).  */
2692
2693       /* There's no reason for current users to emit jump-insns with
2694          such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2695          notes.  */
2696       gcc_assert (!JUMP_P (insn));
2697       add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0));
2698
2699       if (LABEL_P (XEXP (x, 0)))
2700         LABEL_NUSES (XEXP (x, 0))++;
2701
2702       return;
2703     }
2704
2705   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2706     {
2707       if (fmt[i] == 'e')
2708         add_label_notes (XEXP (x, i), insn);
2709       else if (fmt[i] == 'E')
2710         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2711           add_label_notes (XVECEXP (x, i, j), insn);
2712     }
2713 }
2714
2715 /* Code Hoisting variables and subroutines.  */
2716
2717 /* Very busy expressions.  */
2718 static sbitmap *hoist_vbein;
2719 static sbitmap *hoist_vbeout;
2720
2721 /* ??? We could compute post dominators and run this algorithm in
2722    reverse to perform tail merging, doing so would probably be
2723    more effective than the tail merging code in jump.c.
2724
2725    It's unclear if tail merging could be run in parallel with
2726    code hoisting.  It would be nice.  */
2727
2728 /* Allocate vars used for code hoisting analysis.  */
2729
2730 static void
2731 alloc_code_hoist_mem (int n_blocks, int n_exprs)
2732 {
2733   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2734   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2735   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2736
2737   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2738   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2739 }
2740
2741 /* Free vars used for code hoisting analysis.  */
2742
2743 static void
2744 free_code_hoist_mem (void)
2745 {
2746   sbitmap_vector_free (antloc);
2747   sbitmap_vector_free (transp);
2748   sbitmap_vector_free (comp);
2749
2750   sbitmap_vector_free (hoist_vbein);
2751   sbitmap_vector_free (hoist_vbeout);
2752
2753   free_dominance_info (CDI_DOMINATORS);
2754 }
2755
2756 /* Compute the very busy expressions at entry/exit from each block.
2757
2758    An expression is very busy if all paths from a given point
2759    compute the expression.  */
2760
2761 static void
2762 compute_code_hoist_vbeinout (void)
2763 {
2764   int changed, passes;
2765   basic_block bb;
2766
2767   sbitmap_vector_zero (hoist_vbeout, last_basic_block);
2768   sbitmap_vector_zero (hoist_vbein, last_basic_block);
2769
2770   passes = 0;
2771   changed = 1;
2772
2773   while (changed)
2774     {
2775       changed = 0;
2776
2777       /* We scan the blocks in the reverse order to speed up
2778          the convergence.  */
2779       FOR_EACH_BB_REVERSE (bb)
2780         {
2781           if (bb->next_bb != EXIT_BLOCK_PTR)
2782             {
2783               sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
2784                                              hoist_vbein, bb);
2785
2786               /* Include expressions in VBEout that are calculated
2787                  in BB and available at its end.  */
2788               sbitmap_a_or_b (hoist_vbeout[bb->index],
2789                               hoist_vbeout[bb->index], comp[bb->index]);
2790             }
2791
2792           changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
2793                                               antloc[bb->index],
2794                                               hoist_vbeout[bb->index],
2795                                               transp[bb->index]);
2796         }
2797
2798       passes++;
2799     }
2800
2801   if (dump_file)
2802     {
2803       fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2804
2805       FOR_EACH_BB (bb)
2806         {
2807           fprintf (dump_file, "vbein (%d): ", bb->index);
2808           dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
2809           fprintf (dump_file, "vbeout(%d): ", bb->index);
2810           dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
2811         }
2812     }
2813 }
2814
2815 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
2816
2817 static void
2818 compute_code_hoist_data (void)
2819 {
2820   compute_local_properties (transp, comp, antloc, &expr_hash_table);
2821   prune_expressions (false);
2822   compute_code_hoist_vbeinout ();
2823   calculate_dominance_info (CDI_DOMINATORS);
2824   if (dump_file)
2825     fprintf (dump_file, "\n");
2826 }
2827
2828 /* Determine if the expression identified by EXPR_INDEX would
2829    reach BB unimpared if it was placed at the end of EXPR_BB.
2830    Stop the search if the expression would need to be moved more
2831    than DISTANCE instructions.
2832
2833    It's unclear exactly what Muchnick meant by "unimpared".  It seems
2834    to me that the expression must either be computed or transparent in
2835    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
2836    would allow the expression to be hoisted out of loops, even if
2837    the expression wasn't a loop invariant.
2838
2839    Contrast this to reachability for PRE where an expression is
2840    considered reachable if *any* path reaches instead of *all*
2841    paths.  */
2842
2843 static int
2844 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
2845                            char *visited, int distance, int *bb_size)
2846 {
2847   edge pred;
2848   edge_iterator ei;
2849   int visited_allocated_locally = 0;
2850
2851   /* Terminate the search if distance, for which EXPR is allowed to move,
2852      is exhausted.  */
2853   if (distance > 0)
2854     {
2855       distance -= bb_size[bb->index];
2856
2857       if (distance <= 0)
2858         return 0;
2859     }
2860   else
2861     gcc_assert (distance == 0);
2862
2863   if (visited == NULL)
2864     {
2865       visited_allocated_locally = 1;
2866       visited = XCNEWVEC (char, last_basic_block);
2867     }
2868
2869   FOR_EACH_EDGE (pred, ei, bb->preds)
2870     {
2871       basic_block pred_bb = pred->src;
2872
2873       if (pred->src == ENTRY_BLOCK_PTR)
2874         break;
2875       else if (pred_bb == expr_bb)
2876         continue;
2877       else if (visited[pred_bb->index])
2878         continue;
2879
2880       else if (! TEST_BIT (transp[pred_bb->index], expr_index))
2881         break;
2882
2883       /* Not killed.  */
2884       else
2885         {
2886           visited[pred_bb->index] = 1;
2887           if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb,
2888                                            visited, distance, bb_size))
2889             break;
2890         }
2891     }
2892   if (visited_allocated_locally)
2893     free (visited);
2894
2895   return (pred == NULL);
2896 }
2897 \f
2898 /* Find occurrence in BB.  */
2899
2900 static struct occr *
2901 find_occr_in_bb (struct occr *occr, basic_block bb)
2902 {
2903   /* Find the right occurrence of this expression.  */
2904   while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
2905     occr = occr->next;
2906
2907   return occr;
2908 }
2909
2910 /* Actually perform code hoisting.  */
2911
2912 static int
2913 hoist_code (void)
2914 {
2915   basic_block bb, dominated;
2916   VEC (basic_block, heap) *dom_tree_walk;
2917   unsigned int dom_tree_walk_index;
2918   VEC (basic_block, heap) *domby;
2919   unsigned int i,j;
2920   struct expr **index_map;
2921   struct expr *expr;
2922   int *to_bb_head;
2923   int *bb_size;
2924   int changed = 0;
2925
2926   /* Compute a mapping from expression number (`bitmap_index') to
2927      hash table entry.  */
2928
2929   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2930   for (i = 0; i < expr_hash_table.size; i++)
2931     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2932       index_map[expr->bitmap_index] = expr;
2933
2934   /* Calculate sizes of basic blocks and note how far
2935      each instruction is from the start of its block.  We then use this
2936      data to restrict distance an expression can travel.  */
2937
2938   to_bb_head = XCNEWVEC (int, get_max_uid ());
2939   bb_size = XCNEWVEC (int, last_basic_block);
2940
2941   FOR_EACH_BB (bb)
2942     {
2943       rtx insn;
2944       int to_head;
2945
2946       to_head = 0;
2947       FOR_BB_INSNS (bb, insn)
2948         {
2949           /* Don't count debug instructions to avoid them affecting
2950              decision choices.  */
2951           if (NONDEBUG_INSN_P (insn))
2952             to_bb_head[INSN_UID (insn)] = to_head++;
2953         }
2954
2955       bb_size[bb->index] = to_head;
2956     }
2957
2958   gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1
2959               && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
2960                   == ENTRY_BLOCK_PTR->next_bb));
2961
2962   dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
2963                                             ENTRY_BLOCK_PTR->next_bb);
2964
2965   /* Walk over each basic block looking for potentially hoistable
2966      expressions, nothing gets hoisted from the entry block.  */
2967   FOR_EACH_VEC_ELT (basic_block, dom_tree_walk, dom_tree_walk_index, bb)
2968     {
2969       domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
2970
2971       if (VEC_length (basic_block, domby) == 0)
2972         continue;
2973
2974       /* Examine each expression that is very busy at the exit of this
2975          block.  These are the potentially hoistable expressions.  */
2976       for (i = 0; i < SBITMAP_SIZE (hoist_vbeout[bb->index]); i++)
2977         {
2978           if (TEST_BIT (hoist_vbeout[bb->index], i))
2979             {
2980               /* Current expression.  */
2981               struct expr *expr = index_map[i];
2982               /* Number of occurrences of EXPR that can be hoisted to BB.  */
2983               int hoistable = 0;
2984               /* Basic blocks that have occurrences reachable from BB.  */
2985               bitmap_head _from_bbs, *from_bbs = &_from_bbs;
2986               /* Occurrences reachable from BB.  */
2987               VEC (occr_t, heap) *occrs_to_hoist = NULL;
2988               /* We want to insert the expression into BB only once, so
2989                  note when we've inserted it.  */
2990               int insn_inserted_p;
2991               occr_t occr;
2992
2993               bitmap_initialize (from_bbs, 0);
2994
2995               /* If an expression is computed in BB and is available at end of
2996                  BB, hoist all occurrences dominated by BB to BB.  */
2997               if (TEST_BIT (comp[bb->index], i))
2998                 {
2999                   occr = find_occr_in_bb (expr->antic_occr, bb);
3000
3001                   if (occr)
3002                     {
3003                       /* An occurrence might've been already deleted
3004                          while processing a dominator of BB.  */
3005                       if (!occr->deleted_p)
3006                         {
3007                           gcc_assert (NONDEBUG_INSN_P (occr->insn));
3008                           hoistable++;
3009                         }
3010                     }
3011                   else
3012                     hoistable++;
3013                 }
3014
3015               /* We've found a potentially hoistable expression, now
3016                  we look at every block BB dominates to see if it
3017                  computes the expression.  */
3018               FOR_EACH_VEC_ELT (basic_block, domby, j, dominated)
3019                 {
3020                   int max_distance;
3021
3022                   /* Ignore self dominance.  */
3023                   if (bb == dominated)
3024                     continue;
3025                   /* We've found a dominated block, now see if it computes
3026                      the busy expression and whether or not moving that
3027                      expression to the "beginning" of that block is safe.  */
3028                   if (!TEST_BIT (antloc[dominated->index], i))
3029                     continue;
3030
3031                   occr = find_occr_in_bb (expr->antic_occr, dominated);
3032                   gcc_assert (occr);
3033
3034                   /* An occurrence might've been already deleted
3035                      while processing a dominator of BB.  */
3036                   if (occr->deleted_p)
3037                     continue;
3038                   gcc_assert (NONDEBUG_INSN_P (occr->insn));
3039
3040                   max_distance = expr->max_distance;
3041                   if (max_distance > 0)
3042                     /* Adjust MAX_DISTANCE to account for the fact that
3043                        OCCR won't have to travel all of DOMINATED, but
3044                        only part of it.  */
3045                     max_distance += (bb_size[dominated->index]
3046                                      - to_bb_head[INSN_UID (occr->insn)]);
3047
3048                   /* Note if the expression would reach the dominated block
3049                      unimpared if it was placed at the end of BB.
3050
3051                      Keep track of how many times this expression is hoistable
3052                      from a dominated block into BB.  */
3053                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
3054                                                  max_distance, bb_size))
3055                     {
3056                       hoistable++;
3057                       VEC_safe_push (occr_t, heap,
3058                                      occrs_to_hoist, occr);
3059                       bitmap_set_bit (from_bbs, dominated->index);
3060                     }
3061                 }
3062
3063               /* If we found more than one hoistable occurrence of this
3064                  expression, then note it in the vector of expressions to
3065                  hoist.  It makes no sense to hoist things which are computed
3066                  in only one BB, and doing so tends to pessimize register
3067                  allocation.  One could increase this value to try harder
3068                  to avoid any possible code expansion due to register
3069                  allocation issues; however experiments have shown that
3070                  the vast majority of hoistable expressions are only movable
3071                  from two successors, so raising this threshold is likely
3072                  to nullify any benefit we get from code hoisting.  */
3073               if (hoistable > 1 && dbg_cnt (hoist_insn))
3074                 {
3075                   /* If (hoistable != VEC_length), then there is
3076                      an occurrence of EXPR in BB itself.  Don't waste
3077                      time looking for LCA in this case.  */
3078                   if ((unsigned) hoistable
3079                       == VEC_length (occr_t, occrs_to_hoist))
3080                     {
3081                       basic_block lca;
3082
3083                       lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3084                                                               from_bbs);
3085                       if (lca != bb)
3086                         /* Punt, it's better to hoist these occurrences to
3087                            LCA.  */
3088                         VEC_free (occr_t, heap, occrs_to_hoist);
3089                     }
3090                 }
3091               else
3092                 /* Punt, no point hoisting a single occurence.  */
3093                 VEC_free (occr_t, heap, occrs_to_hoist);
3094
3095               insn_inserted_p = 0;
3096
3097               /* Walk through occurrences of I'th expressions we want
3098                  to hoist to BB and make the transformations.  */
3099               FOR_EACH_VEC_ELT (occr_t, occrs_to_hoist, j, occr)
3100                 {
3101                   rtx insn;
3102                   rtx set;
3103
3104                   gcc_assert (!occr->deleted_p);
3105
3106                   insn = occr->insn;
3107                   set = single_set (insn);
3108                   gcc_assert (set);
3109
3110                   /* Create a pseudo-reg to store the result of reaching
3111                      expressions into.  Get the mode for the new pseudo
3112                      from the mode of the original destination pseudo.
3113
3114                      It is important to use new pseudos whenever we
3115                      emit a set.  This will allow reload to use
3116                      rematerialization for such registers.  */
3117                   if (!insn_inserted_p)
3118                     expr->reaching_reg
3119                       = gen_reg_rtx_and_attrs (SET_DEST (set));
3120
3121                   gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3122                                         insn);
3123                   delete_insn (insn);
3124                   occr->deleted_p = 1;
3125                   changed = 1;
3126                   gcse_subst_count++;
3127
3128                   if (!insn_inserted_p)
3129                     {
3130                       insert_insn_end_basic_block (expr, bb);
3131                       insn_inserted_p = 1;
3132                     }
3133                 }
3134
3135               VEC_free (occr_t, heap, occrs_to_hoist);
3136               bitmap_clear (from_bbs);
3137             }
3138         }
3139       VEC_free (basic_block, heap, domby);
3140     }
3141
3142   VEC_free (basic_block, heap, dom_tree_walk);
3143   free (bb_size);
3144   free (to_bb_head);
3145   free (index_map);
3146
3147   return changed;
3148 }
3149
3150 /* Top level routine to perform one code hoisting (aka unification) pass
3151
3152    Return nonzero if a change was made.  */
3153
3154 static int
3155 one_code_hoisting_pass (void)
3156 {
3157   int changed = 0;
3158
3159   gcse_subst_count = 0;
3160   gcse_create_count = 0;
3161
3162   /* Return if there's nothing to do, or it is too expensive.  */
3163   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
3164       || is_too_expensive (_("GCSE disabled")))
3165     return 0;
3166
3167   doing_code_hoisting_p = true;
3168
3169   /* We need alias.  */
3170   init_alias_analysis ();
3171
3172   bytes_used = 0;
3173   gcc_obstack_init (&gcse_obstack);
3174   alloc_gcse_mem ();
3175
3176   alloc_hash_table (&expr_hash_table);
3177   compute_hash_table (&expr_hash_table);
3178   if (dump_file)
3179     dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3180
3181   if (expr_hash_table.n_elems > 0)
3182     {
3183       alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
3184       compute_code_hoist_data ();
3185       changed = hoist_code ();
3186       free_code_hoist_mem ();
3187     }
3188
3189   free_hash_table (&expr_hash_table);
3190   free_gcse_mem ();
3191   obstack_free (&gcse_obstack, NULL);
3192
3193   /* We are finished with alias.  */
3194   end_alias_analysis ();
3195
3196   if (dump_file)
3197     {
3198       fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3199                current_function_name (), n_basic_blocks, bytes_used);
3200       fprintf (dump_file, "%d substs, %d insns created\n",
3201                gcse_subst_count, gcse_create_count);
3202     }
3203
3204   doing_code_hoisting_p = false;
3205
3206   return changed;
3207 }
3208 \f
3209 /*  Here we provide the things required to do store motion towards the exit.
3210     In order for this to be effective, gcse also needed to be taught how to
3211     move a load when it is killed only by a store to itself.
3212
3213             int i;
3214             float a[10];
3215
3216             void foo(float scale)
3217             {
3218               for (i=0; i<10; i++)
3219                 a[i] *= scale;
3220             }
3221
3222     'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3223     the load out since its live around the loop, and stored at the bottom
3224     of the loop.
3225
3226       The 'Load Motion' referred to and implemented in this file is
3227     an enhancement to gcse which when using edge based LCM, recognizes
3228     this situation and allows gcse to move the load out of the loop.
3229
3230       Once gcse has hoisted the load, store motion can then push this
3231     load towards the exit, and we end up with no loads or stores of 'i'
3232     in the loop.  */
3233
3234 static hashval_t
3235 pre_ldst_expr_hash (const void *p)
3236 {
3237   int do_not_record_p = 0;
3238   const struct ls_expr *const x = (const struct ls_expr *) p;
3239   return
3240     hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
3241 }
3242
3243 static int
3244 pre_ldst_expr_eq (const void *p1, const void *p2)
3245 {
3246   const struct ls_expr *const ptr1 = (const struct ls_expr *) p1,
3247     *const ptr2 = (const struct ls_expr *) p2;
3248   return expr_equiv_p (ptr1->pattern, ptr2->pattern);
3249 }
3250
3251 /* This will search the ldst list for a matching expression. If it
3252    doesn't find one, we create one and initialize it.  */
3253
3254 static struct ls_expr *
3255 ldst_entry (rtx x)
3256 {
3257   int do_not_record_p = 0;
3258   struct ls_expr * ptr;
3259   unsigned int hash;
3260   void **slot;
3261   struct ls_expr e;
3262
3263   hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3264                    NULL,  /*have_reg_qty=*/false);
3265
3266   e.pattern = x;
3267   slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
3268   if (*slot)
3269     return (struct ls_expr *)*slot;
3270
3271   ptr = XNEW (struct ls_expr);
3272
3273   ptr->next         = pre_ldst_mems;
3274   ptr->expr         = NULL;
3275   ptr->pattern      = x;
3276   ptr->pattern_regs = NULL_RTX;
3277   ptr->loads        = NULL_RTX;
3278   ptr->stores       = NULL_RTX;
3279   ptr->reaching_reg = NULL_RTX;
3280   ptr->invalid      = 0;
3281   ptr->index        = 0;
3282   ptr->hash_index   = hash;
3283   pre_ldst_mems     = ptr;
3284   *slot = ptr;
3285
3286   return ptr;
3287 }
3288
3289 /* Free up an individual ldst entry.  */
3290
3291 static void
3292 free_ldst_entry (struct ls_expr * ptr)
3293 {
3294   free_INSN_LIST_list (& ptr->loads);
3295   free_INSN_LIST_list (& ptr->stores);
3296
3297   free (ptr);
3298 }
3299
3300 /* Free up all memory associated with the ldst list.  */
3301
3302 static void
3303 free_ld_motion_mems (void)
3304 {
3305   if (pre_ldst_table)
3306     htab_delete (pre_ldst_table);
3307   pre_ldst_table = NULL;
3308
3309   while (pre_ldst_mems)
3310     {
3311       struct ls_expr * tmp = pre_ldst_mems;
3312
3313       pre_ldst_mems = pre_ldst_mems->next;
3314
3315       free_ldst_entry (tmp);
3316     }
3317
3318   pre_ldst_mems = NULL;
3319 }
3320
3321 /* Dump debugging info about the ldst list.  */
3322
3323 static void
3324 print_ldst_list (FILE * file)
3325 {
3326   struct ls_expr * ptr;
3327
3328   fprintf (file, "LDST list: \n");
3329
3330   for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3331     {
3332       fprintf (file, "  Pattern (%3d): ", ptr->index);
3333
3334       print_rtl (file, ptr->pattern);
3335
3336       fprintf (file, "\n         Loads : ");
3337
3338       if (ptr->loads)
3339         print_rtl (file, ptr->loads);
3340       else
3341         fprintf (file, "(nil)");
3342
3343       fprintf (file, "\n        Stores : ");
3344
3345       if (ptr->stores)
3346         print_rtl (file, ptr->stores);
3347       else
3348         fprintf (file, "(nil)");
3349
3350       fprintf (file, "\n\n");
3351     }
3352
3353   fprintf (file, "\n");
3354 }
3355
3356 /* Returns 1 if X is in the list of ldst only expressions.  */
3357
3358 static struct ls_expr *
3359 find_rtx_in_ldst (rtx x)
3360 {
3361   struct ls_expr e;
3362   void **slot;
3363   if (!pre_ldst_table)
3364     return NULL;
3365   e.pattern = x;
3366   slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
3367   if (!slot || ((struct ls_expr *)*slot)->invalid)
3368     return NULL;
3369   return (struct ls_expr *) *slot;
3370 }
3371 \f
3372 /* Load Motion for loads which only kill themselves.  */
3373
3374 /* Return true if x, a MEM, is a simple access with no side effects.
3375    These are the types of loads we consider for the ld_motion list,
3376    otherwise we let the usual aliasing take care of it.  */
3377
3378 static int
3379 simple_mem (const_rtx x)
3380 {
3381   if (MEM_VOLATILE_P (x))
3382     return 0;
3383
3384   if (GET_MODE (x) == BLKmode)
3385     return 0;
3386
3387   /* If we are handling exceptions, we must be careful with memory references
3388      that may trap.  If we are not, the behavior is undefined, so we may just
3389      continue.  */
3390   if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3391     return 0;
3392
3393   if (side_effects_p (x))
3394     return 0;
3395
3396   /* Do not consider function arguments passed on stack.  */
3397   if (reg_mentioned_p (stack_pointer_rtx, x))
3398     return 0;
3399
3400   if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3401     return 0;
3402
3403   return 1;
3404 }
3405
3406 /* Make sure there isn't a buried reference in this pattern anywhere.
3407    If there is, invalidate the entry for it since we're not capable
3408    of fixing it up just yet.. We have to be sure we know about ALL
3409    loads since the aliasing code will allow all entries in the
3410    ld_motion list to not-alias itself.  If we miss a load, we will get
3411    the wrong value since gcse might common it and we won't know to
3412    fix it up.  */
3413
3414 static void
3415 invalidate_any_buried_refs (rtx x)
3416 {
3417   const char * fmt;
3418   int i, j;
3419   struct ls_expr * ptr;
3420
3421   /* Invalidate it in the list.  */
3422   if (MEM_P (x) && simple_mem (x))
3423     {
3424       ptr = ldst_entry (x);
3425       ptr->invalid = 1;
3426     }
3427
3428   /* Recursively process the insn.  */
3429   fmt = GET_RTX_FORMAT (GET_CODE (x));
3430
3431   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3432     {
3433       if (fmt[i] == 'e')
3434         invalidate_any_buried_refs (XEXP (x, i));
3435       else if (fmt[i] == 'E')
3436         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3437           invalidate_any_buried_refs (XVECEXP (x, i, j));
3438     }
3439 }
3440
3441 /* Find all the 'simple' MEMs which are used in LOADs and STORES.  Simple
3442    being defined as MEM loads and stores to symbols, with no side effects
3443    and no registers in the expression.  For a MEM destination, we also
3444    check that the insn is still valid if we replace the destination with a
3445    REG, as is done in update_ld_motion_stores.  If there are any uses/defs
3446    which don't match this criteria, they are invalidated and trimmed out
3447    later.  */
3448
3449 static void
3450 compute_ld_motion_mems (void)
3451 {
3452   struct ls_expr * ptr;
3453   basic_block bb;
3454   rtx insn;
3455
3456   pre_ldst_mems = NULL;
3457   pre_ldst_table
3458     = htab_create (13, pre_ldst_expr_hash, pre_ldst_expr_eq, NULL);
3459
3460   FOR_EACH_BB (bb)
3461     {
3462       FOR_BB_INSNS (bb, insn)
3463         {
3464           if (NONDEBUG_INSN_P (insn))
3465             {
3466               if (GET_CODE (PATTERN (insn)) == SET)
3467                 {
3468                   rtx src = SET_SRC (PATTERN (insn));
3469                   rtx dest = SET_DEST (PATTERN (insn));
3470
3471                   /* Check for a simple LOAD...  */
3472                   if (MEM_P (src) && simple_mem (src))
3473                     {
3474                       ptr = ldst_entry (src);
3475                       if (REG_P (dest))
3476                         ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
3477                       else
3478                         ptr->invalid = 1;
3479                     }
3480                   else
3481                     {
3482                       /* Make sure there isn't a buried load somewhere.  */
3483                       invalidate_any_buried_refs (src);
3484                     }
3485
3486                   /* Check for stores. Don't worry about aliased ones, they
3487                      will block any movement we might do later. We only care
3488                      about this exact pattern since those are the only
3489                      circumstance that we will ignore the aliasing info.  */
3490                   if (MEM_P (dest) && simple_mem (dest))
3491                     {
3492                       ptr = ldst_entry (dest);
3493
3494                       if (! MEM_P (src)
3495                           && GET_CODE (src) != ASM_OPERANDS
3496                           /* Check for REG manually since want_to_gcse_p
3497                              returns 0 for all REGs.  */
3498                           && can_assign_to_reg_without_clobbers_p (src))
3499                         ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
3500                       else
3501                         ptr->invalid = 1;
3502                     }
3503                 }
3504               else
3505                 invalidate_any_buried_refs (PATTERN (insn));
3506             }
3507         }
3508     }
3509 }
3510
3511 /* Remove any references that have been either invalidated or are not in the
3512    expression list for pre gcse.  */
3513
3514 static void
3515 trim_ld_motion_mems (void)
3516 {
3517   struct ls_expr * * last = & pre_ldst_mems;
3518   struct ls_expr * ptr = pre_ldst_mems;
3519
3520   while (ptr != NULL)
3521     {
3522       struct expr * expr;
3523
3524       /* Delete if entry has been made invalid.  */
3525       if (! ptr->invalid)
3526         {
3527           /* Delete if we cannot find this mem in the expression list.  */
3528           unsigned int hash = ptr->hash_index % expr_hash_table.size;
3529
3530           for (expr = expr_hash_table.table[hash];
3531                expr != NULL;
3532                expr = expr->next_same_hash)
3533             if (expr_equiv_p (expr->expr, ptr->pattern))
3534               break;
3535         }
3536       else
3537         expr = (struct expr *) 0;
3538
3539       if (expr)
3540         {
3541           /* Set the expression field if we are keeping it.  */
3542           ptr->expr = expr;
3543           last = & ptr->next;
3544           ptr = ptr->next;
3545         }
3546       else
3547         {
3548           *last = ptr->next;
3549           htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
3550           free_ldst_entry (ptr);
3551           ptr = * last;
3552         }
3553     }
3554
3555   /* Show the world what we've found.  */
3556   if (dump_file && pre_ldst_mems != NULL)
3557     print_ldst_list (dump_file);
3558 }
3559
3560 /* This routine will take an expression which we are replacing with
3561    a reaching register, and update any stores that are needed if
3562    that expression is in the ld_motion list.  Stores are updated by
3563    copying their SRC to the reaching register, and then storing
3564    the reaching register into the store location. These keeps the
3565    correct value in the reaching register for the loads.  */
3566
3567 static void
3568 update_ld_motion_stores (struct expr * expr)
3569 {
3570   struct ls_expr * mem_ptr;
3571
3572   if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
3573     {
3574       /* We can try to find just the REACHED stores, but is shouldn't
3575          matter to set the reaching reg everywhere...  some might be
3576          dead and should be eliminated later.  */
3577
3578       /* We replace (set mem expr) with (set reg expr) (set mem reg)
3579          where reg is the reaching reg used in the load.  We checked in
3580          compute_ld_motion_mems that we can replace (set mem expr) with
3581          (set reg expr) in that insn.  */
3582       rtx list = mem_ptr->stores;
3583
3584       for ( ; list != NULL_RTX; list = XEXP (list, 1))
3585         {
3586           rtx insn = XEXP (list, 0);
3587           rtx pat = PATTERN (insn);
3588           rtx src = SET_SRC (pat);
3589           rtx reg = expr->reaching_reg;
3590           rtx copy;
3591
3592           /* If we've already copied it, continue.  */
3593           if (expr->reaching_reg == src)
3594             continue;
3595
3596           if (dump_file)
3597             {
3598               fprintf (dump_file, "PRE:  store updated with reaching reg ");
3599               print_rtl (dump_file, reg);
3600               fprintf (dump_file, ":\n  ");
3601               print_inline_rtx (dump_file, insn, 8);
3602               fprintf (dump_file, "\n");
3603             }
3604
3605           copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
3606           emit_insn_before (copy, insn);
3607           SET_SRC (pat) = reg;
3608           df_insn_rescan (insn);
3609
3610           /* un-recognize this pattern since it's probably different now.  */
3611           INSN_CODE (insn) = -1;
3612           gcse_create_count++;
3613         }
3614     }
3615 }
3616 \f
3617 /* Return true if the graph is too expensive to optimize. PASS is the
3618    optimization about to be performed.  */
3619
3620 static bool
3621 is_too_expensive (const char *pass)
3622 {
3623   /* Trying to perform global optimizations on flow graphs which have
3624      a high connectivity will take a long time and is unlikely to be
3625      particularly useful.
3626
3627      In normal circumstances a cfg should have about twice as many
3628      edges as blocks.  But we do not want to punish small functions
3629      which have a couple switch statements.  Rather than simply
3630      threshold the number of blocks, uses something with a more
3631      graceful degradation.  */
3632   if (n_edges > 20000 + n_basic_blocks * 4)
3633     {
3634       warning (OPT_Wdisabled_optimization,
3635                "%s: %d basic blocks and %d edges/basic block",
3636                pass, n_basic_blocks, n_edges / n_basic_blocks);
3637
3638       return true;
3639     }
3640
3641   /* If allocating memory for the dataflow bitmaps would take up too much
3642      storage it's better just to disable the optimization.  */
3643   if ((n_basic_blocks
3644        * SBITMAP_SET_SIZE (max_reg_num ())
3645        * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
3646     {
3647       warning (OPT_Wdisabled_optimization,
3648                "%s: %d basic blocks and %d registers",
3649                pass, n_basic_blocks, max_reg_num ());
3650
3651       return true;
3652     }
3653
3654   return false;
3655 }
3656 \f
3657 /* All the passes implemented in this file.  Each pass has its
3658    own gate and execute function, and at the end of the file a
3659    pass definition for passes.c.
3660
3661    We do not construct an accurate cfg in functions which call
3662    setjmp, so none of these passes runs if the function calls
3663    setjmp.
3664    FIXME: Should just handle setjmp via REG_SETJMP notes.  */
3665
3666 static bool
3667 gate_rtl_pre (void)
3668 {
3669   return optimize > 0 && flag_gcse
3670     && !cfun->calls_setjmp
3671     && optimize_function_for_speed_p (cfun)
3672     && dbg_cnt (pre);
3673 }
3674
3675 static unsigned int
3676 execute_rtl_pre (void)
3677 {
3678   int changed;
3679   delete_unreachable_blocks ();
3680   df_analyze ();
3681   changed = one_pre_gcse_pass ();
3682   flag_rerun_cse_after_global_opts |= changed;
3683   if (changed)
3684     cleanup_cfg (0);
3685   return 0;
3686 }
3687
3688 static bool
3689 gate_rtl_hoist (void)
3690 {
3691   return optimize > 0 && flag_gcse
3692     && !cfun->calls_setjmp
3693     /* It does not make sense to run code hoisting unless we are optimizing
3694        for code size -- it rarely makes programs faster, and can make then
3695        bigger if we did PRE (when optimizing for space, we don't run PRE).  */
3696     && optimize_function_for_size_p (cfun)
3697     && dbg_cnt (hoist);
3698 }
3699
3700 static unsigned int
3701 execute_rtl_hoist (void)
3702 {
3703   int changed;
3704   delete_unreachable_blocks ();
3705   df_analyze ();
3706   changed = one_code_hoisting_pass ();
3707   flag_rerun_cse_after_global_opts |= changed;
3708   if (changed)
3709     cleanup_cfg (0);
3710   return 0;
3711 }
3712
3713 struct rtl_opt_pass pass_rtl_pre =
3714 {
3715  {
3716   RTL_PASS,
3717   "rtl pre",                            /* name */
3718   gate_rtl_pre,                         /* gate */
3719   execute_rtl_pre,                      /* execute */
3720   NULL,                                 /* sub */
3721   NULL,                                 /* next */
3722   0,                                    /* static_pass_number */
3723   TV_PRE,                               /* tv_id */
3724   PROP_cfglayout,                       /* properties_required */
3725   0,                                    /* properties_provided */
3726   0,                                    /* properties_destroyed */
3727   0,                                    /* todo_flags_start */
3728   TODO_df_finish | TODO_verify_rtl_sharing |
3729   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
3730  }
3731 };
3732
3733 struct rtl_opt_pass pass_rtl_hoist =
3734 {
3735  {
3736   RTL_PASS,
3737   "hoist",                              /* name */
3738   gate_rtl_hoist,                       /* gate */
3739   execute_rtl_hoist,                    /* execute */
3740   NULL,                                 /* sub */
3741   NULL,                                 /* next */
3742   0,                                    /* static_pass_number */
3743   TV_HOIST,                             /* tv_id */
3744   PROP_cfglayout,                       /* properties_required */
3745   0,                                    /* properties_provided */
3746   0,                                    /* properties_destroyed */
3747   0,                                    /* todo_flags_start */
3748   TODO_df_finish | TODO_verify_rtl_sharing |
3749   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
3750  }
3751 };
3752
3753 #include "gt-gcse.h"