gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20 02111-1307, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "errors.h"
 128 #include "ggc.h"
 129 #include "tree.h"
 130 #include "target.h"
 131
 132 #include "rtl.h"
 133 #include "basic-block.h"
 134 #include "diagnostic.h"
 135 #include "tree-flow.h"
 136 #include "tree-dump.h"
 137 #include "timevar.h"
 138 #include "cfgloop.h"
 139 #include "cfglayout.h"
 140 #include "expr.h"
 141 #include "optabs.h"
 142 #include "toplev.h"
 143 #include "tree-chrec.h"
 144 #include "tree-data-ref.h"
 145 #include "tree-scalar-evolution.h"
 146 #include "tree-vectorizer.h"
 147 #include "tree-pass.h"
 148 #include "langhooks.h"
 149
 150
 151 /*************************************************************************
 152   Simple Loop Peeling Utilities
 153  *************************************************************************/
 154
 155 /* Entry point for peeling of simple loops.
 156    Peel the first/last iterations of a loop.
 157    It can be used outside of the vectorizer for loops that are simple enough
 158    (see function documentation).  In the vectorizer it is used to peel the
 159    last few iterations when the loop bound is unknown or does not evenly
 160    divide by the vectorization factor, and to peel the first few iterations
 161    to force the alignment of data references in the loop.  */
 162 struct loop *slpeel_tree_peel_loop_to_edge
 163   (struct loop *, struct loops *, edge, tree, tree, bool);
 164 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 165   (struct loop *, struct loops *, edge);
 166 static void slpeel_update_phis_for_duplicate_loop
 167   (struct loop *, struct loop *, bool after);
 168 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
 169 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
 170 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 171 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
 172 static void allocate_new_names (bitmap);
 173 static void rename_use_op (use_operand_p);
 174 static void rename_def_op (def_operand_p, tree);
 175 static void rename_variables_in_bb (basic_block);
 176 static void free_new_names (bitmap);
 177 static void rename_variables_in_loop (struct loop *);
 178 #ifdef ENABLE_CHECKING
 179 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
 180 #endif
 181
 182
 183 /*************************************************************************
 184   Vectorization Utilities.
 185  *************************************************************************/
 186
 187 /* Main analysis functions.  */
 188 static loop_vec_info vect_analyze_loop (struct loop *);
 189 static loop_vec_info vect_analyze_loop_form (struct loop *);
 190 static bool vect_analyze_data_refs (loop_vec_info);
 191 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
 192 static bool vect_analyze_scalar_cycles (loop_vec_info);
 193 static bool vect_analyze_data_ref_accesses (loop_vec_info);
 194 static bool vect_analyze_data_refs_alignment (loop_vec_info);
 195 static bool vect_compute_data_refs_alignment (loop_vec_info);
 196 static bool vect_analyze_operations (loop_vec_info);
 197
 198 /* Main code transformation functions.  */
 199 static void vect_transform_loop (loop_vec_info, struct loops *);
 200 static bool vect_transform_stmt (tree, block_stmt_iterator *);
 201 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
 202 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
 203 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
 204 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
 205 static enum dr_alignment_support vect_supportable_dr_alignment
 206   (struct data_reference *);
 207 static void vect_align_data_ref (tree);
 208 static void vect_enhance_data_refs_alignment (loop_vec_info);
 209
 210 /* Utility functions for the analyses.  */
 211 static bool vect_is_simple_use (tree , struct loop *, tree *);
 212 static bool exist_non_indexing_operands_for_use_p (tree, tree);
 213 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
 214 static void vect_mark_relevant (varray_type, tree);
 215 static bool vect_stmt_relevant_p (tree, loop_vec_info);
 216 static tree vect_get_loop_niters (struct loop *, tree *);
 217 static bool vect_compute_data_ref_alignment
 218   (struct data_reference *, loop_vec_info);
 219 static bool vect_analyze_data_ref_access (struct data_reference *);
 220 static bool vect_get_first_index (tree, tree *);
 221 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
 222 static struct data_reference * vect_analyze_pointer_ref_access
 223   (tree, tree, bool);
 224 static bool vect_can_advance_ivs_p (struct loop *);
 225 static tree vect_get_base_and_bit_offset
 226   (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
 227 static struct data_reference * vect_analyze_pointer_ref_access
 228   (tree, tree, bool);
 229 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
 230 static tree vect_compute_array_ref_alignment
 231   (struct data_reference *, loop_vec_info, tree, tree *);
 232 static tree vect_get_ptr_offset (tree, tree, tree *);
 233 static tree vect_get_symbl_and_dr
 234   (tree, tree, bool, loop_vec_info, struct data_reference **);
 235
 236 /* Utility functions for the code transformation.  */
 237 static tree vect_create_destination_var (tree, tree);
 238 static tree vect_create_data_ref_ptr
 239   (tree, block_stmt_iterator *, tree, tree *, bool);
 240 static tree vect_create_index_for_vector_ref
 241   (struct loop *, block_stmt_iterator *);
 242 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
 243 static tree get_vectype_for_scalar_type (tree);
 244 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
 245 static tree vect_get_vec_def_for_operand (tree, tree);
 246 static tree vect_init_vector (tree, tree);
 247 static tree vect_build_symbol_bound (tree, int, struct loop *);
 248 static void vect_finish_stmt_generation
 249   (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
 250
 251 /* Utility function dealing with loop peeling (not peeling itself).  */
 252 static void vect_generate_tmps_on_preheader
 253   (loop_vec_info, tree *, tree *, tree *);
 254 static tree vect_build_loop_niters (loop_vec_info);
 255 static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
 256 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
 257 static void vect_update_inits_of_dr
 258   (struct data_reference *, struct loop *, tree niters);
 259 static void vect_update_inits_of_drs (loop_vec_info, tree);
 260 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
 261 static void vect_do_peeling_for_loop_bound
 262   (loop_vec_info, tree *, struct loops *);
 263
 264 /* Utilities for creation and deletion of vec_info structs.  */
 265 loop_vec_info new_loop_vec_info (struct loop *loop);
 266 void destroy_loop_vec_info (loop_vec_info);
 267 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
 268
 269 static bool vect_debug_stats (struct loop *loop);
 270 static bool vect_debug_details (struct loop *loop);
 271
 272 \f
 273 /*************************************************************************
 274   Simple Loop Peeling Utilities
 275
 276   Utilities to support loop peeling for vectorization purposes.
 277  *************************************************************************/
 278
 279
 280 /* For each definition in DEFINITIONS this function allocates
 281    new ssa name.  */
 282
 283 static void
 284 allocate_new_names (bitmap definitions)
 285 {
 286   unsigned ver;
 287   bitmap_iterator bi;
 288
 289   EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
 290     {
 291       tree def = ssa_name (ver);
 292       tree *new_name_ptr = xmalloc (sizeof (tree));
 293
 294       bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
 295
 296       *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
 297       SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
 298
 299       SSA_NAME_AUX (def) = new_name_ptr;
 300     }
 301 }
 302
 303
 304 /* Renames the use *OP_P.  */
 305
 306 static void
 307 rename_use_op (use_operand_p op_p)
 308 {
 309   tree *new_name_ptr;
 310
 311   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 312     return;
 313
 314   new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
 315
 316   /* Something defined outside of the loop.  */
 317   if (!new_name_ptr)
 318     return;
 319
 320   /* An ordinary ssa name defined in the loop.  */
 321
 322   SET_USE (op_p, *new_name_ptr);
 323 }
 324
 325
 326 /* Renames the def *OP_P in statement STMT.  */
 327
 328 static void
 329 rename_def_op (def_operand_p op_p, tree stmt)
 330 {
 331   tree *new_name_ptr;
 332
 333   if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
 334     return;
 335
 336   new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
 337
 338   /* Something defined outside of the loop.  */
 339   if (!new_name_ptr)
 340     return;
 341
 342   /* An ordinary ssa name defined in the loop.  */
 343
 344   SET_DEF (op_p, *new_name_ptr);
 345   SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
 346 }
 347
 348
 349 /* Renames the variables in basic block BB.  */
 350
 351 static void
 352 rename_variables_in_bb (basic_block bb)
 353 {
 354   tree phi;
 355   block_stmt_iterator bsi;
 356   tree stmt;
 357   stmt_ann_t ann;
 358   use_optype uses;
 359   vuse_optype vuses;
 360   def_optype defs;
 361   v_may_def_optype v_may_defs;
 362   v_must_def_optype v_must_defs;
 363   unsigned i;
 364   edge e;
 365   edge_iterator ei;
 366   struct loop *loop = bb->loop_father;
 367
 368   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
 369     rename_def_op (PHI_RESULT_PTR (phi), phi);
 370
 371   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 372     {
 373       stmt = bsi_stmt (bsi);
 374       get_stmt_operands (stmt);
 375       ann = stmt_ann (stmt);
 376
 377       uses = USE_OPS (ann);
 378       for (i = 0; i < NUM_USES (uses); i++)
 379         rename_use_op (USE_OP_PTR (uses, i));
 380
 381       defs = DEF_OPS (ann);
 382       for (i = 0; i < NUM_DEFS (defs); i++)
 383         rename_def_op (DEF_OP_PTR (defs, i), stmt);
 384
 385       vuses = VUSE_OPS (ann);
 386       for (i = 0; i < NUM_VUSES (vuses); i++)
 387         rename_use_op (VUSE_OP_PTR (vuses, i));
 388
 389       v_may_defs = V_MAY_DEF_OPS (ann);
 390       for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
 391         {
 392           rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
 393           rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
 394         }
 395
 396       v_must_defs = V_MUST_DEF_OPS (ann);
 397       for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
 398         {
 399           rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
 400           rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
 401         }
 402     }
 403
 404   FOR_EACH_EDGE (e, ei, bb->succs)
 405     {
 406       if (!flow_bb_inside_loop_p (loop, e->dest))
 407         continue;
 408       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 409         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 410     }
 411 }
 412
 413
 414 /* Releases the structures holding the new ssa names.  */
 415
 416 static void
 417 free_new_names (bitmap definitions)
 418 {
 419   unsigned ver;
 420   bitmap_iterator bi;
 421
 422   EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
 423     {
 424       tree def = ssa_name (ver);
 425
 426       if (SSA_NAME_AUX (def))
 427         {
 428           free (SSA_NAME_AUX (def));
 429           SSA_NAME_AUX (def) = NULL;
 430         }
 431     }
 432 }
 433
 434
 435 /* Renames variables in new generated LOOP.  */
 436
 437 static void
 438 rename_variables_in_loop (struct loop *loop)
 439 {
 440   unsigned i;
 441   basic_block *bbs;
 442
 443   bbs = get_loop_body (loop);
 444
 445   for (i = 0; i < loop->num_nodes; i++)
 446     rename_variables_in_bb (bbs[i]);
 447
 448   free (bbs);
 449 }
 450
 451
 452 /* Update the PHI nodes of NEW_LOOP.
 453
 454    NEW_LOOP is a duplicate of ORIG_LOOP.
 455    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 456    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 457    executes before it.  */
 458
 459 static void
 460 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 461                                        struct loop *new_loop, bool after)
 462 {
 463   tree *new_name_ptr, new_ssa_name;
 464   tree phi_new, phi_orig;
 465   tree def;
 466   edge orig_loop_latch = loop_latch_edge (orig_loop);
 467   edge orig_entry_e = loop_preheader_edge (orig_loop);
 468   edge new_loop_exit_e = new_loop->exit_edges[0];
 469   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 470   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 471
 472   /*
 473      step 1. For each loop-header-phi:
 474              Add the first phi argument for the phi in NEW_LOOP
 475             (the one associated with the entry of NEW_LOOP)
 476
 477      step 2. For each loop-header-phi:
 478              Add the second phi argument for the phi in NEW_LOOP
 479             (the one associated with the latch of NEW_LOOP)
 480
 481      step 3. Update the phis in the successor block of NEW_LOOP.
 482
 483         case 1: NEW_LOOP was placed before ORIG_LOOP:
 484                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 485                 Updating the phis in the successor block can therefore be done
 486                 along with the scanning of the loop header phis, because the
 487                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 488                 phi nodes, organized in the same order.
 489
 490         case 2: NEW_LOOP was placed after ORIG_LOOP:
 491                 The successor block of NEW_LOOP is the original exit block of
 492                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 493                 We postpone updating these phis to a later stage (when
 494                 loop guards are added).
 495    */
 496
 497
 498   /* Scan the phis in the headers of the old and new loops
 499      (they are organized in exactly the same order).  */
 500
 501   for (phi_new = phi_nodes (new_loop->header),
 502        phi_orig = phi_nodes (orig_loop->header);
 503        phi_new && phi_orig;
 504        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 505     {
 506       /* step 1.  */
 507       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 508       add_phi_arg (&phi_new, def, new_loop_entry_e);
 509
 510       /* step 2.  */
 511       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 512       if (TREE_CODE (def) != SSA_NAME)
 513         continue;
 514
 515       new_name_ptr = SSA_NAME_AUX (def);
 516       if (!new_name_ptr)
 517         /* Something defined outside of the loop.  */
 518         continue;
 519
 520       /* An ordinary ssa name defined in the loop.  */
 521       new_ssa_name = *new_name_ptr;
 522       add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge (new_loop));
 523
 524       /* step 3 (case 1).  */
 525       if (!after)
 526         {
 527           gcc_assert (new_loop_exit_e == orig_entry_e);
 528           SET_PHI_ARG_DEF (phi_orig,
 529                            phi_arg_from_edge (phi_orig, new_loop_exit_e),
 530                            new_ssa_name);
 531         }
 532     }
 533 }
 534
 535
 536 /* Update PHI nodes for a guard of the LOOP.
 537
 538    Input:
 539    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 540         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 541         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 542         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 543         We denote this bb NEW_MERGE_BB because it had a single predecessor (the
 544         LOOP header) before the guard code was added, and now it became a merge
 545         point of two paths - the path that ends with the LOOP exit-edge, and
 546         the path that ends with GUARD_EDGE.
 547
 548         This function creates and updates the relevant phi nodes to account for
 549         the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
 550         1. Create phi nodes at NEW_MERGE_BB.
 551         2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 552            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 553            was added:
 554
 555         ===> The CFG before the guard-code was added:
 556         LOOP_header_bb:
 557           if (exit_loop) goto update_bb : LOOP_header_bb
 558         update_bb:
 559
 560         ==> The CFG after the guard-code was added:
 561         guard_bb:
 562           if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
 563         LOOP_header_bb:
 564           if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
 565         new_merge_bb:
 566           goto update_bb
 567         update_bb:
 568
 569    - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
 570         UPDATE_BB are loop entry phis, like the phis in the LOOP header,
 571         organized in the same order.
 572         If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
 573         loop exit phis.
 574
 575    - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
 576         "original" loop).  FALSE if LOOP is an original loop (not a newly
 577         created copy).  The SSA_NAME_AUX fields of the defs in the original
 578         loop are the corresponding new ssa-names used in the new duplicated
 579         loop copy.  IS_NEW_LOOP indicates which of the two args of the phi
 580         nodes in UPDATE_BB takes the original ssa-name, and which takes the
 581         new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
 582         the LOOP-exit-edge takes the new-name, and the phi-arg that is
 583         associated with GUARD_EDGE takes the original name.  If IS_NEW_LOOP is
 584         FALSE, it's the other way around.
 585   */
 586
 587 static void
 588 slpeel_update_phi_nodes_for_guard (edge guard_edge,
 589                                    struct loop *loop,
 590                                    bool entry_phis,
 591                                    bool is_new_loop)
 592 {
 593   tree orig_phi, new_phi, update_phi;
 594   tree guard_arg, loop_arg;
 595   basic_block new_merge_bb = guard_edge->dest;
 596   edge e = EDGE_SUCC (new_merge_bb, 0);
 597   basic_block update_bb = e->dest;
 598   basic_block orig_bb = (entry_phis ? loop->header : update_bb);
 599
 600   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 601        orig_phi && update_phi;
 602        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 603     {
 604       /* 1. Generate new phi node in NEW_MERGE_BB:  */
 605       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 606                                  new_merge_bb);
 607
 608       /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 609             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 610       if (entry_phis)
 611         {
 612           loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
 613                                             EDGE_SUCC (loop->latch, 0));
 614           guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
 615         }
 616       else /* exit phis */
 617         {
 618           tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 619           tree *new_name_ptr = SSA_NAME_AUX (orig_def);
 620           tree new_name;
 621
 622           if (new_name_ptr)
 623             new_name = *new_name_ptr;
 624           else
 625             /* Something defined outside of the loop  */
 626             new_name = orig_def;
 627
 628           if (is_new_loop)
 629             {
 630               guard_arg = orig_def;
 631               loop_arg = new_name;
 632             }
 633           else
 634             {
 635               guard_arg = new_name;
 636               loop_arg = orig_def;
 637             }
 638         }
 639       add_phi_arg (&new_phi, loop_arg, loop->exit_edges[0]);
 640       add_phi_arg (&new_phi, guard_arg, guard_edge);
 641
 642       /* 3. Update phi in successor block.  */
 643       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 644                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 645       SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e),
 646                        PHI_RESULT (new_phi));
 647     }
 648
 649   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 650 }
 651
 652
 653 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 654    that starts at zero, increases by one and its limit is NITERS.
 655
 656    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 657
 658 static void
 659 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 660 {
 661   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 662   tree orig_cond;
 663   edge exit_edge = loop->exit_edges[0];
 664   block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
 665   tree begin_label = tree_block_label (loop->latch);
 666   tree exit_label = tree_block_label (loop->single_exit->dest);
 667   tree init = build_int_cst (TREE_TYPE (niters), 0);
 668   tree step = build_int_cst (TREE_TYPE (niters), 1);
 669
 670   orig_cond = get_loop_exit_condition (loop);
 671   gcc_assert (orig_cond);
 672   create_iv (init, step, NULL_TREE, loop,
 673              &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
 674
 675   /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
 676      back to the exit condition statement.  */
 677   bsi_next (&loop_exit_bsi);
 678   gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
 679
 680   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 681     cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 682   else /* 'then' edge loops back.  */
 683     cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 684
 685   begin_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 686   exit_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 687   cond_stmt = build (COND_EXPR, TREE_TYPE (orig_cond), cond,
 688                      begin_label, exit_label);
 689   bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
 690
 691   /* Remove old loop exit test:  */
 692   bsi_remove (&loop_exit_bsi);
 693
 694   if (vect_debug_stats (loop) || vect_debug_details (loop))
 695     print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 696
 697   loop->nb_iterations = niters;
 698 }
 699
 700
 701 /* Given LOOP this function generates a new copy of it and puts it
 702    on E which is either the entry or exit of LOOP.  */
 703
 704 static struct loop *
 705 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 706                                         edge e)
 707 {
 708   struct loop *new_loop;
 709   basic_block *new_bbs, *bbs;
 710   bool at_exit;
 711   bool was_imm_dom;
 712   basic_block exit_dest;
 713   tree phi, phi_arg;
 714
 715   at_exit = (e == loop->exit_edges[0]);
 716   if (!at_exit && e != loop_preheader_edge (loop))
 717     {
 718       if (dump_file && (dump_flags & TDF_DETAILS))
 719           fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
 720       return NULL;
 721     }
 722
 723   bbs = get_loop_body (loop);
 724
 725   /* Check whether duplication is possible.  */
 726   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 727     {
 728       if (vect_debug_stats (loop) || vect_debug_details (loop))
 729           fprintf (dump_file, "Cannot copy basic blocks.\n");
 730       free (bbs);
 731       return NULL;
 732     }
 733
 734   /* Generate new loop structure.  */
 735   new_loop = duplicate_loop (loops, loop, loop->outer);
 736   if (!new_loop)
 737     {
 738       if (vect_debug_stats (loop) || vect_debug_details (loop))
 739           fprintf (dump_file, "duplicate_loop returns NULL.\n");
 740       free (bbs);
 741       return NULL;
 742     }
 743
 744   exit_dest = loop->exit_edges[0]->dest;
 745   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 746                                           exit_dest) == loop->header ?
 747                  true : false);
 748
 749   new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
 750
 751   copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
 752
 753   /* Duplicating phi args at exit bbs as coming
 754      also from exit of duplicated loop.  */
 755   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 756     {
 757       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
 758       if (phi_arg)
 759         {
 760           edge new_loop_exit_edge;
 761
 762           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 763             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 764           else
 765             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 766
 767           add_phi_arg (&phi, phi_arg, new_loop_exit_edge);
 768         }
 769     }
 770
 771   if (at_exit) /* Add the loop copy at exit.  */
 772     {
 773       redirect_edge_and_branch_force (e, new_loop->header);
 774       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 775       if (was_imm_dom)
 776         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 777     }
 778   else /* Add the copy at entry.  */
 779     {
 780       edge new_exit_e;
 781       edge entry_e = loop_preheader_edge (loop);
 782       basic_block preheader = entry_e->src;
 783
 784       if (!flow_bb_inside_loop_p (new_loop,
 785                                   EDGE_SUCC (new_loop->header, 0)->dest))
 786         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 787       else
 788         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 789
 790       redirect_edge_and_branch_force (new_exit_e, loop->header);
 791       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 792                                new_exit_e->src);
 793
 794       /* We have to add phi args to the loop->header here as coming
 795          from new_exit_e edge.  */
 796       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 797         {
 798           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 799           if (phi_arg)
 800             add_phi_arg (&phi, phi_arg, new_exit_e);
 801         }
 802
 803       redirect_edge_and_branch_force (entry_e, new_loop->header);
 804       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 805     }
 806
 807   flow_loop_scan (new_loop, LOOP_ALL);
 808   flow_loop_scan (loop, LOOP_ALL);
 809   free (new_bbs);
 810   free (bbs);
 811
 812   return new_loop;
 813 }
 814
 815
 816 /* Given the condition statement COND, put it as the last statement
 817    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 818    Assumes that this is the single exit of the guarded loop.
 819    Returns the skip edge.  */
 820
 821 static edge
 822 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 823                         basic_block dom_bb)
 824 {
 825   block_stmt_iterator bsi;
 826   edge new_e, enter_e;
 827   tree cond_stmt, then_label, else_label;
 828
 829   enter_e = EDGE_SUCC (guard_bb, 0);
 830   enter_e->flags &= ~EDGE_FALLTHRU;
 831   enter_e->flags |= EDGE_FALSE_VALUE;
 832   bsi = bsi_last (guard_bb);
 833
 834   then_label = build1 (GOTO_EXPR, void_type_node,
 835                        tree_block_label (exit_bb));
 836   else_label = build1 (GOTO_EXPR, void_type_node,
 837                        tree_block_label (enter_e->dest));
 838   cond_stmt = build (COND_EXPR, void_type_node, cond,
 839                      then_label, else_label);
 840   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 841   /* Add new edge to connect entry block to the second loop.  */
 842   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 843   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 844   return new_e;
 845 }
 846
 847
 848 /* This function verifies that the following restrictions apply to LOOP:
 849    (1) it is innermost
 850    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 851    (3) it is single entry, single exit
 852    (4) its exit condition is the last stmt in the header
 853    (5) E is the entry/exit edge of LOOP.
 854  */
 855
 856 static bool
 857 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 858 {
 859   edge exit_e = loop->exit_edges [0];
 860   edge entry_e = loop_preheader_edge (loop);
 861   tree orig_cond = get_loop_exit_condition (loop);
 862   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 863
 864   if (any_marked_for_rewrite_p ())
 865     return false;
 866
 867   if (loop->inner
 868       /* All loops have an outer scope; the only case loop->outer is NULL is for
 869          the function itself.  */
 870       || !loop->outer
 871       || loop->num_nodes != 2
 872       || !empty_block_p (loop->latch)
 873       || loop->num_exits != 1
 874       || loop->num_entries != 1
 875       /* Verify that new loop exit condition can be trivially modified.  */
 876       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 877       || (e != exit_e && e != entry_e))
 878     return false;
 879
 880   return true;
 881 }
 882
 883 #ifdef ENABLE_CHECKING
 884 static void
 885 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 886                                  struct loop *second_loop)
 887 {
 888   basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
 889   basic_block loop2_entry_bb = second_loop->pre_header;
 890   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 891
 892   /* A guard that controls whether the second_loop is to be executed or skipped
 893      is placed in first_loop->exit.  first_loopt->exit therefore has two
 894      successors - one is the preheader of second_loop, and the other is a bb
 895      after second_loop.
 896    */
 897   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 898
 899
 900   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
 901         of second_loop.  */
 902
 903   /* The preheader of new_loop is expected to have two predessors:
 904      first_loop->exit and the block that precedes first_loop.  */
 905
 906   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 907               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 908                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 909                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
 910                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
 911
 912   /* Verify that the other successor of first_loopt->exit is after the
 913      second_loop.  */
 914   /* TODO */
 915 }
 916 #endif
 917
 918 /* Function slpeel_tree_peel_loop_to_edge.
 919
 920    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
 921    that is placed on the entry (exit) edge E of LOOP. After this transformation
 922    we have two loops one after the other - first-loop iterates FIRST_NITERS
 923    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
 924
 925    Input:
 926    - LOOP: the loop to be peeled.
 927    - E: the exit or entry edge of LOOP.
 928         If it is the entry edge, we peel the first iterations of LOOP. In this
 929         case first-loop is LOOP, and second-loop is the newly created loop.
 930         If it is the exit edge, we peel the last iterations of LOOP. In this
 931         case, first-loop is the newly created loop, and second-loop is LOOP.
 932    - NITERS: the number of iterations that LOOP iterates.
 933    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
 934    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
 935         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
 936         is false, the caller of this function may want to take care of this
 937         (this can be useful if we don't want new stmts added to first-loop).
 938
 939    Output:
 940    The function returns a pointer to the new loop-copy, or NULL if it failed
 941    to perform the transformation.
 942
 943    The function generates two if-then-else guards: one before the first loop,
 944    and the other before the second loop:
 945    The first guard is:
 946      if (FIRST_NITERS == 0) then skip the first loop,
 947      and go directly to the second loop.
 948    The second guard is:
 949      if (FIRST_NITERS == NITERS) then skip the second loop.
 950
 951    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
 952    FORNOW the resulting code will not be in loop-closed-ssa form.
 953 */
 954
 955 struct loop*
 956 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
 957                                edge e, tree first_niters,
 958                                tree niters, bool update_first_loop_count)
 959 {
 960   struct loop *new_loop = NULL, *first_loop, *second_loop;
 961   edge skip_e;
 962   tree pre_condition;
 963   bitmap definitions;
 964   basic_block bb_before_second_loop, bb_after_second_loop;
 965   basic_block bb_before_first_loop;
 966   basic_block bb_between_loops;
 967   edge exit_e = loop->exit_edges [0];
 968
 969   if (!slpeel_can_duplicate_loop_p (loop, e))
 970     return NULL;
 971
 972   /* We have to initialize cfg_hooks. Then, when calling
 973    cfg_hooks->split_edge, the function tree_split_edge
 974    is actually called and, when calling cfg_hooks->duplicate_block,
 975    the function tree_duplicate_bb is called.  */
 976   tree_register_cfg_hooks ();
 977
 978
 979   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
 980         Resulting CFG would be:
 981
 982         first_loop:
 983         do {
 984         } while ...
 985
 986         second_loop:
 987         do {
 988         } while ...
 989
 990         orig_exit_bb:
 991    */
 992
 993   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
 994     {
 995       if (vect_debug_stats (loop) || vect_debug_details (loop))
 996         fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
 997       return NULL;
 998     }
 999
1000   if (e == exit_e)
1001     {
1002       /* NEW_LOOP was placed after LOOP.  */
1003       first_loop = loop;
1004       second_loop = new_loop;
1005     }
1006   else
1007     {
1008       /* NEW_LOOP was placed before LOOP.  */
1009       first_loop = new_loop;
1010       second_loop = loop;
1011     }
1012
1013   definitions = marked_ssa_names ();
1014   allocate_new_names (definitions);
1015   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1016   rename_variables_in_loop (new_loop);
1017
1018
1019   /* 2. Add the guard that controls whether the first loop is executed.
1020         Resulting CFG would be:
1021
1022         bb_before_first_loop:
1023         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1024                                GOTO first-loop
1025
1026         first_loop:
1027         do {
1028         } while ...
1029
1030         bb_before_second_loop:
1031
1032         second_loop:
1033         do {
1034         } while ...
1035
1036         orig_exit_bb:
1037    */
1038
1039   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1040   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1041   bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1042   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1043   flow_loop_scan (first_loop, LOOP_ALL);
1044   flow_loop_scan (second_loop, LOOP_ALL);
1045
1046   pre_condition =
1047         build (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1048   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1049                                   bb_before_second_loop, bb_before_first_loop);
1050   slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1051                                      first_loop == new_loop);
1052
1053
1054   /* 3. Add the guard that controls whether the second loop is executed.
1055         Resulting CFG would be:
1056
1057         bb_before_first_loop:
1058         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1059                                GOTO first-loop
1060
1061         first_loop:
1062         do {
1063         } while ...
1064
1065         bb_between_loops:
1066         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1067                                     GOTO bb_before_second_loop
1068
1069         bb_before_second_loop:
1070
1071         second_loop:
1072         do {
1073         } while ...
1074
1075         bb_after_second_loop:
1076
1077         orig_exit_bb:
1078    */
1079
1080   bb_between_loops = split_edge (first_loop->exit_edges[0]);
1081   add_bb_to_loop (bb_between_loops, first_loop->outer);
1082   bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1083   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1084   flow_loop_scan (first_loop, LOOP_ALL);
1085   flow_loop_scan (second_loop, LOOP_ALL);
1086
1087   pre_condition = build (EQ_EXPR, boolean_type_node, first_niters, niters);
1088   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1089                                   bb_after_second_loop, bb_before_first_loop);
1090   slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1091                                      second_loop == new_loop);
1092
1093   /* Flow loop scan does not update loop->single_exit field.  */
1094   first_loop->single_exit = first_loop->exit_edges[0];
1095   second_loop->single_exit = second_loop->exit_edges[0];
1096
1097   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1098    */
1099   if (update_first_loop_count)
1100     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1101
1102   free_new_names (definitions);
1103   BITMAP_XFREE (definitions);
1104   unmark_all_for_rewrite ();
1105
1106   return new_loop;
1107 }
1108
1109 \f
1110 /* Here the proper Vectorizer starts.  */
1111
1112 /*************************************************************************
1113   Vectorization Utilities.
1114  *************************************************************************/
1115
1116 /* Function new_stmt_vec_info.
1117
1118    Create and initialize a new stmt_vec_info struct for STMT.  */
1119
1120 stmt_vec_info
1121 new_stmt_vec_info (tree stmt, struct loop *loop)
1122 {
1123   stmt_vec_info res;
1124   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1125
1126   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1127   STMT_VINFO_STMT (res) = stmt;
1128   STMT_VINFO_LOOP (res) = loop;
1129   STMT_VINFO_RELEVANT_P (res) = 0;
1130   STMT_VINFO_VECTYPE (res) = NULL;
1131   STMT_VINFO_VEC_STMT (res) = NULL;
1132   STMT_VINFO_DATA_REF (res) = NULL;
1133   STMT_VINFO_MEMTAG (res) = NULL;
1134   STMT_VINFO_VECT_DR_BASE (res) = NULL;
1135
1136   return res;
1137 }
1138
1139
1140 /* Function new_loop_vec_info.
1141
1142    Create and initialize a new loop_vec_info struct for LOOP, as well as
1143    stmt_vec_info structs for all the stmts in LOOP.  */
1144
1145 loop_vec_info
1146 new_loop_vec_info (struct loop *loop)
1147 {
1148   loop_vec_info res;
1149   basic_block *bbs;
1150   block_stmt_iterator si;
1151   unsigned int i;
1152
1153   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1154
1155   bbs = get_loop_body (loop);
1156
1157   /* Create stmt_info for all stmts in the loop.  */
1158   for (i = 0; i < loop->num_nodes; i++)
1159     {
1160       basic_block bb = bbs[i];
1161       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1162         {
1163           tree stmt = bsi_stmt (si);
1164           stmt_ann_t ann;
1165
1166           get_stmt_operands (stmt);
1167           ann = stmt_ann (stmt);
1168           set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1169         }
1170     }
1171
1172   LOOP_VINFO_LOOP (res) = loop;
1173   LOOP_VINFO_BBS (res) = bbs;
1174   LOOP_VINFO_EXIT_COND (res) = NULL;
1175   LOOP_VINFO_NITERS (res) = NULL;
1176   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1177   LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1178   LOOP_VINFO_VECT_FACTOR (res) = 0;
1179   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1180                            "loop_write_datarefs");
1181   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1182                            "loop_read_datarefs");
1183   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1184
1185   return res;
1186 }
1187
1188
1189 /* Function destroy_loop_vec_info.
1190
1191    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1192    stmts in the loop.  */
1193
1194 void
1195 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1196 {
1197   struct loop *loop;
1198   basic_block *bbs;
1199   int nbbs;
1200   block_stmt_iterator si;
1201   int j;
1202
1203   if (!loop_vinfo)
1204     return;
1205
1206   loop = LOOP_VINFO_LOOP (loop_vinfo);
1207
1208   bbs = LOOP_VINFO_BBS (loop_vinfo);
1209   nbbs = loop->num_nodes;
1210
1211   for (j = 0; j < nbbs; j++)
1212     {
1213       basic_block bb = bbs[j];
1214       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1215         {
1216           tree stmt = bsi_stmt (si);
1217           stmt_ann_t ann = stmt_ann (stmt);
1218           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1219           free (stmt_info);
1220           set_stmt_info (ann, NULL);
1221         }
1222     }
1223
1224   free (LOOP_VINFO_BBS (loop_vinfo));
1225   varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1226   varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1227
1228   free (loop_vinfo);
1229 }
1230
1231
1232 /* Function debug_loop_stats.
1233
1234    For vectorization statistics dumps.  */
1235
1236 static bool
1237 vect_debug_stats (struct loop *loop)
1238 {
1239   basic_block bb;
1240   block_stmt_iterator si;
1241   tree node = NULL_TREE;
1242
1243   if (!dump_file || !(dump_flags & TDF_STATS))
1244     return false;
1245
1246   if (!loop)
1247     {
1248       fprintf (dump_file, "\n");
1249       return true;
1250     }
1251
1252   if (!loop->header)
1253     return false;
1254
1255   bb = loop->header;
1256
1257   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1258     {
1259       node = bsi_stmt (si);
1260       if (node && EXPR_P (node) && EXPR_LOCUS (node))
1261         break;
1262     }
1263
1264   if (node && EXPR_P (node) && EXPR_LOCUS (node)
1265       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1266     {
1267       fprintf (dump_file, "\nloop at %s:%d: ",
1268         EXPR_FILENAME (node), EXPR_LINENO (node));
1269       return true;
1270     }
1271
1272   return false;
1273 }
1274
1275
1276 /* Function debug_loop_details.
1277
1278    For vectorization debug dumps.  */
1279
1280 static bool
1281 vect_debug_details (struct loop *loop)
1282 {
1283    basic_block bb;
1284    block_stmt_iterator si;
1285    tree node = NULL_TREE;
1286
1287   if (!dump_file || !(dump_flags & TDF_DETAILS))
1288     return false;
1289
1290   if (!loop)
1291     {
1292       fprintf (dump_file, "\n");
1293       return true;
1294     }
1295
1296   if (!loop->header)
1297     return false;
1298
1299   bb = loop->header;
1300
1301   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1302     {
1303       node = bsi_stmt (si);
1304       if (node && EXPR_P (node) && EXPR_LOCUS (node))
1305         break;
1306     }
1307
1308   if (node && EXPR_P (node) && EXPR_LOCUS (node)
1309       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1310     {
1311       fprintf (dump_file, "\nloop at %s:%d: ",
1312                EXPR_FILENAME (node), EXPR_LINENO (node));
1313       return true;
1314     }
1315
1316   return false;
1317 }
1318
1319
1320 /* Function vect_get_ptr_offset
1321
1322    Compute the OFFSET modulo vector-type alignment of pointer REF in bits.  */
1323
1324 static tree
1325 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1326                      tree vectype ATTRIBUTE_UNUSED,
1327                      tree *offset ATTRIBUTE_UNUSED)
1328 {
1329   /* TODO: Use alignment information.  */
1330   return NULL_TREE;
1331 }
1332
1333
1334 /* Function vect_get_base_and_bit_offset
1335
1336    Return the BASE of the data reference EXPR.
1337    If VECTYPE is given, also compute the OFFSET from BASE in bits.
1338    E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1339    bits of 'a.b[i] + 4B' from a.
1340
1341    Input:
1342    EXPR - the memory reference that is being analyzed
1343    DR - the data_reference struct of the _original_ memory reference
1344         (Note: DR_REF (DR) is not necessarily EXPR)
1345    VECTYPE - the type that defines the alignment (i.e, we compute
1346              alignment relative to TYPE_ALIGN(VECTYPE))
1347
1348    Output:
1349    BASE (returned value) - the base of the data reference EXPR.
1350                            E.g, if EXPR is a.b[k].c[i][j] the returned
1351                            base is a.
1352    OFFSET - offset of EXPR from BASE in bits
1353    BASE_ALIGNED_P - indicates if BASE is aligned
1354
1355    If something unexpected is encountered (an unsupported form of data-ref),
1356    or if VECTYPE is given but OFFSET cannot be determined:
1357    then NULL_TREE is returned.  */
1358
1359 static tree
1360 vect_get_base_and_bit_offset (struct data_reference *dr,
1361                               tree expr,
1362                               tree vectype,
1363                               loop_vec_info loop_vinfo,
1364                               tree *offset,
1365                               bool *base_aligned_p)
1366 {
1367   tree this_offset = size_zero_node;
1368   tree base = NULL_TREE;
1369   tree next_ref;
1370   tree oprnd0, oprnd1;
1371   struct data_reference *array_dr;
1372   enum tree_code code = TREE_CODE (expr);
1373
1374   *base_aligned_p = false;
1375
1376   switch (code)
1377     {
1378     /* These cases end the recursion:  */
1379     case VAR_DECL:
1380       *offset = size_zero_node;
1381       if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1382         *base_aligned_p = true;
1383       return expr;
1384
1385     case SSA_NAME:
1386       if (!vectype)
1387         return expr;
1388
1389       if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1390         return NULL_TREE;
1391
1392       if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1393         {
1394           base = vect_get_ptr_offset (expr, vectype, offset);
1395           if (base)
1396             *base_aligned_p = true;
1397         }
1398       else
1399         {
1400           *base_aligned_p = true;
1401           *offset = size_zero_node;
1402           base = expr;
1403         }
1404       return base;
1405
1406     case INTEGER_CST:
1407       *offset = int_const_binop (MULT_EXPR, expr,
1408                                  build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1409       return expr;
1410
1411     /* These cases continue the recursion:  */
1412     case COMPONENT_REF:
1413       oprnd0 = TREE_OPERAND (expr, 0);
1414       oprnd1 = TREE_OPERAND (expr, 1);
1415
1416       this_offset = bit_position (oprnd1);
1417       if (vectype && !host_integerp (this_offset, 1))
1418         return NULL_TREE;
1419       next_ref = oprnd0;
1420       break;
1421
1422     case ADDR_EXPR:
1423       oprnd0 = TREE_OPERAND (expr, 0);
1424       next_ref = oprnd0;
1425       break;
1426
1427     case INDIRECT_REF:
1428       oprnd0 = TREE_OPERAND (expr, 0);
1429       next_ref = oprnd0;
1430       break;
1431
1432     case ARRAY_REF:
1433       if (DR_REF (dr) != expr)
1434         /* Build array data_reference struct if the existing DR_REF
1435            doesn't match EXPR. This happens, for example, when the
1436            EXPR is *T and T is initialized to &arr[indx]. The DR struct
1437            contains information on the access of T, not of arr. In order
1438            to continue  the analysis, we create a new DR struct that
1439            describes the access of arr.
1440         */
1441         array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1442       else
1443         array_dr = dr;
1444
1445       next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1446                                                    vectype, &this_offset);
1447       if (!next_ref)
1448         return NULL_TREE;
1449
1450       if (vectype &&
1451           TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
1452         {
1453           *offset = this_offset;
1454           *base_aligned_p = true;
1455           return next_ref;
1456         }
1457       break;
1458
1459     case PLUS_EXPR:
1460     case MINUS_EXPR:
1461       /* In case we have a PLUS_EXPR of the form
1462          (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1463          This is verified in  vect_get_symbl_and_dr.  */
1464       oprnd0 = TREE_OPERAND (expr, 0);
1465       oprnd1 = TREE_OPERAND (expr, 1);
1466
1467       base = vect_get_base_and_bit_offset
1468         (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1469       if (vectype && !base)
1470         return NULL_TREE;
1471
1472       next_ref = oprnd0;
1473       break;
1474
1475     default:
1476       return NULL_TREE;
1477     }
1478
1479   base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1480                                        loop_vinfo, offset, base_aligned_p);
1481
1482   if (vectype && base)
1483     {
1484       *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1485       if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1486         return NULL_TREE;
1487
1488       if (vect_debug_details (NULL))
1489         {
1490           print_generic_expr (dump_file, expr, TDF_SLIM);
1491           fprintf (dump_file, " --> total offset for ref: ");
1492           print_generic_expr (dump_file, *offset, TDF_SLIM);
1493         }
1494     }
1495   return base;
1496 }
1497
1498
1499 /* Function vect_force_dr_alignment_p.
1500
1501    Returns whether the alignment of a DECL can be forced to be aligned
1502    on ALIGNMENT bit boundary.  */
1503
1504 static bool
1505 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1506 {
1507   if (TREE_CODE (decl) != VAR_DECL)
1508     return false;
1509
1510   if (DECL_EXTERNAL (decl))
1511     return false;
1512
1513   if (TREE_STATIC (decl))
1514     return (alignment <= MAX_OFILE_ALIGNMENT);
1515   else
1516     /* This is not 100% correct.  The absolute correct stack alignment
1517        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1518        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1519        However, until someone implements forced stack alignment, SSE
1520        isn't really usable without this.  */
1521     return (alignment <= PREFERRED_STACK_BOUNDARY);
1522 }
1523
1524
1525 /* Function vect_get_new_vect_var.
1526
1527    Returns a name for a new variable. The current naming scheme appends the
1528    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1529    the name of vectorizer generated variables, and appends that to NAME if
1530    provided.  */
1531
1532 static tree
1533 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1534 {
1535   const char *prefix;
1536   int prefix_len;
1537   tree new_vect_var;
1538
1539   if (var_kind == vect_simple_var)
1540     prefix = "vect_";
1541   else
1542     prefix = "vect_p";
1543
1544   prefix_len = strlen (prefix);
1545
1546   if (name)
1547     new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1548   else
1549     new_vect_var = create_tmp_var (type, prefix);
1550
1551   return new_vect_var;
1552 }
1553
1554
1555 /* Function vect_create_index_for_vector_ref.
1556
1557    Create (and return) an index variable, along with it's update chain in the
1558    loop. This variable will be used to access a memory location in a vector
1559    operation.
1560
1561    Input:
1562    LOOP: The loop being vectorized.
1563    BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1564         function can be added here, or in the loop pre-header.
1565
1566    Output:
1567    Return an index that will be used to index a vector array.  It is expected
1568    that a pointer to the first vector will be used as the base address for the
1569    indexed reference.
1570
1571    FORNOW: we are not trying to be efficient, just creating a new index each
1572    time from scratch.  At this time all vector references could use the same
1573    index.
1574
1575    TODO: create only one index to be used by all vector references.  Record
1576    the index in the LOOP_VINFO the first time this procedure is called and
1577    return it on subsequent calls.  The increment of this index must be placed
1578    just before the conditional expression that ends the single block loop.  */
1579
1580 static tree
1581 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1582 {
1583   tree init, step;
1584   tree indx_before_incr, indx_after_incr;
1585
1586   /* It is assumed that the base pointer used for vectorized access contains
1587      the address of the first vector.  Therefore the index used for vectorized
1588      access must be initialized to zero and incremented by 1.  */
1589
1590   init = integer_zero_node;
1591   step = integer_one_node;
1592
1593   /* Assuming that bsi_insert is used with BSI_NEW_STMT  */
1594   create_iv (init, step, NULL_TREE, loop, bsi, false,
1595         &indx_before_incr, &indx_after_incr);
1596
1597   return indx_before_incr;
1598 }
1599
1600
1601 /* Function vect_create_addr_base_for_vector_ref.
1602
1603    Create an expression that computes the address of the first memory location
1604    that will be accessed for a data reference.
1605
1606    Input:
1607    STMT: The statement containing the data reference.
1608    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1609    OFFSET: Optional. If supplied, it is be added to the initial address.
1610
1611    Output:
1612    1. Return an SSA_NAME whose value is the address of the memory location of
1613       the first vector of the data reference.
1614    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1615       these statement(s) which define the returned SSA_NAME.
1616
1617    FORNOW: We are only handling array accesses with step 1.  */
1618
1619 static tree
1620 vect_create_addr_base_for_vector_ref (tree stmt,
1621                                       tree *new_stmt_list,
1622                                       tree offset)
1623 {
1624   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1625   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1626   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1627   tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1628   tree base_name = unshare_expr (DR_BASE_NAME (dr));
1629   tree ref = DR_REF (dr);
1630   tree data_ref_base_type = TREE_TYPE (data_ref_base);
1631   tree scalar_type = TREE_TYPE (ref);
1632   tree scalar_ptr_type = build_pointer_type (scalar_type);
1633   tree access_fn;
1634   tree init_val, step, init_oval;
1635   bool ok;
1636   bool is_ptr_ref, is_array_ref, is_addr_expr;
1637   tree array_base;
1638   tree vec_stmt;
1639   tree new_temp;
1640   tree array_ref;
1641   tree addr_base, addr_expr;
1642   tree dest, new_stmt;
1643
1644   /* Only the access function of the last index is relevant (i_n in
1645      a[i_1][i_2]...[i_n]), the others correspond to loop invariants.  */
1646   access_fn = DR_ACCESS_FN (dr, 0);
1647   ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1648                                     true);
1649   if (!ok)
1650     init_oval = integer_zero_node;
1651
1652   is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1653                && TREE_CODE (data_ref_base) == SSA_NAME;
1654   is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
1655   is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1656                  || TREE_CODE (data_ref_base) == PLUS_EXPR
1657                  || TREE_CODE (data_ref_base) == MINUS_EXPR;
1658   gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1659
1660   /** Create: &(base[init_val])
1661
1662       if data_ref_base is an ARRAY_TYPE:
1663          base = data_ref_base
1664
1665       if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1666          base = *((scalar_array *) data_ref_base)
1667    **/
1668
1669   if (is_array_ref)
1670     array_base = data_ref_base;
1671   else /* is_ptr_ref  or is_addr_expr */
1672     {
1673       /* array_ptr = (scalar_array_ptr_type *) data_ref_base;  */
1674       tree scalar_array_type = build_array_type (scalar_type, 0);
1675       tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1676       tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1677       add_referenced_tmp_var (array_ptr);
1678
1679       dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1680       add_referenced_tmp_var (dest);
1681       data_ref_base =
1682         force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1683       append_to_statement_list_force (new_stmt, new_stmt_list);
1684
1685       vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1686       vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1687       new_temp = make_ssa_name (array_ptr, vec_stmt);
1688       TREE_OPERAND (vec_stmt, 0) = new_temp;
1689       append_to_statement_list_force (vec_stmt, new_stmt_list);
1690
1691       /* (*array_ptr)  */
1692       array_base = build_fold_indirect_ref (new_temp);
1693     }
1694
1695   dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1696   add_referenced_tmp_var (dest);
1697   init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1698   append_to_statement_list_force (new_stmt, new_stmt_list);
1699
1700   if (offset)
1701     {
1702       tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1703       add_referenced_tmp_var (tmp);
1704       vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1705       vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1706       init_val = make_ssa_name (tmp, vec_stmt);
1707       TREE_OPERAND (vec_stmt, 0) = init_val;
1708       append_to_statement_list_force (vec_stmt, new_stmt_list);
1709     }
1710
1711   array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1712                       NULL_TREE, NULL_TREE);
1713   addr_base = build_fold_addr_expr (array_ref);
1714
1715   /* addr_expr = addr_base */
1716   addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1717                                      get_name (base_name));
1718   add_referenced_tmp_var (addr_expr);
1719   vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1720   new_temp = make_ssa_name (addr_expr, vec_stmt);
1721   TREE_OPERAND (vec_stmt, 0) = new_temp;
1722   append_to_statement_list_force (vec_stmt, new_stmt_list);
1723
1724   return new_temp;
1725 }
1726
1727
1728 /* Function get_vectype_for_scalar_type.
1729
1730    Returns the vector type corresponding to SCALAR_TYPE as supported
1731    by the target.  */
1732
1733 static tree
1734 get_vectype_for_scalar_type (tree scalar_type)
1735 {
1736   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1737   int nbytes = GET_MODE_SIZE (inner_mode);
1738   int nunits;
1739   tree vectype;
1740
1741   if (nbytes == 0)
1742     return NULL_TREE;
1743
1744   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1745      is expected.  */
1746   nunits = UNITS_PER_SIMD_WORD / nbytes;
1747
1748   vectype = build_vector_type (scalar_type, nunits);
1749   if (vect_debug_details (NULL))
1750     {
1751       fprintf (dump_file, "get vectype with %d units of type ", nunits);
1752       print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1753     }
1754
1755   if (!vectype)
1756     return NULL_TREE;
1757
1758   if (vect_debug_details (NULL))
1759     {
1760       fprintf (dump_file, "vectype: ");
1761       print_generic_expr (dump_file, vectype, TDF_SLIM);
1762     }
1763
1764   if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1765     {
1766       /* TODO: tree-complex.c sometimes can parallelize operations
1767          on generic vectors.  We can vectorize the loop in that case,
1768          but then we should re-run the lowering pass.  */
1769       if (vect_debug_details (NULL))
1770         fprintf (dump_file, "mode not supported by target.");
1771       return NULL_TREE;
1772     }
1773
1774   return vectype;
1775 }
1776
1777
1778 /* Function vect_align_data_ref.
1779
1780    Handle mislignment of a memory accesses.
1781
1782    FORNOW: Can't handle misaligned accesses.
1783    Make sure that the dataref is aligned.  */
1784
1785 static void
1786 vect_align_data_ref (tree stmt)
1787 {
1788   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1789   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1790
1791   /* FORNOW: can't handle misaligned accesses;
1792              all accesses expected to be aligned.  */
1793   gcc_assert (aligned_access_p (dr));
1794 }
1795
1796
1797 /* Function vect_create_data_ref_ptr.
1798
1799    Create a memory reference expression for vector access, to be used in a
1800    vector load/store stmt. The reference is based on a new pointer to vector
1801    type (vp).
1802
1803    Input:
1804    1. STMT: a stmt that references memory. Expected to be of the form
1805          MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1806    2. BSI: block_stmt_iterator where new stmts can be added.
1807    3. OFFSET (optional): an offset to be added to the initial address accessed
1808         by the data-ref in STMT.
1809    4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1810         pointing to the initial address.
1811
1812    Output:
1813    1. Declare a new ptr to vector_type, and have it point to the base of the
1814       data reference (initial addressed accessed by the data reference).
1815       For example, for vector of type V8HI, the following code is generated:
1816
1817       v8hi *vp;
1818       vp = (v8hi *)initial_address;
1819
1820       if OFFSET is not supplied:
1821          initial_address = &a[init];
1822       if OFFSET is supplied:
1823          initial_address = &a[init + OFFSET];
1824
1825       Return the initial_address in INITIAL_ADDRESS.
1826
1827    2. Create a data-reference in the loop based on the new vector pointer vp,
1828       and using a new index variable 'idx' as follows:
1829
1830       vp' = vp + update
1831
1832       where if ONLY_INIT is true:
1833          update = zero
1834       and otherwise
1835          update = idx + vector_type_size
1836
1837       Return the pointer vp'.
1838
1839
1840    FORNOW: handle only aligned and consecutive accesses.  */
1841
1842 static tree
1843 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1844                           tree *initial_address, bool only_init)
1845 {
1846   tree base_name;
1847   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1848   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1849   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1850   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1851   tree vect_ptr_type;
1852   tree vect_ptr;
1853   tree tag;
1854   v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1855   v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1856   vuse_optype vuses = STMT_VUSE_OPS (stmt);
1857   int nvuses, nv_may_defs, nv_must_defs;
1858   int i;
1859   tree new_temp;
1860   tree vec_stmt;
1861   tree new_stmt_list = NULL_TREE;
1862   tree idx;
1863   edge pe = loop_preheader_edge (loop);
1864   basic_block new_bb;
1865   tree vect_ptr_init;
1866   tree vectype_size;
1867   tree ptr_update;
1868   tree data_ref_ptr;
1869
1870   base_name = unshare_expr (DR_BASE_NAME (dr));
1871   if (vect_debug_details (NULL))
1872     {
1873       tree data_ref_base = base_name;
1874       fprintf (dump_file, "create array_ref of type: ");
1875       print_generic_expr (dump_file, vectype, TDF_SLIM);
1876       if (TREE_CODE (data_ref_base) == VAR_DECL)
1877         fprintf (dump_file, "vectorizing a one dimensional array ref: ");
1878       else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1879         fprintf (dump_file, "vectorizing a multidimensional array ref: ");
1880       else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1881         fprintf (dump_file, "vectorizing a record based array ref: ");
1882       else if (TREE_CODE (data_ref_base) == SSA_NAME)
1883         fprintf (dump_file, "vectorizing a pointer ref: ");
1884       print_generic_expr (dump_file, base_name, TDF_SLIM);
1885     }
1886
1887   /** (1) Create the new vector-pointer variable:  **/
1888
1889   vect_ptr_type = build_pointer_type (vectype);
1890   vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1891                                     get_name (base_name));
1892   add_referenced_tmp_var (vect_ptr);
1893
1894
1895   /** (2) Handle aliasing information of the new vector-pointer:  **/
1896
1897   tag = STMT_VINFO_MEMTAG (stmt_info);
1898   gcc_assert (tag);
1899   get_var_ann (vect_ptr)->type_mem_tag = tag;
1900
1901   /* Mark for renaming all aliased variables
1902      (i.e, the may-aliases of the type-mem-tag).  */
1903   nvuses = NUM_VUSES (vuses);
1904   nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1905   nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1906   for (i = 0; i < nvuses; i++)
1907     {
1908       tree use = VUSE_OP (vuses, i);
1909       if (TREE_CODE (use) == SSA_NAME)
1910         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1911     }
1912   for (i = 0; i < nv_may_defs; i++)
1913     {
1914       tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1915       if (TREE_CODE (def) == SSA_NAME)
1916         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1917     }
1918   for (i = 0; i < nv_must_defs; i++)
1919     {
1920       tree def = V_MUST_DEF_RESULT (v_must_defs, i);
1921       if (TREE_CODE (def) == SSA_NAME)
1922         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1923     }
1924
1925
1926   /** (3) Calculate the initial address the vector-pointer, and set
1927           the vector-pointer to point to it before the loop:  **/
1928
1929   /* Create: (&(base[init_val+offset]) in the loop preheader.  */
1930   new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1931                                                    offset);
1932   pe = loop_preheader_edge (loop);
1933   new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1934   gcc_assert (!new_bb);
1935   *initial_address = new_temp;
1936
1937   /* Create: p = (vectype *) initial_base  */
1938   vec_stmt = fold_convert (vect_ptr_type, new_temp);
1939   vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1940   new_temp = make_ssa_name (vect_ptr, vec_stmt);
1941   TREE_OPERAND (vec_stmt, 0) = new_temp;
1942   new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1943   gcc_assert (!new_bb);
1944   vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1945
1946
1947   /** (4) Handle the updating of the vector-pointer inside the loop: **/
1948
1949   if (only_init) /* No update in loop is required.  */
1950     return vect_ptr_init;
1951
1952   idx = vect_create_index_for_vector_ref (loop, bsi);
1953
1954   /* Create: update = idx * vectype_size  */
1955   ptr_update = create_tmp_var (integer_type_node, "update");
1956   add_referenced_tmp_var (ptr_update);
1957   vectype_size = build_int_cst (integer_type_node,
1958                                 GET_MODE_SIZE (TYPE_MODE (vectype)));
1959   vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1960   vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1961   new_temp = make_ssa_name (ptr_update, vec_stmt);
1962   TREE_OPERAND (vec_stmt, 0) = new_temp;
1963   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1964
1965   /* Create: data_ref_ptr = vect_ptr_init + update  */
1966   vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1967   vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1968   new_temp = make_ssa_name (vect_ptr, vec_stmt);
1969   TREE_OPERAND (vec_stmt, 0) = new_temp;
1970   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1971   data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1972
1973   return data_ref_ptr;
1974 }
1975
1976
1977 /* Function vect_create_destination_var.
1978
1979    Create a new temporary of type VECTYPE.  */
1980
1981 static tree
1982 vect_create_destination_var (tree scalar_dest, tree vectype)
1983 {
1984   tree vec_dest;
1985   const char *new_name;
1986
1987   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1988
1989   new_name = get_name (scalar_dest);
1990   if (!new_name)
1991     new_name = "var_";
1992   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1993   add_referenced_tmp_var (vec_dest);
1994
1995   return vec_dest;
1996 }
1997
1998
1999 /* Function vect_init_vector.
2000
2001    Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2002    the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2003    used in the vectorization of STMT.  */
2004
2005 static tree
2006 vect_init_vector (tree stmt, tree vector_var)
2007 {
2008   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2009   struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2010   tree new_var;
2011   tree init_stmt;
2012   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2013   tree vec_oprnd;
2014   edge pe;
2015   tree new_temp;
2016   basic_block new_bb;
2017
2018   new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2019   add_referenced_tmp_var (new_var);
2020
2021   init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2022   new_temp = make_ssa_name (new_var, init_stmt);
2023   TREE_OPERAND (init_stmt, 0) = new_temp;
2024
2025   pe = loop_preheader_edge (loop);
2026   new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2027   gcc_assert (!new_bb);
2028
2029   if (vect_debug_details (NULL))
2030     {
2031       fprintf (dump_file, "created new init_stmt: ");
2032       print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2033     }
2034
2035   vec_oprnd = TREE_OPERAND (init_stmt, 0);
2036   return vec_oprnd;
2037 }
2038
2039
2040 /* Function vect_get_vec_def_for_operand.
2041
2042    OP is an operand in STMT. This function returns a (vector) def that will be
2043    used in the vectorized stmt for STMT.
2044
2045    In the case that OP is an SSA_NAME which is defined in the loop, then
2046    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2047
2048    In case OP is an invariant or constant, a new stmt that creates a vector def
2049    needs to be introduced.  */
2050
2051 static tree
2052 vect_get_vec_def_for_operand (tree op, tree stmt)
2053 {
2054   tree vec_oprnd;
2055   tree vec_stmt;
2056   tree def_stmt;
2057   stmt_vec_info def_stmt_info = NULL;
2058   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2059   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2060   int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2061   struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2062   basic_block bb;
2063   tree vec_inv;
2064   tree t = NULL_TREE;
2065   tree def;
2066   int i;
2067
2068   if (vect_debug_details (NULL))
2069     {
2070       fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2071       print_generic_expr (dump_file, op, TDF_SLIM);
2072     }
2073
2074   /** ===> Case 1: operand is a constant.  **/
2075
2076   if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2077     {
2078       /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
2079
2080       tree vec_cst;
2081
2082       /* Build a tree with vector elements.  */
2083       if (vect_debug_details (NULL))
2084         fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2085
2086       for (i = nunits - 1; i >= 0; --i)
2087         {
2088           t = tree_cons (NULL_TREE, op, t);
2089         }
2090       vec_cst = build_vector (vectype, t);
2091       return vect_init_vector (stmt, vec_cst);
2092     }
2093
2094   gcc_assert (TREE_CODE (op) == SSA_NAME);
2095
2096   /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it.  **/
2097
2098   def_stmt = SSA_NAME_DEF_STMT (op);
2099   def_stmt_info = vinfo_for_stmt (def_stmt);
2100
2101   if (vect_debug_details (NULL))
2102     {
2103       fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2104       print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2105     }
2106
2107
2108   /** ==> Case 2.1: operand is defined inside the loop.  **/
2109
2110   if (def_stmt_info)
2111     {
2112       /* Get the def from the vectorized stmt.  */
2113
2114       vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2115       gcc_assert (vec_stmt);
2116       vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2117       return vec_oprnd;
2118     }
2119
2120
2121   /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2122                     it is a reduction/induction.  **/
2123
2124   bb = bb_for_stmt (def_stmt);
2125   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2126     {
2127       if (vect_debug_details (NULL))
2128         fprintf (dump_file, "reduction/induction - unsupported.");
2129       internal_error ("no support for reduction/induction"); /* FORNOW */
2130     }
2131
2132
2133   /** ==> Case 2.3: operand is defined outside the loop -
2134                     it is a loop invariant.  */
2135
2136   switch (TREE_CODE (def_stmt))
2137     {
2138     case PHI_NODE:
2139       def = PHI_RESULT (def_stmt);
2140       break;
2141     case MODIFY_EXPR:
2142       def = TREE_OPERAND (def_stmt, 0);
2143       break;
2144     case NOP_EXPR:
2145       def = TREE_OPERAND (def_stmt, 0);
2146       gcc_assert (IS_EMPTY_STMT (def_stmt));
2147       def = op;
2148       break;
2149     default:
2150       if (vect_debug_details (NULL))
2151         {
2152           fprintf (dump_file, "unsupported defining stmt: ");
2153           print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2154         }
2155       internal_error ("unsupported defining stmt");
2156     }
2157
2158   /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}'  */
2159
2160   if (vect_debug_details (NULL))
2161     fprintf (dump_file, "Create vector_inv.");
2162
2163   for (i = nunits - 1; i >= 0; --i)
2164     {
2165       t = tree_cons (NULL_TREE, def, t);
2166     }
2167
2168   vec_inv = build_constructor (vectype, t);
2169   return vect_init_vector (stmt, vec_inv);
2170 }
2171
2172
2173 /* Function vect_finish_stmt_generation.
2174
2175    Insert a new stmt.  */
2176
2177 static void
2178 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2179 {
2180   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2181
2182   if (vect_debug_details (NULL))
2183     {
2184       fprintf (dump_file, "add new stmt: ");
2185       print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2186     }
2187
2188   /* Make sure bsi points to the stmt that is being vectorized.  */
2189
2190   /* Assumption: any stmts created for the vectorization of stmt S were
2191      inserted before S. BSI is expected to point to S or some new stmt before S.
2192    */
2193
2194   while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2195     bsi_next (bsi);
2196   gcc_assert (stmt == bsi_stmt (*bsi));
2197 }
2198
2199
2200 /* Function vectorizable_assignment.
2201
2202    Check if STMT performs an assignment (copy) that can be vectorized.
2203    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2204    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2205    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2206
2207 static bool
2208 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2209 {
2210   tree vec_dest;
2211   tree scalar_dest;
2212   tree op;
2213   tree vec_oprnd;
2214   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2215   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2216   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2217   tree new_temp;
2218
2219   /* Is vectorizable assignment?  */
2220
2221   if (TREE_CODE (stmt) != MODIFY_EXPR)
2222     return false;
2223
2224   scalar_dest = TREE_OPERAND (stmt, 0);
2225   if (TREE_CODE (scalar_dest) != SSA_NAME)
2226     return false;
2227
2228   op = TREE_OPERAND (stmt, 1);
2229   if (!vect_is_simple_use (op, loop, NULL))
2230     {
2231       if (vect_debug_details (NULL))
2232         fprintf (dump_file, "use not simple.");
2233       return false;
2234     }
2235
2236   if (!vec_stmt) /* transformation not required.  */
2237     {
2238       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2239       return true;
2240     }
2241
2242   /** Trasform.  **/
2243   if (vect_debug_details (NULL))
2244     fprintf (dump_file, "transform assignment.");
2245
2246   /* Handle def.  */
2247   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2248
2249   /* Handle use.  */
2250   op = TREE_OPERAND (stmt, 1);
2251   vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2252
2253   /* Arguments are ready. create the new vector stmt.  */
2254   *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2255   new_temp = make_ssa_name (vec_dest, *vec_stmt);
2256   TREE_OPERAND (*vec_stmt, 0) = new_temp;
2257   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2258
2259   return true;
2260 }
2261
2262
2263 /* Function vectorizable_operation.
2264
2265    Check if STMT performs a binary or unary operation that can be vectorized.
2266    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2267    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2268    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2269
2270 static bool
2271 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2272 {
2273   tree vec_dest;
2274   tree scalar_dest;
2275   tree operation;
2276   tree op0, op1 = NULL;
2277   tree vec_oprnd0, vec_oprnd1=NULL;
2278   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2279   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2280   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2281   int i;
2282   enum tree_code code;
2283   enum machine_mode vec_mode;
2284   tree new_temp;
2285   int op_type;
2286   tree op;
2287   optab optab;
2288
2289   /* Is STMT a vectorizable binary/unary operation?   */
2290   if (TREE_CODE (stmt) != MODIFY_EXPR)
2291     return false;
2292
2293   if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2294     return false;
2295
2296   operation = TREE_OPERAND (stmt, 1);
2297   code = TREE_CODE (operation);
2298   optab = optab_for_tree_code (code, vectype);
2299
2300   /* Support only unary or binary operations.  */
2301   op_type = TREE_CODE_LENGTH (code);
2302   if (op_type != unary_op && op_type != binary_op)
2303     {
2304       if (vect_debug_details (NULL))
2305         fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2306       return false;
2307     }
2308
2309   for (i = 0; i < op_type; i++)
2310     {
2311       op = TREE_OPERAND (operation, i);
2312       if (!vect_is_simple_use (op, loop, NULL))
2313         {
2314           if (vect_debug_details (NULL))
2315             fprintf (dump_file, "use not simple.");
2316           return false;
2317         }
2318     }
2319
2320   /* Supportable by target?  */
2321   if (!optab)
2322     {
2323       if (vect_debug_details (NULL))
2324         fprintf (dump_file, "no optab.");
2325       return false;
2326     }
2327   vec_mode = TYPE_MODE (vectype);
2328   if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2329     {
2330       if (vect_debug_details (NULL))
2331         fprintf (dump_file, "op not supported by target.");
2332       return false;
2333     }
2334
2335   if (!vec_stmt) /* transformation not required.  */
2336     {
2337       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2338       return true;
2339     }
2340
2341   /** Transform.  **/
2342
2343   if (vect_debug_details (NULL))
2344     fprintf (dump_file, "transform binary/unary operation.");
2345
2346   /* Handle def.  */
2347   scalar_dest = TREE_OPERAND (stmt, 0);
2348   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2349
2350   /* Handle uses.  */
2351   op0 = TREE_OPERAND (operation, 0);
2352   vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2353
2354   if (op_type == binary_op)
2355     {
2356       op1 = TREE_OPERAND (operation, 1);
2357       vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2358     }
2359
2360   /* Arguments are ready. create the new vector stmt.  */
2361
2362   if (op_type == binary_op)
2363     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2364                 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2365   else
2366     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2367                 build1 (code, vectype, vec_oprnd0));
2368   new_temp = make_ssa_name (vec_dest, *vec_stmt);
2369   TREE_OPERAND (*vec_stmt, 0) = new_temp;
2370   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2371
2372   return true;
2373 }
2374
2375
2376 /* Function vectorizable_store.
2377
2378    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2379    can be vectorized.
2380    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2381    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2382    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2383
2384 static bool
2385 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2386 {
2387   tree scalar_dest;
2388   tree data_ref;
2389   tree op;
2390   tree vec_oprnd1;
2391   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2392   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2393   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2394   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2395   enum machine_mode vec_mode;
2396   tree dummy;
2397   enum dr_alignment_support alignment_support_cheme;
2398
2399   /* Is vectorizable store? */
2400
2401   if (TREE_CODE (stmt) != MODIFY_EXPR)
2402     return false;
2403
2404   scalar_dest = TREE_OPERAND (stmt, 0);
2405   if (TREE_CODE (scalar_dest) != ARRAY_REF
2406       && TREE_CODE (scalar_dest) != INDIRECT_REF)
2407     return false;
2408
2409   op = TREE_OPERAND (stmt, 1);
2410   if (!vect_is_simple_use (op, loop, NULL))
2411     {
2412       if (vect_debug_details (NULL))
2413         fprintf (dump_file, "use not simple.");
2414       return false;
2415     }
2416
2417   vec_mode = TYPE_MODE (vectype);
2418   /* FORNOW. In some cases can vectorize even if data-type not supported
2419      (e.g. - array initialization with 0).  */
2420   if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2421     return false;
2422
2423   if (!STMT_VINFO_DATA_REF (stmt_info))
2424     return false;
2425
2426
2427   if (!vec_stmt) /* transformation not required.  */
2428     {
2429       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2430       return true;
2431     }
2432
2433   /** Trasform.  **/
2434
2435   if (vect_debug_details (NULL))
2436     fprintf (dump_file, "transform store");
2437
2438   alignment_support_cheme = vect_supportable_dr_alignment (dr);
2439   gcc_assert (alignment_support_cheme);
2440   gcc_assert (alignment_support_cheme = dr_aligned);  /* FORNOW */
2441
2442   /* Handle use - get the vectorized def from the defining stmt.  */
2443   vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2444
2445   /* Handle def.  */
2446   /* FORNOW: make sure the data reference is aligned.  */
2447   vect_align_data_ref (stmt);
2448   data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2449   data_ref = build_fold_indirect_ref (data_ref);
2450
2451   /* Arguments are ready. create the new vector stmt.  */
2452   *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2453   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2454
2455   return true;
2456 }
2457
2458
2459 /* vectorizable_load.
2460
2461    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2462    can be vectorized.
2463    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2464    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2465    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2466
2467 static bool
2468 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2469 {
2470   tree scalar_dest;
2471   tree vec_dest = NULL;
2472   tree data_ref = NULL;
2473   tree op;
2474   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2475   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2476   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2477   tree new_temp;
2478   int mode;
2479   tree init_addr;
2480   tree new_stmt;
2481   tree dummy;
2482   basic_block new_bb;
2483   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2484   edge pe = loop_preheader_edge (loop);
2485   enum dr_alignment_support alignment_support_cheme;
2486
2487   /* Is vectorizable load? */
2488
2489   if (TREE_CODE (stmt) != MODIFY_EXPR)
2490     return false;
2491
2492   scalar_dest = TREE_OPERAND (stmt, 0);
2493   if (TREE_CODE (scalar_dest) != SSA_NAME)
2494     return false;
2495
2496   op = TREE_OPERAND (stmt, 1);
2497   if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2498     return false;
2499
2500   if (!STMT_VINFO_DATA_REF (stmt_info))
2501     return false;
2502
2503   mode = (int) TYPE_MODE (vectype);
2504
2505   /* FORNOW. In some cases can vectorize even if data-type not supported
2506     (e.g. - data copies).  */
2507   if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2508     {
2509       if (vect_debug_details (loop))
2510         fprintf (dump_file, "Aligned load, but unsupported type.");
2511       return false;
2512     }
2513
2514   if (!vec_stmt) /* transformation not required.  */
2515     {
2516       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2517       return true;
2518     }
2519
2520   /** Trasform.  **/
2521
2522   if (vect_debug_details (NULL))
2523     fprintf (dump_file, "transform load.");
2524
2525   alignment_support_cheme = vect_supportable_dr_alignment (dr);
2526   gcc_assert (alignment_support_cheme);
2527
2528   if (alignment_support_cheme == dr_aligned
2529       || alignment_support_cheme == dr_unaligned_supported)
2530     {
2531       /* Create:
2532          p = initial_addr;
2533          indx = 0;
2534          loop {
2535            vec_dest = *(p);
2536            indx = indx + 1;
2537          }
2538       */
2539
2540       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2541       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2542       if (aligned_access_p (dr))
2543         data_ref = build_fold_indirect_ref (data_ref);
2544       else
2545         {
2546           int mis = DR_MISALIGNMENT (dr);
2547           tree tmis = (mis == -1 ?
2548                        integer_zero_node :
2549                        build_int_cst (integer_type_node, mis));
2550           tmis = int_const_binop (MULT_EXPR, tmis,
2551                         build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2552           data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2553         }
2554       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2555       new_temp = make_ssa_name (vec_dest, new_stmt);
2556       TREE_OPERAND (new_stmt, 0) = new_temp;
2557       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2558     }
2559   else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2560     {
2561       /* Create:
2562          p1 = initial_addr;
2563          msq_init = *(floor(p1))
2564          p2 = initial_addr + VS - 1;
2565          magic = have_builtin ? builtin_result : initial_address;
2566          indx = 0;
2567          loop {
2568            p2' = p2 + indx * vectype_size
2569            lsq = *(floor(p2'))
2570            vec_dest = realign_load (msq, lsq, magic)
2571            indx = indx + 1;
2572            msq = lsq;
2573          }
2574       */
2575
2576       tree offset;
2577       tree magic;
2578       tree phi_stmt;
2579       tree msq_init;
2580       tree msq, lsq;
2581       tree dataref_ptr;
2582       tree params;
2583
2584       /* <1> Create msq_init = *(floor(p1)) in the loop preheader  */
2585       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2586       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2587                                            &init_addr, true);
2588       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2589       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2590       new_temp = make_ssa_name (vec_dest, new_stmt);
2591       TREE_OPERAND (new_stmt, 0) = new_temp;
2592       new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2593       gcc_assert (!new_bb);
2594       msq_init = TREE_OPERAND (new_stmt, 0);
2595
2596
2597       /* <2> Create lsq = *(floor(p2')) in the loop  */
2598       offset = build_int_cst (integer_type_node,
2599                               GET_MODE_NUNITS (TYPE_MODE (vectype)));
2600       offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2601       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2602       dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2603       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2604       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2605       new_temp = make_ssa_name (vec_dest, new_stmt);
2606       TREE_OPERAND (new_stmt, 0) = new_temp;
2607       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2608       lsq = TREE_OPERAND (new_stmt, 0);
2609
2610
2611       /* <3> */
2612       if (targetm.vectorize.builtin_mask_for_load)
2613         {
2614           /* Create permutation mask, if required, in loop preheader.  */
2615           tree builtin_decl;
2616           params = build_tree_list (NULL_TREE, init_addr);
2617           vec_dest = vect_create_destination_var (scalar_dest, vectype);
2618           builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2619           new_stmt = build_function_call_expr (builtin_decl, params);
2620           new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2621           new_temp = make_ssa_name (vec_dest, new_stmt);
2622           TREE_OPERAND (new_stmt, 0) = new_temp;
2623           new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2624           gcc_assert (!new_bb);
2625           magic = TREE_OPERAND (new_stmt, 0);
2626         }
2627       else
2628         {
2629           /* Use current address instead of init_addr for reduced reg pressure.
2630            */
2631           magic = dataref_ptr;
2632         }
2633
2634
2635       /* <4> Create msq = phi <msq_init, lsq> in loop  */
2636       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2637       msq = make_ssa_name (vec_dest, NULL_TREE);
2638       phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2639       SSA_NAME_DEF_STMT (msq) = phi_stmt;
2640       add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
2641       add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
2642
2643
2644       /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop  */
2645       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2646       new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2647       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2648       new_temp = make_ssa_name (vec_dest, new_stmt);
2649       TREE_OPERAND (new_stmt, 0) = new_temp;
2650       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2651     }
2652   else
2653     gcc_unreachable ();
2654
2655   *vec_stmt = new_stmt;
2656   return true;
2657 }
2658
2659
2660 /* Function vect_supportable_dr_alignment
2661
2662    Return whether the data reference DR is supported with respect to its
2663    alignment.  */
2664
2665 static enum dr_alignment_support
2666 vect_supportable_dr_alignment (struct data_reference *dr)
2667 {
2668   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2669   enum machine_mode mode = (int) TYPE_MODE (vectype);
2670
2671   if (aligned_access_p (dr))
2672     return dr_aligned;
2673
2674   /* Possibly unaligned access.  */
2675
2676   if (DR_IS_READ (dr))
2677     {
2678       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2679           && (!targetm.vectorize.builtin_mask_for_load
2680               || targetm.vectorize.builtin_mask_for_load ()))
2681         return dr_unaligned_software_pipeline;
2682
2683       if (targetm.vectorize.misaligned_mem_ok (mode))
2684         /* Can't software pipeline the loads.  */
2685         return dr_unaligned_supported;
2686     }
2687
2688   /* Unsupported.  */
2689   return dr_unaligned_unsupported;
2690 }
2691
2692
2693 /* Function vect_transform_stmt.
2694
2695    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
2696
2697 static bool
2698 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2699 {
2700   bool is_store = false;
2701   tree vec_stmt = NULL_TREE;
2702   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2703   bool done;
2704
2705   switch (STMT_VINFO_TYPE (stmt_info))
2706     {
2707     case op_vec_info_type:
2708       done = vectorizable_operation (stmt, bsi, &vec_stmt);
2709       gcc_assert (done);
2710       break;
2711
2712     case assignment_vec_info_type:
2713       done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2714       gcc_assert (done);
2715       break;
2716
2717     case load_vec_info_type:
2718       done = vectorizable_load (stmt, bsi, &vec_stmt);
2719       gcc_assert (done);
2720       break;
2721
2722     case store_vec_info_type:
2723       done = vectorizable_store (stmt, bsi, &vec_stmt);
2724       gcc_assert (done);
2725       is_store = true;
2726       break;
2727     default:
2728       if (vect_debug_details (NULL))
2729         fprintf (dump_file, "stmt not supported.");
2730       gcc_unreachable ();
2731     }
2732
2733   STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2734
2735   return is_store;
2736 }
2737
2738
2739 /* This function builds ni_name = number of iterations loop executes
2740    on the loop preheader.  */
2741
2742 static tree
2743 vect_build_loop_niters (loop_vec_info loop_vinfo)
2744 {
2745   tree ni_name, stmt, var;
2746   edge pe;
2747   basic_block new_bb = NULL;
2748   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2749   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2750
2751   var = create_tmp_var (TREE_TYPE (ni), "niters");
2752   add_referenced_tmp_var (var);
2753   ni_name = force_gimple_operand (ni, &stmt, false, var);
2754
2755   pe = loop_preheader_edge (loop);
2756   if (stmt)
2757     new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2758   if (new_bb)
2759     add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2760
2761   return ni_name;
2762 }
2763
2764
2765 /* This function generates the following statements:
2766
2767  ni_name = number of iterations loop executes
2768  ratio = ni_name / vf
2769  ratio_mult_vf_name = ratio * vf
2770
2771  and places them at the loop preheader edge.  */
2772
2773 static void
2774 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_p,
2775                                  tree *ratio_mult_vf_name_p, tree *ratio_p)
2776 {
2777
2778   edge pe;
2779   basic_block new_bb;
2780   tree stmt, ni_name;
2781   tree ratio;
2782   tree ratio_mult_vf_name, ratio_mult_vf;
2783   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2784   tree ni = LOOP_VINFO_NITERS(loop_vinfo);
2785
2786   int vf, i;
2787
2788   /* Generate temporary variable that contains
2789      number of iterations loop executes.  */
2790
2791   ni_name = vect_build_loop_niters (loop_vinfo);
2792
2793   /* ratio = ni / vf.
2794      vf is power of 2; then if ratio =  = n >> log2 (vf).  */
2795   vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2796   ratio = vect_build_symbol_bound (ni_name, vf, loop);
2797
2798   /* Update initial conditions of loop copy.  */
2799
2800   /* ratio_mult_vf = ratio * vf;
2801      then if ratio_mult_vf = ratio << log2 (vf).  */
2802
2803   i = exact_log2 (vf);
2804   ratio_mult_vf = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2805   add_referenced_tmp_var (ratio_mult_vf);
2806
2807   ratio_mult_vf_name = make_ssa_name (ratio_mult_vf, NULL_TREE);
2808
2809   stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2810                 build2 (LSHIFT_EXPR, TREE_TYPE (ratio),
2811                        ratio, build_int_cst (unsigned_type_node,
2812                                              i)));
2813
2814   SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2815
2816   pe = loop_preheader_edge (loop);
2817   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2818   if (new_bb)
2819     add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2820
2821   *ni_name_p = ni_name;
2822   *ratio_mult_vf_name_p = ratio_mult_vf_name;
2823   *ratio_p = ratio;
2824
2825   return;
2826 }
2827
2828
2829 /* This function generates stmt
2830
2831    tmp = n / vf;
2832
2833    and attaches it to preheader of LOOP.  */
2834
2835 static tree
2836 vect_build_symbol_bound (tree n, int vf, struct loop * loop)
2837 {
2838   tree var, stmt, var_name;
2839   edge pe;
2840   basic_block new_bb;
2841   int i;
2842
2843   /* create temporary variable */
2844   var = create_tmp_var (TREE_TYPE (n), "bnd");
2845   add_referenced_tmp_var (var);
2846
2847   var_name = make_ssa_name (var, NULL_TREE);
2848
2849   /* vf is power of 2; then n/vf = n >> log2 (vf).  */
2850
2851   i = exact_log2 (vf);
2852   stmt = build2 (MODIFY_EXPR, void_type_node, var_name,
2853                 build2 (RSHIFT_EXPR, TREE_TYPE (n),
2854                        n, build_int_cst (unsigned_type_node,i)));
2855
2856   SSA_NAME_DEF_STMT (var_name) = stmt;
2857
2858   pe = loop_preheader_edge (loop);
2859   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2860   if (new_bb)
2861     add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2862   else
2863     if (vect_debug_details (NULL))
2864       fprintf (dump_file, "New bb on preheader edge was not generated.");
2865
2866   return var_name;
2867 }
2868
2869
2870 /*   Function vect_update_ivs_after_vectorizer.
2871
2872      "Advance" the induction variables of LOOP to the value they should take
2873      after the execution of LOOP.  This is currently necessary because the
2874      vectorizer does not handle induction variables that are used after the
2875      loop.  Such a situation occurs when the last iterations of LOOP are
2876      peeled, because:
2877      1. We introduced new uses after LOOP for IVs that were not originally used
2878         after LOOP: the IVs of LOOP are now used by an epilog loop.
2879      2. LOOP is going to be vectorized; this means that it will iterate N/VF
2880         times, whereas the loop IVs should be bumped N times.
2881
2882      Input:
2883      - LOOP - a loop that is going to be vectorized. The last few iterations
2884               of LOOP were peeled.
2885      - NITERS - the number of iterations that LOOP executes (before it is
2886                 vectorized). i.e, the number of times the ivs should be bumped.
2887      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2888                   coming out from LOOP on which there are uses of the LOOP ivs
2889                   (this is the path from LOOP->exit to epilog_loop->preheader).
2890
2891                   The new definitions of the ivs are placed in LOOP->exit.
2892                   The phi args associated with the edge UPDATE_E in the bb
2893                   UPDATE_E->dest are updated accordingly.
2894
2895      Assumption 1: Like the rest of the vectorizer, this function assumes
2896      a single loop exit that has a single predecessor.
2897
2898      Assumption 2: The phi nodes in the LOOP header and in update_bb are
2899      organized in the same order.
2900
2901      Assumption 3: The access function of the ivs is simple enough (see
2902      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
2903
2904      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2905      coming out of LOOP on which the ivs of LOOP are used (this is the path
2906      that leads to the epilog loop; other paths skip the epilog loop).  This
2907      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2908      needs to have its phis updated.
2909  */
2910
2911 static void
2912 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
2913 {
2914   basic_block exit_bb = loop->exit_edges[0]->dest;
2915   tree phi, phi1;
2916   basic_block update_bb = update_e->dest;
2917
2918   /* gcc_assert (vect_can_advance_ivs_p (loop)); */
2919
2920   /* Make sure there exists a single-predecessor exit bb:  */
2921   gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
2922
2923   for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2924        phi && phi1;
2925        phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2926     {
2927       tree access_fn = NULL;
2928       tree evolution_part;
2929       tree init_expr;
2930       tree step_expr;
2931       tree var, stmt, ni, ni_name;
2932       block_stmt_iterator last_bsi;
2933
2934       /* Skip virtual phi's.  */
2935       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2936         {
2937           if (vect_debug_details (NULL))
2938             fprintf (dump_file, "virtual phi. skip.");
2939           continue;
2940         }
2941
2942       access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2943       gcc_assert (access_fn);
2944       evolution_part =
2945          unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2946       gcc_assert (evolution_part != NULL_TREE);
2947
2948       /* FORNOW: We do not support IVs whose evolution function is a polynomial
2949          of degree >= 2 or exponential.  */
2950       gcc_assert (!tree_is_chrec (evolution_part));
2951
2952       step_expr = evolution_part;
2953       init_expr = unshare_expr (initial_condition (access_fn));
2954
2955       ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2956                   build2 (MULT_EXPR, TREE_TYPE (niters),
2957                        niters, step_expr), init_expr);
2958
2959       var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2960       add_referenced_tmp_var (var);
2961
2962       ni_name = force_gimple_operand (ni, &stmt, false, var);
2963
2964       /* Insert stmt into exit_bb.  */
2965       last_bsi = bsi_last (exit_bb);
2966       if (stmt)
2967         bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2968
2969       /* Fix phi expressions in the successor bb.  */
2970       gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
2971                   PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
2972       SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
2973     }
2974 }
2975
2976
2977 /* Function vect_do_peeling_for_loop_bound
2978
2979    Peel the last iterations of the loop represented by LOOP_VINFO.
2980    The peeled iterations form a new epilog loop.  Given that the loop now
2981    iterates NITERS times, the new epilog loop iterates
2982    NITERS % VECTORIZATION_FACTOR times.
2983
2984    The original loop will later be made to iterate
2985    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).  */
2986
2987 static void
2988 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2989                                 struct loops *loops)
2990 {
2991
2992   tree ni_name, ratio_mult_vf_name;
2993   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2994   struct loop *new_loop;
2995   edge update_e;
2996 #ifdef ENABLE_CHECKING
2997   int loop_num;
2998 #endif
2999
3000   if (vect_debug_details (NULL))
3001     fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3002
3003   /* Generate the following variables on the preheader of original loop:
3004
3005      ni_name = number of iteration the original loop executes
3006      ratio = ni_name / vf
3007      ratio_mult_vf_name = ratio * vf  */
3008   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3009                                    &ratio_mult_vf_name, ratio);
3010
3011   /* Update loop info.  */
3012   loop->pre_header = loop_preheader_edge (loop)->src;
3013   loop->pre_header_edges[0] = loop_preheader_edge (loop);
3014
3015 #ifdef ENABLE_CHECKING
3016   loop_num  = loop->num;
3017 #endif
3018   new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3019                                             ratio_mult_vf_name, ni_name, false);
3020 #ifdef ENABLE_CHECKING
3021   gcc_assert (new_loop);
3022   gcc_assert (loop_num == loop->num);
3023   slpeel_verify_cfg_after_peeling (loop, new_loop);
3024 #endif
3025
3026   /* A guard that controls whether the new_loop is to be executed or skipped
3027      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
3028      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
3029      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
3030      is on the path where the LOOP IVs are used and need to be updated.  */
3031
3032   if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3033     update_e = EDGE_PRED (new_loop->pre_header, 0);
3034   else
3035     update_e = EDGE_PRED (new_loop->pre_header, 1);
3036
3037   /* Update IVs of original loop as if they were advanced
3038      by ratio_mult_vf_name steps.  */
3039   vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
3040
3041   /* After peeling we have to reset scalar evolution analyzer.  */
3042   scev_reset ();
3043
3044   return;
3045 }
3046
3047
3048 /* Function vect_gen_niters_for_prolog_loop
3049
3050    Set the number of iterations for the loop represented by LOOP_VINFO
3051    to the minimum between LOOP_NITERS (the original iteration count of the loop)
3052    and the misalignment of DR - the first data reference recorded in
3053    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
3054    this loop, the data reference DR will refer to an aligned location.
3055
3056    The following computation is generated:
3057
3058    compute address misalignment in bytes:
3059    addr_mis = addr & (vectype_size - 1)
3060
3061    prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3062
3063    (elem_size = element type size; an element is the scalar element
3064         whose type is the inner type of the vectype)  */
3065
3066 static tree
3067 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3068 {
3069   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3070   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3071   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3072   tree var, stmt;
3073   tree iters, iters_name;
3074   edge pe;
3075   basic_block new_bb;
3076   tree dr_stmt = DR_STMT (dr);
3077   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3078   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3079   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3080   tree elem_misalign;
3081   tree byte_misalign;
3082   tree new_stmts = NULL_TREE;
3083   tree start_addr =
3084         vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3085   tree ptr_type = TREE_TYPE (start_addr);
3086   tree size = TYPE_SIZE (ptr_type);
3087   tree type = lang_hooks.types.type_for_size (TREE_INT_CST_LOW (size), 1);
3088   tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3089   tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3090   tree niters_type = TREE_TYPE (loop_niters);
3091   tree elem_size_log =
3092         build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3093   tree vf_tree = build_int_cst (unsigned_type_node, vf);
3094
3095   pe = loop_preheader_edge (loop);
3096   new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3097   if (new_bb)
3098     add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3099
3100   /* Create:  byte_misalign = addr & (vectype_size - 1)  */
3101   byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3102
3103   /* Create:  elem_misalign = byte_misalign / element_size  */
3104   elem_misalign =
3105         build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3106
3107   /* Create:  (niters_type) (VF - elem_misalign)&(VF - 1)  */
3108   iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3109   iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3110   iters = fold_convert (niters_type, iters);
3111
3112   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
3113   iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3114   var = create_tmp_var (niters_type, "prolog_loop_niters");
3115   add_referenced_tmp_var (var);
3116   iters_name = force_gimple_operand (iters, &stmt, false, var);
3117
3118   /* Insert stmt on loop preheader edge.  */
3119   pe = loop_preheader_edge (loop);
3120   if (stmt)
3121     new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3122   if (new_bb)
3123     add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3124
3125   return iters_name;
3126 }
3127
3128
3129 /* Function vect_update_inits_of_dr
3130
3131    NITERS iterations were peeled from LOOP.  DR represents a data reference
3132    in LOOP.  This function updates the information recorded in DR to
3133    account for the fact that the first NITERS iterations had already been
3134    executed.  Specifically, it updates the initial_condition of the
3135    access_function of DR.  */
3136
3137 static void
3138 vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3139                          tree niters)
3140 {
3141   tree access_fn = DR_ACCESS_FN (dr, 0);
3142   tree init, init_new, step;
3143
3144   step = evolution_part_in_loop_num (access_fn, loop->num);
3145   init = initial_condition (access_fn);
3146
3147   init_new = build (PLUS_EXPR, TREE_TYPE (init),
3148                   build (MULT_EXPR, TREE_TYPE (niters),
3149                          niters, step), init);
3150   DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3151
3152   return;
3153 }
3154
3155
3156 /* Function vect_update_inits_of_drs
3157
3158    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3159    This function updates the information recorded for the data references in
3160    the loop to account for the fact that the first NITERS iterations had
3161    already been executed.  Specifically, it updates the initial_condition of the
3162    access_function of all the data_references in the loop.  */
3163
3164 static void
3165 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3166 {
3167   unsigned int i;
3168   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3169   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3170   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3171
3172   if (dump_file && (dump_flags & TDF_DETAILS))
3173     fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3174
3175   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3176     {
3177       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3178       vect_update_inits_of_dr (dr, loop, niters);
3179     }
3180
3181   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3182     {
3183       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3184       vect_update_inits_of_dr (dr, loop, niters);
3185     }
3186 }
3187
3188
3189 /* Function vect_do_peeling_for_alignment
3190
3191    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3192    'niters' is set to the misalignment of one of the data references in the
3193    loop, thereby forcing it to refer to an aligned location at the beginning
3194    of the execution of this loop.  The data reference for which we are
3195    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
3196
3197 static void
3198 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3199 {
3200   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3201   tree niters_of_prolog_loop, ni_name;
3202   tree n_iters;
3203   struct loop *new_loop;
3204
3205   if (vect_debug_details (NULL))
3206     fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3207
3208   ni_name = vect_build_loop_niters (loop_vinfo);
3209   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3210
3211   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
3212   new_loop =
3213         slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3214                                        niters_of_prolog_loop, ni_name, true);
3215 #ifdef ENABLE_CHECKING
3216   gcc_assert (new_loop);
3217   slpeel_verify_cfg_after_peeling (new_loop, loop);
3218 #endif
3219
3220   /* Update number of times loop executes.  */
3221   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3222   LOOP_VINFO_NITERS (loop_vinfo) =
3223     build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3224
3225   /* Update the init conditions of the access functions of all data refs.  */
3226   vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3227
3228   /* After peeling we have to reset scalar evolution analyzer.  */
3229   scev_reset ();
3230
3231   return;
3232 }
3233
3234
3235 /* Function vect_transform_loop.
3236
3237    The analysis phase has determined that the loop is vectorizable.
3238    Vectorize the loop - created vectorized stmts to replace the scalar
3239    stmts in the loop, and update the loop exit condition.  */
3240
3241 static void
3242 vect_transform_loop (loop_vec_info loop_vinfo,
3243                      struct loops *loops ATTRIBUTE_UNUSED)
3244 {
3245   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3246   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3247   int nbbs = loop->num_nodes;
3248   block_stmt_iterator si;
3249   int i;
3250   tree ratio = NULL;
3251   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3252
3253   if (vect_debug_details (NULL))
3254     fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3255
3256
3257   /* Peel the loop if there are data refs with unknown alignment.
3258      Only one data ref with unknown store is allowed.  */
3259
3260   if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3261     vect_do_peeling_for_alignment (loop_vinfo, loops);
3262
3263   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3264      compile time constant), or it is a constant that doesn't divide by the
3265      vectorization factor, then an epilog loop needs to be created.
3266      We therefore duplicate the loop: the original loop will be vectorized,
3267      and will compute the first (n/VF) iterations. The second copy of the loop
3268      will remain scalar and will compute the remaining (n%VF) iterations.
3269      (VF is the vectorization factor).  */
3270
3271   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3272       || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3273           && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3274     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3275   else
3276     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3277                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3278
3279   /* 1) Make sure the loop header has exactly two entries
3280      2) Make sure we have a preheader basic block.  */
3281
3282   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3283
3284   loop_split_edge_with (loop_preheader_edge (loop), NULL);
3285
3286
3287   /* FORNOW: the vectorizer supports only loops which body consist
3288      of one basic block (header + empty latch). When the vectorizer will
3289      support more involved loop forms, the order by which the BBs are
3290      traversed need to be reconsidered.  */
3291
3292   for (i = 0; i < nbbs; i++)
3293     {
3294       basic_block bb = bbs[i];
3295
3296       for (si = bsi_start (bb); !bsi_end_p (si);)
3297         {
3298           tree stmt = bsi_stmt (si);
3299           stmt_vec_info stmt_info;
3300           bool is_store;
3301
3302           if (vect_debug_details (NULL))
3303             {
3304               fprintf (dump_file, "------>vectorizing statement: ");
3305               print_generic_expr (dump_file, stmt, TDF_SLIM);
3306             }
3307           stmt_info = vinfo_for_stmt (stmt);
3308           gcc_assert (stmt_info);
3309           if (!STMT_VINFO_RELEVANT_P (stmt_info))
3310             {
3311               bsi_next (&si);
3312               continue;
3313             }
3314 #ifdef ENABLE_CHECKING
3315           /* FORNOW: Verify that all stmts operate on the same number of
3316                      units and no inner unrolling is necessary.  */
3317           gcc_assert
3318                 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3319                  == vectorization_factor);
3320 #endif
3321           /* -------- vectorize statement ------------ */
3322           if (vect_debug_details (NULL))
3323             fprintf (dump_file, "transform statement.");
3324
3325           is_store = vect_transform_stmt (stmt, &si);
3326           if (is_store)
3327             {
3328               /* free the attached stmt_vec_info and remove the stmt.  */
3329               stmt_ann_t ann = stmt_ann (stmt);
3330               free (stmt_info);
3331               set_stmt_info (ann, NULL);
3332               bsi_remove (&si);
3333               continue;
3334             }
3335
3336           bsi_next (&si);
3337         }                       /* stmts in BB */
3338     }                           /* BBs in loop */
3339
3340   slpeel_make_loop_iterate_ntimes (loop, ratio);
3341
3342   if (vect_debug_details (loop))
3343     fprintf (dump_file,"Success! loop vectorized.");
3344   if (vect_debug_stats (loop))
3345     fprintf (dump_file, "LOOP VECTORIZED.");
3346 }
3347
3348
3349 /* Function vect_is_simple_use.
3350
3351    Input:
3352    LOOP - the loop that is being vectorized.
3353    OPERAND - operand of a stmt in LOOP.
3354    DEF - the defining stmt in case OPERAND is an SSA_NAME.
3355
3356    Returns whether a stmt with OPERAND can be vectorized.
3357    Supportable operands are constants, loop invariants, and operands that are
3358    defined by the current iteration of the loop. Unsupportable operands are
3359    those that are defined by a previous iteration of the loop (as is the case
3360    in reduction/induction computations).  */
3361
3362 static bool
3363 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3364 {
3365   tree def_stmt;
3366   basic_block bb;
3367
3368   if (def)
3369     *def = NULL_TREE;
3370
3371   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3372     return true;
3373
3374   if (TREE_CODE (operand) != SSA_NAME)
3375     return false;
3376
3377   def_stmt = SSA_NAME_DEF_STMT (operand);
3378   if (def_stmt == NULL_TREE )
3379     {
3380       if (vect_debug_details (NULL))
3381         fprintf (dump_file, "no def_stmt.");
3382       return false;
3383     }
3384
3385   /* empty stmt is expected only in case of a function argument.
3386      (Otherwise - we expect a phi_node or a modify_expr).  */
3387   if (IS_EMPTY_STMT (def_stmt))
3388     {
3389       tree arg = TREE_OPERAND (def_stmt, 0);
3390       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3391         return true;
3392       if (vect_debug_details (NULL))
3393         {
3394           fprintf (dump_file, "Unexpected empty stmt: ");
3395           print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3396         }
3397       return false;
3398     }
3399
3400   /* phi_node inside the loop indicates an induction/reduction pattern.
3401      This is not supported yet.  */
3402   bb = bb_for_stmt (def_stmt);
3403   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3404     {
3405       if (vect_debug_details (NULL))
3406         fprintf (dump_file, "reduction/induction - unsupported.");
3407       return false; /* FORNOW: not supported yet.  */
3408     }
3409
3410   /* Expecting a modify_expr or a phi_node.  */
3411   if (TREE_CODE (def_stmt) == MODIFY_EXPR
3412       || TREE_CODE (def_stmt) == PHI_NODE)
3413     {
3414       if (def)
3415         *def = def_stmt;
3416       return true;
3417     }
3418
3419   return false;
3420 }
3421
3422
3423 /* Function vect_analyze_operations.
3424
3425    Scan the loop stmts and make sure they are all vectorizable.  */
3426
3427 static bool
3428 vect_analyze_operations (loop_vec_info loop_vinfo)
3429 {
3430   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3431   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3432   int nbbs = loop->num_nodes;
3433   block_stmt_iterator si;
3434   int vectorization_factor = 0;
3435   int i;
3436   bool ok;
3437   tree scalar_type;
3438
3439   if (vect_debug_details (NULL))
3440     fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3441
3442   for (i = 0; i < nbbs; i++)
3443     {
3444       basic_block bb = bbs[i];
3445
3446       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3447         {
3448           tree stmt = bsi_stmt (si);
3449           int nunits;
3450           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3451           tree vectype;
3452
3453           if (vect_debug_details (NULL))
3454             {
3455               fprintf (dump_file, "==> examining statement: ");
3456               print_generic_expr (dump_file, stmt, TDF_SLIM);
3457             }
3458
3459           gcc_assert (stmt_info);
3460
3461           /* skip stmts which do not need to be vectorized.
3462              this is expected to include:
3463              - the COND_EXPR which is the loop exit condition
3464              - any LABEL_EXPRs in the loop
3465              - computations that are used only for array indexing or loop
3466              control  */
3467
3468           if (!STMT_VINFO_RELEVANT_P (stmt_info))
3469             {
3470               if (vect_debug_details (NULL))
3471                 fprintf (dump_file, "irrelevant.");
3472               continue;
3473             }
3474
3475           if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3476             {
3477               if (vect_debug_stats (loop) || vect_debug_details (loop))
3478                 {
3479                   fprintf (dump_file, "not vectorized: vector stmt in loop:");
3480                   print_generic_expr (dump_file, stmt, TDF_SLIM);
3481                 }
3482               return false;
3483             }
3484
3485           if (STMT_VINFO_DATA_REF (stmt_info))
3486             scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3487           else if (TREE_CODE (stmt) == MODIFY_EXPR)
3488             scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3489           else
3490             scalar_type = TREE_TYPE (stmt);
3491
3492           if (vect_debug_details (NULL))
3493             {
3494               fprintf (dump_file, "get vectype for scalar type:  ");
3495               print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3496             }
3497
3498           vectype = get_vectype_for_scalar_type (scalar_type);
3499           if (!vectype)
3500             {
3501               if (vect_debug_stats (loop) || vect_debug_details (loop))
3502                 {
3503                   fprintf (dump_file, "not vectorized: unsupported data-type ");
3504                   print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3505                 }
3506               return false;
3507             }
3508
3509           if (vect_debug_details (NULL))
3510             {
3511               fprintf (dump_file, "vectype: ");
3512               print_generic_expr (dump_file, vectype, TDF_SLIM);
3513             }
3514           STMT_VINFO_VECTYPE (stmt_info) = vectype;
3515
3516           ok = (vectorizable_operation (stmt, NULL, NULL)
3517                 || vectorizable_assignment (stmt, NULL, NULL)
3518                 || vectorizable_load (stmt, NULL, NULL)
3519                 || vectorizable_store (stmt, NULL, NULL));
3520
3521           if (!ok)
3522             {
3523               if (vect_debug_stats (loop) || vect_debug_details (loop))
3524                 {
3525                   fprintf (dump_file, "not vectorized: stmt not supported: ");
3526                   print_generic_expr (dump_file, stmt, TDF_SLIM);
3527                 }
3528               return false;
3529             }
3530
3531           nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3532           if (vect_debug_details (NULL))
3533             fprintf (dump_file, "nunits = %d", nunits);
3534
3535           if (vectorization_factor)
3536             {
3537               /* FORNOW: don't allow mixed units.
3538                  This restriction will be relaxed in the future.  */
3539               if (nunits != vectorization_factor)
3540                 {
3541                   if (vect_debug_stats (loop) || vect_debug_details (loop))
3542                     fprintf (dump_file, "not vectorized: mixed data-types");
3543                   return false;
3544                 }
3545             }
3546           else
3547             vectorization_factor = nunits;
3548
3549 #ifdef ENABLE_CHECKING
3550           gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3551                         * vectorization_factor == UNITS_PER_SIMD_WORD);
3552 #endif
3553         }
3554     }
3555
3556   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
3557
3558   if (vectorization_factor <= 1)
3559     {
3560       if (vect_debug_stats (loop) || vect_debug_details (loop))
3561         fprintf (dump_file, "not vectorized: unsupported data-type");
3562       return false;
3563     }
3564   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3565
3566   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3567     fprintf (dump_file,
3568         "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3569         vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3570
3571   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3572       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3573     {
3574       if (vect_debug_stats (loop) || vect_debug_details (loop))
3575         fprintf (dump_file, "epilog loop required.");
3576       if (!vect_can_advance_ivs_p (loop))
3577         {
3578           if (vect_debug_stats (loop) || vect_debug_details (loop))
3579             fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3580           return false;
3581         }
3582       if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3583         {
3584           if (vect_debug_stats (loop) || vect_debug_details (loop))
3585             fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3586           return false;
3587         }
3588     }
3589
3590   return true;
3591 }
3592
3593
3594 /* Function exist_non_indexing_operands_for_use_p
3595
3596    USE is one of the uses attached to STMT. Check if USE is
3597    used in STMT for anything other than indexing an array.  */
3598
3599 static bool
3600 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3601 {
3602   tree operand;
3603   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3604
3605   /* USE corresponds to some operand in STMT. If there is no data
3606      reference in STMT, then any operand that corresponds to USE
3607      is not indexing an array.  */
3608   if (!STMT_VINFO_DATA_REF (stmt_info))
3609     return true;
3610
3611   /* STMT has a data_ref. FORNOW this means that its of one of
3612      the following forms:
3613      -1- ARRAY_REF = var
3614      -2- var = ARRAY_REF
3615      (This should have been verified in analyze_data_refs).
3616
3617      'var' in the second case corresponds to a def, not a use,
3618      so USE cannot correspond to any operands that are not used
3619      for array indexing.
3620
3621      Therefore, all we need to check is if STMT falls into the
3622      first case, and whether var corresponds to USE.  */
3623
3624   if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3625     return false;
3626
3627   operand = TREE_OPERAND (stmt, 1);
3628
3629   if (TREE_CODE (operand) != SSA_NAME)
3630     return false;
3631
3632   if (operand == use)
3633     return true;
3634
3635   return false;
3636 }
3637
3638
3639 /* Function vect_is_simple_iv_evolution.
3640
3641    FORNOW: A simple evolution of an induction variables in the loop is
3642    considered a polynomial evolution with constant step.  */
3643
3644 static bool
3645 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3646                                 tree * step, bool strict)
3647 {
3648   tree init_expr;
3649   tree step_expr;
3650
3651   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3652
3653   /* When there is no evolution in this loop, the evolution function
3654      is not "simple".  */
3655   if (evolution_part == NULL_TREE)
3656     return false;
3657
3658   /* When the evolution is a polynomial of degree >= 2
3659      the evolution function is not "simple".  */
3660   if (tree_is_chrec (evolution_part))
3661     return false;
3662
3663   step_expr = evolution_part;
3664   init_expr = unshare_expr (initial_condition (access_fn));
3665
3666   if (vect_debug_details (NULL))
3667     {
3668       fprintf (dump_file, "step: ");
3669       print_generic_expr (dump_file, step_expr, TDF_SLIM);
3670       fprintf (dump_file, ",  init: ");
3671       print_generic_expr (dump_file, init_expr, TDF_SLIM);
3672     }
3673
3674   *init = init_expr;
3675   *step = step_expr;
3676
3677   if (TREE_CODE (step_expr) != INTEGER_CST)
3678     {
3679       if (vect_debug_details (NULL))
3680         fprintf (dump_file, "step unknown.");
3681       return false;
3682     }
3683
3684   if (strict)
3685     if (!integer_onep (step_expr))
3686       {
3687         if (vect_debug_details (NULL))
3688           print_generic_expr (dump_file, step_expr, TDF_SLIM);
3689         return false;
3690       }
3691
3692   return true;
3693 }
3694
3695
3696 /* Function vect_analyze_scalar_cycles.
3697
3698    Examine the cross iteration def-use cycles of scalar variables, by
3699    analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3700    cycles that they represent do not impede vectorization.
3701
3702    FORNOW: Reduction as in the following loop, is not supported yet:
3703               loop1:
3704               for (i=0; i<N; i++)
3705                  sum += a[i];
3706            The cross-iteration cycle corresponding to variable 'sum' will be
3707            considered too complicated and will impede vectorization.
3708
3709    FORNOW: Induction as in the following loop, is not supported yet:
3710               loop2:
3711               for (i=0; i<N; i++)
3712                  a[i] = i;
3713
3714            However, the following loop *is* vectorizable:
3715               loop3:
3716               for (i=0; i<N; i++)
3717                  a[i] = b[i];
3718
3719            In both loops there exists a def-use cycle for the variable i:
3720               loop: i_2 = PHI (i_0, i_1)
3721                     a[i_2] = ...;
3722                     i_1 = i_2 + 1;
3723                     GOTO loop;
3724
3725            The evolution of the above cycle is considered simple enough,
3726            however, we also check that the cycle does not need to be
3727            vectorized, i.e - we check that the variable that this cycle
3728            defines is only used for array indexing or in stmts that do not
3729            need to be vectorized. This is not the case in loop2, but it
3730            *is* the case in loop3.  */
3731
3732 static bool
3733 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3734 {
3735   tree phi;
3736   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3737   basic_block bb = loop->header;
3738   tree dummy;
3739
3740   if (vect_debug_details (NULL))
3741     fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3742
3743   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3744     {
3745       tree access_fn = NULL;
3746
3747       if (vect_debug_details (NULL))
3748         {
3749           fprintf (dump_file, "Analyze phi: ");
3750           print_generic_expr (dump_file, phi, TDF_SLIM);
3751         }
3752
3753       /* Skip virtual phi's. The data dependences that are associated with
3754          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
3755
3756       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3757         {
3758           if (vect_debug_details (NULL))
3759             fprintf (dump_file, "virtual phi. skip.");
3760           continue;
3761         }
3762
3763       /* Analyze the evolution function.  */
3764
3765       /* FORNOW: The only scalar cross-iteration cycles that we allow are
3766          those of loop induction variables; This property is verified here.
3767
3768          Furthermore, if that induction variable is used in an operation
3769          that needs to be vectorized (i.e, is not solely used to index
3770          arrays and check the exit condition) - we do not support its
3771          vectorization yet. This property is verified in vect_is_simple_use,
3772          during vect_analyze_operations.  */
3773
3774       access_fn = /* instantiate_parameters
3775                      (loop,*/
3776          analyze_scalar_evolution (loop, PHI_RESULT (phi));
3777
3778       if (!access_fn)
3779         {
3780           if (vect_debug_stats (loop) || vect_debug_details (loop))
3781             fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3782           return false;
3783         }
3784
3785       if (vect_debug_details (NULL))
3786         {
3787            fprintf (dump_file, "Access function of PHI: ");
3788            print_generic_expr (dump_file, access_fn, TDF_SLIM);
3789         }
3790
3791       if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3792                                         &dummy, false))
3793         {
3794           if (vect_debug_stats (loop) || vect_debug_details (loop))
3795             fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3796           return false;
3797         }
3798     }
3799
3800   return true;
3801 }
3802
3803
3804 /* Function vect_analyze_data_ref_dependence.
3805
3806    Return TRUE if there (might) exist a dependence between a memory-reference
3807    DRA and a memory-reference DRB.  */
3808
3809 static bool
3810 vect_analyze_data_ref_dependence (struct data_reference *dra,
3811                                   struct data_reference *drb,
3812                                   struct loop *loop)
3813 {
3814   bool differ_p;
3815   struct data_dependence_relation *ddr;
3816
3817   if (!array_base_name_differ_p (dra, drb, &differ_p))
3818     {
3819       if (vect_debug_stats (loop) || vect_debug_details (loop))
3820         {
3821           fprintf (dump_file,
3822                 "not vectorized: can't determine dependence between: ");
3823           print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3824           fprintf (dump_file, " and ");
3825           print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3826         }
3827       return true;
3828     }
3829
3830   if (differ_p)
3831     return false;
3832
3833   ddr = initialize_data_dependence_relation (dra, drb);
3834   compute_affine_dependence (ddr);
3835
3836   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3837     return false;
3838
3839   if (vect_debug_stats (loop) || vect_debug_details (loop))
3840     {
3841       fprintf (dump_file,
3842         "not vectorized: possible dependence between data-refs ");
3843       print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3844       fprintf (dump_file, " and ");
3845       print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3846     }
3847
3848   return true;
3849 }
3850
3851
3852 /* Function vect_analyze_data_ref_dependences.
3853
3854    Examine all the data references in the loop, and make sure there do not
3855    exist any data dependences between them.
3856
3857    TODO: dependences which distance is greater than the vectorization factor
3858          can be ignored.  */
3859
3860 static bool
3861 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3862 {
3863   unsigned int i, j;
3864   varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3865   varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3866   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3867
3868   /* Examine store-store (output) dependences.  */
3869
3870   if (vect_debug_details (NULL))
3871     fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3872
3873   if (vect_debug_details (NULL))
3874     fprintf (dump_file, "compare all store-store pairs.");
3875
3876   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3877     {
3878       for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3879         {
3880           struct data_reference *dra =
3881             VARRAY_GENERIC_PTR (loop_write_refs, i);
3882           struct data_reference *drb =
3883             VARRAY_GENERIC_PTR (loop_write_refs, j);
3884           if (vect_analyze_data_ref_dependence (dra, drb, loop))
3885             return false;
3886         }
3887     }
3888
3889   /* Examine load-store (true/anti) dependences.  */
3890
3891   if (vect_debug_details (NULL))
3892     fprintf (dump_file, "compare all load-store pairs.");
3893
3894   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3895     {
3896       for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3897         {
3898           struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3899           struct data_reference *drb =
3900             VARRAY_GENERIC_PTR (loop_write_refs, j);
3901           if (vect_analyze_data_ref_dependence (dra, drb, loop))
3902             return false;
3903         }
3904     }
3905
3906   return true;
3907 }
3908
3909
3910 /* Function vect_get_first_index.
3911
3912    REF is a data reference.
3913    If it is an ARRAY_REF: if its lower bound is simple enough,
3914    put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3915    If it is not an ARRAY_REF: REF has no "first index";
3916    ARRAY_FIRST_INDEX in zero, and the function returns TRUE.  */
3917
3918 static bool
3919 vect_get_first_index (tree ref, tree *array_first_index)
3920 {
3921   tree array_start;
3922
3923   if (TREE_CODE (ref) != ARRAY_REF)
3924     *array_first_index = size_zero_node;
3925   else
3926     {
3927       array_start = array_ref_low_bound (ref);
3928       if (!host_integerp (array_start,0))
3929         {
3930           if (vect_debug_details (NULL))
3931             {
3932               fprintf (dump_file, "array min val not simple integer cst.");
3933               print_generic_expr (dump_file, array_start, TDF_DETAILS);
3934             }
3935           return false;
3936         }
3937       *array_first_index = array_start;
3938     }
3939
3940   return true;
3941 }
3942
3943
3944 /* Function vect_compute_array_base_alignment.
3945    A utility function of vect_compute_array_ref_alignment.
3946
3947    Compute the misalignment of ARRAY in bits.
3948
3949    Input:
3950    ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
3951    VECTYPE - we are interested in the misalignment modulo the size of vectype.
3952              if NULL: don't compute misalignment, just return the base of ARRAY.
3953    PREV_DIMENSIONS - initialized to one.
3954    MISALIGNMENT - the computed misalignment in bits.
3955
3956    Output:
3957    If VECTYPE is not NULL:
3958      Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3959      the base of the array, and put the computed misalignment in MISALIGNMENT.
3960    If VECTYPE is NULL:
3961      Return the base of the array.
3962
3963    For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3964    a[idx_N]...[idx_2][idx_1] is
3965    {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3966     ... + idx_N * dim_0 * ... * dim_N-1}.
3967    (The misalignment of &a is not checked here).
3968    Note, that every term contains dim_0, therefore, if dim_0 is a
3969    multiple of NUNITS, the whole sum is a multiple of NUNITS.
3970    Otherwise, if idx_1 is constant, and dim_1 is a multiple of
3971    NUINTS, we can say that the misalignment of the sum is equal to
3972    the misalignment of {idx_1 * dim_0}.  If idx_1 is not constant,
3973    we can't determine this array misalignment, and we return
3974    false.
3975    We proceed recursively in this manner, accumulating total misalignment
3976    and the multiplication of previous dimensions for correct misalignment
3977    calculation.  */
3978
3979 static tree
3980 vect_compute_array_base_alignment (tree array,
3981                                    tree vectype,
3982                                    tree *prev_dimensions,
3983                                    tree *misalignment)
3984 {
3985   tree index;
3986   tree domain;
3987   tree dimension_size;
3988   tree mis;
3989   tree bits_per_vectype;
3990   tree bits_per_vectype_unit;
3991
3992   /* The 'stop condition' of the recursion.  */
3993   if (TREE_CODE (array) != ARRAY_REF)
3994     return array;
3995
3996   if (!vectype)
3997     /* Just get the base decl.  */
3998     return vect_compute_array_base_alignment
3999                 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4000
4001   if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
4002       !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
4003     return NULL_TREE;
4004
4005   domain = TYPE_DOMAIN (TREE_TYPE (array));
4006   dimension_size =
4007         int_const_binop (PLUS_EXPR,
4008                 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
4009                                              TYPE_MIN_VALUE (domain), 1),
4010                 size_one_node, 1);
4011
4012   /* Check if the dimension size is a multiple of NUNITS, the remaining sum
4013      is a multiple of NUNITS:
4014
4015      dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
4016    */
4017   mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
4018          build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
4019   if (integer_zerop (mis))
4020     /* This array is aligned. Continue just in order to get the base decl.  */
4021     return vect_compute_array_base_alignment
4022                 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4023
4024   index = TREE_OPERAND (array, 1);
4025   if (!host_integerp (index, 1))
4026     /* The current index is not constant.  */
4027     return NULL_TREE;
4028
4029   index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
4030
4031   bits_per_vectype = fold_convert (unsigned_type_node,
4032     build_int_cst (NULL_TREE, BITS_PER_UNIT *
4033                  GET_MODE_SIZE (TYPE_MODE (vectype))));
4034   bits_per_vectype_unit =  fold_convert (unsigned_type_node,
4035     build_int_cst (NULL_TREE, BITS_PER_UNIT *
4036                  GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4037
4038   /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4039      earlier:
4040
4041      *misalignment =
4042        (*misalignment + index_val * dimension_size * *prev_dimensions)
4043                                                         % vectype_nunits;
4044    */
4045
4046   mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4047   mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4048   mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4049   mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4050   *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4051
4052
4053   *prev_dimensions = int_const_binop (MULT_EXPR,
4054                                 *prev_dimensions, dimension_size, 1);
4055
4056   return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4057                                             prev_dimensions,
4058                                             misalignment);
4059 }
4060
4061
4062 /* Function vect_compute_data_ref_alignment
4063
4064    Compute the misalignment of the data reference DR.
4065
4066    Output:
4067    1. If during the misalignment computation it is found that the data reference
4068       cannot be vectorized then false is returned.
4069    2. DR_MISALIGNMENT (DR) is defined.
4070
4071    FOR NOW: No analysis is actually performed. Misalignment is calculated
4072    only for trivial cases. TODO.  */
4073
4074 static bool
4075 vect_compute_data_ref_alignment (struct data_reference *dr,
4076                                  loop_vec_info loop_vinfo)
4077 {
4078   tree stmt = DR_STMT (dr);
4079   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4080   tree ref = DR_REF (dr);
4081   tree vectype;
4082   tree scalar_type;
4083   tree offset = size_zero_node;
4084   tree base, bit_offset, alignment;
4085   tree unit_bits = fold_convert (unsigned_type_node,
4086                                  build_int_cst (NULL_TREE, BITS_PER_UNIT));
4087   tree dr_base;
4088   bool base_aligned_p;
4089
4090   if (vect_debug_details (NULL))
4091     fprintf (dump_file, "vect_compute_data_ref_alignment:");
4092
4093   /* Initialize misalignment to unknown.  */
4094   DR_MISALIGNMENT (dr) = -1;
4095
4096   scalar_type = TREE_TYPE (ref);
4097   vectype = get_vectype_for_scalar_type (scalar_type);
4098   if (!vectype)
4099     {
4100       if (vect_debug_details (NULL))
4101         {
4102           fprintf (dump_file, "no vectype for stmt: ");
4103           print_generic_expr (dump_file, stmt, TDF_SLIM);
4104           fprintf (dump_file, " scalar_type: ");
4105           print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4106         }
4107       /* It is not possible to vectorize this data reference.  */
4108       return false;
4109     }
4110   STMT_VINFO_VECTYPE (stmt_info) = vectype;
4111   gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4112
4113   if (TREE_CODE (ref) == ARRAY_REF)
4114     dr_base = ref;
4115   else
4116     dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4117
4118   base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4119                           loop_vinfo, &bit_offset, &base_aligned_p);
4120   if (!base)
4121     {
4122       if (vect_debug_details (NULL))
4123         {
4124           fprintf (dump_file, "Unknown alignment for access: ");
4125           print_generic_expr (dump_file,
4126                               STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
4127         }
4128       return true;
4129     }
4130
4131   if (!base_aligned_p)
4132     {
4133       if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4134         {
4135           if (vect_debug_details (NULL))
4136             {
4137               fprintf (dump_file, "can't force alignment of ref: ");
4138               print_generic_expr (dump_file, ref, TDF_SLIM);
4139             }
4140           return true;
4141         }
4142
4143       /* Force the alignment of the decl.
4144          NOTE: This is the only change to the code we make during
4145          the analysis phase, before deciding to vectorize the loop.  */
4146       if (vect_debug_details (NULL))
4147         fprintf (dump_file, "force alignment");
4148       DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4149       DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
4150     }
4151
4152   /* At this point we assume that the base is aligned, and the offset from it
4153      (including index, if relevant) has been computed and is in BIT_OFFSET.  */
4154   gcc_assert (base_aligned_p
4155               || (TREE_CODE (base) == VAR_DECL
4156                   && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4157
4158   /* Convert into bytes.  */
4159   offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4160   /* Check that there is no remainder in bits.  */
4161   bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4162   if (!integer_zerop (bit_offset))
4163     {
4164       if (vect_debug_details (NULL))
4165         {
4166           fprintf (dump_file, "bit offset alignment: ");
4167           print_generic_expr (dump_file, bit_offset, TDF_SLIM);
4168         }
4169       return false;
4170     }
4171
4172   /* Alignment required, in bytes:  */
4173   alignment = fold_convert (unsigned_type_node,
4174             build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
4175
4176   /* Modulo alignment.  */
4177   offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4178   if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4179     {
4180       if (vect_debug_details (NULL))
4181         fprintf (dump_file, "unexpected misalign value");
4182       return false;
4183     }
4184
4185   DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
4186
4187   if (vect_debug_details (NULL))
4188     fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4189
4190   return true;
4191 }
4192
4193
4194 /* Function vect_compute_array_ref_alignment
4195
4196    Compute the alignment of an array-ref.
4197    The alignment we compute here is relative to
4198    TYPE_ALIGN(VECTYPE) boundary.
4199
4200    Output:
4201    OFFSET - the alignment in bits
4202    Return value - the base of the array-ref. E.g,
4203                   if the array-ref is a.b[k].c[i][j] the returned
4204                   base is a.b[k].c
4205 */
4206
4207 static tree
4208 vect_compute_array_ref_alignment (struct data_reference *dr,
4209                                   loop_vec_info loop_vinfo,
4210                                   tree vectype,
4211                                   tree *offset)
4212 {
4213   tree array_first_index = size_zero_node;
4214   tree init;
4215   tree ref = DR_REF (dr);
4216   tree scalar_type = TREE_TYPE (ref);
4217   tree oprnd0 = TREE_OPERAND (ref, 0);
4218   tree dims = size_one_node;
4219   tree misalign = size_zero_node;
4220   tree next_ref, this_offset = size_zero_node;
4221   tree nunits;
4222   tree nbits;
4223
4224   if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
4225     /* The reference is an array without its last index.  */
4226     next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4227                                                   &misalign);
4228   else
4229     next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4230                                                   &misalign);
4231   if (!vectype)
4232     /* Alignment is not requested. Just return the base.  */
4233     return next_ref;
4234
4235   /* Compute alignment.  */
4236   if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4237     return NULL_TREE;
4238   this_offset = misalign;
4239
4240   /* Check the first index accessed.  */
4241   if (!vect_get_first_index (ref, &array_first_index))
4242     {
4243       if (vect_debug_details (NULL))
4244         fprintf (dump_file, "no first_index for array.");
4245       return NULL_TREE;
4246     }
4247
4248   /* Check the index of the array_ref.  */
4249   init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4250                                         LOOP_VINFO_LOOP (loop_vinfo)->num);
4251
4252   /* FORNOW: In order to simplify the handling of alignment, we make sure
4253      that the first location at which the array is accessed ('init') is on an
4254      'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4255      This is too conservative, since we require that
4256      both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4257      NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4258      This should be relaxed in the future.  */
4259
4260   if (!init || !host_integerp (init, 0))
4261     {
4262       if (vect_debug_details (NULL))
4263         fprintf (dump_file, "non constant init. ");
4264       return NULL_TREE;
4265     }
4266
4267   /* bytes per scalar element: */
4268   nunits = fold_convert (unsigned_type_node,
4269         build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4270   nbits = int_const_binop (MULT_EXPR, nunits,
4271                            build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
4272
4273   /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4274   misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
4275   misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4276   misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
4277
4278   /* TODO: allow negative misalign values.  */
4279   if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
4280     {
4281       if (vect_debug_details (NULL))
4282         fprintf (dump_file, "unexpected misalign value");
4283       return NULL_TREE;
4284     }
4285   *offset = misalign;
4286   return next_ref;
4287 }
4288
4289
4290 /* Function vect_compute_data_refs_alignment
4291
4292    Compute the misalignment of data references in the loop.
4293    This pass may take place at function granularity instead of at loop
4294    granularity.
4295
4296    FOR NOW: No analysis is actually performed. Misalignment is calculated
4297    only for trivial cases. TODO.  */
4298
4299 static bool
4300 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4301 {
4302   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4303   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4304   unsigned int i;
4305
4306   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4307     {
4308       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4309       if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4310         return false;
4311     }
4312
4313   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4314     {
4315       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4316       if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4317         return false;
4318     }
4319
4320   return true;
4321 }
4322
4323
4324 /* Function vect_enhance_data_refs_alignment
4325
4326    This pass will use loop versioning and loop peeling in order to enhance
4327    the alignment of data references in the loop.
4328
4329    FOR NOW: we assume that whatever versioning/peeling takes place, only the
4330    original loop is to be vectorized; Any other loops that are created by
4331    the transformations performed in this pass - are not supposed to be
4332    vectorized. This restriction will be relaxed.  */
4333
4334 static void
4335 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4336 {
4337   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4338   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4339   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4340   unsigned int i;
4341
4342   /*
4343      This pass will require a cost model to guide it whether to apply peeling
4344      or versioning or a combination of the two. For example, the scheme that
4345      intel uses when given a loop with several memory accesses, is as follows:
4346      choose one memory access ('p') which alignment you want to force by doing
4347      peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4348      other accesses are not necessarily aligned, or (2) use loop versioning to
4349      generate one loop in which all accesses are aligned, and another loop in
4350      which only 'p' is necessarily aligned.
4351
4352      ("Automatic Intra-Register Vectorization for the Intel Architecture",
4353       Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4354       Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4355
4356      Devising a cost model is the most critical aspect of this work. It will
4357      guide us on which access to peel for, whether to use loop versioning, how
4358      many versions to create, etc. The cost model will probably consist of
4359      generic considerations as well as target specific considerations (on
4360      powerpc for example, misaligned stores are more painful than misaligned
4361      loads).
4362
4363      Here is the general steps involved in alignment enhancements:
4364
4365      -- original loop, before alignment analysis:
4366         for (i=0; i<N; i++){
4367           x = q[i];                     # DR_MISALIGNMENT(q) = unknown
4368           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
4369         }
4370
4371      -- After vect_compute_data_refs_alignment:
4372         for (i=0; i<N; i++){
4373           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4374           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
4375         }
4376
4377      -- Possibility 1: we do loop versioning:
4378      if (p is aligned) {
4379         for (i=0; i<N; i++){    # loop 1A
4380           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4381           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
4382         }
4383      }
4384      else {
4385         for (i=0; i<N; i++){    # loop 1B
4386           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4387           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
4388         }
4389      }
4390
4391      -- Possibility 2: we do loop peeling:
4392      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
4393         x = q[i];
4394         p[i] = y;
4395      }
4396      for (i = 3; i < N; i++){   # loop 2A
4397         x = q[i];                       # DR_MISALIGNMENT(q) = 0
4398         p[i] = y;                       # DR_MISALIGNMENT(p) = unknown
4399      }
4400
4401      -- Possibility 3: combination of loop peeling and versioning:
4402      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
4403         x = q[i];
4404         p[i] = y;
4405      }
4406      if (p is aligned) {
4407         for (i = 3; i<N; i++){  # loop 3A
4408           x = q[i];                     # DR_MISALIGNMENT(q) = 0
4409           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
4410         }
4411      }
4412      else {
4413         for (i = 3; i<N; i++){  # loop 3B
4414           x = q[i];                     # DR_MISALIGNMENT(q) = 0
4415           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
4416         }
4417      }
4418
4419      These loops are later passed to loop_transform to be vectorized. The
4420      vectorizer will use the alignment information to guide the transformation
4421      (whether to generate regular loads/stores, or with special handling for
4422      misalignment).
4423    */
4424
4425   /* (1) Peeling to force alignment.  */
4426
4427   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4428      Considerations:
4429      + How many accesses will become aligned due to the peeling
4430      - How many accesses will become unaligned due to the peeling,
4431        and the cost of misaligned accesses.
4432      - The cost of peeling (the extra runtime checks, the increase
4433        in code size).
4434
4435      The scheme we use FORNOW: peel to force the alignment of the first
4436      misaligned store in the loop.
4437      Rationale: misaligned stores are not yet supported.
4438
4439      TODO: Use a better cost model.  */
4440
4441   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4442     {
4443       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4444       if (!aligned_access_p (dr))
4445         {
4446           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4447           LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4448           break;
4449         }
4450     }
4451
4452   if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4453     {
4454       if (vect_debug_details (loop))
4455         fprintf (dump_file, "Peeling for alignment will not be applied.");
4456       return;
4457     }
4458   else
4459     if (vect_debug_details (loop))
4460       fprintf (dump_file, "Peeling for alignment will be applied.");
4461
4462
4463   /* (1.2) Update the alignment info according to the peeling factor.
4464            If the misalignment of the DR we peel for is M, then the
4465            peeling factor is VF - M, and the misalignment of each access DR_i
4466            in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4467            If the misalignment of the DR we peel for is unknown, then the
4468            misalignment of each access DR_i in the loop is also unknown.
4469
4470            FORNOW: set the misalignment of the accesses to unknown even
4471                    if the peeling factor is known at compile time.
4472
4473            TODO: - if the peeling factor is known at compile time, use that
4474                    when updating the misalignment info of the loop DRs.
4475                  - consider accesses that are known to have the same
4476                    alignment, even if that alignment is unknown.  */
4477
4478   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4479     {
4480       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4481       if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4482         DR_MISALIGNMENT (dr) = 0;
4483       else
4484         DR_MISALIGNMENT (dr) = -1;
4485     }
4486   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4487     {
4488       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4489       if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4490         DR_MISALIGNMENT (dr) = 0;
4491       else
4492         DR_MISALIGNMENT (dr) = -1;
4493     }
4494 }
4495
4496
4497 /* Function vect_analyze_data_refs_alignment
4498
4499    Analyze the alignment of the data-references in the loop.
4500    FOR NOW: Until support for misliagned accesses is in place, only if all
4501    accesses are aligned can the loop be vectorized. This restriction will be
4502    relaxed.  */
4503
4504 static bool
4505 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4506 {
4507   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4508   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4509   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4510   enum dr_alignment_support supportable_dr_alignment;
4511   unsigned int i;
4512
4513   if (vect_debug_details (NULL))
4514     fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4515
4516
4517   /* This pass may take place at function granularity instead of at loop
4518      granularity.  */
4519
4520   if (!vect_compute_data_refs_alignment (loop_vinfo))
4521     {
4522       if (vect_debug_details (loop) || vect_debug_stats (loop))
4523         fprintf (dump_file,
4524                  "not vectorized: can't calculate alignment for data ref.");
4525       return false;
4526     }
4527
4528
4529   /* This pass will decide on using loop versioning and/or loop peeling in
4530      order to enhance the alignment of data references in the loop.  */
4531
4532   vect_enhance_data_refs_alignment (loop_vinfo);
4533
4534
4535   /* Finally, check that all the data references in the loop can be
4536      handled with respect to their alignment.  */
4537
4538   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4539     {
4540       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4541       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4542       if (!supportable_dr_alignment)
4543         {
4544           if (vect_debug_details (loop) || vect_debug_stats (loop))
4545             fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4546           return false;
4547         }
4548     }
4549   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4550     {
4551       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4552       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4553       if (!supportable_dr_alignment)
4554         {
4555           if (vect_debug_details (loop) || vect_debug_stats (loop))
4556             fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4557           return false;
4558         }
4559     }
4560
4561   return true;
4562 }
4563
4564
4565 /* Function vect_analyze_data_ref_access.
4566
4567    Analyze the access pattern of the data-reference DR. For now, a data access
4568    has to consecutive and aligned to be considered vectorizable.  */
4569
4570 static bool
4571 vect_analyze_data_ref_access (struct data_reference *dr)
4572 {
4573   varray_type access_fns = DR_ACCESS_FNS (dr);
4574   tree access_fn;
4575   tree init, step;
4576   unsigned int dimensions, i;
4577
4578   /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4579      i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4580      access is contiguous).  */
4581   dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4582
4583   for (i = 1; i < dimensions; i++) /* Not including the last dimension.  */
4584     {
4585       access_fn = DR_ACCESS_FN (dr, i);
4586
4587       if (evolution_part_in_loop_num (access_fn,
4588                                       loop_containing_stmt (DR_STMT (dr))->num))
4589         {
4590           /* Evolution part is not NULL in this loop (it is neither constant
4591              nor invariant).  */
4592           if (vect_debug_details (NULL))
4593             {
4594               fprintf (dump_file,
4595                        "not vectorized: complicated multidim. array access.");
4596               print_generic_expr (dump_file, access_fn, TDF_SLIM);
4597             }
4598           return false;
4599         }
4600     }
4601
4602   access_fn = DR_ACCESS_FN (dr, 0); /*  The last dimension access function.  */
4603   if (!evolution_function_is_constant_p (access_fn)
4604       && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4605                                        access_fn, &init, &step, true))
4606     {
4607       if (vect_debug_details (NULL))
4608         {
4609           fprintf (dump_file, "not vectorized: complicated access function.");
4610           print_generic_expr (dump_file, access_fn, TDF_SLIM);
4611         }
4612       return false;
4613     }
4614
4615   return true;
4616 }
4617
4618
4619 /* Function vect_analyze_data_ref_accesses.
4620
4621    Analyze the access pattern of all the data references in the loop.
4622
4623    FORNOW: the only access pattern that is considered vectorizable is a
4624            simple step 1 (consecutive) access.
4625
4626    FORNOW: handle only arrays and pointer accesses.  */
4627
4628 static bool
4629 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4630 {
4631   unsigned int i;
4632   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4633   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4634
4635   if (vect_debug_details (NULL))
4636     fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4637
4638   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4639     {
4640       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4641       bool ok = vect_analyze_data_ref_access (dr);
4642       if (!ok)
4643         {
4644           if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4645               || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4646             fprintf (dump_file, "not vectorized: complicated access pattern.");
4647           return false;
4648         }
4649     }
4650
4651   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4652     {
4653       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4654       bool ok = vect_analyze_data_ref_access (dr);
4655       if (!ok)
4656         {
4657           if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4658               || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4659             fprintf (dump_file, "not vectorized: complicated access pattern.");
4660           return false;
4661         }
4662     }
4663
4664   return true;
4665 }
4666
4667
4668 /* Function vect_analyze_pointer_ref_access.
4669
4670    Input:
4671    STMT - a stmt that contains a data-ref
4672    MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4673
4674    If the data-ref access is vectorizable, return a data_reference structure
4675    that represents it (DR). Otherwise - return NULL.  */
4676
4677 static struct data_reference *
4678 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4679 {
4680   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4681   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4682   tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4683   tree init, step;
4684   int step_val;
4685   tree reftype, innertype;
4686   enum machine_mode innermode;
4687   tree indx_access_fn;
4688   int loopnum = loop->num;
4689   struct data_reference *dr;
4690
4691   if (!access_fn)
4692     {
4693       if (vect_debug_stats (loop) || vect_debug_details (loop))
4694         fprintf (dump_file, "not vectorized: complicated pointer access.");
4695       return NULL;
4696     }
4697
4698   if (vect_debug_details (NULL))
4699     {
4700       fprintf (dump_file, "Access function of ptr: ");
4701       print_generic_expr (dump_file, access_fn, TDF_SLIM);
4702     }
4703
4704   if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4705     {
4706       if (vect_debug_stats (loop) || vect_debug_details (loop))
4707         fprintf (dump_file, "not vectorized: pointer access is not simple.");
4708       return NULL;
4709     }
4710
4711   STRIP_NOPS (init);
4712
4713   if (!host_integerp (step,0))
4714     {
4715       if (vect_debug_stats (loop) || vect_debug_details (loop))
4716         fprintf (dump_file,
4717                 "not vectorized: non constant step for pointer access.");
4718       return NULL;
4719     }
4720
4721   step_val = TREE_INT_CST_LOW (step);
4722
4723   reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4724   if (TREE_CODE (reftype) != POINTER_TYPE)
4725     {
4726       if (vect_debug_stats (loop) || vect_debug_details (loop))
4727         fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4728       return NULL;
4729     }
4730
4731   reftype = TREE_TYPE (init);
4732   if (TREE_CODE (reftype) != POINTER_TYPE)
4733     {
4734       if (vect_debug_stats (loop) || vect_debug_details (loop))
4735         fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4736       return NULL;
4737     }
4738
4739   innertype = TREE_TYPE (reftype);
4740   innermode = TYPE_MODE (innertype);
4741   if (GET_MODE_SIZE (innermode) != step_val)
4742     {
4743       /* FORNOW: support only consecutive access */
4744       if (vect_debug_stats (loop) || vect_debug_details (loop))
4745         fprintf (dump_file, "not vectorized: non consecutive access.");
4746       return NULL;
4747     }
4748
4749   indx_access_fn =
4750         build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4751   if (vect_debug_details (NULL))
4752     {
4753       fprintf (dump_file, "Access function of ptr indx: ");
4754       print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4755     }
4756   dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4757   return dr;
4758 }
4759
4760
4761 /* Function vect_get_symbl_and_dr.
4762
4763    The function returns SYMBL - the relevant variable for
4764    memory tag (for aliasing purposes).
4765    Also data reference structure DR is created.
4766
4767    Input:
4768    MEMREF - data reference in STMT
4769    IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4770
4771    Output:
4772    DR - data_reference struct for MEMREF
4773    return value - the relevant variable for memory tag (for aliasing purposes).
4774
4775 */
4776
4777 static tree
4778 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4779                        loop_vec_info loop_vinfo, struct data_reference **dr)
4780 {
4781   tree symbl, oprnd0, oprnd1;
4782   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4783   tree offset;
4784   tree array_base, base;
4785   struct data_reference *new_dr;
4786   bool base_aligned_p;
4787
4788   *dr = NULL;
4789   switch (TREE_CODE (memref))
4790     {
4791     case INDIRECT_REF:
4792       new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4793       if (! new_dr)
4794         return NULL_TREE;
4795       *dr = new_dr;
4796       symbl = DR_BASE_NAME (new_dr);
4797       STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4798
4799       switch (TREE_CODE (symbl))
4800         {
4801         case PLUS_EXPR:
4802         case MINUS_EXPR:
4803           oprnd0 = TREE_OPERAND (symbl, 0);
4804           oprnd1 = TREE_OPERAND (symbl, 1);
4805
4806           STRIP_NOPS(oprnd1);
4807           /* Only {address_base + offset} expressions are supported,
4808              where address_base can be POINTER_TYPE or ARRAY_TYPE and
4809              offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4810              TODO: swap operands if {offset + address_base}.  */
4811           if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4812                && TREE_CODE (oprnd1) != INTEGER_CST)
4813               || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4814             return NULL_TREE;
4815
4816           if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4817             symbl = oprnd0;
4818           else
4819             symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4820                                            loop_vinfo, &new_dr);
4821
4822         case SSA_NAME:
4823         case ADDR_EXPR:
4824           /* symbl remains unchanged.  */
4825           break;
4826
4827         default:
4828           if (vect_debug_details (NULL))
4829             {
4830               fprintf (dump_file, "unhandled data ref: ");
4831               print_generic_expr (dump_file, memref, TDF_SLIM);
4832               fprintf (dump_file, " (symbl ");
4833               print_generic_expr (dump_file, symbl, TDF_SLIM);
4834               fprintf (dump_file, ") in stmt  ");
4835               print_generic_expr (dump_file, stmt, TDF_SLIM);
4836             }
4837           return NULL_TREE;
4838         }
4839       break;
4840
4841     case ARRAY_REF:
4842       offset = size_zero_node;
4843
4844       /* Store the array base in the stmt info.
4845          For one dimensional array ref a[i], the base is a,
4846          for multidimensional a[i1][i2]..[iN], the base is
4847          a[i1][i2]..[iN-1].  */
4848       array_base = TREE_OPERAND (memref, 0);
4849       STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4850
4851       new_dr = analyze_array (stmt, memref, is_read);
4852       *dr = new_dr;
4853
4854       /* Find the relevant symbol for aliasing purposes.  */
4855       base = DR_BASE_NAME (new_dr);
4856       switch (TREE_CODE (base))
4857         {
4858         case VAR_DECL:
4859           symbl = base;
4860           break;
4861
4862         case INDIRECT_REF:
4863           symbl = TREE_OPERAND (base, 0);
4864           break;
4865
4866         case COMPONENT_REF:
4867           /* Could have recorded more accurate information -
4868              i.e, the actual FIELD_DECL that is being referenced -
4869              but later passes expect VAR_DECL as the nmt.  */
4870           symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4871                                         loop_vinfo, &offset, &base_aligned_p);
4872           if (symbl)
4873             break;
4874           /* fall through */
4875         default:
4876           if (vect_debug_details (NULL))
4877             {
4878               fprintf (dump_file, "unhandled struct/class field access ");
4879               print_generic_expr (dump_file, stmt, TDF_SLIM);
4880             }
4881           return NULL_TREE;
4882         }
4883       break;
4884
4885     default:
4886       if (vect_debug_details (NULL))
4887         {
4888           fprintf (dump_file, "unhandled data ref: ");
4889           print_generic_expr (dump_file, memref, TDF_SLIM);
4890           fprintf (dump_file, " in stmt  ");
4891           print_generic_expr (dump_file, stmt, TDF_SLIM);
4892         }
4893       return NULL_TREE;
4894     }
4895   return symbl;
4896 }
4897
4898
4899 /* Function vect_analyze_data_refs.
4900
4901    Find all the data references in the loop.
4902
4903    FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4904            which base is really an array (not a pointer) and which alignment
4905            can be forced. This restriction will be relaxed.  */
4906
4907 static bool
4908 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4909 {
4910   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4911   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4912   int nbbs = loop->num_nodes;
4913   block_stmt_iterator si;
4914   int j;
4915   struct data_reference *dr;
4916   tree tag;
4917   tree address_base;
4918   bool base_aligned_p;
4919   tree offset;
4920
4921   if (vect_debug_details (NULL))
4922     fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4923
4924   for (j = 0; j < nbbs; j++)
4925     {
4926       basic_block bb = bbs[j];
4927       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4928         {
4929           bool is_read = false;
4930           tree stmt = bsi_stmt (si);
4931           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4932           v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4933           v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4934           vuse_optype vuses = STMT_VUSE_OPS (stmt);
4935           varray_type *datarefs = NULL;
4936           int nvuses, nv_may_defs, nv_must_defs;
4937           tree memref = NULL;
4938           tree symbl;
4939
4940           /* Assumption: there exists a data-ref in stmt, if and only if
4941              it has vuses/vdefs.  */
4942
4943           if (!vuses && !v_may_defs && !v_must_defs)
4944             continue;
4945
4946           nvuses = NUM_VUSES (vuses);
4947           nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4948           nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4949
4950           if (nvuses && (nv_may_defs || nv_must_defs))
4951             {
4952               if (vect_debug_details (NULL))
4953                 {
4954                   fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4955                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4956                 }
4957               return false;
4958             }
4959
4960           if (TREE_CODE (stmt) != MODIFY_EXPR)
4961             {
4962               if (vect_debug_details (NULL))
4963                 {
4964                   fprintf (dump_file, "unexpected vops in stmt: ");
4965                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4966                 }
4967               return false;
4968             }
4969
4970           if (vuses)
4971             {
4972               memref = TREE_OPERAND (stmt, 1);
4973               datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4974               is_read = true;
4975             }
4976           else /* vdefs */
4977             {
4978               memref = TREE_OPERAND (stmt, 0);
4979               datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4980               is_read = false;
4981             }
4982
4983           /* Analyze MEMREF. If it is of a supported form, build data_reference
4984              struct for it (DR) and find the relevant symbol for aliasing
4985              purposes.  */
4986           symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
4987                                          &dr);
4988           if (!symbl)
4989             {
4990               if (vect_debug_stats (loop) || vect_debug_details (loop))
4991                 {
4992                   fprintf (dump_file, "not vectorized: unhandled data ref: ");
4993                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4994                 }
4995               return false;
4996             }
4997
4998           /* Find and record the memtag assigned to this data-ref.  */
4999            switch (TREE_CODE (symbl))
5000             {
5001             case VAR_DECL:
5002               STMT_VINFO_MEMTAG (stmt_info) = symbl;
5003               break;
5004
5005             case SSA_NAME:
5006               symbl = SSA_NAME_VAR (symbl);
5007               tag = get_var_ann (symbl)->type_mem_tag;
5008               if (!tag)
5009                 {
5010                   tree ptr = TREE_OPERAND (memref, 0);
5011                   if (TREE_CODE (ptr) == SSA_NAME)
5012                     tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
5013                 }
5014               if (!tag)
5015                 {
5016                   if (vect_debug_stats (loop) || vect_debug_details (loop))
5017                     fprintf (dump_file, "not vectorized: no memtag for ref.");
5018                   return false;
5019                 }
5020               STMT_VINFO_MEMTAG (stmt_info) = tag;
5021               break;
5022
5023             case ADDR_EXPR:
5024               address_base = TREE_OPERAND (symbl, 0);
5025
5026               switch (TREE_CODE (address_base))
5027                 {
5028                 case ARRAY_REF:
5029                   dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
5030                                       DR_IS_READ(dr));
5031                   tag = vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr),
5032                            NULL_TREE, loop_vinfo, &offset, &base_aligned_p);
5033                   if (!tag)
5034                     {
5035                       if (vect_debug_stats (loop) || vect_debug_details (loop))
5036                         fprintf (dump_file, "not vectorized: no memtag for ref.");
5037                       return false;
5038                     }
5039                   STMT_VINFO_MEMTAG (stmt_info) = tag;
5040                   break;
5041
5042                 case VAR_DECL:
5043                   STMT_VINFO_MEMTAG (stmt_info) = address_base;
5044                   break;
5045
5046                 default:
5047                   if (vect_debug_stats (loop) || vect_debug_details (loop))
5048                     {
5049                       fprintf (dump_file,
5050                                "not vectorized: unhandled address expr: ");
5051                       print_generic_expr (dump_file, stmt, TDF_SLIM);
5052                     }
5053                   return false;
5054                 }
5055               break;
5056
5057             default:
5058               if (vect_debug_stats (loop) || vect_debug_details (loop))
5059                 {
5060                   fprintf (dump_file, "not vectorized: unsupported data-ref: ");
5061                   print_generic_expr (dump_file, memref, TDF_SLIM);
5062                 }
5063               return false;
5064             }
5065
5066           VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5067           STMT_VINFO_DATA_REF (stmt_info) = dr;
5068         }
5069     }
5070
5071   return true;
5072 }
5073
5074
5075 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
5076
5077 /* Function vect_mark_relevant.
5078
5079    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
5080
5081 static void
5082 vect_mark_relevant (varray_type worklist, tree stmt)
5083 {
5084   stmt_vec_info stmt_info;
5085
5086   if (vect_debug_details (NULL))
5087     fprintf (dump_file, "mark relevant.");
5088
5089   if (TREE_CODE (stmt) == PHI_NODE)
5090     {
5091       VARRAY_PUSH_TREE (worklist, stmt);
5092       return;
5093     }
5094
5095   stmt_info = vinfo_for_stmt (stmt);
5096
5097   if (!stmt_info)
5098     {
5099       if (vect_debug_details (NULL))
5100         {
5101           fprintf (dump_file, "mark relevant: no stmt info!!.");
5102           print_generic_expr (dump_file, stmt, TDF_SLIM);
5103         }
5104       return;
5105     }
5106
5107   if (STMT_VINFO_RELEVANT_P (stmt_info))
5108     {
5109       if (vect_debug_details (NULL))
5110         fprintf (dump_file, "already marked relevant.");
5111       return;
5112     }
5113
5114   STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5115   VARRAY_PUSH_TREE (worklist, stmt);
5116 }
5117
5118
5119 /* Function vect_stmt_relevant_p.
5120
5121    Return true if STMT in loop that is represented by LOOP_VINFO is
5122    "relevant for vectorization".
5123
5124    A stmt is considered "relevant for vectorization" if:
5125    - it has uses outside the loop.
5126    - it has vdefs (it alters memory).
5127    - control stmts in the loop (except for the exit condition).
5128
5129    CHECKME: what other side effects would the vectorizer allow?  */
5130
5131 static bool
5132 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5133 {
5134   v_may_def_optype v_may_defs;
5135   v_must_def_optype v_must_defs;
5136   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5137   int i;
5138   dataflow_t df;
5139   int num_uses;
5140
5141   /* cond stmt other than loop exit cond.  */
5142   if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5143     return true;
5144
5145   /* changing memory.  */
5146   v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5147   v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5148   if (v_may_defs || v_must_defs)
5149     {
5150       if (vect_debug_details (NULL))
5151         fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5152       return true;
5153     }
5154
5155   /* uses outside the loop.  */
5156   df = get_immediate_uses (stmt);
5157   num_uses = num_immediate_uses (df);
5158   for (i = 0; i < num_uses; i++)
5159     {
5160       tree use = immediate_use (df, i);
5161       basic_block bb = bb_for_stmt (use);
5162       if (!flow_bb_inside_loop_p (loop, bb))
5163         {
5164           if (vect_debug_details (NULL))
5165             fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5166           return true;
5167         }
5168     }
5169
5170   return false;
5171 }
5172
5173
5174 /* Function vect_mark_stmts_to_be_vectorized.
5175
5176    Not all stmts in the loop need to be vectorized. For example:
5177
5178      for i...
5179        for j...
5180    1.    T0 = i + j
5181    2.    T1 = a[T0]
5182
5183    3.    j = j + 1
5184
5185    Stmt 1 and 3 do not need to be vectorized, because loop control and
5186    addressing of vectorized data-refs are handled differently.
5187
5188    This pass detects such stmts.  */
5189
5190 static bool
5191 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5192 {
5193   varray_type worklist;
5194   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5195   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5196   unsigned int nbbs = loop->num_nodes;
5197   block_stmt_iterator si;
5198   tree stmt;
5199   stmt_ann_t ann;
5200   unsigned int i;
5201   int j;
5202   use_optype use_ops;
5203   stmt_vec_info stmt_info;
5204
5205   if (vect_debug_details (NULL))
5206     fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5207
5208   VARRAY_TREE_INIT (worklist, 64, "work list");
5209
5210   /* 1. Init worklist.  */
5211
5212   for (i = 0; i < nbbs; i++)
5213     {
5214       basic_block bb = bbs[i];
5215       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5216         {
5217           stmt = bsi_stmt (si);
5218
5219           if (vect_debug_details (NULL))
5220             {
5221               fprintf (dump_file, "init: stmt relevant? ");
5222               print_generic_expr (dump_file, stmt, TDF_SLIM);
5223             }
5224
5225           stmt_info = vinfo_for_stmt (stmt);
5226           STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5227
5228           if (vect_stmt_relevant_p (stmt, loop_vinfo))
5229             vect_mark_relevant (worklist, stmt);
5230         }
5231     }
5232
5233
5234   /* 2. Process_worklist */
5235
5236   while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5237     {
5238       stmt = VARRAY_TOP_TREE (worklist);
5239       VARRAY_POP (worklist);
5240
5241       if (vect_debug_details (NULL))
5242         {
5243           fprintf (dump_file, "worklist: examine stmt: ");
5244           print_generic_expr (dump_file, stmt, TDF_SLIM);
5245         }
5246
5247       /* Examine the USES in this statement. Mark all the statements which
5248          feed this statement's uses as "relevant", unless the USE is used as
5249          an array index.  */
5250
5251       if (TREE_CODE (stmt) == PHI_NODE)
5252         {
5253           /* follow the def-use chain inside the loop.  */
5254           for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5255             {
5256               tree arg = PHI_ARG_DEF (stmt, j);
5257               tree def_stmt = NULL_TREE;
5258               basic_block bb;
5259               if (!vect_is_simple_use (arg, loop, &def_stmt))
5260                 {
5261                   if (vect_debug_details (NULL))
5262                     fprintf (dump_file, "worklist: unsupported use.");
5263                   varray_clear (worklist);
5264                   return false;
5265                 }
5266               if (!def_stmt)
5267                 continue;
5268
5269               if (vect_debug_details (NULL))
5270                 {
5271                   fprintf (dump_file, "worklist: def_stmt: ");
5272                   print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5273                 }
5274
5275               bb = bb_for_stmt (def_stmt);
5276               if (flow_bb_inside_loop_p (loop, bb))
5277                 vect_mark_relevant (worklist, def_stmt);
5278             }
5279         }
5280
5281       ann = stmt_ann (stmt);
5282       use_ops = USE_OPS (ann);
5283
5284       for (i = 0; i < NUM_USES (use_ops); i++)
5285         {
5286           tree use = USE_OP (use_ops, i);
5287
5288           /* We are only interested in uses that need to be vectorized. Uses
5289              that are used for address computation are not considered relevant.
5290            */
5291           if (exist_non_indexing_operands_for_use_p (use, stmt))
5292             {
5293               tree def_stmt = NULL_TREE;
5294               basic_block bb;
5295               if (!vect_is_simple_use (use, loop, &def_stmt))
5296                 {
5297                   if (vect_debug_details (NULL))
5298                     fprintf (dump_file, "worklist: unsupported use.");
5299                   varray_clear (worklist);
5300                   return false;
5301                 }
5302
5303               if (!def_stmt)
5304                 continue;
5305
5306               if (vect_debug_details (NULL))
5307                 {
5308                   fprintf (dump_file, "worklist: examine use %d: ", i);
5309                   print_generic_expr (dump_file, use, TDF_SLIM);
5310                 }
5311
5312               bb = bb_for_stmt (def_stmt);
5313               if (flow_bb_inside_loop_p (loop, bb))
5314                 vect_mark_relevant (worklist, def_stmt);
5315             }
5316         }
5317     }                           /* while worklist */
5318
5319   varray_clear (worklist);
5320   return true;
5321 }
5322
5323
5324 /* Function vect_can_advance_ivs_p
5325
5326    In case the number of iterations that LOOP iterates in unknown at compile
5327    time, an epilog loop will be generated, and the loop induction variables
5328    (IVs) will be "advanced" to the value they are supposed to take just before
5329    the epilog loop.  Here we check that the access function of the loop IVs
5330    and the expression that represents the loop bound are simple enough.
5331    These restrictions will be relaxed in the future.  */
5332
5333 static bool
5334 vect_can_advance_ivs_p (struct loop *loop)
5335 {
5336   basic_block bb = loop->header;
5337   tree phi;
5338
5339   /* Analyze phi functions of the loop header.  */
5340
5341   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5342     {
5343       tree access_fn = NULL;
5344       tree evolution_part;
5345
5346       if (vect_debug_details (NULL))
5347         {
5348           fprintf (dump_file, "Analyze phi: ");
5349           print_generic_expr (dump_file, phi, TDF_SLIM);
5350         }
5351
5352       /* Skip virtual phi's. The data dependences that are associated with
5353          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
5354
5355       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5356         {
5357           if (vect_debug_details (NULL))
5358             fprintf (dump_file, "virtual phi. skip.");
5359           continue;
5360         }
5361
5362       /* Analyze the evolution function.  */
5363
5364       access_fn = instantiate_parameters
5365         (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5366
5367       if (!access_fn)
5368         {
5369           if (vect_debug_details (NULL))
5370             fprintf (dump_file, "No Access function.");
5371           return false;
5372         }
5373
5374       if (vect_debug_details (NULL))
5375         {
5376           fprintf (dump_file, "Access function of PHI: ");
5377           print_generic_expr (dump_file, access_fn, TDF_SLIM);
5378         }
5379
5380       evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5381
5382       if (evolution_part == NULL_TREE)
5383         return false;
5384
5385       /* FORNOW: We do not transform initial conditions of IVs
5386          which evolution functions are a polynomial of degree >= 2.  */
5387
5388       if (tree_is_chrec (evolution_part))
5389         return false;
5390     }
5391
5392   return true;
5393 }
5394
5395
5396 /* Function vect_get_loop_niters.
5397
5398    Determine how many iterations the loop is executed.
5399    If an expression that represents the number of iterations
5400    can be constructed, place it in NUMBER_OF_ITERATIONS.
5401    Return the loop exit condition.  */
5402
5403 static tree
5404 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5405 {
5406   tree niters;
5407
5408   if (vect_debug_details (NULL))
5409     fprintf (dump_file, "\n<<get_loop_niters>>\n");
5410
5411   niters = number_of_iterations_in_loop (loop);
5412
5413   if (niters != NULL_TREE
5414       && niters != chrec_dont_know)
5415     {
5416       *number_of_iterations = niters;
5417
5418       if (vect_debug_details (NULL))
5419         {
5420           fprintf (dump_file, "==> get_loop_niters:" );
5421           print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5422         }
5423     }
5424
5425   return get_loop_exit_condition (loop);
5426 }
5427
5428
5429 /* Function vect_analyze_loop_form.
5430
5431    Verify the following restrictions (some may be relaxed in the future):
5432    - it's an inner-most loop
5433    - number of BBs = 2 (which are the loop header and the latch)
5434    - the loop has a pre-header
5435    - the loop has a single entry and exit
5436    - the loop exit condition is simple enough, and the number of iterations
5437      can be analyzed (a countable loop).  */
5438
5439 static loop_vec_info
5440 vect_analyze_loop_form (struct loop *loop)
5441 {
5442   loop_vec_info loop_vinfo;
5443   tree loop_cond;
5444   tree number_of_iterations = NULL;
5445   bool rescan = false;
5446
5447   if (vect_debug_details (loop))
5448     fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5449
5450   if (loop->inner
5451       || !loop->single_exit
5452       || loop->num_nodes != 2
5453       || EDGE_COUNT (loop->header->preds) != 2
5454       || loop->num_entries != 1)
5455     {
5456       if (vect_debug_stats (loop) || vect_debug_details (loop))
5457         {
5458           fprintf (dump_file, "not vectorized: bad loop form. ");
5459           if (loop->inner)
5460             fprintf (dump_file, "nested loop.");
5461           else if (!loop->single_exit)
5462             fprintf (dump_file, "multiple exits.");
5463           else if (loop->num_nodes != 2)
5464             fprintf (dump_file, "too many BBs in loop.");
5465           else if (EDGE_COUNT (loop->header->preds) != 2)
5466             fprintf (dump_file, "too many incoming edges.");
5467           else if (loop->num_entries != 1)
5468             fprintf (dump_file, "too many entries.");
5469         }
5470
5471       return NULL;
5472     }
5473
5474   /* We assume that the loop exit condition is at the end of the loop. i.e,
5475      that the loop is represented as a do-while (with a proper if-guard
5476      before the loop if needed), where the loop header contains all the
5477      executable statements, and the latch is empty.  */
5478   if (!empty_block_p (loop->latch))
5479     {
5480       if (vect_debug_stats (loop) || vect_debug_details (loop))
5481         fprintf (dump_file, "not vectorized: unexpectd loop form.");
5482       return NULL;
5483     }
5484
5485   /* Make sure we have a preheader basic block.  */
5486   if (!loop->pre_header)
5487     {
5488       rescan = true;
5489       loop_split_edge_with (loop_preheader_edge (loop), NULL);
5490     }
5491
5492   /* Make sure there exists a single-predecessor exit bb:  */
5493   if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5494     {
5495       rescan = true;
5496       loop_split_edge_with (loop->exit_edges[0], NULL);
5497     }
5498
5499   if (rescan)
5500     {
5501       flow_loop_scan (loop, LOOP_ALL);
5502       /* Flow loop scan does not update loop->single_exit field.  */
5503       loop->single_exit = loop->exit_edges[0];
5504     }
5505
5506   if (empty_block_p (loop->header))
5507     {
5508       if (vect_debug_stats (loop) || vect_debug_details (loop))
5509         fprintf (dump_file, "not vectorized: empty loop.");
5510       return NULL;
5511     }
5512
5513   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5514   if (!loop_cond)
5515     {
5516       if (vect_debug_stats (loop) || vect_debug_details (loop))
5517         fprintf (dump_file, "not vectorized: complicated exit condition.");
5518       return NULL;
5519     }
5520
5521   if (!number_of_iterations)
5522     {
5523       if (vect_debug_stats (loop) || vect_debug_details (loop))
5524         fprintf (dump_file,
5525                  "not vectorized: number of iterations cannot be computed.");
5526       return NULL;
5527     }
5528
5529   if (chrec_contains_undetermined (number_of_iterations))
5530     {
5531       if (vect_debug_details (NULL))
5532         fprintf (dump_file, "Infinite number of iterations.");
5533       return false;
5534     }
5535
5536   loop_vinfo = new_loop_vec_info (loop);
5537   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5538
5539   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5540     {
5541       if (vect_debug_details (loop))
5542         {
5543           fprintf (dump_file, "loop bound unknown.\n");
5544           fprintf (dump_file, "Symbolic number of iterations is ");
5545           print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5546         }
5547     }
5548   else
5549   if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5550     {
5551       if (vect_debug_stats (loop) || vect_debug_details (loop))
5552         fprintf (dump_file, "not vectorized: number of iterations = 0.");
5553       return NULL;
5554     }
5555
5556   LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5557
5558   return loop_vinfo;
5559 }
5560
5561
5562 /* Function vect_analyze_loop.
5563
5564    Apply a set of analyses on LOOP, and create a loop_vec_info struct
5565    for it. The different analyses will record information in the
5566    loop_vec_info struct.  */
5567
5568 static loop_vec_info
5569 vect_analyze_loop (struct loop *loop)
5570 {
5571   bool ok;
5572   loop_vec_info loop_vinfo;
5573
5574   if (vect_debug_details (NULL))
5575     fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5576
5577   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
5578
5579   loop_vinfo = vect_analyze_loop_form (loop);
5580   if (!loop_vinfo)
5581     {
5582       if (vect_debug_details (loop))
5583         fprintf (dump_file, "bad loop form.");
5584       return NULL;
5585     }
5586
5587   /* Find all data references in the loop (which correspond to vdefs/vuses)
5588      and analyze their evolution in the loop.
5589
5590      FORNOW: Handle only simple, array references, which
5591      alignment can be forced, and aligned pointer-references.  */
5592
5593   ok = vect_analyze_data_refs (loop_vinfo);
5594   if (!ok)
5595     {
5596       if (vect_debug_details (loop))
5597         fprintf (dump_file, "bad data references.");
5598       destroy_loop_vec_info (loop_vinfo);
5599       return NULL;
5600     }
5601
5602   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
5603
5604   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5605   if (!ok)
5606     {
5607       if (vect_debug_details (loop))
5608         fprintf (dump_file, "unexpected pattern.");
5609       if (vect_debug_details (loop))
5610         fprintf (dump_file, "not vectorized: unexpected pattern.");
5611       destroy_loop_vec_info (loop_vinfo);
5612       return NULL;
5613     }
5614
5615   /* Check that all cross-iteration scalar data-flow cycles are OK.
5616      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
5617
5618   ok = vect_analyze_scalar_cycles (loop_vinfo);
5619   if (!ok)
5620     {
5621       if (vect_debug_details (loop))
5622         fprintf (dump_file, "bad scalar cycle.");
5623       destroy_loop_vec_info (loop_vinfo);
5624       return NULL;
5625     }
5626
5627   /* Analyze data dependences between the data-refs in the loop.
5628      FORNOW: fail at the first data dependence that we encounter.  */
5629
5630   ok = vect_analyze_data_ref_dependences (loop_vinfo);
5631   if (!ok)
5632     {
5633       if (vect_debug_details (loop))
5634         fprintf (dump_file, "bad data dependence.");
5635       destroy_loop_vec_info (loop_vinfo);
5636       return NULL;
5637     }
5638
5639   /* Analyze the access patterns of the data-refs in the loop (consecutive,
5640      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
5641
5642   ok = vect_analyze_data_ref_accesses (loop_vinfo);
5643   if (!ok)
5644     {
5645       if (vect_debug_details (loop))
5646         fprintf (dump_file, "bad data access.");
5647       destroy_loop_vec_info (loop_vinfo);
5648       return NULL;
5649     }
5650
5651   /* Analyze the alignment of the data-refs in the loop.
5652      FORNOW: Only aligned accesses are handled.  */
5653
5654   ok = vect_analyze_data_refs_alignment (loop_vinfo);
5655   if (!ok)
5656     {
5657       if (vect_debug_details (loop))
5658         fprintf (dump_file, "bad data alignment.");
5659       destroy_loop_vec_info (loop_vinfo);
5660       return NULL;
5661     }
5662
5663   /* Scan all the operations in the loop and make sure they are
5664      vectorizable.  */
5665
5666   ok = vect_analyze_operations (loop_vinfo);
5667   if (!ok)
5668     {
5669       if (vect_debug_details (loop))
5670         fprintf (dump_file, "bad operation or unsupported loop bound.");
5671       destroy_loop_vec_info (loop_vinfo);
5672       return NULL;
5673     }
5674
5675   LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5676
5677   return loop_vinfo;
5678 }
5679
5680
5681 /* Function need_imm_uses_for.
5682
5683    Return whether we ought to include information for 'var'
5684    when calculating immediate uses.  For this pass we only want use
5685    information for non-virtual variables.  */
5686
5687 static bool
5688 need_imm_uses_for (tree var)
5689 {
5690   return is_gimple_reg (var);
5691 }
5692
5693
5694 /* Function vectorize_loops.
5695
5696    Entry Point to loop vectorization phase.  */
5697
5698 void
5699 vectorize_loops (struct loops *loops)
5700 {
5701   unsigned int i, loops_num;
5702   unsigned int num_vectorized_loops = 0;
5703
5704   /* Does the target support SIMD?  */
5705   /* FORNOW: until more sophisticated machine modelling is in place.  */
5706   if (!UNITS_PER_SIMD_WORD)
5707     {
5708       if (vect_debug_details (NULL))
5709         fprintf (dump_file, "vectorizer: target vector size is not defined.");
5710       return;
5711     }
5712
5713 #ifdef ENABLE_CHECKING
5714   verify_loop_closed_ssa ();
5715 #endif
5716
5717   compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5718
5719   /*  ----------- Analyze loops. -----------  */
5720
5721   /* If some loop was duplicated, it gets bigger number
5722      than all previously defined loops. This fact allows us to run
5723      only over initial loops skipping newly generated ones.  */
5724   loops_num = loops->num;
5725   for (i = 1; i < loops_num; i++)
5726     {
5727       loop_vec_info loop_vinfo;
5728       struct loop *loop = loops->parray[i];
5729
5730       if (!loop)
5731         continue;
5732
5733       loop_vinfo = vect_analyze_loop (loop);
5734       loop->aux = loop_vinfo;
5735
5736       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5737         continue;
5738
5739       vect_transform_loop (loop_vinfo, loops);
5740       num_vectorized_loops++;
5741     }
5742
5743   if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5744     fprintf (dump_file, "\nvectorized %u loops in function.\n",
5745              num_vectorized_loops);
5746
5747   /*  ----------- Finalize. -----------  */
5748
5749   free_df ();
5750   for (i = 1; i < loops_num; i++)
5751     {
5752       struct loop *loop = loops->parray[i];
5753       loop_vec_info loop_vinfo;
5754
5755       if (!loop)
5756         continue;
5757       loop_vinfo = loop->aux;
5758       destroy_loop_vec_info (loop_vinfo);
5759       loop->aux = NULL;
5760     }
5761
5762   rewrite_into_ssa (false);
5763   rewrite_into_loop_closed_ssa (); /* FORNOW */
5764   bitmap_clear (vars_to_rename);
5765 }