2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
148 #include "langhooks.h"
151 /*************************************************************************
152 Simple Loop Peeling Utilities
153 *************************************************************************/
155 /* Entry point for peeling of simple loops.
156 Peel the first/last iterations of a loop.
157 It can be used outside of the vectorizer for loops that are simple enough
158 (see function documentation). In the vectorizer it is used to peel the
159 last few iterations when the loop bound is unknown or does not evenly
160 divide by the vectorization factor, and to peel the first few iterations
161 to force the alignment of data references in the loop. */
162 struct loop *slpeel_tree_peel_loop_to_edge
163 (struct loop *, struct loops *, edge, tree, tree, bool);
164 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
165 (struct loop *, struct loops *, edge);
166 static void slpeel_update_phis_for_duplicate_loop
167 (struct loop *, struct loop *, bool after);
168 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
169 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
170 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
171 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
172 static void allocate_new_names (bitmap);
173 static void rename_use_op (use_operand_p);
174 static void rename_def_op (def_operand_p, tree);
175 static void rename_variables_in_bb (basic_block);
176 static void free_new_names (bitmap);
177 static void rename_variables_in_loop (struct loop *);
178 #ifdef ENABLE_CHECKING
179 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
183 /*************************************************************************
184 Vectorization Utilities.
185 *************************************************************************/
187 /* Main analysis functions. */
188 static loop_vec_info vect_analyze_loop (struct loop *);
189 static loop_vec_info vect_analyze_loop_form (struct loop *);
190 static bool vect_analyze_data_refs (loop_vec_info);
191 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
192 static bool vect_analyze_scalar_cycles (loop_vec_info);
193 static bool vect_analyze_data_ref_accesses (loop_vec_info);
194 static bool vect_analyze_data_refs_alignment (loop_vec_info);
195 static bool vect_compute_data_refs_alignment (loop_vec_info);
196 static bool vect_analyze_operations (loop_vec_info);
198 /* Main code transformation functions. */
199 static void vect_transform_loop (loop_vec_info, struct loops *);
200 static bool vect_transform_stmt (tree, block_stmt_iterator *);
201 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
202 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
203 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
204 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
205 static enum dr_alignment_support vect_supportable_dr_alignment
206 (struct data_reference *);
207 static void vect_align_data_ref (tree);
208 static void vect_enhance_data_refs_alignment (loop_vec_info);
210 /* Utility functions for the analyses. */
211 static bool vect_is_simple_use (tree , struct loop *, tree *);
212 static bool exist_non_indexing_operands_for_use_p (tree, tree);
213 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
214 static void vect_mark_relevant (varray_type, tree);
215 static bool vect_stmt_relevant_p (tree, loop_vec_info);
216 static tree vect_get_loop_niters (struct loop *, tree *);
217 static bool vect_compute_data_ref_alignment
218 (struct data_reference *, loop_vec_info);
219 static bool vect_analyze_data_ref_access (struct data_reference *);
220 static bool vect_get_first_index (tree, tree *);
221 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
222 static struct data_reference * vect_analyze_pointer_ref_access
224 static bool vect_can_advance_ivs_p (struct loop *);
225 static tree vect_get_base_and_bit_offset
226 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
227 static struct data_reference * vect_analyze_pointer_ref_access
229 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
230 static tree vect_compute_array_ref_alignment
231 (struct data_reference *, loop_vec_info, tree, tree *);
232 static tree vect_get_ptr_offset (tree, tree, tree *);
233 static tree vect_get_symbl_and_dr
234 (tree, tree, bool, loop_vec_info, struct data_reference **);
236 /* Utility functions for the code transformation. */
237 static tree vect_create_destination_var (tree, tree);
238 static tree vect_create_data_ref_ptr
239 (tree, block_stmt_iterator *, tree, tree *, bool);
240 static tree vect_create_index_for_vector_ref
241 (struct loop *, block_stmt_iterator *);
242 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
243 static tree get_vectype_for_scalar_type (tree);
244 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
245 static tree vect_get_vec_def_for_operand (tree, tree);
246 static tree vect_init_vector (tree, tree);
247 static tree vect_build_symbol_bound (tree, int, struct loop *);
248 static void vect_finish_stmt_generation
249 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
251 /* Utility function dealing with loop peeling (not peeling itself). */
252 static void vect_generate_tmps_on_preheader
253 (loop_vec_info, tree *, tree *, tree *);
254 static tree vect_build_loop_niters (loop_vec_info);
255 static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
256 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
257 static void vect_update_inits_of_dr
258 (struct data_reference *, struct loop *, tree niters);
259 static void vect_update_inits_of_drs (loop_vec_info, tree);
260 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
261 static void vect_do_peeling_for_loop_bound
262 (loop_vec_info, tree *, struct loops *);
264 /* Utilities for creation and deletion of vec_info structs. */
265 loop_vec_info new_loop_vec_info (struct loop *loop);
266 void destroy_loop_vec_info (loop_vec_info);
267 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
269 static bool vect_debug_stats (struct loop *loop);
270 static bool vect_debug_details (struct loop *loop);
273 /*************************************************************************
274 Simple Loop Peeling Utilities
276 Utilities to support loop peeling for vectorization purposes.
277 *************************************************************************/
280 /* For each definition in DEFINITIONS this function allocates
284 allocate_new_names (bitmap definitions)
289 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
291 tree def = ssa_name (ver);
292 tree *new_name_ptr = xmalloc (sizeof (tree));
294 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
296 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
297 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
299 SSA_NAME_AUX (def) = new_name_ptr;
304 /* Renames the use *OP_P. */
307 rename_use_op (use_operand_p op_p)
311 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
314 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
316 /* Something defined outside of the loop. */
320 /* An ordinary ssa name defined in the loop. */
322 SET_USE (op_p, *new_name_ptr);
326 /* Renames the def *OP_P in statement STMT. */
329 rename_def_op (def_operand_p op_p, tree stmt)
333 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
336 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
338 /* Something defined outside of the loop. */
342 /* An ordinary ssa name defined in the loop. */
344 SET_DEF (op_p, *new_name_ptr);
345 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
349 /* Renames the variables in basic block BB. */
352 rename_variables_in_bb (basic_block bb)
355 block_stmt_iterator bsi;
361 v_may_def_optype v_may_defs;
362 v_must_def_optype v_must_defs;
366 struct loop *loop = bb->loop_father;
368 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
369 rename_def_op (PHI_RESULT_PTR (phi), phi);
371 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
373 stmt = bsi_stmt (bsi);
374 get_stmt_operands (stmt);
375 ann = stmt_ann (stmt);
377 uses = USE_OPS (ann);
378 for (i = 0; i < NUM_USES (uses); i++)
379 rename_use_op (USE_OP_PTR (uses, i));
381 defs = DEF_OPS (ann);
382 for (i = 0; i < NUM_DEFS (defs); i++)
383 rename_def_op (DEF_OP_PTR (defs, i), stmt);
385 vuses = VUSE_OPS (ann);
386 for (i = 0; i < NUM_VUSES (vuses); i++)
387 rename_use_op (VUSE_OP_PTR (vuses, i));
389 v_may_defs = V_MAY_DEF_OPS (ann);
390 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
392 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
393 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
396 v_must_defs = V_MUST_DEF_OPS (ann);
397 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
399 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
400 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
404 FOR_EACH_EDGE (e, ei, bb->succs)
406 if (!flow_bb_inside_loop_p (loop, e->dest))
408 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
409 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
414 /* Releases the structures holding the new ssa names. */
417 free_new_names (bitmap definitions)
422 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
424 tree def = ssa_name (ver);
426 if (SSA_NAME_AUX (def))
428 free (SSA_NAME_AUX (def));
429 SSA_NAME_AUX (def) = NULL;
435 /* Renames variables in new generated LOOP. */
438 rename_variables_in_loop (struct loop *loop)
443 bbs = get_loop_body (loop);
445 for (i = 0; i < loop->num_nodes; i++)
446 rename_variables_in_bb (bbs[i]);
452 /* Update the PHI nodes of NEW_LOOP.
454 NEW_LOOP is a duplicate of ORIG_LOOP.
455 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
456 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
457 executes before it. */
460 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
461 struct loop *new_loop, bool after)
463 tree *new_name_ptr, new_ssa_name;
464 tree phi_new, phi_orig;
466 edge orig_loop_latch = loop_latch_edge (orig_loop);
467 edge orig_entry_e = loop_preheader_edge (orig_loop);
468 edge new_loop_exit_e = new_loop->exit_edges[0];
469 edge new_loop_entry_e = loop_preheader_edge (new_loop);
470 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
473 step 1. For each loop-header-phi:
474 Add the first phi argument for the phi in NEW_LOOP
475 (the one associated with the entry of NEW_LOOP)
477 step 2. For each loop-header-phi:
478 Add the second phi argument for the phi in NEW_LOOP
479 (the one associated with the latch of NEW_LOOP)
481 step 3. Update the phis in the successor block of NEW_LOOP.
483 case 1: NEW_LOOP was placed before ORIG_LOOP:
484 The successor block of NEW_LOOP is the header of ORIG_LOOP.
485 Updating the phis in the successor block can therefore be done
486 along with the scanning of the loop header phis, because the
487 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
488 phi nodes, organized in the same order.
490 case 2: NEW_LOOP was placed after ORIG_LOOP:
491 The successor block of NEW_LOOP is the original exit block of
492 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
493 We postpone updating these phis to a later stage (when
494 loop guards are added).
498 /* Scan the phis in the headers of the old and new loops
499 (they are organized in exactly the same order). */
501 for (phi_new = phi_nodes (new_loop->header),
502 phi_orig = phi_nodes (orig_loop->header);
504 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
507 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
508 add_phi_arg (&phi_new, def, new_loop_entry_e);
511 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
512 if (TREE_CODE (def) != SSA_NAME)
515 new_name_ptr = SSA_NAME_AUX (def);
517 /* Something defined outside of the loop. */
520 /* An ordinary ssa name defined in the loop. */
521 new_ssa_name = *new_name_ptr;
522 add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge (new_loop));
524 /* step 3 (case 1). */
527 gcc_assert (new_loop_exit_e == orig_entry_e);
528 SET_PHI_ARG_DEF (phi_orig,
529 phi_arg_from_edge (phi_orig, new_loop_exit_e),
536 /* Update PHI nodes for a guard of the LOOP.
539 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
540 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
541 originates from the guard-bb, skips LOOP and reaches the (unique) exit
542 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
543 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
544 LOOP header) before the guard code was added, and now it became a merge
545 point of two paths - the path that ends with the LOOP exit-edge, and
546 the path that ends with GUARD_EDGE.
548 This function creates and updates the relevant phi nodes to account for
549 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
550 1. Create phi nodes at NEW_MERGE_BB.
551 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
552 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
555 ===> The CFG before the guard-code was added:
557 if (exit_loop) goto update_bb : LOOP_header_bb
560 ==> The CFG after the guard-code was added:
562 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
564 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
569 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
570 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
571 organized in the same order.
572 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
575 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
576 "original" loop). FALSE if LOOP is an original loop (not a newly
577 created copy). The SSA_NAME_AUX fields of the defs in the original
578 loop are the corresponding new ssa-names used in the new duplicated
579 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
580 nodes in UPDATE_BB takes the original ssa-name, and which takes the
581 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
582 the LOOP-exit-edge takes the new-name, and the phi-arg that is
583 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
584 FALSE, it's the other way around.
588 slpeel_update_phi_nodes_for_guard (edge guard_edge,
593 tree orig_phi, new_phi, update_phi;
594 tree guard_arg, loop_arg;
595 basic_block new_merge_bb = guard_edge->dest;
596 edge e = EDGE_SUCC (new_merge_bb, 0);
597 basic_block update_bb = e->dest;
598 basic_block orig_bb = (entry_phis ? loop->header : update_bb);
600 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
601 orig_phi && update_phi;
602 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
604 /* 1. Generate new phi node in NEW_MERGE_BB: */
605 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
608 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
609 of LOOP. Set the two phi args in NEW_PHI for these edges: */
612 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
613 EDGE_SUCC (loop->latch, 0));
614 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
618 tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
619 tree *new_name_ptr = SSA_NAME_AUX (orig_def);
623 new_name = *new_name_ptr;
625 /* Something defined outside of the loop */
630 guard_arg = orig_def;
635 guard_arg = new_name;
639 add_phi_arg (&new_phi, loop_arg, loop->exit_edges[0]);
640 add_phi_arg (&new_phi, guard_arg, guard_edge);
642 /* 3. Update phi in successor block. */
643 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
644 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
645 SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e),
646 PHI_RESULT (new_phi));
649 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
653 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
654 that starts at zero, increases by one and its limit is NITERS.
656 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
659 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
661 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
663 edge exit_edge = loop->exit_edges[0];
664 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
665 tree begin_label = tree_block_label (loop->latch);
666 tree exit_label = tree_block_label (loop->single_exit->dest);
667 tree init = build_int_cst (TREE_TYPE (niters), 0);
668 tree step = build_int_cst (TREE_TYPE (niters), 1);
670 orig_cond = get_loop_exit_condition (loop);
671 gcc_assert (orig_cond);
672 create_iv (init, step, NULL_TREE, loop,
673 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
675 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
676 back to the exit condition statement. */
677 bsi_next (&loop_exit_bsi);
678 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
680 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
681 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
682 else /* 'then' edge loops back. */
683 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
685 begin_label = build1 (GOTO_EXPR, void_type_node, begin_label);
686 exit_label = build1 (GOTO_EXPR, void_type_node, exit_label);
687 cond_stmt = build (COND_EXPR, TREE_TYPE (orig_cond), cond,
688 begin_label, exit_label);
689 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
691 /* Remove old loop exit test: */
692 bsi_remove (&loop_exit_bsi);
694 if (vect_debug_stats (loop) || vect_debug_details (loop))
695 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
697 loop->nb_iterations = niters;
701 /* Given LOOP this function generates a new copy of it and puts it
702 on E which is either the entry or exit of LOOP. */
705 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
708 struct loop *new_loop;
709 basic_block *new_bbs, *bbs;
712 basic_block exit_dest;
715 at_exit = (e == loop->exit_edges[0]);
716 if (!at_exit && e != loop_preheader_edge (loop))
718 if (dump_file && (dump_flags & TDF_DETAILS))
719 fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
723 bbs = get_loop_body (loop);
725 /* Check whether duplication is possible. */
726 if (!can_copy_bbs_p (bbs, loop->num_nodes))
728 if (vect_debug_stats (loop) || vect_debug_details (loop))
729 fprintf (dump_file, "Cannot copy basic blocks.\n");
734 /* Generate new loop structure. */
735 new_loop = duplicate_loop (loops, loop, loop->outer);
738 if (vect_debug_stats (loop) || vect_debug_details (loop))
739 fprintf (dump_file, "duplicate_loop returns NULL.\n");
744 exit_dest = loop->exit_edges[0]->dest;
745 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
746 exit_dest) == loop->header ?
749 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
751 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
753 /* Duplicating phi args at exit bbs as coming
754 also from exit of duplicated loop. */
755 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
757 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
760 edge new_loop_exit_edge;
762 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
763 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
765 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
767 add_phi_arg (&phi, phi_arg, new_loop_exit_edge);
771 if (at_exit) /* Add the loop copy at exit. */
773 redirect_edge_and_branch_force (e, new_loop->header);
774 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
776 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
778 else /* Add the copy at entry. */
781 edge entry_e = loop_preheader_edge (loop);
782 basic_block preheader = entry_e->src;
784 if (!flow_bb_inside_loop_p (new_loop,
785 EDGE_SUCC (new_loop->header, 0)->dest))
786 new_exit_e = EDGE_SUCC (new_loop->header, 0);
788 new_exit_e = EDGE_SUCC (new_loop->header, 1);
790 redirect_edge_and_branch_force (new_exit_e, loop->header);
791 set_immediate_dominator (CDI_DOMINATORS, loop->header,
794 /* We have to add phi args to the loop->header here as coming
795 from new_exit_e edge. */
796 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
798 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
800 add_phi_arg (&phi, phi_arg, new_exit_e);
803 redirect_edge_and_branch_force (entry_e, new_loop->header);
804 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
807 flow_loop_scan (new_loop, LOOP_ALL);
808 flow_loop_scan (loop, LOOP_ALL);
816 /* Given the condition statement COND, put it as the last statement
817 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
818 Assumes that this is the single exit of the guarded loop.
819 Returns the skip edge. */
822 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
825 block_stmt_iterator bsi;
827 tree cond_stmt, then_label, else_label;
829 enter_e = EDGE_SUCC (guard_bb, 0);
830 enter_e->flags &= ~EDGE_FALLTHRU;
831 enter_e->flags |= EDGE_FALSE_VALUE;
832 bsi = bsi_last (guard_bb);
834 then_label = build1 (GOTO_EXPR, void_type_node,
835 tree_block_label (exit_bb));
836 else_label = build1 (GOTO_EXPR, void_type_node,
837 tree_block_label (enter_e->dest));
838 cond_stmt = build (COND_EXPR, void_type_node, cond,
839 then_label, else_label);
840 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
841 /* Add new edge to connect entry block to the second loop. */
842 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
843 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
848 /* This function verifies that the following restrictions apply to LOOP:
850 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
851 (3) it is single entry, single exit
852 (4) its exit condition is the last stmt in the header
853 (5) E is the entry/exit edge of LOOP.
857 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
859 edge exit_e = loop->exit_edges [0];
860 edge entry_e = loop_preheader_edge (loop);
861 tree orig_cond = get_loop_exit_condition (loop);
862 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
864 if (any_marked_for_rewrite_p ())
868 /* All loops have an outer scope; the only case loop->outer is NULL is for
869 the function itself. */
871 || loop->num_nodes != 2
872 || !empty_block_p (loop->latch)
873 || loop->num_exits != 1
874 || loop->num_entries != 1
875 /* Verify that new loop exit condition can be trivially modified. */
876 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
877 || (e != exit_e && e != entry_e))
883 #ifdef ENABLE_CHECKING
885 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
886 struct loop *second_loop)
888 basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
889 basic_block loop2_entry_bb = second_loop->pre_header;
890 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
892 /* A guard that controls whether the second_loop is to be executed or skipped
893 is placed in first_loop->exit. first_loopt->exit therefore has two
894 successors - one is the preheader of second_loop, and the other is a bb
897 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
900 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
903 /* The preheader of new_loop is expected to have two predessors:
904 first_loop->exit and the block that precedes first_loop. */
906 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
907 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
908 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
909 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
910 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
912 /* Verify that the other successor of first_loopt->exit is after the
918 /* Function slpeel_tree_peel_loop_to_edge.
920 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
921 that is placed on the entry (exit) edge E of LOOP. After this transformation
922 we have two loops one after the other - first-loop iterates FIRST_NITERS
923 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
926 - LOOP: the loop to be peeled.
927 - E: the exit or entry edge of LOOP.
928 If it is the entry edge, we peel the first iterations of LOOP. In this
929 case first-loop is LOOP, and second-loop is the newly created loop.
930 If it is the exit edge, we peel the last iterations of LOOP. In this
931 case, first-loop is the newly created loop, and second-loop is LOOP.
932 - NITERS: the number of iterations that LOOP iterates.
933 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
934 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
935 for updating the loop bound of the first-loop to FIRST_NITERS. If it
936 is false, the caller of this function may want to take care of this
937 (this can be useful if we don't want new stmts added to first-loop).
940 The function returns a pointer to the new loop-copy, or NULL if it failed
941 to perform the transformation.
943 The function generates two if-then-else guards: one before the first loop,
944 and the other before the second loop:
946 if (FIRST_NITERS == 0) then skip the first loop,
947 and go directly to the second loop.
949 if (FIRST_NITERS == NITERS) then skip the second loop.
951 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
952 FORNOW the resulting code will not be in loop-closed-ssa form.
956 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
957 edge e, tree first_niters,
958 tree niters, bool update_first_loop_count)
960 struct loop *new_loop = NULL, *first_loop, *second_loop;
964 basic_block bb_before_second_loop, bb_after_second_loop;
965 basic_block bb_before_first_loop;
966 basic_block bb_between_loops;
967 edge exit_e = loop->exit_edges [0];
969 if (!slpeel_can_duplicate_loop_p (loop, e))
972 /* We have to initialize cfg_hooks. Then, when calling
973 cfg_hooks->split_edge, the function tree_split_edge
974 is actually called and, when calling cfg_hooks->duplicate_block,
975 the function tree_duplicate_bb is called. */
976 tree_register_cfg_hooks ();
979 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
980 Resulting CFG would be:
993 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
995 if (vect_debug_stats (loop) || vect_debug_details (loop))
996 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1002 /* NEW_LOOP was placed after LOOP. */
1004 second_loop = new_loop;
1008 /* NEW_LOOP was placed before LOOP. */
1009 first_loop = new_loop;
1013 definitions = marked_ssa_names ();
1014 allocate_new_names (definitions);
1015 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1016 rename_variables_in_loop (new_loop);
1019 /* 2. Add the guard that controls whether the first loop is executed.
1020 Resulting CFG would be:
1022 bb_before_first_loop:
1023 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1030 bb_before_second_loop:
1039 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1040 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1041 bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1042 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1043 flow_loop_scan (first_loop, LOOP_ALL);
1044 flow_loop_scan (second_loop, LOOP_ALL);
1047 build (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1048 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1049 bb_before_second_loop, bb_before_first_loop);
1050 slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1051 first_loop == new_loop);
1054 /* 3. Add the guard that controls whether the second loop is executed.
1055 Resulting CFG would be:
1057 bb_before_first_loop:
1058 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1066 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1067 GOTO bb_before_second_loop
1069 bb_before_second_loop:
1075 bb_after_second_loop:
1080 bb_between_loops = split_edge (first_loop->exit_edges[0]);
1081 add_bb_to_loop (bb_between_loops, first_loop->outer);
1082 bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1083 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1084 flow_loop_scan (first_loop, LOOP_ALL);
1085 flow_loop_scan (second_loop, LOOP_ALL);
1087 pre_condition = build (EQ_EXPR, boolean_type_node, first_niters, niters);
1088 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1089 bb_after_second_loop, bb_before_first_loop);
1090 slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1091 second_loop == new_loop);
1093 /* Flow loop scan does not update loop->single_exit field. */
1094 first_loop->single_exit = first_loop->exit_edges[0];
1095 second_loop->single_exit = second_loop->exit_edges[0];
1097 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1099 if (update_first_loop_count)
1100 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1102 free_new_names (definitions);
1103 BITMAP_XFREE (definitions);
1104 unmark_all_for_rewrite ();
1110 /* Here the proper Vectorizer starts. */
1112 /*************************************************************************
1113 Vectorization Utilities.
1114 *************************************************************************/
1116 /* Function new_stmt_vec_info.
1118 Create and initialize a new stmt_vec_info struct for STMT. */
1121 new_stmt_vec_info (tree stmt, struct loop *loop)
1124 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1126 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1127 STMT_VINFO_STMT (res) = stmt;
1128 STMT_VINFO_LOOP (res) = loop;
1129 STMT_VINFO_RELEVANT_P (res) = 0;
1130 STMT_VINFO_VECTYPE (res) = NULL;
1131 STMT_VINFO_VEC_STMT (res) = NULL;
1132 STMT_VINFO_DATA_REF (res) = NULL;
1133 STMT_VINFO_MEMTAG (res) = NULL;
1134 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1140 /* Function new_loop_vec_info.
1142 Create and initialize a new loop_vec_info struct for LOOP, as well as
1143 stmt_vec_info structs for all the stmts in LOOP. */
1146 new_loop_vec_info (struct loop *loop)
1150 block_stmt_iterator si;
1153 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1155 bbs = get_loop_body (loop);
1157 /* Create stmt_info for all stmts in the loop. */
1158 for (i = 0; i < loop->num_nodes; i++)
1160 basic_block bb = bbs[i];
1161 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1163 tree stmt = bsi_stmt (si);
1166 get_stmt_operands (stmt);
1167 ann = stmt_ann (stmt);
1168 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1172 LOOP_VINFO_LOOP (res) = loop;
1173 LOOP_VINFO_BBS (res) = bbs;
1174 LOOP_VINFO_EXIT_COND (res) = NULL;
1175 LOOP_VINFO_NITERS (res) = NULL;
1176 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1177 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1178 LOOP_VINFO_VECT_FACTOR (res) = 0;
1179 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1180 "loop_write_datarefs");
1181 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1182 "loop_read_datarefs");
1183 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1189 /* Function destroy_loop_vec_info.
1191 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1192 stmts in the loop. */
1195 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1200 block_stmt_iterator si;
1206 loop = LOOP_VINFO_LOOP (loop_vinfo);
1208 bbs = LOOP_VINFO_BBS (loop_vinfo);
1209 nbbs = loop->num_nodes;
1211 for (j = 0; j < nbbs; j++)
1213 basic_block bb = bbs[j];
1214 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1216 tree stmt = bsi_stmt (si);
1217 stmt_ann_t ann = stmt_ann (stmt);
1218 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1220 set_stmt_info (ann, NULL);
1224 free (LOOP_VINFO_BBS (loop_vinfo));
1225 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1226 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1232 /* Function debug_loop_stats.
1234 For vectorization statistics dumps. */
1237 vect_debug_stats (struct loop *loop)
1240 block_stmt_iterator si;
1241 tree node = NULL_TREE;
1243 if (!dump_file || !(dump_flags & TDF_STATS))
1248 fprintf (dump_file, "\n");
1257 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1259 node = bsi_stmt (si);
1260 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1264 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1265 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1267 fprintf (dump_file, "\nloop at %s:%d: ",
1268 EXPR_FILENAME (node), EXPR_LINENO (node));
1276 /* Function debug_loop_details.
1278 For vectorization debug dumps. */
1281 vect_debug_details (struct loop *loop)
1284 block_stmt_iterator si;
1285 tree node = NULL_TREE;
1287 if (!dump_file || !(dump_flags & TDF_DETAILS))
1292 fprintf (dump_file, "\n");
1301 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1303 node = bsi_stmt (si);
1304 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1308 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1309 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1311 fprintf (dump_file, "\nloop at %s:%d: ",
1312 EXPR_FILENAME (node), EXPR_LINENO (node));
1320 /* Function vect_get_ptr_offset
1322 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1325 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1326 tree vectype ATTRIBUTE_UNUSED,
1327 tree *offset ATTRIBUTE_UNUSED)
1329 /* TODO: Use alignment information. */
1334 /* Function vect_get_base_and_bit_offset
1336 Return the BASE of the data reference EXPR.
1337 If VECTYPE is given, also compute the OFFSET from BASE in bits.
1338 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1339 bits of 'a.b[i] + 4B' from a.
1342 EXPR - the memory reference that is being analyzed
1343 DR - the data_reference struct of the _original_ memory reference
1344 (Note: DR_REF (DR) is not necessarily EXPR)
1345 VECTYPE - the type that defines the alignment (i.e, we compute
1346 alignment relative to TYPE_ALIGN(VECTYPE))
1349 BASE (returned value) - the base of the data reference EXPR.
1350 E.g, if EXPR is a.b[k].c[i][j] the returned
1352 OFFSET - offset of EXPR from BASE in bits
1353 BASE_ALIGNED_P - indicates if BASE is aligned
1355 If something unexpected is encountered (an unsupported form of data-ref),
1356 or if VECTYPE is given but OFFSET cannot be determined:
1357 then NULL_TREE is returned. */
1360 vect_get_base_and_bit_offset (struct data_reference *dr,
1363 loop_vec_info loop_vinfo,
1365 bool *base_aligned_p)
1367 tree this_offset = size_zero_node;
1368 tree base = NULL_TREE;
1370 tree oprnd0, oprnd1;
1371 struct data_reference *array_dr;
1372 enum tree_code code = TREE_CODE (expr);
1374 *base_aligned_p = false;
1378 /* These cases end the recursion: */
1380 *offset = size_zero_node;
1381 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1382 *base_aligned_p = true;
1389 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1392 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1394 base = vect_get_ptr_offset (expr, vectype, offset);
1396 *base_aligned_p = true;
1400 *base_aligned_p = true;
1401 *offset = size_zero_node;
1407 *offset = int_const_binop (MULT_EXPR, expr,
1408 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1411 /* These cases continue the recursion: */
1413 oprnd0 = TREE_OPERAND (expr, 0);
1414 oprnd1 = TREE_OPERAND (expr, 1);
1416 this_offset = bit_position (oprnd1);
1417 if (vectype && !host_integerp (this_offset, 1))
1423 oprnd0 = TREE_OPERAND (expr, 0);
1428 oprnd0 = TREE_OPERAND (expr, 0);
1433 if (DR_REF (dr) != expr)
1434 /* Build array data_reference struct if the existing DR_REF
1435 doesn't match EXPR. This happens, for example, when the
1436 EXPR is *T and T is initialized to &arr[indx]. The DR struct
1437 contains information on the access of T, not of arr. In order
1438 to continue the analysis, we create a new DR struct that
1439 describes the access of arr.
1441 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1445 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1446 vectype, &this_offset);
1451 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
1453 *offset = this_offset;
1454 *base_aligned_p = true;
1461 /* In case we have a PLUS_EXPR of the form
1462 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1463 This is verified in vect_get_symbl_and_dr. */
1464 oprnd0 = TREE_OPERAND (expr, 0);
1465 oprnd1 = TREE_OPERAND (expr, 1);
1467 base = vect_get_base_and_bit_offset
1468 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1469 if (vectype && !base)
1479 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1480 loop_vinfo, offset, base_aligned_p);
1482 if (vectype && base)
1484 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1485 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1488 if (vect_debug_details (NULL))
1490 print_generic_expr (dump_file, expr, TDF_SLIM);
1491 fprintf (dump_file, " --> total offset for ref: ");
1492 print_generic_expr (dump_file, *offset, TDF_SLIM);
1499 /* Function vect_force_dr_alignment_p.
1501 Returns whether the alignment of a DECL can be forced to be aligned
1502 on ALIGNMENT bit boundary. */
1505 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1507 if (TREE_CODE (decl) != VAR_DECL)
1510 if (DECL_EXTERNAL (decl))
1513 if (TREE_STATIC (decl))
1514 return (alignment <= MAX_OFILE_ALIGNMENT);
1516 /* This is not 100% correct. The absolute correct stack alignment
1517 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1518 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1519 However, until someone implements forced stack alignment, SSE
1520 isn't really usable without this. */
1521 return (alignment <= PREFERRED_STACK_BOUNDARY);
1525 /* Function vect_get_new_vect_var.
1527 Returns a name for a new variable. The current naming scheme appends the
1528 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1529 the name of vectorizer generated variables, and appends that to NAME if
1533 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1539 if (var_kind == vect_simple_var)
1544 prefix_len = strlen (prefix);
1547 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1549 new_vect_var = create_tmp_var (type, prefix);
1551 return new_vect_var;
1555 /* Function vect_create_index_for_vector_ref.
1557 Create (and return) an index variable, along with it's update chain in the
1558 loop. This variable will be used to access a memory location in a vector
1562 LOOP: The loop being vectorized.
1563 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1564 function can be added here, or in the loop pre-header.
1567 Return an index that will be used to index a vector array. It is expected
1568 that a pointer to the first vector will be used as the base address for the
1571 FORNOW: we are not trying to be efficient, just creating a new index each
1572 time from scratch. At this time all vector references could use the same
1575 TODO: create only one index to be used by all vector references. Record
1576 the index in the LOOP_VINFO the first time this procedure is called and
1577 return it on subsequent calls. The increment of this index must be placed
1578 just before the conditional expression that ends the single block loop. */
1581 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1584 tree indx_before_incr, indx_after_incr;
1586 /* It is assumed that the base pointer used for vectorized access contains
1587 the address of the first vector. Therefore the index used for vectorized
1588 access must be initialized to zero and incremented by 1. */
1590 init = integer_zero_node;
1591 step = integer_one_node;
1593 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1594 create_iv (init, step, NULL_TREE, loop, bsi, false,
1595 &indx_before_incr, &indx_after_incr);
1597 return indx_before_incr;
1601 /* Function vect_create_addr_base_for_vector_ref.
1603 Create an expression that computes the address of the first memory location
1604 that will be accessed for a data reference.
1607 STMT: The statement containing the data reference.
1608 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1609 OFFSET: Optional. If supplied, it is be added to the initial address.
1612 1. Return an SSA_NAME whose value is the address of the memory location of
1613 the first vector of the data reference.
1614 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1615 these statement(s) which define the returned SSA_NAME.
1617 FORNOW: We are only handling array accesses with step 1. */
1620 vect_create_addr_base_for_vector_ref (tree stmt,
1621 tree *new_stmt_list,
1624 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1625 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1626 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1627 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1628 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1629 tree ref = DR_REF (dr);
1630 tree data_ref_base_type = TREE_TYPE (data_ref_base);
1631 tree scalar_type = TREE_TYPE (ref);
1632 tree scalar_ptr_type = build_pointer_type (scalar_type);
1634 tree init_val, step, init_oval;
1636 bool is_ptr_ref, is_array_ref, is_addr_expr;
1641 tree addr_base, addr_expr;
1642 tree dest, new_stmt;
1644 /* Only the access function of the last index is relevant (i_n in
1645 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
1646 access_fn = DR_ACCESS_FN (dr, 0);
1647 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1650 init_oval = integer_zero_node;
1652 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1653 && TREE_CODE (data_ref_base) == SSA_NAME;
1654 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
1655 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1656 || TREE_CODE (data_ref_base) == PLUS_EXPR
1657 || TREE_CODE (data_ref_base) == MINUS_EXPR;
1658 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1660 /** Create: &(base[init_val])
1662 if data_ref_base is an ARRAY_TYPE:
1663 base = data_ref_base
1665 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1666 base = *((scalar_array *) data_ref_base)
1670 array_base = data_ref_base;
1671 else /* is_ptr_ref or is_addr_expr */
1673 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
1674 tree scalar_array_type = build_array_type (scalar_type, 0);
1675 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1676 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1677 add_referenced_tmp_var (array_ptr);
1679 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1680 add_referenced_tmp_var (dest);
1682 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1683 append_to_statement_list_force (new_stmt, new_stmt_list);
1685 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1686 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1687 new_temp = make_ssa_name (array_ptr, vec_stmt);
1688 TREE_OPERAND (vec_stmt, 0) = new_temp;
1689 append_to_statement_list_force (vec_stmt, new_stmt_list);
1692 array_base = build_fold_indirect_ref (new_temp);
1695 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1696 add_referenced_tmp_var (dest);
1697 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1698 append_to_statement_list_force (new_stmt, new_stmt_list);
1702 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1703 add_referenced_tmp_var (tmp);
1704 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1705 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1706 init_val = make_ssa_name (tmp, vec_stmt);
1707 TREE_OPERAND (vec_stmt, 0) = init_val;
1708 append_to_statement_list_force (vec_stmt, new_stmt_list);
1711 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1712 NULL_TREE, NULL_TREE);
1713 addr_base = build_fold_addr_expr (array_ref);
1715 /* addr_expr = addr_base */
1716 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1717 get_name (base_name));
1718 add_referenced_tmp_var (addr_expr);
1719 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1720 new_temp = make_ssa_name (addr_expr, vec_stmt);
1721 TREE_OPERAND (vec_stmt, 0) = new_temp;
1722 append_to_statement_list_force (vec_stmt, new_stmt_list);
1728 /* Function get_vectype_for_scalar_type.
1730 Returns the vector type corresponding to SCALAR_TYPE as supported
1734 get_vectype_for_scalar_type (tree scalar_type)
1736 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1737 int nbytes = GET_MODE_SIZE (inner_mode);
1744 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1746 nunits = UNITS_PER_SIMD_WORD / nbytes;
1748 vectype = build_vector_type (scalar_type, nunits);
1749 if (vect_debug_details (NULL))
1751 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1752 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1758 if (vect_debug_details (NULL))
1760 fprintf (dump_file, "vectype: ");
1761 print_generic_expr (dump_file, vectype, TDF_SLIM);
1764 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1766 /* TODO: tree-complex.c sometimes can parallelize operations
1767 on generic vectors. We can vectorize the loop in that case,
1768 but then we should re-run the lowering pass. */
1769 if (vect_debug_details (NULL))
1770 fprintf (dump_file, "mode not supported by target.");
1778 /* Function vect_align_data_ref.
1780 Handle mislignment of a memory accesses.
1782 FORNOW: Can't handle misaligned accesses.
1783 Make sure that the dataref is aligned. */
1786 vect_align_data_ref (tree stmt)
1788 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1789 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1791 /* FORNOW: can't handle misaligned accesses;
1792 all accesses expected to be aligned. */
1793 gcc_assert (aligned_access_p (dr));
1797 /* Function vect_create_data_ref_ptr.
1799 Create a memory reference expression for vector access, to be used in a
1800 vector load/store stmt. The reference is based on a new pointer to vector
1804 1. STMT: a stmt that references memory. Expected to be of the form
1805 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1806 2. BSI: block_stmt_iterator where new stmts can be added.
1807 3. OFFSET (optional): an offset to be added to the initial address accessed
1808 by the data-ref in STMT.
1809 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1810 pointing to the initial address.
1813 1. Declare a new ptr to vector_type, and have it point to the base of the
1814 data reference (initial addressed accessed by the data reference).
1815 For example, for vector of type V8HI, the following code is generated:
1818 vp = (v8hi *)initial_address;
1820 if OFFSET is not supplied:
1821 initial_address = &a[init];
1822 if OFFSET is supplied:
1823 initial_address = &a[init + OFFSET];
1825 Return the initial_address in INITIAL_ADDRESS.
1827 2. Create a data-reference in the loop based on the new vector pointer vp,
1828 and using a new index variable 'idx' as follows:
1832 where if ONLY_INIT is true:
1835 update = idx + vector_type_size
1837 Return the pointer vp'.
1840 FORNOW: handle only aligned and consecutive accesses. */
1843 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1844 tree *initial_address, bool only_init)
1847 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1848 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1849 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1850 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1854 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1855 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1856 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1857 int nvuses, nv_may_defs, nv_must_defs;
1861 tree new_stmt_list = NULL_TREE;
1863 edge pe = loop_preheader_edge (loop);
1870 base_name = unshare_expr (DR_BASE_NAME (dr));
1871 if (vect_debug_details (NULL))
1873 tree data_ref_base = base_name;
1874 fprintf (dump_file, "create array_ref of type: ");
1875 print_generic_expr (dump_file, vectype, TDF_SLIM);
1876 if (TREE_CODE (data_ref_base) == VAR_DECL)
1877 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
1878 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1879 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
1880 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1881 fprintf (dump_file, "vectorizing a record based array ref: ");
1882 else if (TREE_CODE (data_ref_base) == SSA_NAME)
1883 fprintf (dump_file, "vectorizing a pointer ref: ");
1884 print_generic_expr (dump_file, base_name, TDF_SLIM);
1887 /** (1) Create the new vector-pointer variable: **/
1889 vect_ptr_type = build_pointer_type (vectype);
1890 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1891 get_name (base_name));
1892 add_referenced_tmp_var (vect_ptr);
1895 /** (2) Handle aliasing information of the new vector-pointer: **/
1897 tag = STMT_VINFO_MEMTAG (stmt_info);
1899 get_var_ann (vect_ptr)->type_mem_tag = tag;
1901 /* Mark for renaming all aliased variables
1902 (i.e, the may-aliases of the type-mem-tag). */
1903 nvuses = NUM_VUSES (vuses);
1904 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1905 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1906 for (i = 0; i < nvuses; i++)
1908 tree use = VUSE_OP (vuses, i);
1909 if (TREE_CODE (use) == SSA_NAME)
1910 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1912 for (i = 0; i < nv_may_defs; i++)
1914 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1915 if (TREE_CODE (def) == SSA_NAME)
1916 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1918 for (i = 0; i < nv_must_defs; i++)
1920 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
1921 if (TREE_CODE (def) == SSA_NAME)
1922 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1926 /** (3) Calculate the initial address the vector-pointer, and set
1927 the vector-pointer to point to it before the loop: **/
1929 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1930 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1932 pe = loop_preheader_edge (loop);
1933 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1934 gcc_assert (!new_bb);
1935 *initial_address = new_temp;
1937 /* Create: p = (vectype *) initial_base */
1938 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1939 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1940 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1941 TREE_OPERAND (vec_stmt, 0) = new_temp;
1942 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1943 gcc_assert (!new_bb);
1944 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1947 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1949 if (only_init) /* No update in loop is required. */
1950 return vect_ptr_init;
1952 idx = vect_create_index_for_vector_ref (loop, bsi);
1954 /* Create: update = idx * vectype_size */
1955 ptr_update = create_tmp_var (integer_type_node, "update");
1956 add_referenced_tmp_var (ptr_update);
1957 vectype_size = build_int_cst (integer_type_node,
1958 GET_MODE_SIZE (TYPE_MODE (vectype)));
1959 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1960 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1961 new_temp = make_ssa_name (ptr_update, vec_stmt);
1962 TREE_OPERAND (vec_stmt, 0) = new_temp;
1963 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1965 /* Create: data_ref_ptr = vect_ptr_init + update */
1966 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1967 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1968 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1969 TREE_OPERAND (vec_stmt, 0) = new_temp;
1970 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1971 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1973 return data_ref_ptr;
1977 /* Function vect_create_destination_var.
1979 Create a new temporary of type VECTYPE. */
1982 vect_create_destination_var (tree scalar_dest, tree vectype)
1985 const char *new_name;
1987 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1989 new_name = get_name (scalar_dest);
1992 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1993 add_referenced_tmp_var (vec_dest);
1999 /* Function vect_init_vector.
2001 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2002 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2003 used in the vectorization of STMT. */
2006 vect_init_vector (tree stmt, tree vector_var)
2008 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2009 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2012 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2018 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2019 add_referenced_tmp_var (new_var);
2021 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2022 new_temp = make_ssa_name (new_var, init_stmt);
2023 TREE_OPERAND (init_stmt, 0) = new_temp;
2025 pe = loop_preheader_edge (loop);
2026 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2027 gcc_assert (!new_bb);
2029 if (vect_debug_details (NULL))
2031 fprintf (dump_file, "created new init_stmt: ");
2032 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2035 vec_oprnd = TREE_OPERAND (init_stmt, 0);
2040 /* Function vect_get_vec_def_for_operand.
2042 OP is an operand in STMT. This function returns a (vector) def that will be
2043 used in the vectorized stmt for STMT.
2045 In the case that OP is an SSA_NAME which is defined in the loop, then
2046 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2048 In case OP is an invariant or constant, a new stmt that creates a vector def
2049 needs to be introduced. */
2052 vect_get_vec_def_for_operand (tree op, tree stmt)
2057 stmt_vec_info def_stmt_info = NULL;
2058 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2059 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2060 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2061 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2068 if (vect_debug_details (NULL))
2070 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2071 print_generic_expr (dump_file, op, TDF_SLIM);
2074 /** ===> Case 1: operand is a constant. **/
2076 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2078 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2082 /* Build a tree with vector elements. */
2083 if (vect_debug_details (NULL))
2084 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2086 for (i = nunits - 1; i >= 0; --i)
2088 t = tree_cons (NULL_TREE, op, t);
2090 vec_cst = build_vector (vectype, t);
2091 return vect_init_vector (stmt, vec_cst);
2094 gcc_assert (TREE_CODE (op) == SSA_NAME);
2096 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2098 def_stmt = SSA_NAME_DEF_STMT (op);
2099 def_stmt_info = vinfo_for_stmt (def_stmt);
2101 if (vect_debug_details (NULL))
2103 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2104 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2108 /** ==> Case 2.1: operand is defined inside the loop. **/
2112 /* Get the def from the vectorized stmt. */
2114 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2115 gcc_assert (vec_stmt);
2116 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2121 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2122 it is a reduction/induction. **/
2124 bb = bb_for_stmt (def_stmt);
2125 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2127 if (vect_debug_details (NULL))
2128 fprintf (dump_file, "reduction/induction - unsupported.");
2129 internal_error ("no support for reduction/induction"); /* FORNOW */
2133 /** ==> Case 2.3: operand is defined outside the loop -
2134 it is a loop invariant. */
2136 switch (TREE_CODE (def_stmt))
2139 def = PHI_RESULT (def_stmt);
2142 def = TREE_OPERAND (def_stmt, 0);
2145 def = TREE_OPERAND (def_stmt, 0);
2146 gcc_assert (IS_EMPTY_STMT (def_stmt));
2150 if (vect_debug_details (NULL))
2152 fprintf (dump_file, "unsupported defining stmt: ");
2153 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2155 internal_error ("unsupported defining stmt");
2158 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2160 if (vect_debug_details (NULL))
2161 fprintf (dump_file, "Create vector_inv.");
2163 for (i = nunits - 1; i >= 0; --i)
2165 t = tree_cons (NULL_TREE, def, t);
2168 vec_inv = build_constructor (vectype, t);
2169 return vect_init_vector (stmt, vec_inv);
2173 /* Function vect_finish_stmt_generation.
2175 Insert a new stmt. */
2178 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2180 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2182 if (vect_debug_details (NULL))
2184 fprintf (dump_file, "add new stmt: ");
2185 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2188 /* Make sure bsi points to the stmt that is being vectorized. */
2190 /* Assumption: any stmts created for the vectorization of stmt S were
2191 inserted before S. BSI is expected to point to S or some new stmt before S.
2194 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2196 gcc_assert (stmt == bsi_stmt (*bsi));
2200 /* Function vectorizable_assignment.
2202 Check if STMT performs an assignment (copy) that can be vectorized.
2203 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2204 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2205 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2208 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2214 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2215 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2216 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2219 /* Is vectorizable assignment? */
2221 if (TREE_CODE (stmt) != MODIFY_EXPR)
2224 scalar_dest = TREE_OPERAND (stmt, 0);
2225 if (TREE_CODE (scalar_dest) != SSA_NAME)
2228 op = TREE_OPERAND (stmt, 1);
2229 if (!vect_is_simple_use (op, loop, NULL))
2231 if (vect_debug_details (NULL))
2232 fprintf (dump_file, "use not simple.");
2236 if (!vec_stmt) /* transformation not required. */
2238 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2243 if (vect_debug_details (NULL))
2244 fprintf (dump_file, "transform assignment.");
2247 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2250 op = TREE_OPERAND (stmt, 1);
2251 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2253 /* Arguments are ready. create the new vector stmt. */
2254 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2255 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2256 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2257 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2263 /* Function vectorizable_operation.
2265 Check if STMT performs a binary or unary operation that can be vectorized.
2266 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2267 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2268 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2271 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2276 tree op0, op1 = NULL;
2277 tree vec_oprnd0, vec_oprnd1=NULL;
2278 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2279 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2280 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2282 enum tree_code code;
2283 enum machine_mode vec_mode;
2289 /* Is STMT a vectorizable binary/unary operation? */
2290 if (TREE_CODE (stmt) != MODIFY_EXPR)
2293 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2296 operation = TREE_OPERAND (stmt, 1);
2297 code = TREE_CODE (operation);
2298 optab = optab_for_tree_code (code, vectype);
2300 /* Support only unary or binary operations. */
2301 op_type = TREE_CODE_LENGTH (code);
2302 if (op_type != unary_op && op_type != binary_op)
2304 if (vect_debug_details (NULL))
2305 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2309 for (i = 0; i < op_type; i++)
2311 op = TREE_OPERAND (operation, i);
2312 if (!vect_is_simple_use (op, loop, NULL))
2314 if (vect_debug_details (NULL))
2315 fprintf (dump_file, "use not simple.");
2320 /* Supportable by target? */
2323 if (vect_debug_details (NULL))
2324 fprintf (dump_file, "no optab.");
2327 vec_mode = TYPE_MODE (vectype);
2328 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2330 if (vect_debug_details (NULL))
2331 fprintf (dump_file, "op not supported by target.");
2335 if (!vec_stmt) /* transformation not required. */
2337 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2343 if (vect_debug_details (NULL))
2344 fprintf (dump_file, "transform binary/unary operation.");
2347 scalar_dest = TREE_OPERAND (stmt, 0);
2348 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2351 op0 = TREE_OPERAND (operation, 0);
2352 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2354 if (op_type == binary_op)
2356 op1 = TREE_OPERAND (operation, 1);
2357 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2360 /* Arguments are ready. create the new vector stmt. */
2362 if (op_type == binary_op)
2363 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2364 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2366 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2367 build1 (code, vectype, vec_oprnd0));
2368 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2369 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2370 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2376 /* Function vectorizable_store.
2378 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2380 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2381 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2382 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2385 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2391 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2392 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2393 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2394 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2395 enum machine_mode vec_mode;
2397 enum dr_alignment_support alignment_support_cheme;
2399 /* Is vectorizable store? */
2401 if (TREE_CODE (stmt) != MODIFY_EXPR)
2404 scalar_dest = TREE_OPERAND (stmt, 0);
2405 if (TREE_CODE (scalar_dest) != ARRAY_REF
2406 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2409 op = TREE_OPERAND (stmt, 1);
2410 if (!vect_is_simple_use (op, loop, NULL))
2412 if (vect_debug_details (NULL))
2413 fprintf (dump_file, "use not simple.");
2417 vec_mode = TYPE_MODE (vectype);
2418 /* FORNOW. In some cases can vectorize even if data-type not supported
2419 (e.g. - array initialization with 0). */
2420 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2423 if (!STMT_VINFO_DATA_REF (stmt_info))
2427 if (!vec_stmt) /* transformation not required. */
2429 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2435 if (vect_debug_details (NULL))
2436 fprintf (dump_file, "transform store");
2438 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2439 gcc_assert (alignment_support_cheme);
2440 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2442 /* Handle use - get the vectorized def from the defining stmt. */
2443 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2446 /* FORNOW: make sure the data reference is aligned. */
2447 vect_align_data_ref (stmt);
2448 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2449 data_ref = build_fold_indirect_ref (data_ref);
2451 /* Arguments are ready. create the new vector stmt. */
2452 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2453 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2459 /* vectorizable_load.
2461 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2463 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2464 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2465 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2468 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2471 tree vec_dest = NULL;
2472 tree data_ref = NULL;
2474 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2475 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2476 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2483 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2484 edge pe = loop_preheader_edge (loop);
2485 enum dr_alignment_support alignment_support_cheme;
2487 /* Is vectorizable load? */
2489 if (TREE_CODE (stmt) != MODIFY_EXPR)
2492 scalar_dest = TREE_OPERAND (stmt, 0);
2493 if (TREE_CODE (scalar_dest) != SSA_NAME)
2496 op = TREE_OPERAND (stmt, 1);
2497 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2500 if (!STMT_VINFO_DATA_REF (stmt_info))
2503 mode = (int) TYPE_MODE (vectype);
2505 /* FORNOW. In some cases can vectorize even if data-type not supported
2506 (e.g. - data copies). */
2507 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2509 if (vect_debug_details (loop))
2510 fprintf (dump_file, "Aligned load, but unsupported type.");
2514 if (!vec_stmt) /* transformation not required. */
2516 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2522 if (vect_debug_details (NULL))
2523 fprintf (dump_file, "transform load.");
2525 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2526 gcc_assert (alignment_support_cheme);
2528 if (alignment_support_cheme == dr_aligned
2529 || alignment_support_cheme == dr_unaligned_supported)
2540 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2541 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2542 if (aligned_access_p (dr))
2543 data_ref = build_fold_indirect_ref (data_ref);
2546 int mis = DR_MISALIGNMENT (dr);
2547 tree tmis = (mis == -1 ?
2549 build_int_cst (integer_type_node, mis));
2550 tmis = int_const_binop (MULT_EXPR, tmis,
2551 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2552 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2554 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2555 new_temp = make_ssa_name (vec_dest, new_stmt);
2556 TREE_OPERAND (new_stmt, 0) = new_temp;
2557 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2559 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2563 msq_init = *(floor(p1))
2564 p2 = initial_addr + VS - 1;
2565 magic = have_builtin ? builtin_result : initial_address;
2568 p2' = p2 + indx * vectype_size
2570 vec_dest = realign_load (msq, lsq, magic)
2584 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2585 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2586 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2588 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2589 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2590 new_temp = make_ssa_name (vec_dest, new_stmt);
2591 TREE_OPERAND (new_stmt, 0) = new_temp;
2592 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2593 gcc_assert (!new_bb);
2594 msq_init = TREE_OPERAND (new_stmt, 0);
2597 /* <2> Create lsq = *(floor(p2')) in the loop */
2598 offset = build_int_cst (integer_type_node,
2599 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2600 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2601 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2602 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2603 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2604 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2605 new_temp = make_ssa_name (vec_dest, new_stmt);
2606 TREE_OPERAND (new_stmt, 0) = new_temp;
2607 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2608 lsq = TREE_OPERAND (new_stmt, 0);
2612 if (targetm.vectorize.builtin_mask_for_load)
2614 /* Create permutation mask, if required, in loop preheader. */
2616 params = build_tree_list (NULL_TREE, init_addr);
2617 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2618 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2619 new_stmt = build_function_call_expr (builtin_decl, params);
2620 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2621 new_temp = make_ssa_name (vec_dest, new_stmt);
2622 TREE_OPERAND (new_stmt, 0) = new_temp;
2623 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2624 gcc_assert (!new_bb);
2625 magic = TREE_OPERAND (new_stmt, 0);
2629 /* Use current address instead of init_addr for reduced reg pressure.
2631 magic = dataref_ptr;
2635 /* <4> Create msq = phi <msq_init, lsq> in loop */
2636 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2637 msq = make_ssa_name (vec_dest, NULL_TREE);
2638 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2639 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2640 add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
2641 add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
2644 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2645 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2646 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2647 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2648 new_temp = make_ssa_name (vec_dest, new_stmt);
2649 TREE_OPERAND (new_stmt, 0) = new_temp;
2650 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2655 *vec_stmt = new_stmt;
2660 /* Function vect_supportable_dr_alignment
2662 Return whether the data reference DR is supported with respect to its
2665 static enum dr_alignment_support
2666 vect_supportable_dr_alignment (struct data_reference *dr)
2668 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2669 enum machine_mode mode = (int) TYPE_MODE (vectype);
2671 if (aligned_access_p (dr))
2674 /* Possibly unaligned access. */
2676 if (DR_IS_READ (dr))
2678 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2679 && (!targetm.vectorize.builtin_mask_for_load
2680 || targetm.vectorize.builtin_mask_for_load ()))
2681 return dr_unaligned_software_pipeline;
2683 if (targetm.vectorize.misaligned_mem_ok (mode))
2684 /* Can't software pipeline the loads. */
2685 return dr_unaligned_supported;
2689 return dr_unaligned_unsupported;
2693 /* Function vect_transform_stmt.
2695 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2698 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2700 bool is_store = false;
2701 tree vec_stmt = NULL_TREE;
2702 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2705 switch (STMT_VINFO_TYPE (stmt_info))
2707 case op_vec_info_type:
2708 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2712 case assignment_vec_info_type:
2713 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2717 case load_vec_info_type:
2718 done = vectorizable_load (stmt, bsi, &vec_stmt);
2722 case store_vec_info_type:
2723 done = vectorizable_store (stmt, bsi, &vec_stmt);
2728 if (vect_debug_details (NULL))
2729 fprintf (dump_file, "stmt not supported.");
2733 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2739 /* This function builds ni_name = number of iterations loop executes
2740 on the loop preheader. */
2743 vect_build_loop_niters (loop_vec_info loop_vinfo)
2745 tree ni_name, stmt, var;
2747 basic_block new_bb = NULL;
2748 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2749 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2751 var = create_tmp_var (TREE_TYPE (ni), "niters");
2752 add_referenced_tmp_var (var);
2753 ni_name = force_gimple_operand (ni, &stmt, false, var);
2755 pe = loop_preheader_edge (loop);
2757 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2759 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2765 /* This function generates the following statements:
2767 ni_name = number of iterations loop executes
2768 ratio = ni_name / vf
2769 ratio_mult_vf_name = ratio * vf
2771 and places them at the loop preheader edge. */
2774 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_p,
2775 tree *ratio_mult_vf_name_p, tree *ratio_p)
2782 tree ratio_mult_vf_name, ratio_mult_vf;
2783 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2784 tree ni = LOOP_VINFO_NITERS(loop_vinfo);
2788 /* Generate temporary variable that contains
2789 number of iterations loop executes. */
2791 ni_name = vect_build_loop_niters (loop_vinfo);
2794 vf is power of 2; then if ratio = = n >> log2 (vf). */
2795 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2796 ratio = vect_build_symbol_bound (ni_name, vf, loop);
2798 /* Update initial conditions of loop copy. */
2800 /* ratio_mult_vf = ratio * vf;
2801 then if ratio_mult_vf = ratio << log2 (vf). */
2803 i = exact_log2 (vf);
2804 ratio_mult_vf = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2805 add_referenced_tmp_var (ratio_mult_vf);
2807 ratio_mult_vf_name = make_ssa_name (ratio_mult_vf, NULL_TREE);
2809 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2810 build2 (LSHIFT_EXPR, TREE_TYPE (ratio),
2811 ratio, build_int_cst (unsigned_type_node,
2814 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2816 pe = loop_preheader_edge (loop);
2817 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2819 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2821 *ni_name_p = ni_name;
2822 *ratio_mult_vf_name_p = ratio_mult_vf_name;
2829 /* This function generates stmt
2833 and attaches it to preheader of LOOP. */
2836 vect_build_symbol_bound (tree n, int vf, struct loop * loop)
2838 tree var, stmt, var_name;
2843 /* create temporary variable */
2844 var = create_tmp_var (TREE_TYPE (n), "bnd");
2845 add_referenced_tmp_var (var);
2847 var_name = make_ssa_name (var, NULL_TREE);
2849 /* vf is power of 2; then n/vf = n >> log2 (vf). */
2851 i = exact_log2 (vf);
2852 stmt = build2 (MODIFY_EXPR, void_type_node, var_name,
2853 build2 (RSHIFT_EXPR, TREE_TYPE (n),
2854 n, build_int_cst (unsigned_type_node,i)));
2856 SSA_NAME_DEF_STMT (var_name) = stmt;
2858 pe = loop_preheader_edge (loop);
2859 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2861 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2863 if (vect_debug_details (NULL))
2864 fprintf (dump_file, "New bb on preheader edge was not generated.");
2870 /* Function vect_update_ivs_after_vectorizer.
2872 "Advance" the induction variables of LOOP to the value they should take
2873 after the execution of LOOP. This is currently necessary because the
2874 vectorizer does not handle induction variables that are used after the
2875 loop. Such a situation occurs when the last iterations of LOOP are
2877 1. We introduced new uses after LOOP for IVs that were not originally used
2878 after LOOP: the IVs of LOOP are now used by an epilog loop.
2879 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2880 times, whereas the loop IVs should be bumped N times.
2883 - LOOP - a loop that is going to be vectorized. The last few iterations
2884 of LOOP were peeled.
2885 - NITERS - the number of iterations that LOOP executes (before it is
2886 vectorized). i.e, the number of times the ivs should be bumped.
2887 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2888 coming out from LOOP on which there are uses of the LOOP ivs
2889 (this is the path from LOOP->exit to epilog_loop->preheader).
2891 The new definitions of the ivs are placed in LOOP->exit.
2892 The phi args associated with the edge UPDATE_E in the bb
2893 UPDATE_E->dest are updated accordingly.
2895 Assumption 1: Like the rest of the vectorizer, this function assumes
2896 a single loop exit that has a single predecessor.
2898 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2899 organized in the same order.
2901 Assumption 3: The access function of the ivs is simple enough (see
2902 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2904 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2905 coming out of LOOP on which the ivs of LOOP are used (this is the path
2906 that leads to the epilog loop; other paths skip the epilog loop). This
2907 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2908 needs to have its phis updated.
2912 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
2914 basic_block exit_bb = loop->exit_edges[0]->dest;
2916 basic_block update_bb = update_e->dest;
2918 /* gcc_assert (vect_can_advance_ivs_p (loop)); */
2920 /* Make sure there exists a single-predecessor exit bb: */
2921 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
2923 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2925 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2927 tree access_fn = NULL;
2928 tree evolution_part;
2931 tree var, stmt, ni, ni_name;
2932 block_stmt_iterator last_bsi;
2934 /* Skip virtual phi's. */
2935 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2937 if (vect_debug_details (NULL))
2938 fprintf (dump_file, "virtual phi. skip.");
2942 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2943 gcc_assert (access_fn);
2945 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2946 gcc_assert (evolution_part != NULL_TREE);
2948 /* FORNOW: We do not support IVs whose evolution function is a polynomial
2949 of degree >= 2 or exponential. */
2950 gcc_assert (!tree_is_chrec (evolution_part));
2952 step_expr = evolution_part;
2953 init_expr = unshare_expr (initial_condition (access_fn));
2955 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2956 build2 (MULT_EXPR, TREE_TYPE (niters),
2957 niters, step_expr), init_expr);
2959 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2960 add_referenced_tmp_var (var);
2962 ni_name = force_gimple_operand (ni, &stmt, false, var);
2964 /* Insert stmt into exit_bb. */
2965 last_bsi = bsi_last (exit_bb);
2967 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2969 /* Fix phi expressions in the successor bb. */
2970 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
2971 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
2972 SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
2977 /* Function vect_do_peeling_for_loop_bound
2979 Peel the last iterations of the loop represented by LOOP_VINFO.
2980 The peeled iterations form a new epilog loop. Given that the loop now
2981 iterates NITERS times, the new epilog loop iterates
2982 NITERS % VECTORIZATION_FACTOR times.
2984 The original loop will later be made to iterate
2985 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
2988 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2989 struct loops *loops)
2992 tree ni_name, ratio_mult_vf_name;
2993 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2994 struct loop *new_loop;
2996 #ifdef ENABLE_CHECKING
3000 if (vect_debug_details (NULL))
3001 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3003 /* Generate the following variables on the preheader of original loop:
3005 ni_name = number of iteration the original loop executes
3006 ratio = ni_name / vf
3007 ratio_mult_vf_name = ratio * vf */
3008 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3009 &ratio_mult_vf_name, ratio);
3011 /* Update loop info. */
3012 loop->pre_header = loop_preheader_edge (loop)->src;
3013 loop->pre_header_edges[0] = loop_preheader_edge (loop);
3015 #ifdef ENABLE_CHECKING
3016 loop_num = loop->num;
3018 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3019 ratio_mult_vf_name, ni_name, false);
3020 #ifdef ENABLE_CHECKING
3021 gcc_assert (new_loop);
3022 gcc_assert (loop_num == loop->num);
3023 slpeel_verify_cfg_after_peeling (loop, new_loop);
3026 /* A guard that controls whether the new_loop is to be executed or skipped
3027 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3028 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3029 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3030 is on the path where the LOOP IVs are used and need to be updated. */
3032 if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3033 update_e = EDGE_PRED (new_loop->pre_header, 0);
3035 update_e = EDGE_PRED (new_loop->pre_header, 1);
3037 /* Update IVs of original loop as if they were advanced
3038 by ratio_mult_vf_name steps. */
3039 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
3041 /* After peeling we have to reset scalar evolution analyzer. */
3048 /* Function vect_gen_niters_for_prolog_loop
3050 Set the number of iterations for the loop represented by LOOP_VINFO
3051 to the minimum between LOOP_NITERS (the original iteration count of the loop)
3052 and the misalignment of DR - the first data reference recorded in
3053 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3054 this loop, the data reference DR will refer to an aligned location.
3056 The following computation is generated:
3058 compute address misalignment in bytes:
3059 addr_mis = addr & (vectype_size - 1)
3061 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3063 (elem_size = element type size; an element is the scalar element
3064 whose type is the inner type of the vectype) */
3067 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3069 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3070 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3071 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3073 tree iters, iters_name;
3076 tree dr_stmt = DR_STMT (dr);
3077 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3078 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3079 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3082 tree new_stmts = NULL_TREE;
3084 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3085 tree ptr_type = TREE_TYPE (start_addr);
3086 tree size = TYPE_SIZE (ptr_type);
3087 tree type = lang_hooks.types.type_for_size (TREE_INT_CST_LOW (size), 1);
3088 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3089 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3090 tree niters_type = TREE_TYPE (loop_niters);
3091 tree elem_size_log =
3092 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3093 tree vf_tree = build_int_cst (unsigned_type_node, vf);
3095 pe = loop_preheader_edge (loop);
3096 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3098 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3100 /* Create: byte_misalign = addr & (vectype_size - 1) */
3101 byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3103 /* Create: elem_misalign = byte_misalign / element_size */
3105 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3107 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
3108 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3109 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3110 iters = fold_convert (niters_type, iters);
3112 /* Create: prolog_loop_niters = min (iters, loop_niters) */
3113 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3114 var = create_tmp_var (niters_type, "prolog_loop_niters");
3115 add_referenced_tmp_var (var);
3116 iters_name = force_gimple_operand (iters, &stmt, false, var);
3118 /* Insert stmt on loop preheader edge. */
3119 pe = loop_preheader_edge (loop);
3121 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3123 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3129 /* Function vect_update_inits_of_dr
3131 NITERS iterations were peeled from LOOP. DR represents a data reference
3132 in LOOP. This function updates the information recorded in DR to
3133 account for the fact that the first NITERS iterations had already been
3134 executed. Specifically, it updates the initial_condition of the
3135 access_function of DR. */
3138 vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3141 tree access_fn = DR_ACCESS_FN (dr, 0);
3142 tree init, init_new, step;
3144 step = evolution_part_in_loop_num (access_fn, loop->num);
3145 init = initial_condition (access_fn);
3147 init_new = build (PLUS_EXPR, TREE_TYPE (init),
3148 build (MULT_EXPR, TREE_TYPE (niters),
3149 niters, step), init);
3150 DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3156 /* Function vect_update_inits_of_drs
3158 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3159 This function updates the information recorded for the data references in
3160 the loop to account for the fact that the first NITERS iterations had
3161 already been executed. Specifically, it updates the initial_condition of the
3162 access_function of all the data_references in the loop. */
3165 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3168 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3169 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3170 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3172 if (dump_file && (dump_flags & TDF_DETAILS))
3173 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3175 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3177 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3178 vect_update_inits_of_dr (dr, loop, niters);
3181 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3183 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3184 vect_update_inits_of_dr (dr, loop, niters);
3189 /* Function vect_do_peeling_for_alignment
3191 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3192 'niters' is set to the misalignment of one of the data references in the
3193 loop, thereby forcing it to refer to an aligned location at the beginning
3194 of the execution of this loop. The data reference for which we are
3195 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3198 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3200 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3201 tree niters_of_prolog_loop, ni_name;
3203 struct loop *new_loop;
3205 if (vect_debug_details (NULL))
3206 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3208 ni_name = vect_build_loop_niters (loop_vinfo);
3209 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3211 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3213 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3214 niters_of_prolog_loop, ni_name, true);
3215 #ifdef ENABLE_CHECKING
3216 gcc_assert (new_loop);
3217 slpeel_verify_cfg_after_peeling (new_loop, loop);
3220 /* Update number of times loop executes. */
3221 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3222 LOOP_VINFO_NITERS (loop_vinfo) =
3223 build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3225 /* Update the init conditions of the access functions of all data refs. */
3226 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3228 /* After peeling we have to reset scalar evolution analyzer. */
3235 /* Function vect_transform_loop.
3237 The analysis phase has determined that the loop is vectorizable.
3238 Vectorize the loop - created vectorized stmts to replace the scalar
3239 stmts in the loop, and update the loop exit condition. */
3242 vect_transform_loop (loop_vec_info loop_vinfo,
3243 struct loops *loops ATTRIBUTE_UNUSED)
3245 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3246 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3247 int nbbs = loop->num_nodes;
3248 block_stmt_iterator si;
3251 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3253 if (vect_debug_details (NULL))
3254 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3257 /* Peel the loop if there are data refs with unknown alignment.
3258 Only one data ref with unknown store is allowed. */
3260 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3261 vect_do_peeling_for_alignment (loop_vinfo, loops);
3263 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3264 compile time constant), or it is a constant that doesn't divide by the
3265 vectorization factor, then an epilog loop needs to be created.
3266 We therefore duplicate the loop: the original loop will be vectorized,
3267 and will compute the first (n/VF) iterations. The second copy of the loop
3268 will remain scalar and will compute the remaining (n%VF) iterations.
3269 (VF is the vectorization factor). */
3271 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3272 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3273 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3274 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3276 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3277 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3279 /* 1) Make sure the loop header has exactly two entries
3280 2) Make sure we have a preheader basic block. */
3282 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3284 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3287 /* FORNOW: the vectorizer supports only loops which body consist
3288 of one basic block (header + empty latch). When the vectorizer will
3289 support more involved loop forms, the order by which the BBs are
3290 traversed need to be reconsidered. */
3292 for (i = 0; i < nbbs; i++)
3294 basic_block bb = bbs[i];
3296 for (si = bsi_start (bb); !bsi_end_p (si);)
3298 tree stmt = bsi_stmt (si);
3299 stmt_vec_info stmt_info;
3302 if (vect_debug_details (NULL))
3304 fprintf (dump_file, "------>vectorizing statement: ");
3305 print_generic_expr (dump_file, stmt, TDF_SLIM);
3307 stmt_info = vinfo_for_stmt (stmt);
3308 gcc_assert (stmt_info);
3309 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3314 #ifdef ENABLE_CHECKING
3315 /* FORNOW: Verify that all stmts operate on the same number of
3316 units and no inner unrolling is necessary. */
3318 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3319 == vectorization_factor);
3321 /* -------- vectorize statement ------------ */
3322 if (vect_debug_details (NULL))
3323 fprintf (dump_file, "transform statement.");
3325 is_store = vect_transform_stmt (stmt, &si);
3328 /* free the attached stmt_vec_info and remove the stmt. */
3329 stmt_ann_t ann = stmt_ann (stmt);
3331 set_stmt_info (ann, NULL);
3340 slpeel_make_loop_iterate_ntimes (loop, ratio);
3342 if (vect_debug_details (loop))
3343 fprintf (dump_file,"Success! loop vectorized.");
3344 if (vect_debug_stats (loop))
3345 fprintf (dump_file, "LOOP VECTORIZED.");
3349 /* Function vect_is_simple_use.
3352 LOOP - the loop that is being vectorized.
3353 OPERAND - operand of a stmt in LOOP.
3354 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3356 Returns whether a stmt with OPERAND can be vectorized.
3357 Supportable operands are constants, loop invariants, and operands that are
3358 defined by the current iteration of the loop. Unsupportable operands are
3359 those that are defined by a previous iteration of the loop (as is the case
3360 in reduction/induction computations). */
3363 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3371 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3374 if (TREE_CODE (operand) != SSA_NAME)
3377 def_stmt = SSA_NAME_DEF_STMT (operand);
3378 if (def_stmt == NULL_TREE )
3380 if (vect_debug_details (NULL))
3381 fprintf (dump_file, "no def_stmt.");
3385 /* empty stmt is expected only in case of a function argument.
3386 (Otherwise - we expect a phi_node or a modify_expr). */
3387 if (IS_EMPTY_STMT (def_stmt))
3389 tree arg = TREE_OPERAND (def_stmt, 0);
3390 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3392 if (vect_debug_details (NULL))
3394 fprintf (dump_file, "Unexpected empty stmt: ");
3395 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3400 /* phi_node inside the loop indicates an induction/reduction pattern.
3401 This is not supported yet. */
3402 bb = bb_for_stmt (def_stmt);
3403 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3405 if (vect_debug_details (NULL))
3406 fprintf (dump_file, "reduction/induction - unsupported.");
3407 return false; /* FORNOW: not supported yet. */
3410 /* Expecting a modify_expr or a phi_node. */
3411 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3412 || TREE_CODE (def_stmt) == PHI_NODE)
3423 /* Function vect_analyze_operations.
3425 Scan the loop stmts and make sure they are all vectorizable. */
3428 vect_analyze_operations (loop_vec_info loop_vinfo)
3430 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3431 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3432 int nbbs = loop->num_nodes;
3433 block_stmt_iterator si;
3434 int vectorization_factor = 0;
3439 if (vect_debug_details (NULL))
3440 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3442 for (i = 0; i < nbbs; i++)
3444 basic_block bb = bbs[i];
3446 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3448 tree stmt = bsi_stmt (si);
3450 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3453 if (vect_debug_details (NULL))
3455 fprintf (dump_file, "==> examining statement: ");
3456 print_generic_expr (dump_file, stmt, TDF_SLIM);
3459 gcc_assert (stmt_info);
3461 /* skip stmts which do not need to be vectorized.
3462 this is expected to include:
3463 - the COND_EXPR which is the loop exit condition
3464 - any LABEL_EXPRs in the loop
3465 - computations that are used only for array indexing or loop
3468 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3470 if (vect_debug_details (NULL))
3471 fprintf (dump_file, "irrelevant.");
3475 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3477 if (vect_debug_stats (loop) || vect_debug_details (loop))
3479 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3480 print_generic_expr (dump_file, stmt, TDF_SLIM);
3485 if (STMT_VINFO_DATA_REF (stmt_info))
3486 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3487 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3488 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3490 scalar_type = TREE_TYPE (stmt);
3492 if (vect_debug_details (NULL))
3494 fprintf (dump_file, "get vectype for scalar type: ");
3495 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3498 vectype = get_vectype_for_scalar_type (scalar_type);
3501 if (vect_debug_stats (loop) || vect_debug_details (loop))
3503 fprintf (dump_file, "not vectorized: unsupported data-type ");
3504 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3509 if (vect_debug_details (NULL))
3511 fprintf (dump_file, "vectype: ");
3512 print_generic_expr (dump_file, vectype, TDF_SLIM);
3514 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3516 ok = (vectorizable_operation (stmt, NULL, NULL)
3517 || vectorizable_assignment (stmt, NULL, NULL)
3518 || vectorizable_load (stmt, NULL, NULL)
3519 || vectorizable_store (stmt, NULL, NULL));
3523 if (vect_debug_stats (loop) || vect_debug_details (loop))
3525 fprintf (dump_file, "not vectorized: stmt not supported: ");
3526 print_generic_expr (dump_file, stmt, TDF_SLIM);
3531 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3532 if (vect_debug_details (NULL))
3533 fprintf (dump_file, "nunits = %d", nunits);
3535 if (vectorization_factor)
3537 /* FORNOW: don't allow mixed units.
3538 This restriction will be relaxed in the future. */
3539 if (nunits != vectorization_factor)
3541 if (vect_debug_stats (loop) || vect_debug_details (loop))
3542 fprintf (dump_file, "not vectorized: mixed data-types");
3547 vectorization_factor = nunits;
3549 #ifdef ENABLE_CHECKING
3550 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3551 * vectorization_factor == UNITS_PER_SIMD_WORD);
3556 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3558 if (vectorization_factor <= 1)
3560 if (vect_debug_stats (loop) || vect_debug_details (loop))
3561 fprintf (dump_file, "not vectorized: unsupported data-type");
3564 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3566 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3568 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3569 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3571 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3572 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3574 if (vect_debug_stats (loop) || vect_debug_details (loop))
3575 fprintf (dump_file, "epilog loop required.");
3576 if (!vect_can_advance_ivs_p (loop))
3578 if (vect_debug_stats (loop) || vect_debug_details (loop))
3579 fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3582 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3584 if (vect_debug_stats (loop) || vect_debug_details (loop))
3585 fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3594 /* Function exist_non_indexing_operands_for_use_p
3596 USE is one of the uses attached to STMT. Check if USE is
3597 used in STMT for anything other than indexing an array. */
3600 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3603 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3605 /* USE corresponds to some operand in STMT. If there is no data
3606 reference in STMT, then any operand that corresponds to USE
3607 is not indexing an array. */
3608 if (!STMT_VINFO_DATA_REF (stmt_info))
3611 /* STMT has a data_ref. FORNOW this means that its of one of
3612 the following forms:
3615 (This should have been verified in analyze_data_refs).
3617 'var' in the second case corresponds to a def, not a use,
3618 so USE cannot correspond to any operands that are not used
3621 Therefore, all we need to check is if STMT falls into the
3622 first case, and whether var corresponds to USE. */
3624 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3627 operand = TREE_OPERAND (stmt, 1);
3629 if (TREE_CODE (operand) != SSA_NAME)
3639 /* Function vect_is_simple_iv_evolution.
3641 FORNOW: A simple evolution of an induction variables in the loop is
3642 considered a polynomial evolution with constant step. */
3645 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3646 tree * step, bool strict)
3651 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3653 /* When there is no evolution in this loop, the evolution function
3655 if (evolution_part == NULL_TREE)
3658 /* When the evolution is a polynomial of degree >= 2
3659 the evolution function is not "simple". */
3660 if (tree_is_chrec (evolution_part))
3663 step_expr = evolution_part;
3664 init_expr = unshare_expr (initial_condition (access_fn));
3666 if (vect_debug_details (NULL))
3668 fprintf (dump_file, "step: ");
3669 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3670 fprintf (dump_file, ", init: ");
3671 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3677 if (TREE_CODE (step_expr) != INTEGER_CST)
3679 if (vect_debug_details (NULL))
3680 fprintf (dump_file, "step unknown.");
3685 if (!integer_onep (step_expr))
3687 if (vect_debug_details (NULL))
3688 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3696 /* Function vect_analyze_scalar_cycles.
3698 Examine the cross iteration def-use cycles of scalar variables, by
3699 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3700 cycles that they represent do not impede vectorization.
3702 FORNOW: Reduction as in the following loop, is not supported yet:
3706 The cross-iteration cycle corresponding to variable 'sum' will be
3707 considered too complicated and will impede vectorization.
3709 FORNOW: Induction as in the following loop, is not supported yet:
3714 However, the following loop *is* vectorizable:
3719 In both loops there exists a def-use cycle for the variable i:
3720 loop: i_2 = PHI (i_0, i_1)
3725 The evolution of the above cycle is considered simple enough,
3726 however, we also check that the cycle does not need to be
3727 vectorized, i.e - we check that the variable that this cycle
3728 defines is only used for array indexing or in stmts that do not
3729 need to be vectorized. This is not the case in loop2, but it
3730 *is* the case in loop3. */
3733 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3736 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3737 basic_block bb = loop->header;
3740 if (vect_debug_details (NULL))
3741 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3743 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3745 tree access_fn = NULL;
3747 if (vect_debug_details (NULL))
3749 fprintf (dump_file, "Analyze phi: ");
3750 print_generic_expr (dump_file, phi, TDF_SLIM);
3753 /* Skip virtual phi's. The data dependences that are associated with
3754 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3756 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3758 if (vect_debug_details (NULL))
3759 fprintf (dump_file, "virtual phi. skip.");
3763 /* Analyze the evolution function. */
3765 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3766 those of loop induction variables; This property is verified here.
3768 Furthermore, if that induction variable is used in an operation
3769 that needs to be vectorized (i.e, is not solely used to index
3770 arrays and check the exit condition) - we do not support its
3771 vectorization yet. This property is verified in vect_is_simple_use,
3772 during vect_analyze_operations. */
3774 access_fn = /* instantiate_parameters
3776 analyze_scalar_evolution (loop, PHI_RESULT (phi));
3780 if (vect_debug_stats (loop) || vect_debug_details (loop))
3781 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3785 if (vect_debug_details (NULL))
3787 fprintf (dump_file, "Access function of PHI: ");
3788 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3791 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3794 if (vect_debug_stats (loop) || vect_debug_details (loop))
3795 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3804 /* Function vect_analyze_data_ref_dependence.
3806 Return TRUE if there (might) exist a dependence between a memory-reference
3807 DRA and a memory-reference DRB. */
3810 vect_analyze_data_ref_dependence (struct data_reference *dra,
3811 struct data_reference *drb,
3815 struct data_dependence_relation *ddr;
3817 if (!array_base_name_differ_p (dra, drb, &differ_p))
3819 if (vect_debug_stats (loop) || vect_debug_details (loop))
3822 "not vectorized: can't determine dependence between: ");
3823 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3824 fprintf (dump_file, " and ");
3825 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3833 ddr = initialize_data_dependence_relation (dra, drb);
3834 compute_affine_dependence (ddr);
3836 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3839 if (vect_debug_stats (loop) || vect_debug_details (loop))
3842 "not vectorized: possible dependence between data-refs ");
3843 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3844 fprintf (dump_file, " and ");
3845 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3852 /* Function vect_analyze_data_ref_dependences.
3854 Examine all the data references in the loop, and make sure there do not
3855 exist any data dependences between them.
3857 TODO: dependences which distance is greater than the vectorization factor
3861 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3864 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3865 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3866 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3868 /* Examine store-store (output) dependences. */
3870 if (vect_debug_details (NULL))
3871 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3873 if (vect_debug_details (NULL))
3874 fprintf (dump_file, "compare all store-store pairs.");
3876 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3878 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3880 struct data_reference *dra =
3881 VARRAY_GENERIC_PTR (loop_write_refs, i);
3882 struct data_reference *drb =
3883 VARRAY_GENERIC_PTR (loop_write_refs, j);
3884 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3889 /* Examine load-store (true/anti) dependences. */
3891 if (vect_debug_details (NULL))
3892 fprintf (dump_file, "compare all load-store pairs.");
3894 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3896 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3898 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3899 struct data_reference *drb =
3900 VARRAY_GENERIC_PTR (loop_write_refs, j);
3901 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3910 /* Function vect_get_first_index.
3912 REF is a data reference.
3913 If it is an ARRAY_REF: if its lower bound is simple enough,
3914 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3915 If it is not an ARRAY_REF: REF has no "first index";
3916 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
3919 vect_get_first_index (tree ref, tree *array_first_index)
3923 if (TREE_CODE (ref) != ARRAY_REF)
3924 *array_first_index = size_zero_node;
3927 array_start = array_ref_low_bound (ref);
3928 if (!host_integerp (array_start,0))
3930 if (vect_debug_details (NULL))
3932 fprintf (dump_file, "array min val not simple integer cst.");
3933 print_generic_expr (dump_file, array_start, TDF_DETAILS);
3937 *array_first_index = array_start;
3944 /* Function vect_compute_array_base_alignment.
3945 A utility function of vect_compute_array_ref_alignment.
3947 Compute the misalignment of ARRAY in bits.
3950 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
3951 VECTYPE - we are interested in the misalignment modulo the size of vectype.
3952 if NULL: don't compute misalignment, just return the base of ARRAY.
3953 PREV_DIMENSIONS - initialized to one.
3954 MISALIGNMENT - the computed misalignment in bits.
3957 If VECTYPE is not NULL:
3958 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3959 the base of the array, and put the computed misalignment in MISALIGNMENT.
3961 Return the base of the array.
3963 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3964 a[idx_N]...[idx_2][idx_1] is
3965 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3966 ... + idx_N * dim_0 * ... * dim_N-1}.
3967 (The misalignment of &a is not checked here).
3968 Note, that every term contains dim_0, therefore, if dim_0 is a
3969 multiple of NUNITS, the whole sum is a multiple of NUNITS.
3970 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
3971 NUINTS, we can say that the misalignment of the sum is equal to
3972 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
3973 we can't determine this array misalignment, and we return
3975 We proceed recursively in this manner, accumulating total misalignment
3976 and the multiplication of previous dimensions for correct misalignment
3980 vect_compute_array_base_alignment (tree array,
3982 tree *prev_dimensions,
3987 tree dimension_size;
3989 tree bits_per_vectype;
3990 tree bits_per_vectype_unit;
3992 /* The 'stop condition' of the recursion. */
3993 if (TREE_CODE (array) != ARRAY_REF)
3997 /* Just get the base decl. */
3998 return vect_compute_array_base_alignment
3999 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4001 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
4002 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
4005 domain = TYPE_DOMAIN (TREE_TYPE (array));
4007 int_const_binop (PLUS_EXPR,
4008 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
4009 TYPE_MIN_VALUE (domain), 1),
4012 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
4013 is a multiple of NUNITS:
4015 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
4017 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
4018 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
4019 if (integer_zerop (mis))
4020 /* This array is aligned. Continue just in order to get the base decl. */
4021 return vect_compute_array_base_alignment
4022 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4024 index = TREE_OPERAND (array, 1);
4025 if (!host_integerp (index, 1))
4026 /* The current index is not constant. */
4029 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
4031 bits_per_vectype = fold_convert (unsigned_type_node,
4032 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4033 GET_MODE_SIZE (TYPE_MODE (vectype))));
4034 bits_per_vectype_unit = fold_convert (unsigned_type_node,
4035 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4036 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4038 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4042 (*misalignment + index_val * dimension_size * *prev_dimensions)
4046 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4047 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4048 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4049 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4050 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4053 *prev_dimensions = int_const_binop (MULT_EXPR,
4054 *prev_dimensions, dimension_size, 1);
4056 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4062 /* Function vect_compute_data_ref_alignment
4064 Compute the misalignment of the data reference DR.
4067 1. If during the misalignment computation it is found that the data reference
4068 cannot be vectorized then false is returned.
4069 2. DR_MISALIGNMENT (DR) is defined.
4071 FOR NOW: No analysis is actually performed. Misalignment is calculated
4072 only for trivial cases. TODO. */
4075 vect_compute_data_ref_alignment (struct data_reference *dr,
4076 loop_vec_info loop_vinfo)
4078 tree stmt = DR_STMT (dr);
4079 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4080 tree ref = DR_REF (dr);
4083 tree offset = size_zero_node;
4084 tree base, bit_offset, alignment;
4085 tree unit_bits = fold_convert (unsigned_type_node,
4086 build_int_cst (NULL_TREE, BITS_PER_UNIT));
4088 bool base_aligned_p;
4090 if (vect_debug_details (NULL))
4091 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4093 /* Initialize misalignment to unknown. */
4094 DR_MISALIGNMENT (dr) = -1;
4096 scalar_type = TREE_TYPE (ref);
4097 vectype = get_vectype_for_scalar_type (scalar_type);
4100 if (vect_debug_details (NULL))
4102 fprintf (dump_file, "no vectype for stmt: ");
4103 print_generic_expr (dump_file, stmt, TDF_SLIM);
4104 fprintf (dump_file, " scalar_type: ");
4105 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4107 /* It is not possible to vectorize this data reference. */
4110 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4111 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4113 if (TREE_CODE (ref) == ARRAY_REF)
4116 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4118 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4119 loop_vinfo, &bit_offset, &base_aligned_p);
4122 if (vect_debug_details (NULL))
4124 fprintf (dump_file, "Unknown alignment for access: ");
4125 print_generic_expr (dump_file,
4126 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
4131 if (!base_aligned_p)
4133 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4135 if (vect_debug_details (NULL))
4137 fprintf (dump_file, "can't force alignment of ref: ");
4138 print_generic_expr (dump_file, ref, TDF_SLIM);
4143 /* Force the alignment of the decl.
4144 NOTE: This is the only change to the code we make during
4145 the analysis phase, before deciding to vectorize the loop. */
4146 if (vect_debug_details (NULL))
4147 fprintf (dump_file, "force alignment");
4148 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4149 DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
4152 /* At this point we assume that the base is aligned, and the offset from it
4153 (including index, if relevant) has been computed and is in BIT_OFFSET. */
4154 gcc_assert (base_aligned_p
4155 || (TREE_CODE (base) == VAR_DECL
4156 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4158 /* Convert into bytes. */
4159 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4160 /* Check that there is no remainder in bits. */
4161 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4162 if (!integer_zerop (bit_offset))
4164 if (vect_debug_details (NULL))
4166 fprintf (dump_file, "bit offset alignment: ");
4167 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
4172 /* Alignment required, in bytes: */
4173 alignment = fold_convert (unsigned_type_node,
4174 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
4176 /* Modulo alignment. */
4177 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4178 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4180 if (vect_debug_details (NULL))
4181 fprintf (dump_file, "unexpected misalign value");
4185 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
4187 if (vect_debug_details (NULL))
4188 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4194 /* Function vect_compute_array_ref_alignment
4196 Compute the alignment of an array-ref.
4197 The alignment we compute here is relative to
4198 TYPE_ALIGN(VECTYPE) boundary.
4201 OFFSET - the alignment in bits
4202 Return value - the base of the array-ref. E.g,
4203 if the array-ref is a.b[k].c[i][j] the returned
4208 vect_compute_array_ref_alignment (struct data_reference *dr,
4209 loop_vec_info loop_vinfo,
4213 tree array_first_index = size_zero_node;
4215 tree ref = DR_REF (dr);
4216 tree scalar_type = TREE_TYPE (ref);
4217 tree oprnd0 = TREE_OPERAND (ref, 0);
4218 tree dims = size_one_node;
4219 tree misalign = size_zero_node;
4220 tree next_ref, this_offset = size_zero_node;
4224 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
4225 /* The reference is an array without its last index. */
4226 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4229 next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4232 /* Alignment is not requested. Just return the base. */
4235 /* Compute alignment. */
4236 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4238 this_offset = misalign;
4240 /* Check the first index accessed. */
4241 if (!vect_get_first_index (ref, &array_first_index))
4243 if (vect_debug_details (NULL))
4244 fprintf (dump_file, "no first_index for array.");
4248 /* Check the index of the array_ref. */
4249 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4250 LOOP_VINFO_LOOP (loop_vinfo)->num);
4252 /* FORNOW: In order to simplify the handling of alignment, we make sure
4253 that the first location at which the array is accessed ('init') is on an
4254 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4255 This is too conservative, since we require that
4256 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4257 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4258 This should be relaxed in the future. */
4260 if (!init || !host_integerp (init, 0))
4262 if (vect_debug_details (NULL))
4263 fprintf (dump_file, "non constant init. ");
4267 /* bytes per scalar element: */
4268 nunits = fold_convert (unsigned_type_node,
4269 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4270 nbits = int_const_binop (MULT_EXPR, nunits,
4271 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
4273 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4274 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
4275 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4276 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
4278 /* TODO: allow negative misalign values. */
4279 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
4281 if (vect_debug_details (NULL))
4282 fprintf (dump_file, "unexpected misalign value");
4290 /* Function vect_compute_data_refs_alignment
4292 Compute the misalignment of data references in the loop.
4293 This pass may take place at function granularity instead of at loop
4296 FOR NOW: No analysis is actually performed. Misalignment is calculated
4297 only for trivial cases. TODO. */
4300 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4302 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4303 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4306 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4308 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4309 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4313 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4315 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4316 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4324 /* Function vect_enhance_data_refs_alignment
4326 This pass will use loop versioning and loop peeling in order to enhance
4327 the alignment of data references in the loop.
4329 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4330 original loop is to be vectorized; Any other loops that are created by
4331 the transformations performed in this pass - are not supposed to be
4332 vectorized. This restriction will be relaxed. */
4335 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4337 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4338 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4339 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4343 This pass will require a cost model to guide it whether to apply peeling
4344 or versioning or a combination of the two. For example, the scheme that
4345 intel uses when given a loop with several memory accesses, is as follows:
4346 choose one memory access ('p') which alignment you want to force by doing
4347 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4348 other accesses are not necessarily aligned, or (2) use loop versioning to
4349 generate one loop in which all accesses are aligned, and another loop in
4350 which only 'p' is necessarily aligned.
4352 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4353 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4354 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4356 Devising a cost model is the most critical aspect of this work. It will
4357 guide us on which access to peel for, whether to use loop versioning, how
4358 many versions to create, etc. The cost model will probably consist of
4359 generic considerations as well as target specific considerations (on
4360 powerpc for example, misaligned stores are more painful than misaligned
4363 Here is the general steps involved in alignment enhancements:
4365 -- original loop, before alignment analysis:
4366 for (i=0; i<N; i++){
4367 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4368 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4371 -- After vect_compute_data_refs_alignment:
4372 for (i=0; i<N; i++){
4373 x = q[i]; # DR_MISALIGNMENT(q) = 3
4374 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4377 -- Possibility 1: we do loop versioning:
4379 for (i=0; i<N; i++){ # loop 1A
4380 x = q[i]; # DR_MISALIGNMENT(q) = 3
4381 p[i] = y; # DR_MISALIGNMENT(p) = 0
4385 for (i=0; i<N; i++){ # loop 1B
4386 x = q[i]; # DR_MISALIGNMENT(q) = 3
4387 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4391 -- Possibility 2: we do loop peeling:
4392 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4396 for (i = 3; i < N; i++){ # loop 2A
4397 x = q[i]; # DR_MISALIGNMENT(q) = 0
4398 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4401 -- Possibility 3: combination of loop peeling and versioning:
4402 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4407 for (i = 3; i<N; i++){ # loop 3A
4408 x = q[i]; # DR_MISALIGNMENT(q) = 0
4409 p[i] = y; # DR_MISALIGNMENT(p) = 0
4413 for (i = 3; i<N; i++){ # loop 3B
4414 x = q[i]; # DR_MISALIGNMENT(q) = 0
4415 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4419 These loops are later passed to loop_transform to be vectorized. The
4420 vectorizer will use the alignment information to guide the transformation
4421 (whether to generate regular loads/stores, or with special handling for
4425 /* (1) Peeling to force alignment. */
4427 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4429 + How many accesses will become aligned due to the peeling
4430 - How many accesses will become unaligned due to the peeling,
4431 and the cost of misaligned accesses.
4432 - The cost of peeling (the extra runtime checks, the increase
4435 The scheme we use FORNOW: peel to force the alignment of the first
4436 misaligned store in the loop.
4437 Rationale: misaligned stores are not yet supported.
4439 TODO: Use a better cost model. */
4441 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4443 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4444 if (!aligned_access_p (dr))
4446 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4447 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4452 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4454 if (vect_debug_details (loop))
4455 fprintf (dump_file, "Peeling for alignment will not be applied.");
4459 if (vect_debug_details (loop))
4460 fprintf (dump_file, "Peeling for alignment will be applied.");
4463 /* (1.2) Update the alignment info according to the peeling factor.
4464 If the misalignment of the DR we peel for is M, then the
4465 peeling factor is VF - M, and the misalignment of each access DR_i
4466 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4467 If the misalignment of the DR we peel for is unknown, then the
4468 misalignment of each access DR_i in the loop is also unknown.
4470 FORNOW: set the misalignment of the accesses to unknown even
4471 if the peeling factor is known at compile time.
4473 TODO: - if the peeling factor is known at compile time, use that
4474 when updating the misalignment info of the loop DRs.
4475 - consider accesses that are known to have the same
4476 alignment, even if that alignment is unknown. */
4478 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4480 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4481 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4482 DR_MISALIGNMENT (dr) = 0;
4484 DR_MISALIGNMENT (dr) = -1;
4486 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4488 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4489 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4490 DR_MISALIGNMENT (dr) = 0;
4492 DR_MISALIGNMENT (dr) = -1;
4497 /* Function vect_analyze_data_refs_alignment
4499 Analyze the alignment of the data-references in the loop.
4500 FOR NOW: Until support for misliagned accesses is in place, only if all
4501 accesses are aligned can the loop be vectorized. This restriction will be
4505 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4507 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4508 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4509 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4510 enum dr_alignment_support supportable_dr_alignment;
4513 if (vect_debug_details (NULL))
4514 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4517 /* This pass may take place at function granularity instead of at loop
4520 if (!vect_compute_data_refs_alignment (loop_vinfo))
4522 if (vect_debug_details (loop) || vect_debug_stats (loop))
4524 "not vectorized: can't calculate alignment for data ref.");
4529 /* This pass will decide on using loop versioning and/or loop peeling in
4530 order to enhance the alignment of data references in the loop. */
4532 vect_enhance_data_refs_alignment (loop_vinfo);
4535 /* Finally, check that all the data references in the loop can be
4536 handled with respect to their alignment. */
4538 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4540 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4541 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4542 if (!supportable_dr_alignment)
4544 if (vect_debug_details (loop) || vect_debug_stats (loop))
4545 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4549 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4551 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4552 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4553 if (!supportable_dr_alignment)
4555 if (vect_debug_details (loop) || vect_debug_stats (loop))
4556 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4565 /* Function vect_analyze_data_ref_access.
4567 Analyze the access pattern of the data-reference DR. For now, a data access
4568 has to consecutive and aligned to be considered vectorizable. */
4571 vect_analyze_data_ref_access (struct data_reference *dr)
4573 varray_type access_fns = DR_ACCESS_FNS (dr);
4576 unsigned int dimensions, i;
4578 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4579 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4580 access is contiguous). */
4581 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4583 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
4585 access_fn = DR_ACCESS_FN (dr, i);
4587 if (evolution_part_in_loop_num (access_fn,
4588 loop_containing_stmt (DR_STMT (dr))->num))
4590 /* Evolution part is not NULL in this loop (it is neither constant
4592 if (vect_debug_details (NULL))
4595 "not vectorized: complicated multidim. array access.");
4596 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4602 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
4603 if (!evolution_function_is_constant_p (access_fn)
4604 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4605 access_fn, &init, &step, true))
4607 if (vect_debug_details (NULL))
4609 fprintf (dump_file, "not vectorized: complicated access function.");
4610 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4619 /* Function vect_analyze_data_ref_accesses.
4621 Analyze the access pattern of all the data references in the loop.
4623 FORNOW: the only access pattern that is considered vectorizable is a
4624 simple step 1 (consecutive) access.
4626 FORNOW: handle only arrays and pointer accesses. */
4629 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4632 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4633 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4635 if (vect_debug_details (NULL))
4636 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4638 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4640 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4641 bool ok = vect_analyze_data_ref_access (dr);
4644 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4645 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4646 fprintf (dump_file, "not vectorized: complicated access pattern.");
4651 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4653 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4654 bool ok = vect_analyze_data_ref_access (dr);
4657 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4658 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4659 fprintf (dump_file, "not vectorized: complicated access pattern.");
4668 /* Function vect_analyze_pointer_ref_access.
4671 STMT - a stmt that contains a data-ref
4672 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4674 If the data-ref access is vectorizable, return a data_reference structure
4675 that represents it (DR). Otherwise - return NULL. */
4677 static struct data_reference *
4678 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4680 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4681 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4682 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4685 tree reftype, innertype;
4686 enum machine_mode innermode;
4687 tree indx_access_fn;
4688 int loopnum = loop->num;
4689 struct data_reference *dr;
4693 if (vect_debug_stats (loop) || vect_debug_details (loop))
4694 fprintf (dump_file, "not vectorized: complicated pointer access.");
4698 if (vect_debug_details (NULL))
4700 fprintf (dump_file, "Access function of ptr: ");
4701 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4704 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4706 if (vect_debug_stats (loop) || vect_debug_details (loop))
4707 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4713 if (!host_integerp (step,0))
4715 if (vect_debug_stats (loop) || vect_debug_details (loop))
4717 "not vectorized: non constant step for pointer access.");
4721 step_val = TREE_INT_CST_LOW (step);
4723 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4724 if (TREE_CODE (reftype) != POINTER_TYPE)
4726 if (vect_debug_stats (loop) || vect_debug_details (loop))
4727 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4731 reftype = TREE_TYPE (init);
4732 if (TREE_CODE (reftype) != POINTER_TYPE)
4734 if (vect_debug_stats (loop) || vect_debug_details (loop))
4735 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4739 innertype = TREE_TYPE (reftype);
4740 innermode = TYPE_MODE (innertype);
4741 if (GET_MODE_SIZE (innermode) != step_val)
4743 /* FORNOW: support only consecutive access */
4744 if (vect_debug_stats (loop) || vect_debug_details (loop))
4745 fprintf (dump_file, "not vectorized: non consecutive access.");
4750 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4751 if (vect_debug_details (NULL))
4753 fprintf (dump_file, "Access function of ptr indx: ");
4754 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4756 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4761 /* Function vect_get_symbl_and_dr.
4763 The function returns SYMBL - the relevant variable for
4764 memory tag (for aliasing purposes).
4765 Also data reference structure DR is created.
4768 MEMREF - data reference in STMT
4769 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4772 DR - data_reference struct for MEMREF
4773 return value - the relevant variable for memory tag (for aliasing purposes).
4778 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4779 loop_vec_info loop_vinfo, struct data_reference **dr)
4781 tree symbl, oprnd0, oprnd1;
4782 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4784 tree array_base, base;
4785 struct data_reference *new_dr;
4786 bool base_aligned_p;
4789 switch (TREE_CODE (memref))
4792 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4796 symbl = DR_BASE_NAME (new_dr);
4797 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4799 switch (TREE_CODE (symbl))
4803 oprnd0 = TREE_OPERAND (symbl, 0);
4804 oprnd1 = TREE_OPERAND (symbl, 1);
4807 /* Only {address_base + offset} expressions are supported,
4808 where address_base can be POINTER_TYPE or ARRAY_TYPE and
4809 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4810 TODO: swap operands if {offset + address_base}. */
4811 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4812 && TREE_CODE (oprnd1) != INTEGER_CST)
4813 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4816 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4819 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4820 loop_vinfo, &new_dr);
4824 /* symbl remains unchanged. */
4828 if (vect_debug_details (NULL))
4830 fprintf (dump_file, "unhandled data ref: ");
4831 print_generic_expr (dump_file, memref, TDF_SLIM);
4832 fprintf (dump_file, " (symbl ");
4833 print_generic_expr (dump_file, symbl, TDF_SLIM);
4834 fprintf (dump_file, ") in stmt ");
4835 print_generic_expr (dump_file, stmt, TDF_SLIM);
4842 offset = size_zero_node;
4844 /* Store the array base in the stmt info.
4845 For one dimensional array ref a[i], the base is a,
4846 for multidimensional a[i1][i2]..[iN], the base is
4847 a[i1][i2]..[iN-1]. */
4848 array_base = TREE_OPERAND (memref, 0);
4849 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4851 new_dr = analyze_array (stmt, memref, is_read);
4854 /* Find the relevant symbol for aliasing purposes. */
4855 base = DR_BASE_NAME (new_dr);
4856 switch (TREE_CODE (base))
4863 symbl = TREE_OPERAND (base, 0);
4867 /* Could have recorded more accurate information -
4868 i.e, the actual FIELD_DECL that is being referenced -
4869 but later passes expect VAR_DECL as the nmt. */
4870 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4871 loop_vinfo, &offset, &base_aligned_p);
4876 if (vect_debug_details (NULL))
4878 fprintf (dump_file, "unhandled struct/class field access ");
4879 print_generic_expr (dump_file, stmt, TDF_SLIM);
4886 if (vect_debug_details (NULL))
4888 fprintf (dump_file, "unhandled data ref: ");
4889 print_generic_expr (dump_file, memref, TDF_SLIM);
4890 fprintf (dump_file, " in stmt ");
4891 print_generic_expr (dump_file, stmt, TDF_SLIM);
4899 /* Function vect_analyze_data_refs.
4901 Find all the data references in the loop.
4903 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4904 which base is really an array (not a pointer) and which alignment
4905 can be forced. This restriction will be relaxed. */
4908 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4910 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4911 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4912 int nbbs = loop->num_nodes;
4913 block_stmt_iterator si;
4915 struct data_reference *dr;
4918 bool base_aligned_p;
4921 if (vect_debug_details (NULL))
4922 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4924 for (j = 0; j < nbbs; j++)
4926 basic_block bb = bbs[j];
4927 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4929 bool is_read = false;
4930 tree stmt = bsi_stmt (si);
4931 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4932 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4933 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4934 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4935 varray_type *datarefs = NULL;
4936 int nvuses, nv_may_defs, nv_must_defs;
4940 /* Assumption: there exists a data-ref in stmt, if and only if
4941 it has vuses/vdefs. */
4943 if (!vuses && !v_may_defs && !v_must_defs)
4946 nvuses = NUM_VUSES (vuses);
4947 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4948 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4950 if (nvuses && (nv_may_defs || nv_must_defs))
4952 if (vect_debug_details (NULL))
4954 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4955 print_generic_expr (dump_file, stmt, TDF_SLIM);
4960 if (TREE_CODE (stmt) != MODIFY_EXPR)
4962 if (vect_debug_details (NULL))
4964 fprintf (dump_file, "unexpected vops in stmt: ");
4965 print_generic_expr (dump_file, stmt, TDF_SLIM);
4972 memref = TREE_OPERAND (stmt, 1);
4973 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4978 memref = TREE_OPERAND (stmt, 0);
4979 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4983 /* Analyze MEMREF. If it is of a supported form, build data_reference
4984 struct for it (DR) and find the relevant symbol for aliasing
4986 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
4990 if (vect_debug_stats (loop) || vect_debug_details (loop))
4992 fprintf (dump_file, "not vectorized: unhandled data ref: ");
4993 print_generic_expr (dump_file, stmt, TDF_SLIM);
4998 /* Find and record the memtag assigned to this data-ref. */
4999 switch (TREE_CODE (symbl))
5002 STMT_VINFO_MEMTAG (stmt_info) = symbl;
5006 symbl = SSA_NAME_VAR (symbl);
5007 tag = get_var_ann (symbl)->type_mem_tag;
5010 tree ptr = TREE_OPERAND (memref, 0);
5011 if (TREE_CODE (ptr) == SSA_NAME)
5012 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
5016 if (vect_debug_stats (loop) || vect_debug_details (loop))
5017 fprintf (dump_file, "not vectorized: no memtag for ref.");
5020 STMT_VINFO_MEMTAG (stmt_info) = tag;
5024 address_base = TREE_OPERAND (symbl, 0);
5026 switch (TREE_CODE (address_base))
5029 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
5031 tag = vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr),
5032 NULL_TREE, loop_vinfo, &offset, &base_aligned_p);
5035 if (vect_debug_stats (loop) || vect_debug_details (loop))
5036 fprintf (dump_file, "not vectorized: no memtag for ref.");
5039 STMT_VINFO_MEMTAG (stmt_info) = tag;
5043 STMT_VINFO_MEMTAG (stmt_info) = address_base;
5047 if (vect_debug_stats (loop) || vect_debug_details (loop))
5050 "not vectorized: unhandled address expr: ");
5051 print_generic_expr (dump_file, stmt, TDF_SLIM);
5058 if (vect_debug_stats (loop) || vect_debug_details (loop))
5060 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
5061 print_generic_expr (dump_file, memref, TDF_SLIM);
5066 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5067 STMT_VINFO_DATA_REF (stmt_info) = dr;
5075 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5077 /* Function vect_mark_relevant.
5079 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5082 vect_mark_relevant (varray_type worklist, tree stmt)
5084 stmt_vec_info stmt_info;
5086 if (vect_debug_details (NULL))
5087 fprintf (dump_file, "mark relevant.");
5089 if (TREE_CODE (stmt) == PHI_NODE)
5091 VARRAY_PUSH_TREE (worklist, stmt);
5095 stmt_info = vinfo_for_stmt (stmt);
5099 if (vect_debug_details (NULL))
5101 fprintf (dump_file, "mark relevant: no stmt info!!.");
5102 print_generic_expr (dump_file, stmt, TDF_SLIM);
5107 if (STMT_VINFO_RELEVANT_P (stmt_info))
5109 if (vect_debug_details (NULL))
5110 fprintf (dump_file, "already marked relevant.");
5114 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5115 VARRAY_PUSH_TREE (worklist, stmt);
5119 /* Function vect_stmt_relevant_p.
5121 Return true if STMT in loop that is represented by LOOP_VINFO is
5122 "relevant for vectorization".
5124 A stmt is considered "relevant for vectorization" if:
5125 - it has uses outside the loop.
5126 - it has vdefs (it alters memory).
5127 - control stmts in the loop (except for the exit condition).
5129 CHECKME: what other side effects would the vectorizer allow? */
5132 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5134 v_may_def_optype v_may_defs;
5135 v_must_def_optype v_must_defs;
5136 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5141 /* cond stmt other than loop exit cond. */
5142 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5145 /* changing memory. */
5146 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5147 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5148 if (v_may_defs || v_must_defs)
5150 if (vect_debug_details (NULL))
5151 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5155 /* uses outside the loop. */
5156 df = get_immediate_uses (stmt);
5157 num_uses = num_immediate_uses (df);
5158 for (i = 0; i < num_uses; i++)
5160 tree use = immediate_use (df, i);
5161 basic_block bb = bb_for_stmt (use);
5162 if (!flow_bb_inside_loop_p (loop, bb))
5164 if (vect_debug_details (NULL))
5165 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5174 /* Function vect_mark_stmts_to_be_vectorized.
5176 Not all stmts in the loop need to be vectorized. For example:
5185 Stmt 1 and 3 do not need to be vectorized, because loop control and
5186 addressing of vectorized data-refs are handled differently.
5188 This pass detects such stmts. */
5191 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5193 varray_type worklist;
5194 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5195 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5196 unsigned int nbbs = loop->num_nodes;
5197 block_stmt_iterator si;
5203 stmt_vec_info stmt_info;
5205 if (vect_debug_details (NULL))
5206 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5208 VARRAY_TREE_INIT (worklist, 64, "work list");
5210 /* 1. Init worklist. */
5212 for (i = 0; i < nbbs; i++)
5214 basic_block bb = bbs[i];
5215 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5217 stmt = bsi_stmt (si);
5219 if (vect_debug_details (NULL))
5221 fprintf (dump_file, "init: stmt relevant? ");
5222 print_generic_expr (dump_file, stmt, TDF_SLIM);
5225 stmt_info = vinfo_for_stmt (stmt);
5226 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5228 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5229 vect_mark_relevant (worklist, stmt);
5234 /* 2. Process_worklist */
5236 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5238 stmt = VARRAY_TOP_TREE (worklist);
5239 VARRAY_POP (worklist);
5241 if (vect_debug_details (NULL))
5243 fprintf (dump_file, "worklist: examine stmt: ");
5244 print_generic_expr (dump_file, stmt, TDF_SLIM);
5247 /* Examine the USES in this statement. Mark all the statements which
5248 feed this statement's uses as "relevant", unless the USE is used as
5251 if (TREE_CODE (stmt) == PHI_NODE)
5253 /* follow the def-use chain inside the loop. */
5254 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5256 tree arg = PHI_ARG_DEF (stmt, j);
5257 tree def_stmt = NULL_TREE;
5259 if (!vect_is_simple_use (arg, loop, &def_stmt))
5261 if (vect_debug_details (NULL))
5262 fprintf (dump_file, "worklist: unsupported use.");
5263 varray_clear (worklist);
5269 if (vect_debug_details (NULL))
5271 fprintf (dump_file, "worklist: def_stmt: ");
5272 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5275 bb = bb_for_stmt (def_stmt);
5276 if (flow_bb_inside_loop_p (loop, bb))
5277 vect_mark_relevant (worklist, def_stmt);
5281 ann = stmt_ann (stmt);
5282 use_ops = USE_OPS (ann);
5284 for (i = 0; i < NUM_USES (use_ops); i++)
5286 tree use = USE_OP (use_ops, i);
5288 /* We are only interested in uses that need to be vectorized. Uses
5289 that are used for address computation are not considered relevant.
5291 if (exist_non_indexing_operands_for_use_p (use, stmt))
5293 tree def_stmt = NULL_TREE;
5295 if (!vect_is_simple_use (use, loop, &def_stmt))
5297 if (vect_debug_details (NULL))
5298 fprintf (dump_file, "worklist: unsupported use.");
5299 varray_clear (worklist);
5306 if (vect_debug_details (NULL))
5308 fprintf (dump_file, "worklist: examine use %d: ", i);
5309 print_generic_expr (dump_file, use, TDF_SLIM);
5312 bb = bb_for_stmt (def_stmt);
5313 if (flow_bb_inside_loop_p (loop, bb))
5314 vect_mark_relevant (worklist, def_stmt);
5317 } /* while worklist */
5319 varray_clear (worklist);
5324 /* Function vect_can_advance_ivs_p
5326 In case the number of iterations that LOOP iterates in unknown at compile
5327 time, an epilog loop will be generated, and the loop induction variables
5328 (IVs) will be "advanced" to the value they are supposed to take just before
5329 the epilog loop. Here we check that the access function of the loop IVs
5330 and the expression that represents the loop bound are simple enough.
5331 These restrictions will be relaxed in the future. */
5334 vect_can_advance_ivs_p (struct loop *loop)
5336 basic_block bb = loop->header;
5339 /* Analyze phi functions of the loop header. */
5341 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5343 tree access_fn = NULL;
5344 tree evolution_part;
5346 if (vect_debug_details (NULL))
5348 fprintf (dump_file, "Analyze phi: ");
5349 print_generic_expr (dump_file, phi, TDF_SLIM);
5352 /* Skip virtual phi's. The data dependences that are associated with
5353 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5355 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5357 if (vect_debug_details (NULL))
5358 fprintf (dump_file, "virtual phi. skip.");
5362 /* Analyze the evolution function. */
5364 access_fn = instantiate_parameters
5365 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5369 if (vect_debug_details (NULL))
5370 fprintf (dump_file, "No Access function.");
5374 if (vect_debug_details (NULL))
5376 fprintf (dump_file, "Access function of PHI: ");
5377 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5380 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5382 if (evolution_part == NULL_TREE)
5385 /* FORNOW: We do not transform initial conditions of IVs
5386 which evolution functions are a polynomial of degree >= 2. */
5388 if (tree_is_chrec (evolution_part))
5396 /* Function vect_get_loop_niters.
5398 Determine how many iterations the loop is executed.
5399 If an expression that represents the number of iterations
5400 can be constructed, place it in NUMBER_OF_ITERATIONS.
5401 Return the loop exit condition. */
5404 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5408 if (vect_debug_details (NULL))
5409 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5411 niters = number_of_iterations_in_loop (loop);
5413 if (niters != NULL_TREE
5414 && niters != chrec_dont_know)
5416 *number_of_iterations = niters;
5418 if (vect_debug_details (NULL))
5420 fprintf (dump_file, "==> get_loop_niters:" );
5421 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5425 return get_loop_exit_condition (loop);
5429 /* Function vect_analyze_loop_form.
5431 Verify the following restrictions (some may be relaxed in the future):
5432 - it's an inner-most loop
5433 - number of BBs = 2 (which are the loop header and the latch)
5434 - the loop has a pre-header
5435 - the loop has a single entry and exit
5436 - the loop exit condition is simple enough, and the number of iterations
5437 can be analyzed (a countable loop). */
5439 static loop_vec_info
5440 vect_analyze_loop_form (struct loop *loop)
5442 loop_vec_info loop_vinfo;
5444 tree number_of_iterations = NULL;
5445 bool rescan = false;
5447 if (vect_debug_details (loop))
5448 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5451 || !loop->single_exit
5452 || loop->num_nodes != 2
5453 || EDGE_COUNT (loop->header->preds) != 2
5454 || loop->num_entries != 1)
5456 if (vect_debug_stats (loop) || vect_debug_details (loop))
5458 fprintf (dump_file, "not vectorized: bad loop form. ");
5460 fprintf (dump_file, "nested loop.");
5461 else if (!loop->single_exit)
5462 fprintf (dump_file, "multiple exits.");
5463 else if (loop->num_nodes != 2)
5464 fprintf (dump_file, "too many BBs in loop.");
5465 else if (EDGE_COUNT (loop->header->preds) != 2)
5466 fprintf (dump_file, "too many incoming edges.");
5467 else if (loop->num_entries != 1)
5468 fprintf (dump_file, "too many entries.");
5474 /* We assume that the loop exit condition is at the end of the loop. i.e,
5475 that the loop is represented as a do-while (with a proper if-guard
5476 before the loop if needed), where the loop header contains all the
5477 executable statements, and the latch is empty. */
5478 if (!empty_block_p (loop->latch))
5480 if (vect_debug_stats (loop) || vect_debug_details (loop))
5481 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5485 /* Make sure we have a preheader basic block. */
5486 if (!loop->pre_header)
5489 loop_split_edge_with (loop_preheader_edge (loop), NULL);
5492 /* Make sure there exists a single-predecessor exit bb: */
5493 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5496 loop_split_edge_with (loop->exit_edges[0], NULL);
5501 flow_loop_scan (loop, LOOP_ALL);
5502 /* Flow loop scan does not update loop->single_exit field. */
5503 loop->single_exit = loop->exit_edges[0];
5506 if (empty_block_p (loop->header))
5508 if (vect_debug_stats (loop) || vect_debug_details (loop))
5509 fprintf (dump_file, "not vectorized: empty loop.");
5513 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5516 if (vect_debug_stats (loop) || vect_debug_details (loop))
5517 fprintf (dump_file, "not vectorized: complicated exit condition.");
5521 if (!number_of_iterations)
5523 if (vect_debug_stats (loop) || vect_debug_details (loop))
5525 "not vectorized: number of iterations cannot be computed.");
5529 if (chrec_contains_undetermined (number_of_iterations))
5531 if (vect_debug_details (NULL))
5532 fprintf (dump_file, "Infinite number of iterations.");
5536 loop_vinfo = new_loop_vec_info (loop);
5537 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5539 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5541 if (vect_debug_details (loop))
5543 fprintf (dump_file, "loop bound unknown.\n");
5544 fprintf (dump_file, "Symbolic number of iterations is ");
5545 print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5549 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5551 if (vect_debug_stats (loop) || vect_debug_details (loop))
5552 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5556 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5562 /* Function vect_analyze_loop.
5564 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5565 for it. The different analyses will record information in the
5566 loop_vec_info struct. */
5568 static loop_vec_info
5569 vect_analyze_loop (struct loop *loop)
5572 loop_vec_info loop_vinfo;
5574 if (vect_debug_details (NULL))
5575 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5577 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5579 loop_vinfo = vect_analyze_loop_form (loop);
5582 if (vect_debug_details (loop))
5583 fprintf (dump_file, "bad loop form.");
5587 /* Find all data references in the loop (which correspond to vdefs/vuses)
5588 and analyze their evolution in the loop.
5590 FORNOW: Handle only simple, array references, which
5591 alignment can be forced, and aligned pointer-references. */
5593 ok = vect_analyze_data_refs (loop_vinfo);
5596 if (vect_debug_details (loop))
5597 fprintf (dump_file, "bad data references.");
5598 destroy_loop_vec_info (loop_vinfo);
5602 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5604 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5607 if (vect_debug_details (loop))
5608 fprintf (dump_file, "unexpected pattern.");
5609 if (vect_debug_details (loop))
5610 fprintf (dump_file, "not vectorized: unexpected pattern.");
5611 destroy_loop_vec_info (loop_vinfo);
5615 /* Check that all cross-iteration scalar data-flow cycles are OK.
5616 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5618 ok = vect_analyze_scalar_cycles (loop_vinfo);
5621 if (vect_debug_details (loop))
5622 fprintf (dump_file, "bad scalar cycle.");
5623 destroy_loop_vec_info (loop_vinfo);
5627 /* Analyze data dependences between the data-refs in the loop.
5628 FORNOW: fail at the first data dependence that we encounter. */
5630 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5633 if (vect_debug_details (loop))
5634 fprintf (dump_file, "bad data dependence.");
5635 destroy_loop_vec_info (loop_vinfo);
5639 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5640 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5642 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5645 if (vect_debug_details (loop))
5646 fprintf (dump_file, "bad data access.");
5647 destroy_loop_vec_info (loop_vinfo);
5651 /* Analyze the alignment of the data-refs in the loop.
5652 FORNOW: Only aligned accesses are handled. */
5654 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5657 if (vect_debug_details (loop))
5658 fprintf (dump_file, "bad data alignment.");
5659 destroy_loop_vec_info (loop_vinfo);
5663 /* Scan all the operations in the loop and make sure they are
5666 ok = vect_analyze_operations (loop_vinfo);
5669 if (vect_debug_details (loop))
5670 fprintf (dump_file, "bad operation or unsupported loop bound.");
5671 destroy_loop_vec_info (loop_vinfo);
5675 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5681 /* Function need_imm_uses_for.
5683 Return whether we ought to include information for 'var'
5684 when calculating immediate uses. For this pass we only want use
5685 information for non-virtual variables. */
5688 need_imm_uses_for (tree var)
5690 return is_gimple_reg (var);
5694 /* Function vectorize_loops.
5696 Entry Point to loop vectorization phase. */
5699 vectorize_loops (struct loops *loops)
5701 unsigned int i, loops_num;
5702 unsigned int num_vectorized_loops = 0;
5704 /* Does the target support SIMD? */
5705 /* FORNOW: until more sophisticated machine modelling is in place. */
5706 if (!UNITS_PER_SIMD_WORD)
5708 if (vect_debug_details (NULL))
5709 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5713 #ifdef ENABLE_CHECKING
5714 verify_loop_closed_ssa ();
5717 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5719 /* ----------- Analyze loops. ----------- */
5721 /* If some loop was duplicated, it gets bigger number
5722 than all previously defined loops. This fact allows us to run
5723 only over initial loops skipping newly generated ones. */
5724 loops_num = loops->num;
5725 for (i = 1; i < loops_num; i++)
5727 loop_vec_info loop_vinfo;
5728 struct loop *loop = loops->parray[i];
5733 loop_vinfo = vect_analyze_loop (loop);
5734 loop->aux = loop_vinfo;
5736 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5739 vect_transform_loop (loop_vinfo, loops);
5740 num_vectorized_loops++;
5743 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5744 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5745 num_vectorized_loops);
5747 /* ----------- Finalize. ----------- */
5750 for (i = 1; i < loops_num; i++)
5752 struct loop *loop = loops->parray[i];
5753 loop_vec_info loop_vinfo;
5757 loop_vinfo = loop->aux;
5758 destroy_loop_vec_info (loop_vinfo);
5762 rewrite_into_ssa (false);
5763 rewrite_into_loop_closed_ssa (); /* FORNOW */
5764 bitmap_clear (vars_to_rename);