2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
147 #include "tree-vectorizer.h"
148 #include "tree-pass.h"
149 #include "langhooks.h"
152 /*************************************************************************
153 Simple Loop Peeling Utilities
154 *************************************************************************/
156 /* Entry point for peeling of simple loops.
157 Peel the first/last iterations of a loop.
158 It can be used outside of the vectorizer for loops that are simple enough
159 (see function documentation). In the vectorizer it is used to peel the
160 last few iterations when the loop bound is unknown or does not evenly
161 divide by the vectorization factor, and to peel the first few iterations
162 to force the alignment of data references in the loop. */
163 struct loop *slpeel_tree_peel_loop_to_edge
164 (struct loop *, struct loops *, edge, tree, tree, bool);
165 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
166 (struct loop *, struct loops *, edge);
167 static void slpeel_update_phis_for_duplicate_loop
168 (struct loop *, struct loop *, bool after);
169 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
170 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
171 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
172 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
173 static void allocate_new_names (bitmap);
174 static void rename_use_op (use_operand_p);
175 static void rename_def_op (def_operand_p, tree);
176 static void rename_variables_in_bb (basic_block);
177 static void free_new_names (bitmap);
178 static void rename_variables_in_loop (struct loop *);
179 #ifdef ENABLE_CHECKING
180 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
182 static LOC find_loop_location (struct loop *);
185 /*************************************************************************
186 Vectorization Utilities.
187 *************************************************************************/
189 /* Main analysis functions. */
190 static loop_vec_info vect_analyze_loop (struct loop *);
191 static loop_vec_info vect_analyze_loop_form (struct loop *);
192 static bool vect_analyze_data_refs (loop_vec_info);
193 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
194 static bool vect_analyze_scalar_cycles (loop_vec_info);
195 static bool vect_analyze_data_ref_accesses (loop_vec_info);
196 static bool vect_analyze_data_ref_dependence
197 (struct data_reference *, struct data_reference *, loop_vec_info);
198 static bool vect_analyze_data_ref_dependences (loop_vec_info);
199 static bool vect_analyze_data_refs_alignment (loop_vec_info);
200 static bool vect_compute_data_refs_alignment (loop_vec_info);
201 static bool vect_analyze_operations (loop_vec_info);
203 /* Main code transformation functions. */
204 static void vect_transform_loop (loop_vec_info, struct loops *);
205 static bool vect_transform_stmt (tree, block_stmt_iterator *);
206 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
207 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
208 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
209 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
210 static enum dr_alignment_support vect_supportable_dr_alignment
211 (struct data_reference *);
212 static void vect_align_data_ref (tree);
213 static void vect_enhance_data_refs_alignment (loop_vec_info);
215 /* Utility functions for the analyses. */
216 static bool vect_is_simple_use (tree , loop_vec_info, tree *);
217 static bool exist_non_indexing_operands_for_use_p (tree, tree);
218 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
219 static void vect_mark_relevant (varray_type *, tree);
220 static bool vect_stmt_relevant_p (tree, loop_vec_info);
221 static tree vect_get_loop_niters (struct loop *, tree *);
222 static bool vect_compute_data_ref_alignment (struct data_reference *);
223 static bool vect_analyze_data_ref_access (struct data_reference *);
224 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
225 static struct data_reference * vect_analyze_pointer_ref_access
227 static bool vect_can_advance_ivs_p (loop_vec_info);
228 static tree vect_get_base_and_offset (struct data_reference *, tree, tree,
229 loop_vec_info, tree *, tree *, tree *,
231 static struct data_reference * vect_analyze_pointer_ref_access
233 static tree vect_get_ptr_offset (tree, tree, tree *);
234 static tree vect_get_memtag_and_dr
235 (tree, tree, bool, loop_vec_info, tree, struct data_reference **);
236 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
238 static tree vect_strip_conversion (tree);
240 /* Utility functions for the code transformation. */
241 static tree vect_create_destination_var (tree, tree);
242 static tree vect_create_data_ref_ptr
243 (tree, block_stmt_iterator *, tree, tree *, bool);
244 static tree vect_create_index_for_vector_ref (loop_vec_info);
245 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
246 static tree get_vectype_for_scalar_type (tree);
247 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
248 static tree vect_get_vec_def_for_operand (tree, tree);
249 static tree vect_init_vector (tree, tree);
250 static void vect_finish_stmt_generation
251 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
253 /* Utility function dealing with loop peeling (not peeling itself). */
254 static void vect_generate_tmps_on_preheader
255 (loop_vec_info, tree *, tree *, tree *);
256 static tree vect_build_loop_niters (loop_vec_info);
257 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
258 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
259 static void vect_update_inits_of_dr (struct data_reference *, tree niters);
260 static void vect_update_inits_of_drs (loop_vec_info, tree);
261 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
262 static void vect_do_peeling_for_loop_bound
263 (loop_vec_info, tree *, struct loops *);
265 /* Utilities for creation and deletion of vec_info structs. */
266 loop_vec_info new_loop_vec_info (struct loop *loop);
267 void destroy_loop_vec_info (loop_vec_info);
268 stmt_vec_info new_stmt_vec_info (tree, loop_vec_info);
270 /*************************************************************************
271 Vectorization Debug Information.
272 *************************************************************************/
274 /* vect_verbosity_level set to invalid verbosity level to mark that it's
276 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
278 /* vect_dump will be set to stderr or dump_file if exist. */
281 /* Utilities for output formatting. */
282 static bool vect_print_dump_info (enum verbosity_levels, LOC);
283 static void vect_set_dump_settings (void);
284 void vect_set_verbosity_level (const char *);
288 /*************************************************************************
289 Simple Loop Peeling Utilities
291 Utilities to support loop peeling for vectorization purposes.
292 *************************************************************************/
295 /* For each definition in DEFINITIONS this function allocates
299 allocate_new_names (bitmap definitions)
304 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
306 tree def = ssa_name (ver);
307 tree *new_name_ptr = xmalloc (sizeof (tree));
309 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
311 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
312 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
314 SSA_NAME_AUX (def) = new_name_ptr;
319 /* Renames the use *OP_P. */
322 rename_use_op (use_operand_p op_p)
326 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
329 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
331 /* Something defined outside of the loop. */
335 /* An ordinary ssa name defined in the loop. */
337 SET_USE (op_p, *new_name_ptr);
341 /* Renames the def *OP_P in statement STMT. */
344 rename_def_op (def_operand_p op_p, tree stmt)
348 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
351 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
353 /* Something defined outside of the loop. */
357 /* An ordinary ssa name defined in the loop. */
359 SET_DEF (op_p, *new_name_ptr);
360 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
364 /* Renames the variables in basic block BB. */
367 rename_variables_in_bb (basic_block bb)
370 block_stmt_iterator bsi;
376 v_may_def_optype v_may_defs;
377 v_must_def_optype v_must_defs;
381 struct loop *loop = bb->loop_father;
383 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
384 rename_def_op (PHI_RESULT_PTR (phi), phi);
386 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
388 stmt = bsi_stmt (bsi);
389 get_stmt_operands (stmt);
390 ann = stmt_ann (stmt);
392 uses = USE_OPS (ann);
393 for (i = 0; i < NUM_USES (uses); i++)
394 rename_use_op (USE_OP_PTR (uses, i));
396 defs = DEF_OPS (ann);
397 for (i = 0; i < NUM_DEFS (defs); i++)
398 rename_def_op (DEF_OP_PTR (defs, i), stmt);
400 vuses = VUSE_OPS (ann);
401 for (i = 0; i < NUM_VUSES (vuses); i++)
402 rename_use_op (VUSE_OP_PTR (vuses, i));
404 v_may_defs = V_MAY_DEF_OPS (ann);
405 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
407 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
408 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
411 v_must_defs = V_MUST_DEF_OPS (ann);
412 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
414 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
415 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
419 FOR_EACH_EDGE (e, ei, bb->succs)
421 if (!flow_bb_inside_loop_p (loop, e->dest))
423 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
424 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
429 /* Releases the structures holding the new ssa names. */
432 free_new_names (bitmap definitions)
437 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
439 tree def = ssa_name (ver);
441 if (SSA_NAME_AUX (def))
443 free (SSA_NAME_AUX (def));
444 SSA_NAME_AUX (def) = NULL;
450 /* Renames variables in new generated LOOP. */
453 rename_variables_in_loop (struct loop *loop)
458 bbs = get_loop_body (loop);
460 for (i = 0; i < loop->num_nodes; i++)
461 rename_variables_in_bb (bbs[i]);
467 /* Update the PHI nodes of NEW_LOOP.
469 NEW_LOOP is a duplicate of ORIG_LOOP.
470 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
471 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
472 executes before it. */
475 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
476 struct loop *new_loop, bool after)
478 tree *new_name_ptr, new_ssa_name;
479 tree phi_new, phi_orig;
481 edge orig_loop_latch = loop_latch_edge (orig_loop);
482 edge orig_entry_e = loop_preheader_edge (orig_loop);
483 edge new_loop_exit_e = new_loop->exit_edges[0];
484 edge new_loop_entry_e = loop_preheader_edge (new_loop);
485 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
488 step 1. For each loop-header-phi:
489 Add the first phi argument for the phi in NEW_LOOP
490 (the one associated with the entry of NEW_LOOP)
492 step 2. For each loop-header-phi:
493 Add the second phi argument for the phi in NEW_LOOP
494 (the one associated with the latch of NEW_LOOP)
496 step 3. Update the phis in the successor block of NEW_LOOP.
498 case 1: NEW_LOOP was placed before ORIG_LOOP:
499 The successor block of NEW_LOOP is the header of ORIG_LOOP.
500 Updating the phis in the successor block can therefore be done
501 along with the scanning of the loop header phis, because the
502 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
503 phi nodes, organized in the same order.
505 case 2: NEW_LOOP was placed after ORIG_LOOP:
506 The successor block of NEW_LOOP is the original exit block of
507 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
508 We postpone updating these phis to a later stage (when
509 loop guards are added).
513 /* Scan the phis in the headers of the old and new loops
514 (they are organized in exactly the same order). */
516 for (phi_new = phi_nodes (new_loop->header),
517 phi_orig = phi_nodes (orig_loop->header);
519 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
522 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
523 add_phi_arg (phi_new, def, new_loop_entry_e);
526 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
527 if (TREE_CODE (def) != SSA_NAME)
530 new_name_ptr = SSA_NAME_AUX (def);
532 /* Something defined outside of the loop. */
535 /* An ordinary ssa name defined in the loop. */
536 new_ssa_name = *new_name_ptr;
537 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
539 /* step 3 (case 1). */
542 gcc_assert (new_loop_exit_e == orig_entry_e);
543 SET_PHI_ARG_DEF (phi_orig,
544 new_loop_exit_e->dest_idx,
551 /* Update PHI nodes for a guard of the LOOP.
554 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
555 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
556 originates from the guard-bb, skips LOOP and reaches the (unique) exit
557 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
558 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
559 LOOP header) before the guard code was added, and now it became a merge
560 point of two paths - the path that ends with the LOOP exit-edge, and
561 the path that ends with GUARD_EDGE.
563 This function creates and updates the relevant phi nodes to account for
564 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
565 1. Create phi nodes at NEW_MERGE_BB.
566 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
567 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
570 ===> The CFG before the guard-code was added:
572 if (exit_loop) goto update_bb : LOOP_header_bb
575 ==> The CFG after the guard-code was added:
577 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
579 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
584 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
585 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
586 organized in the same order.
587 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
590 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
591 "original" loop). FALSE if LOOP is an original loop (not a newly
592 created copy). The SSA_NAME_AUX fields of the defs in the original
593 loop are the corresponding new ssa-names used in the new duplicated
594 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
595 nodes in UPDATE_BB takes the original ssa-name, and which takes the
596 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
597 the LOOP-exit-edge takes the new-name, and the phi-arg that is
598 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
599 FALSE, it's the other way around.
603 slpeel_update_phi_nodes_for_guard (edge guard_edge,
608 tree orig_phi, new_phi, update_phi;
609 tree guard_arg, loop_arg;
610 basic_block new_merge_bb = guard_edge->dest;
611 edge e = EDGE_SUCC (new_merge_bb, 0);
612 basic_block update_bb = e->dest;
613 basic_block orig_bb = (entry_phis ? loop->header : update_bb);
615 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
616 orig_phi && update_phi;
617 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
619 /* 1. Generate new phi node in NEW_MERGE_BB: */
620 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
623 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
624 of LOOP. Set the two phi args in NEW_PHI for these edges: */
627 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
628 EDGE_SUCC (loop->latch, 0));
629 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
633 tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
634 tree *new_name_ptr = SSA_NAME_AUX (orig_def);
638 new_name = *new_name_ptr;
640 /* Something defined outside of the loop */
645 guard_arg = orig_def;
650 guard_arg = new_name;
654 add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
655 add_phi_arg (new_phi, guard_arg, guard_edge);
657 /* 3. Update phi in successor block. */
658 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
659 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
660 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
663 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
667 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
668 that starts at zero, increases by one and its limit is NITERS.
670 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
673 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
675 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
677 edge exit_edge = loop->exit_edges[0];
678 block_stmt_iterator loop_cond_bsi;
679 block_stmt_iterator incr_bsi;
681 tree begin_label = tree_block_label (loop->latch);
682 tree exit_label = tree_block_label (loop->single_exit->dest);
683 tree init = build_int_cst (TREE_TYPE (niters), 0);
684 tree step = build_int_cst (TREE_TYPE (niters), 1);
689 orig_cond = get_loop_exit_condition (loop);
690 #ifdef ENABLE_CHECKING
691 gcc_assert (orig_cond);
693 loop_cond_bsi = bsi_for_stmt (orig_cond);
695 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
696 create_iv (init, step, NULL_TREE, loop,
697 &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
699 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
701 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
702 then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
703 else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
705 else /* 'then' edge loops back. */
707 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
708 then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
709 else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
712 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
713 then_label, else_label);
714 bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
716 /* Remove old loop exit test: */
717 bsi_remove (&loop_cond_bsi);
719 loop_loc = find_loop_location (loop);
720 if (dump_file && (dump_flags & TDF_DETAILS))
722 if (loop_loc != UNKNOWN_LOC)
723 fprintf (dump_file, "\nloop at %s:%d: ",
724 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
725 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
728 loop->nb_iterations = niters;
732 /* Given LOOP this function generates a new copy of it and puts it
733 on E which is either the entry or exit of LOOP. */
736 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
739 struct loop *new_loop;
740 basic_block *new_bbs, *bbs;
743 basic_block exit_dest;
746 at_exit = (e == loop->exit_edges[0]);
747 if (!at_exit && e != loop_preheader_edge (loop))
750 bbs = get_loop_body (loop);
752 /* Check whether duplication is possible. */
753 if (!can_copy_bbs_p (bbs, loop->num_nodes))
759 /* Generate new loop structure. */
760 new_loop = duplicate_loop (loops, loop, loop->outer);
767 exit_dest = loop->exit_edges[0]->dest;
768 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
769 exit_dest) == loop->header ?
772 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
774 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
776 /* Duplicating phi args at exit bbs as coming
777 also from exit of duplicated loop. */
778 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
780 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
783 edge new_loop_exit_edge;
785 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
786 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
788 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
790 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
794 if (at_exit) /* Add the loop copy at exit. */
796 redirect_edge_and_branch_force (e, new_loop->header);
797 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
799 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
801 else /* Add the copy at entry. */
804 edge entry_e = loop_preheader_edge (loop);
805 basic_block preheader = entry_e->src;
807 if (!flow_bb_inside_loop_p (new_loop,
808 EDGE_SUCC (new_loop->header, 0)->dest))
809 new_exit_e = EDGE_SUCC (new_loop->header, 0);
811 new_exit_e = EDGE_SUCC (new_loop->header, 1);
813 redirect_edge_and_branch_force (new_exit_e, loop->header);
814 set_immediate_dominator (CDI_DOMINATORS, loop->header,
817 /* We have to add phi args to the loop->header here as coming
818 from new_exit_e edge. */
819 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
821 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
823 add_phi_arg (phi, phi_arg, new_exit_e);
826 redirect_edge_and_branch_force (entry_e, new_loop->header);
827 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
830 flow_loop_scan (new_loop, LOOP_ALL);
831 flow_loop_scan (loop, LOOP_ALL);
839 /* Given the condition statement COND, put it as the last statement
840 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
841 Assumes that this is the single exit of the guarded loop.
842 Returns the skip edge. */
845 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
848 block_stmt_iterator bsi;
850 tree cond_stmt, then_label, else_label;
852 enter_e = EDGE_SUCC (guard_bb, 0);
853 enter_e->flags &= ~EDGE_FALLTHRU;
854 enter_e->flags |= EDGE_FALSE_VALUE;
855 bsi = bsi_last (guard_bb);
857 then_label = build1 (GOTO_EXPR, void_type_node,
858 tree_block_label (exit_bb));
859 else_label = build1 (GOTO_EXPR, void_type_node,
860 tree_block_label (enter_e->dest));
861 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
862 then_label, else_label);
863 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
864 /* Add new edge to connect entry block to the second loop. */
865 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
866 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
871 /* This function verifies that the following restrictions apply to LOOP:
873 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
874 (3) it is single entry, single exit
875 (4) its exit condition is the last stmt in the header
876 (5) E is the entry/exit edge of LOOP.
880 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
882 edge exit_e = loop->exit_edges [0];
883 edge entry_e = loop_preheader_edge (loop);
884 tree orig_cond = get_loop_exit_condition (loop);
885 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
887 if (any_marked_for_rewrite_p ())
891 /* All loops have an outer scope; the only case loop->outer is NULL is for
892 the function itself. */
894 || loop->num_nodes != 2
895 || !empty_block_p (loop->latch)
896 || loop->num_exits != 1
897 || loop->num_entries != 1
898 /* Verify that new loop exit condition can be trivially modified. */
899 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
900 || (e != exit_e && e != entry_e))
906 #ifdef ENABLE_CHECKING
908 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
909 struct loop *second_loop)
911 basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
912 basic_block loop2_entry_bb = second_loop->pre_header;
913 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
915 /* A guard that controls whether the second_loop is to be executed or skipped
916 is placed in first_loop->exit. first_loopt->exit therefore has two
917 successors - one is the preheader of second_loop, and the other is a bb
920 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
923 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
926 /* The preheader of new_loop is expected to have two predessors:
927 first_loop->exit and the block that precedes first_loop. */
929 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
930 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
931 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
932 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
933 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
935 /* Verify that the other successor of first_loopt->exit is after the
941 /* Function slpeel_tree_peel_loop_to_edge.
943 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
944 that is placed on the entry (exit) edge E of LOOP. After this transformation
945 we have two loops one after the other - first-loop iterates FIRST_NITERS
946 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
949 - LOOP: the loop to be peeled.
950 - E: the exit or entry edge of LOOP.
951 If it is the entry edge, we peel the first iterations of LOOP. In this
952 case first-loop is LOOP, and second-loop is the newly created loop.
953 If it is the exit edge, we peel the last iterations of LOOP. In this
954 case, first-loop is the newly created loop, and second-loop is LOOP.
955 - NITERS: the number of iterations that LOOP iterates.
956 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
957 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
958 for updating the loop bound of the first-loop to FIRST_NITERS. If it
959 is false, the caller of this function may want to take care of this
960 (this can be useful if we don't want new stmts added to first-loop).
963 The function returns a pointer to the new loop-copy, or NULL if it failed
964 to perform the transformation.
966 The function generates two if-then-else guards: one before the first loop,
967 and the other before the second loop:
969 if (FIRST_NITERS == 0) then skip the first loop,
970 and go directly to the second loop.
972 if (FIRST_NITERS == NITERS) then skip the second loop.
974 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
975 FORNOW the resulting code will not be in loop-closed-ssa form.
979 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
980 edge e, tree first_niters,
981 tree niters, bool update_first_loop_count)
983 struct loop *new_loop = NULL, *first_loop, *second_loop;
987 basic_block bb_before_second_loop, bb_after_second_loop;
988 basic_block bb_before_first_loop;
989 basic_block bb_between_loops;
990 edge exit_e = loop->exit_edges [0];
993 if (!slpeel_can_duplicate_loop_p (loop, e))
996 /* We have to initialize cfg_hooks. Then, when calling
997 cfg_hooks->split_edge, the function tree_split_edge
998 is actually called and, when calling cfg_hooks->duplicate_block,
999 the function tree_duplicate_bb is called. */
1000 tree_register_cfg_hooks ();
1003 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1004 Resulting CFG would be:
1017 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1019 loop_loc = find_loop_location (loop);
1020 if (dump_file && (dump_flags & TDF_DETAILS))
1022 if (loop_loc != UNKNOWN_LOC)
1023 fprintf (dump_file, "\n%s:%d: note: ",
1024 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1025 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1032 /* NEW_LOOP was placed after LOOP. */
1034 second_loop = new_loop;
1038 /* NEW_LOOP was placed before LOOP. */
1039 first_loop = new_loop;
1043 definitions = marked_ssa_names ();
1044 allocate_new_names (definitions);
1045 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1046 rename_variables_in_loop (new_loop);
1049 /* 2. Add the guard that controls whether the first loop is executed.
1050 Resulting CFG would be:
1052 bb_before_first_loop:
1053 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1060 bb_before_second_loop:
1069 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1070 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1071 bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1072 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1073 flow_loop_scan (first_loop, LOOP_ALL);
1074 flow_loop_scan (second_loop, LOOP_ALL);
1077 build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1078 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1079 bb_before_second_loop, bb_before_first_loop);
1080 slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1081 first_loop == new_loop);
1084 /* 3. Add the guard that controls whether the second loop is executed.
1085 Resulting CFG would be:
1087 bb_before_first_loop:
1088 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1096 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1097 GOTO bb_before_second_loop
1099 bb_before_second_loop:
1105 bb_after_second_loop:
1110 bb_between_loops = split_edge (first_loop->exit_edges[0]);
1111 add_bb_to_loop (bb_between_loops, first_loop->outer);
1112 bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1113 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1114 flow_loop_scan (first_loop, LOOP_ALL);
1115 flow_loop_scan (second_loop, LOOP_ALL);
1117 pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1118 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1119 bb_after_second_loop, bb_before_first_loop);
1120 slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1121 second_loop == new_loop);
1123 /* Flow loop scan does not update loop->single_exit field. */
1124 first_loop->single_exit = first_loop->exit_edges[0];
1125 second_loop->single_exit = second_loop->exit_edges[0];
1127 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1129 if (update_first_loop_count)
1130 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1132 free_new_names (definitions);
1133 BITMAP_XFREE (definitions);
1134 unmark_all_for_rewrite ();
1139 /* Function vect_get_loop_location.
1141 Extract the location of the loop in the source code.
1142 If the loop is not well formed for vectorization, an estimated
1143 location is calculated.
1144 Return the loop location if succeed and NULL if not. */
1147 find_loop_location (struct loop *loop)
1149 tree node = NULL_TREE;
1151 block_stmt_iterator si;
1156 node = get_loop_exit_condition (loop);
1158 if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1159 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1160 return EXPR_LOC (node);
1162 /* If we got here the loop is probably not "well formed",
1163 try to estimate the loop location */
1170 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1172 node = bsi_stmt (si);
1173 if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1174 return EXPR_LOC (node);
1181 /*************************************************************************
1182 Vectorization Debug Information.
1183 *************************************************************************/
1185 /* Function vect_set_verbosity_level.
1187 Called from toplev.c upon detection of the
1188 -ftree-vectorizer-verbose=N option. */
1191 vect_set_verbosity_level (const char *val)
1196 if (vl < MAX_VERBOSITY_LEVEL)
1197 vect_verbosity_level = vl;
1199 vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1203 /* Function vect_set_dump_settings.
1205 Fix the verbosity level of the vectorizer if the
1206 requested level was not set explicitly using the flag
1207 -ftree-vectorizer-verbose=N.
1208 Decide where to print the debugging information (dump_file/stderr).
1209 If the user defined the verbosity level, but there is no dump file,
1210 print to stderr, otherwise print to the dump file. */
1213 vect_set_dump_settings (void)
1215 vect_dump = dump_file;
1217 /* Check if the verbosity level was defined by the user: */
1218 if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1220 /* If there is no dump file, print to stderr. */
1226 /* User didn't specify verbosity level: */
1227 if (dump_flags & TDF_DETAILS)
1228 vect_verbosity_level = REPORT_DETAILS;
1229 else if (dump_flags & TDF_STATS)
1230 vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1232 vect_verbosity_level = REPORT_NONE;
1236 /* Function debug_loop_details.
1238 For vectorization debug dumps. */
1241 vect_print_dump_info (enum verbosity_levels vl, LOC loc)
1243 if (vl > vect_verbosity_level)
1246 if (loc == UNKNOWN_LOC)
1247 fprintf (vect_dump, "\n%s:%d: note: ",
1248 DECL_SOURCE_FILE (current_function_decl),
1249 DECL_SOURCE_LINE (current_function_decl));
1251 fprintf (vect_dump, "\n%s:%d: note: ", LOC_FILE (loc), LOC_LINE (loc));
1259 /* Here the proper Vectorizer starts. */
1261 /*************************************************************************
1262 Vectorization Utilities.
1263 *************************************************************************/
1265 /* Function new_stmt_vec_info.
1267 Create and initialize a new stmt_vec_info struct for STMT. */
1270 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1273 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1275 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1276 STMT_VINFO_STMT (res) = stmt;
1277 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1278 STMT_VINFO_RELEVANT_P (res) = 0;
1279 STMT_VINFO_VECTYPE (res) = NULL;
1280 STMT_VINFO_VEC_STMT (res) = NULL;
1281 STMT_VINFO_DATA_REF (res) = NULL;
1282 STMT_VINFO_MEMTAG (res) = NULL;
1283 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1284 STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1285 STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1286 STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1287 STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1293 /* Function new_loop_vec_info.
1295 Create and initialize a new loop_vec_info struct for LOOP, as well as
1296 stmt_vec_info structs for all the stmts in LOOP. */
1299 new_loop_vec_info (struct loop *loop)
1303 block_stmt_iterator si;
1306 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1308 bbs = get_loop_body (loop);
1310 /* Create stmt_info for all stmts in the loop. */
1311 for (i = 0; i < loop->num_nodes; i++)
1313 basic_block bb = bbs[i];
1314 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1316 tree stmt = bsi_stmt (si);
1319 get_stmt_operands (stmt);
1320 ann = stmt_ann (stmt);
1321 set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1325 LOOP_VINFO_LOOP (res) = loop;
1326 LOOP_VINFO_BBS (res) = bbs;
1327 LOOP_VINFO_EXIT_COND (res) = NULL;
1328 LOOP_VINFO_NITERS (res) = NULL;
1329 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1330 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1331 LOOP_VINFO_VECT_FACTOR (res) = 0;
1332 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1333 "loop_write_datarefs");
1334 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1335 "loop_read_datarefs");
1336 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1337 LOOP_VINFO_LOC (res) = UNKNOWN_LOC;
1343 /* Function destroy_loop_vec_info.
1345 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1346 stmts in the loop. */
1349 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1354 block_stmt_iterator si;
1360 loop = LOOP_VINFO_LOOP (loop_vinfo);
1362 bbs = LOOP_VINFO_BBS (loop_vinfo);
1363 nbbs = loop->num_nodes;
1365 for (j = 0; j < nbbs; j++)
1367 basic_block bb = bbs[j];
1368 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1370 tree stmt = bsi_stmt (si);
1371 stmt_ann_t ann = stmt_ann (stmt);
1372 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1374 set_stmt_info (ann, NULL);
1378 free (LOOP_VINFO_BBS (loop_vinfo));
1379 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1380 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1386 /* Function vect_get_ptr_offset
1388 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1391 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1392 tree vectype ATTRIBUTE_UNUSED,
1393 tree *offset ATTRIBUTE_UNUSED)
1395 /* TODO: Use alignment information. */
1400 /* Function vect_strip_conversions
1402 Strip conversions that don't narrow the mode. */
1405 vect_strip_conversion (tree expr)
1407 tree to, ti, oprnd0;
1409 while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1411 to = TREE_TYPE (expr);
1412 oprnd0 = TREE_OPERAND (expr, 0);
1413 ti = TREE_TYPE (oprnd0);
1415 if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1417 if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1426 /* Function vect_analyze_offset_expr
1428 Given an offset expression EXPR received from get_inner_reference, analyze
1429 it and create an expression for INITIAL_OFFSET by substituting the variables
1430 of EXPR with initial_condition of the corresponding access_fn in the loop.
1433 for (j = 3; j < N; j++)
1436 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
1437 substituted, since its access_fn in the inner loop is i. 'j' will be
1438 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
1441 Compute MISALIGN (the misalignment of the data reference initial access from
1442 its base) if possible. Misalignment can be calculated only if all the
1443 variables can be substituted with constants, or if a variable is multiplied
1444 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
1445 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
1446 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
1447 VECTYPE_ALIGNMENT computation in the caller of this function).
1449 STEP is an evolution of the data reference in this loop in bytes.
1450 In the above example, STEP is C_j.
1452 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
1453 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
1454 are NULL_TREEs. Otherwise, return TRUE.
1459 vect_analyze_offset_expr (tree expr,
1461 tree vectype_alignment,
1462 tree *initial_offset,
1468 tree left_offset = ssize_int (0);
1469 tree right_offset = ssize_int (0);
1470 tree left_misalign = ssize_int (0);
1471 tree right_misalign = ssize_int (0);
1472 tree left_step = ssize_int (0);
1473 tree right_step = ssize_int (0);
1474 enum tree_code code;
1475 tree init, evolution;
1478 *misalign = NULL_TREE;
1479 *initial_offset = NULL_TREE;
1481 /* Strip conversions that don't narrow the mode. */
1482 expr = vect_strip_conversion (expr);
1488 if (TREE_CODE (expr) == INTEGER_CST)
1490 *initial_offset = fold_convert (ssizetype, expr);
1491 *misalign = fold_convert (ssizetype, expr);
1492 *step = ssize_int (0);
1496 /* 2. Variable. Try to substitute with initial_condition of the corresponding
1497 access_fn in the current loop. */
1498 if (SSA_VAR_P (expr))
1500 tree access_fn = analyze_scalar_evolution (loop, expr);
1502 if (access_fn == chrec_dont_know)
1506 init = initial_condition_in_loop_num (access_fn, loop->num);
1507 if (init == expr && !expr_invariant_in_loop_p (loop, init))
1508 /* Not enough information: may be not loop invariant.
1509 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
1510 initial_condition is D, but it depends on i - loop's induction
1514 evolution = evolution_part_in_loop_num (access_fn, loop->num);
1515 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
1516 /* Evolution is not constant. */
1519 if (TREE_CODE (init) == INTEGER_CST)
1520 *misalign = fold_convert (ssizetype, init);
1522 /* Not constant, misalignment cannot be calculated. */
1523 *misalign = NULL_TREE;
1525 *initial_offset = fold_convert (ssizetype, init);
1527 *step = evolution ? fold_convert (ssizetype, evolution) : ssize_int (0);
1531 /* Recursive computation. */
1532 if (!BINARY_CLASS_P (expr))
1534 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
1535 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1537 fprintf (vect_dump, "Not binary expression ");
1538 print_generic_expr (vect_dump, expr, TDF_SLIM);
1542 oprnd0 = TREE_OPERAND (expr, 0);
1543 oprnd1 = TREE_OPERAND (expr, 1);
1545 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
1546 &left_misalign, &left_step)
1547 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
1548 &right_offset, &right_misalign, &right_step))
1551 /* The type of the operation: plus, minus or mult. */
1552 code = TREE_CODE (expr);
1556 if (TREE_CODE (right_offset) != INTEGER_CST)
1557 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
1559 FORNOW: We don't support such cases. */
1562 /* Strip conversions that don't narrow the mode. */
1563 left_offset = vect_strip_conversion (left_offset);
1566 /* Misalignment computation. */
1567 if (SSA_VAR_P (left_offset))
1569 /* If the left side contains variables that can't be substituted with
1570 constants, we check if the right side is a multiple of ALIGNMENT.
1572 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
1573 fold_convert (ssizetype, vectype_alignment))))
1574 *misalign = ssize_int (0);
1576 /* If the remainder is not zero or the right side isn't constant,
1577 we can't compute misalignment. */
1578 *misalign = NULL_TREE;
1582 /* The left operand was successfully substituted with constant. */
1584 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
1586 *misalign = size_binop (code, left_misalign, right_misalign);
1588 *misalign = NULL_TREE;
1591 /* Step calculation. */
1592 /* Multiply the step by the right operand. */
1593 *step = size_binop (MULT_EXPR, left_step, right_offset);
1598 /* Combine the recursive calculations for step and misalignment. */
1599 *step = size_binop (code, left_step, right_step);
1601 if (left_misalign && right_misalign)
1602 *misalign = size_binop (code, left_misalign, right_misalign);
1604 *misalign = NULL_TREE;
1612 /* Compute offset. */
1613 *initial_offset = fold_convert (ssizetype,
1614 fold (build2 (code, TREE_TYPE (left_offset),
1621 /* Function vect_get_base_and_offset
1623 Return the BASE of the data reference EXPR.
1624 If VECTYPE is given, also compute the INITIAL_OFFSET from BASE, MISALIGN and
1626 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1627 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1628 instantiated with initial_conditions of access_functions of variables,
1629 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1631 Function get_inner_reference is used for the above in case of ARRAY_REF and
1635 EXPR - the memory reference that is being analyzed
1636 DR - the data_reference struct of the _original_ memory reference
1637 (Note: DR_REF (DR) is not necessarily EXPR)
1638 VECTYPE - the type that defines the alignment (i.e, we compute
1639 alignment relative to TYPE_ALIGN(VECTYPE))
1642 BASE (returned value) - the base of the data reference EXPR.
1643 E.g, if EXPR is a.b[k].c[i][j] the returned
1645 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1646 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1647 computation is impossible
1648 STEP - evolution of the DR_REF in the loop
1649 BASE_ALIGNED_P - indicates if BASE is aligned
1651 If something unexpected is encountered (an unsupported form of data-ref),
1652 then NULL_TREE is returned. */
1655 vect_get_base_and_offset (struct data_reference *dr,
1658 loop_vec_info loop_vinfo,
1659 tree *initial_offset,
1662 bool *base_aligned_p)
1664 tree this_offset = ssize_int (0);
1665 tree this_misalign = ssize_int (0);
1666 tree this_step = ssize_int (0);
1667 tree base = NULL_TREE;
1669 tree oprnd0, oprnd1;
1670 enum tree_code code = TREE_CODE (expr);
1671 HOST_WIDE_INT pbitsize;
1672 HOST_WIDE_INT pbitpos;
1674 enum machine_mode pmode;
1675 int punsignedp, pvolatilep;
1676 tree bit_pos_in_bytes;
1677 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1679 *base_aligned_p = false;
1683 /* These cases end the recursion: */
1686 *initial_offset = ssize_int (0);
1687 *step = ssize_int (0);
1688 *misalign = ssize_int (0);
1689 if (DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1690 *base_aligned_p = true;
1694 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1697 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1699 base = vect_get_ptr_offset (expr, vectype, misalign);
1701 *base_aligned_p = true;
1705 *base_aligned_p = true;
1706 *misalign = ssize_int (0);
1708 *initial_offset = ssize_int (0);
1709 *step = ssize_int (0);
1713 *initial_offset = fold_convert (ssizetype, expr);
1714 *misalign = fold_convert (ssizetype, expr);
1715 *step = ssize_int (0);
1718 /* These cases continue the recursion: */
1720 oprnd0 = TREE_OPERAND (expr, 0);
1725 oprnd0 = TREE_OPERAND (expr, 0);
1731 oprnd0 = TREE_OPERAND (expr, 0);
1732 oprnd1 = TREE_OPERAND (expr, 1);
1734 /* In case we have a PLUS_EXPR of the form
1735 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1736 This is verified in vect_get_memtag_and_dr. */
1737 base = vect_get_base_and_offset (dr, oprnd1, vectype, loop_vinfo,
1738 &this_offset, &this_misalign,
1739 &this_step, base_aligned_p);
1740 /* Offset was already computed in vect_analyze_pointer_ref_access. */
1741 this_offset = ssize_int (0);
1744 this_misalign = NULL_TREE;
1746 this_misalign = size_binop (TREE_CODE (expr), ssize_int (0),
1752 if (!handled_component_p (expr))
1753 /* Unsupported expression. */
1756 /* Find the base and the offset from it. */
1757 next_ref = get_inner_reference (expr, &pbitsize, &pbitpos, &poffset,
1758 &pmode, &punsignedp, &pvolatilep, false);
1763 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1764 &this_offset, &this_misalign,
1767 /* Failed to compute offset or step. */
1769 *initial_offset = NULL_TREE;
1770 *misalign = NULL_TREE;
1774 /* Add bit position to OFFSET and MISALIGN. */
1776 bit_pos_in_bytes = ssize_int (pbitpos/BITS_PER_UNIT);
1777 /* Check that there is no remainder in bits. */
1778 if (pbitpos%BITS_PER_UNIT)
1780 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1781 fprintf (vect_dump, "bit offset alignment.");
1784 this_offset = size_binop (PLUS_EXPR, bit_pos_in_bytes,
1785 fold_convert (ssizetype, this_offset));
1787 this_misalign = size_binop (PLUS_EXPR, this_misalign, bit_pos_in_bytes);
1789 /* Continue the recursion to refine the base (get_inner_reference returns
1790 &a for &a[i], and not a). */
1794 base = vect_get_base_and_offset (dr, next_ref, vectype, loop_vinfo,
1795 initial_offset, misalign, step,
1799 /* Combine the results. */
1800 if (this_misalign && *misalign)
1801 *misalign = size_binop (PLUS_EXPR, *misalign, this_misalign);
1803 *misalign = NULL_TREE;
1805 *step = size_binop (PLUS_EXPR, *step, this_step);
1807 *initial_offset = size_binop (PLUS_EXPR, *initial_offset, this_offset);
1809 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1811 print_generic_expr (vect_dump, expr, TDF_SLIM);
1812 fprintf (vect_dump, "\n --> total offset for ref: ");
1813 print_generic_expr (vect_dump, *initial_offset, TDF_SLIM);
1814 fprintf (vect_dump, "\n --> total misalign for ref: ");
1815 print_generic_expr (vect_dump, *misalign, TDF_SLIM);
1816 fprintf (vect_dump, "\n --> total step for ref: ");
1817 print_generic_expr (vect_dump, *step, TDF_SLIM);
1824 /* Function vect_force_dr_alignment_p.
1826 Returns whether the alignment of a DECL can be forced to be aligned
1827 on ALIGNMENT bit boundary. */
1830 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1832 if (TREE_CODE (decl) != VAR_DECL)
1835 if (DECL_EXTERNAL (decl))
1838 if (TREE_ASM_WRITTEN (decl))
1841 if (TREE_STATIC (decl))
1842 return (alignment <= MAX_OFILE_ALIGNMENT);
1844 /* This is not 100% correct. The absolute correct stack alignment
1845 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1846 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1847 However, until someone implements forced stack alignment, SSE
1848 isn't really usable without this. */
1849 return (alignment <= PREFERRED_STACK_BOUNDARY);
1853 /* Function vect_get_new_vect_var.
1855 Returns a name for a new variable. The current naming scheme appends the
1856 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1857 the name of vectorizer generated variables, and appends that to NAME if
1861 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1867 if (var_kind == vect_simple_var)
1872 prefix_len = strlen (prefix);
1875 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1877 new_vect_var = create_tmp_var (type, prefix);
1879 return new_vect_var;
1883 /* Function vect_create_index_for_vector_ref.
1885 Create (and return) an index variable, along with it's update chain in the
1886 loop. This variable will be used to access a memory location in a vector
1890 LOOP: The loop being vectorized.
1891 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1892 function can be added here, or in the loop pre-header.
1895 Return an index that will be used to index a vector array. It is expected
1896 that a pointer to the first vector will be used as the base address for the
1899 FORNOW: we are not trying to be efficient, just creating a new index each
1900 time from scratch. At this time all vector references could use the same
1903 TODO: create only one index to be used by all vector references. Record
1904 the index in the LOOP_VINFO the first time this procedure is called and
1905 return it on subsequent calls. The increment of this index must be placed
1906 just before the conditional expression that ends the single block loop. */
1909 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo)
1912 block_stmt_iterator incr_bsi;
1914 tree indx_before_incr, indx_after_incr;
1915 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1918 /* It is assumed that the base pointer used for vectorized access contains
1919 the address of the first vector. Therefore the index used for vectorized
1920 access must be initialized to zero and incremented by 1. */
1922 init = integer_zero_node;
1923 step = integer_one_node;
1925 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
1926 create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after,
1927 &indx_before_incr, &indx_after_incr);
1928 incr = bsi_stmt (incr_bsi);
1929 get_stmt_operands (incr);
1930 set_stmt_info (stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
1932 return indx_before_incr;
1936 /* Function vect_create_addr_base_for_vector_ref.
1938 Create an expression that computes the address of the first memory location
1939 that will be accessed for a data reference.
1942 STMT: The statement containing the data reference.
1943 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1944 OFFSET: Optional. If supplied, it is be added to the initial address.
1947 1. Return an SSA_NAME whose value is the address of the memory location of
1948 the first vector of the data reference.
1949 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1950 these statement(s) which define the returned SSA_NAME.
1952 FORNOW: We are only handling array accesses with step 1. */
1955 vect_create_addr_base_for_vector_ref (tree stmt,
1956 tree *new_stmt_list,
1959 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1960 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1961 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1962 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1963 tree ref = DR_REF (dr);
1964 tree scalar_type = TREE_TYPE (ref);
1965 tree scalar_ptr_type = build_pointer_type (scalar_type);
1968 tree addr_base, addr_expr;
1969 tree dest, new_stmt;
1970 tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
1972 if (TREE_CODE (TREE_TYPE (data_ref_base)) != POINTER_TYPE)
1973 /* After the analysis stage, we expect to get here only with RECORD_TYPE
1975 /* Add '&' to ref_base. */
1976 data_ref_base = build_fold_addr_expr (data_ref_base);
1979 /* Create '(scalar_type*) base' for pointers. */
1980 tree dest, new_stmt, new_temp, vec_stmt, tmp_base;
1981 tree scalar_array_type = build_array_type (scalar_type, 0);
1982 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1983 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1984 add_referenced_tmp_var (array_ptr);
1986 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1987 add_referenced_tmp_var (dest);
1988 tmp_base = force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1989 append_to_statement_list_force (new_stmt, new_stmt_list);
1991 vec_stmt = fold_convert (scalar_array_ptr_type, tmp_base);
1992 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1993 new_temp = make_ssa_name (array_ptr, vec_stmt);
1994 TREE_OPERAND (vec_stmt, 0) = new_temp;
1995 append_to_statement_list_force (vec_stmt, new_stmt_list);
1996 data_ref_base = new_temp;
1999 /* Create base_offset */
2000 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
2001 add_referenced_tmp_var (dest);
2002 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
2003 append_to_statement_list_force (new_stmt, new_stmt_list);
2007 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
2008 add_referenced_tmp_var (tmp);
2009 offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
2010 STMT_VINFO_VECT_STEP (stmt_info)));
2011 base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset),
2012 base_offset, offset));
2013 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
2014 append_to_statement_list_force (new_stmt, new_stmt_list);
2017 /* base + base_offset */
2018 addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
2021 /* addr_expr = addr_base */
2022 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
2023 get_name (base_name));
2024 add_referenced_tmp_var (addr_expr);
2025 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
2026 new_temp = make_ssa_name (addr_expr, vec_stmt);
2027 TREE_OPERAND (vec_stmt, 0) = new_temp;
2028 append_to_statement_list_force (vec_stmt, new_stmt_list);
2030 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2032 fprintf (vect_dump, "created ");
2033 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2039 /* Function get_vectype_for_scalar_type.
2041 Returns the vector type corresponding to SCALAR_TYPE as supported
2045 get_vectype_for_scalar_type (tree scalar_type)
2047 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
2048 int nbytes = GET_MODE_SIZE (inner_mode);
2055 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
2057 nunits = UNITS_PER_SIMD_WORD / nbytes;
2059 vectype = build_vector_type (scalar_type, nunits);
2060 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2062 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
2063 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
2069 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2071 fprintf (vect_dump, "vectype: ");
2072 print_generic_expr (vect_dump, vectype, TDF_SLIM);
2075 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2077 /* TODO: tree-complex.c sometimes can parallelize operations
2078 on generic vectors. We can vectorize the loop in that case,
2079 but then we should re-run the lowering pass. */
2080 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2081 fprintf (vect_dump, "mode not supported by target.");
2089 /* Function vect_align_data_ref.
2091 Handle mislignment of a memory accesses.
2093 FORNOW: Can't handle misaligned accesses.
2094 Make sure that the dataref is aligned. */
2097 vect_align_data_ref (tree stmt)
2099 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2100 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2102 /* FORNOW: can't handle misaligned accesses;
2103 all accesses expected to be aligned. */
2104 gcc_assert (aligned_access_p (dr));
2108 /* Function vect_create_data_ref_ptr.
2110 Create a memory reference expression for vector access, to be used in a
2111 vector load/store stmt. The reference is based on a new pointer to vector
2115 1. STMT: a stmt that references memory. Expected to be of the form
2116 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
2117 2. BSI: block_stmt_iterator where new stmts can be added.
2118 3. OFFSET (optional): an offset to be added to the initial address accessed
2119 by the data-ref in STMT.
2120 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2121 pointing to the initial address.
2124 1. Declare a new ptr to vector_type, and have it point to the base of the
2125 data reference (initial addressed accessed by the data reference).
2126 For example, for vector of type V8HI, the following code is generated:
2129 vp = (v8hi *)initial_address;
2131 if OFFSET is not supplied:
2132 initial_address = &a[init];
2133 if OFFSET is supplied:
2134 initial_address = &a[init + OFFSET];
2136 Return the initial_address in INITIAL_ADDRESS.
2138 2. Create a data-reference in the loop based on the new vector pointer vp,
2139 and using a new index variable 'idx' as follows:
2143 where if ONLY_INIT is true:
2146 update = idx + vector_type_size
2148 Return the pointer vp'.
2151 FORNOW: handle only aligned and consecutive accesses. */
2154 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
2155 tree *initial_address, bool only_init)
2158 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2159 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2160 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2161 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2162 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2166 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2167 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2168 vuse_optype vuses = STMT_VUSE_OPS (stmt);
2169 int nvuses, nv_may_defs, nv_must_defs;
2173 tree new_stmt_list = NULL_TREE;
2175 edge pe = loop_preheader_edge (loop);
2181 tree type, tmp, size;
2183 base_name = unshare_expr (DR_BASE_NAME (dr));
2184 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2186 tree data_ref_base = base_name;
2187 fprintf (vect_dump, "create array_ref of type: ");
2188 print_generic_expr (vect_dump, vectype, TDF_SLIM);
2189 if (TREE_CODE (data_ref_base) == VAR_DECL)
2190 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
2191 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
2192 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
2193 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2194 fprintf (vect_dump, " vectorizing a record based array ref: ");
2195 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2196 fprintf (vect_dump, " vectorizing a pointer ref: ");
2197 print_generic_expr (vect_dump, base_name, TDF_SLIM);
2200 /** (1) Create the new vector-pointer variable: **/
2202 vect_ptr_type = build_pointer_type (vectype);
2203 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2204 get_name (base_name));
2205 add_referenced_tmp_var (vect_ptr);
2208 /** (2) Handle aliasing information of the new vector-pointer: **/
2210 tag = STMT_VINFO_MEMTAG (stmt_info);
2212 get_var_ann (vect_ptr)->type_mem_tag = tag;
2214 /* Mark for renaming all aliased variables
2215 (i.e, the may-aliases of the type-mem-tag). */
2216 nvuses = NUM_VUSES (vuses);
2217 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
2218 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
2219 for (i = 0; i < nvuses; i++)
2221 tree use = VUSE_OP (vuses, i);
2222 if (TREE_CODE (use) == SSA_NAME)
2223 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
2225 for (i = 0; i < nv_may_defs; i++)
2227 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
2228 if (TREE_CODE (def) == SSA_NAME)
2229 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2231 for (i = 0; i < nv_must_defs; i++)
2233 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
2234 if (TREE_CODE (def) == SSA_NAME)
2235 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2239 /** (3) Calculate the initial address the vector-pointer, and set
2240 the vector-pointer to point to it before the loop: **/
2242 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2243 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2245 pe = loop_preheader_edge (loop);
2246 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
2247 gcc_assert (!new_bb);
2248 *initial_address = new_temp;
2250 /* Create: p = (vectype *) initial_base */
2251 vec_stmt = fold_convert (vect_ptr_type, new_temp);
2252 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2253 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2254 TREE_OPERAND (vec_stmt, 0) = new_temp;
2255 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
2256 gcc_assert (!new_bb);
2257 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
2260 /** (4) Handle the updating of the vector-pointer inside the loop: **/
2262 if (only_init) /* No update in loop is required. */
2263 return vect_ptr_init;
2265 idx = vect_create_index_for_vector_ref (loop_vinfo);
2267 /* Create: update = idx * vectype_size */
2268 tmp = create_tmp_var (integer_type_node, "update");
2269 add_referenced_tmp_var (tmp);
2270 size = TYPE_SIZE (vect_ptr_type);
2271 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2272 ptr_update = create_tmp_var (type, "update");
2273 add_referenced_tmp_var (ptr_update);
2274 vectype_size = TYPE_SIZE_UNIT (vectype);
2275 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
2276 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
2277 new_temp = make_ssa_name (tmp, vec_stmt);
2278 TREE_OPERAND (vec_stmt, 0) = new_temp;
2279 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2280 vec_stmt = fold_convert (type, new_temp);
2281 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
2282 new_temp = make_ssa_name (ptr_update, vec_stmt);
2283 TREE_OPERAND (vec_stmt, 0) = new_temp;
2284 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2286 /* Create: data_ref_ptr = vect_ptr_init + update */
2287 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
2288 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2289 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2290 TREE_OPERAND (vec_stmt, 0) = new_temp;
2291 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2292 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
2294 return data_ref_ptr;
2298 /* Function vect_create_destination_var.
2300 Create a new temporary of type VECTYPE. */
2303 vect_create_destination_var (tree scalar_dest, tree vectype)
2306 const char *new_name;
2308 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2310 new_name = get_name (scalar_dest);
2313 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2314 add_referenced_tmp_var (vec_dest);
2320 /* Function vect_init_vector.
2322 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2323 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2324 used in the vectorization of STMT. */
2327 vect_init_vector (tree stmt, tree vector_var)
2329 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2330 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2331 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2334 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2340 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2341 add_referenced_tmp_var (new_var);
2343 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2344 new_temp = make_ssa_name (new_var, init_stmt);
2345 TREE_OPERAND (init_stmt, 0) = new_temp;
2347 pe = loop_preheader_edge (loop);
2348 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2349 gcc_assert (!new_bb);
2351 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2353 fprintf (vect_dump, "created new init_stmt: ");
2354 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
2357 vec_oprnd = TREE_OPERAND (init_stmt, 0);
2362 /* Function vect_get_vec_def_for_operand.
2364 OP is an operand in STMT. This function returns a (vector) def that will be
2365 used in the vectorized stmt for STMT.
2367 In the case that OP is an SSA_NAME which is defined in the loop, then
2368 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2370 In case OP is an invariant or constant, a new stmt that creates a vector def
2371 needs to be introduced. */
2374 vect_get_vec_def_for_operand (tree op, tree stmt)
2379 stmt_vec_info def_stmt_info = NULL;
2380 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2381 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2382 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2383 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2384 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2391 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2393 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
2394 print_generic_expr (vect_dump, op, TDF_SLIM);
2397 /** ===> Case 1: operand is a constant. **/
2399 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2401 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2405 /* Build a tree with vector elements. */
2406 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2407 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
2409 for (i = nunits - 1; i >= 0; --i)
2411 t = tree_cons (NULL_TREE, op, t);
2413 vec_cst = build_vector (vectype, t);
2414 return vect_init_vector (stmt, vec_cst);
2417 gcc_assert (TREE_CODE (op) == SSA_NAME);
2419 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2421 def_stmt = SSA_NAME_DEF_STMT (op);
2422 def_stmt_info = vinfo_for_stmt (def_stmt);
2424 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2426 fprintf (vect_dump, "vect_get_vec_def_for_operand: def_stmt: ");
2427 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
2431 /** ==> Case 2.1: operand is defined inside the loop. **/
2435 /* Get the def from the vectorized stmt. */
2437 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2438 gcc_assert (vec_stmt);
2439 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2444 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2445 it is a reduction/induction. **/
2447 bb = bb_for_stmt (def_stmt);
2448 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2450 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2451 fprintf (vect_dump, "reduction/induction - unsupported.");
2452 internal_error ("no support for reduction/induction"); /* FORNOW */
2456 /** ==> Case 2.3: operand is defined outside the loop -
2457 it is a loop invariant. */
2459 switch (TREE_CODE (def_stmt))
2462 def = PHI_RESULT (def_stmt);
2465 def = TREE_OPERAND (def_stmt, 0);
2468 def = TREE_OPERAND (def_stmt, 0);
2469 gcc_assert (IS_EMPTY_STMT (def_stmt));
2473 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2475 fprintf (vect_dump, "unsupported defining stmt: ");
2476 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
2478 internal_error ("unsupported defining stmt");
2481 /* Build a tree with vector elements.
2482 Create 'vec_inv = {inv,inv,..,inv}' */
2484 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2485 fprintf (vect_dump, "Create vector_inv.");
2487 for (i = nunits - 1; i >= 0; --i)
2489 t = tree_cons (NULL_TREE, def, t);
2492 vec_inv = build_constructor (vectype, t);
2493 return vect_init_vector (stmt, vec_inv);
2497 /* Function vect_finish_stmt_generation.
2499 Insert a new stmt. */
2502 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2504 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2506 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2508 fprintf (vect_dump, "add new stmt: ");
2509 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2512 #ifdef ENABLE_CHECKING
2513 /* Make sure bsi points to the stmt that is being vectorized. */
2514 gcc_assert (stmt == bsi_stmt (*bsi));
2517 #ifdef USE_MAPPED_LOCATION
2518 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCUS (stmt));
2520 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
2525 /* Function vectorizable_assignment.
2527 Check if STMT performs an assignment (copy) that can be vectorized.
2528 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2529 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2530 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2533 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2539 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2540 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2541 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2544 /* Is vectorizable assignment? */
2546 if (TREE_CODE (stmt) != MODIFY_EXPR)
2549 scalar_dest = TREE_OPERAND (stmt, 0);
2550 if (TREE_CODE (scalar_dest) != SSA_NAME)
2553 op = TREE_OPERAND (stmt, 1);
2554 if (!vect_is_simple_use (op, loop_vinfo, NULL))
2556 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2557 fprintf (vect_dump, "use not simple.");
2561 if (!vec_stmt) /* transformation not required. */
2563 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2568 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2569 fprintf (vect_dump, "transform assignment.");
2572 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2575 op = TREE_OPERAND (stmt, 1);
2576 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2578 /* Arguments are ready. create the new vector stmt. */
2579 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2580 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2581 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2582 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2588 /* Function vectorizable_operation.
2590 Check if STMT performs a binary or unary operation that can be vectorized.
2591 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2592 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2593 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2596 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2601 tree op0, op1 = NULL;
2602 tree vec_oprnd0, vec_oprnd1=NULL;
2603 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2604 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2605 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2607 enum tree_code code;
2608 enum machine_mode vec_mode;
2614 /* Is STMT a vectorizable binary/unary operation? */
2615 if (TREE_CODE (stmt) != MODIFY_EXPR)
2618 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2621 operation = TREE_OPERAND (stmt, 1);
2622 code = TREE_CODE (operation);
2623 optab = optab_for_tree_code (code, vectype);
2625 /* Support only unary or binary operations. */
2626 op_type = TREE_CODE_LENGTH (code);
2627 if (op_type != unary_op && op_type != binary_op)
2629 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2630 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
2634 for (i = 0; i < op_type; i++)
2636 op = TREE_OPERAND (operation, i);
2637 if (!vect_is_simple_use (op, loop_vinfo, NULL))
2639 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2640 fprintf (vect_dump, "use not simple.");
2645 /* Supportable by target? */
2648 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2649 fprintf (vect_dump, "no optab.");
2652 vec_mode = TYPE_MODE (vectype);
2653 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2655 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2656 fprintf (vect_dump, "op not supported by target.");
2660 if (!vec_stmt) /* transformation not required. */
2662 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2668 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2669 fprintf (vect_dump, "transform binary/unary operation.");
2672 scalar_dest = TREE_OPERAND (stmt, 0);
2673 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2676 op0 = TREE_OPERAND (operation, 0);
2677 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2679 if (op_type == binary_op)
2681 op1 = TREE_OPERAND (operation, 1);
2682 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2685 /* Arguments are ready. create the new vector stmt. */
2687 if (op_type == binary_op)
2688 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2689 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2691 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2692 build1 (code, vectype, vec_oprnd0));
2693 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2694 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2695 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2701 /* Function vectorizable_store.
2703 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2705 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2706 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2707 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2710 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2716 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2717 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2718 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2719 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2720 enum machine_mode vec_mode;
2722 enum dr_alignment_support alignment_support_cheme;
2724 /* Is vectorizable store? */
2726 if (TREE_CODE (stmt) != MODIFY_EXPR)
2729 scalar_dest = TREE_OPERAND (stmt, 0);
2730 if (TREE_CODE (scalar_dest) != ARRAY_REF
2731 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2734 op = TREE_OPERAND (stmt, 1);
2735 if (!vect_is_simple_use (op, loop_vinfo, NULL))
2737 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2738 fprintf (vect_dump, "use not simple.");
2742 vec_mode = TYPE_MODE (vectype);
2743 /* FORNOW. In some cases can vectorize even if data-type not supported
2744 (e.g. - array initialization with 0). */
2745 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2748 if (!STMT_VINFO_DATA_REF (stmt_info))
2752 if (!vec_stmt) /* transformation not required. */
2754 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2760 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2761 fprintf (vect_dump, "transform store");
2763 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2764 gcc_assert (alignment_support_cheme);
2765 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2767 /* Handle use - get the vectorized def from the defining stmt. */
2768 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2771 /* FORNOW: make sure the data reference is aligned. */
2772 vect_align_data_ref (stmt);
2773 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2774 data_ref = build_fold_indirect_ref (data_ref);
2776 /* Arguments are ready. create the new vector stmt. */
2777 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2778 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2784 /* vectorizable_load.
2786 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2788 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2789 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2790 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2793 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2796 tree vec_dest = NULL;
2797 tree data_ref = NULL;
2799 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2800 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2801 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2808 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2809 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2810 edge pe = loop_preheader_edge (loop);
2811 enum dr_alignment_support alignment_support_cheme;
2813 /* Is vectorizable load? */
2815 if (TREE_CODE (stmt) != MODIFY_EXPR)
2818 scalar_dest = TREE_OPERAND (stmt, 0);
2819 if (TREE_CODE (scalar_dest) != SSA_NAME)
2822 op = TREE_OPERAND (stmt, 1);
2823 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2826 if (!STMT_VINFO_DATA_REF (stmt_info))
2829 mode = (int) TYPE_MODE (vectype);
2831 /* FORNOW. In some cases can vectorize even if data-type not supported
2832 (e.g. - data copies). */
2833 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2835 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2836 fprintf (vect_dump, "Aligned load, but unsupported type.");
2840 if (!vec_stmt) /* transformation not required. */
2842 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2848 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2849 fprintf (vect_dump, "transform load.");
2851 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2852 gcc_assert (alignment_support_cheme);
2854 if (alignment_support_cheme == dr_aligned
2855 || alignment_support_cheme == dr_unaligned_supported)
2866 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2867 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2868 if (aligned_access_p (dr))
2869 data_ref = build_fold_indirect_ref (data_ref);
2872 int mis = DR_MISALIGNMENT (dr);
2873 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
2874 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
2875 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2877 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2878 new_temp = make_ssa_name (vec_dest, new_stmt);
2879 TREE_OPERAND (new_stmt, 0) = new_temp;
2880 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2882 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2886 msq_init = *(floor(p1))
2887 p2 = initial_addr + VS - 1;
2888 magic = have_builtin ? builtin_result : initial_address;
2891 p2' = p2 + indx * vectype_size
2893 vec_dest = realign_load (msq, lsq, magic)
2907 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2908 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2909 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2911 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2912 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2913 new_temp = make_ssa_name (vec_dest, new_stmt);
2914 TREE_OPERAND (new_stmt, 0) = new_temp;
2915 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2916 gcc_assert (!new_bb);
2917 msq_init = TREE_OPERAND (new_stmt, 0);
2920 /* <2> Create lsq = *(floor(p2')) in the loop */
2921 offset = build_int_cst (integer_type_node,
2922 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2923 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2924 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2925 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2926 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2927 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2928 new_temp = make_ssa_name (vec_dest, new_stmt);
2929 TREE_OPERAND (new_stmt, 0) = new_temp;
2930 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2931 lsq = TREE_OPERAND (new_stmt, 0);
2935 if (targetm.vectorize.builtin_mask_for_load)
2937 /* Create permutation mask, if required, in loop preheader. */
2939 params = build_tree_list (NULL_TREE, init_addr);
2940 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2941 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2942 new_stmt = build_function_call_expr (builtin_decl, params);
2943 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2944 new_temp = make_ssa_name (vec_dest, new_stmt);
2945 TREE_OPERAND (new_stmt, 0) = new_temp;
2946 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2947 gcc_assert (!new_bb);
2948 magic = TREE_OPERAND (new_stmt, 0);
2950 /* Since we have just created a CALL_EXPR, we may need to
2951 rename call-clobbered variables. */
2952 mark_call_clobbered_vars_to_rename ();
2956 /* Use current address instead of init_addr for reduced reg pressure.
2958 magic = dataref_ptr;
2962 /* <4> Create msq = phi <msq_init, lsq> in loop */
2963 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2964 msq = make_ssa_name (vec_dest, NULL_TREE);
2965 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2966 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2967 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2968 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2971 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2972 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2973 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2974 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2975 new_temp = make_ssa_name (vec_dest, new_stmt);
2976 TREE_OPERAND (new_stmt, 0) = new_temp;
2977 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2982 *vec_stmt = new_stmt;
2987 /* Function vect_supportable_dr_alignment
2989 Return whether the data reference DR is supported with respect to its
2992 static enum dr_alignment_support
2993 vect_supportable_dr_alignment (struct data_reference *dr)
2995 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2996 enum machine_mode mode = (int) TYPE_MODE (vectype);
2998 if (aligned_access_p (dr))
3001 /* Possibly unaligned access. */
3003 if (DR_IS_READ (dr))
3005 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
3006 && (!targetm.vectorize.builtin_mask_for_load
3007 || targetm.vectorize.builtin_mask_for_load ()))
3008 return dr_unaligned_software_pipeline;
3010 if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
3011 /* Can't software pipeline the loads, but can at least do them. */
3012 return dr_unaligned_supported;
3016 return dr_unaligned_unsupported;
3020 /* Function vect_transform_stmt.
3022 Create a vectorized stmt to replace STMT, and insert it at BSI. */
3025 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
3027 bool is_store = false;
3028 tree vec_stmt = NULL_TREE;
3029 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3032 switch (STMT_VINFO_TYPE (stmt_info))
3034 case op_vec_info_type:
3035 done = vectorizable_operation (stmt, bsi, &vec_stmt);
3039 case assignment_vec_info_type:
3040 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
3044 case load_vec_info_type:
3045 done = vectorizable_load (stmt, bsi, &vec_stmt);
3049 case store_vec_info_type:
3050 done = vectorizable_store (stmt, bsi, &vec_stmt);
3055 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3056 fprintf (vect_dump, "stmt not supported.");
3060 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
3066 /* This function builds ni_name = number of iterations loop executes
3067 on the loop preheader. */
3070 vect_build_loop_niters (loop_vec_info loop_vinfo)
3072 tree ni_name, stmt, var;
3074 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3075 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
3077 var = create_tmp_var (TREE_TYPE (ni), "niters");
3078 add_referenced_tmp_var (var);
3079 ni_name = force_gimple_operand (ni, &stmt, false, var);
3081 pe = loop_preheader_edge (loop);
3084 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3085 gcc_assert (!new_bb);
3092 /* This function generates the following statements:
3094 ni_name = number of iterations loop executes
3095 ratio = ni_name / vf
3096 ratio_mult_vf_name = ratio * vf
3098 and places them at the loop preheader edge. */
3101 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
3103 tree *ratio_mult_vf_name_ptr,
3104 tree *ratio_name_ptr)
3112 tree ratio_mult_vf_name;
3113 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3114 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
3115 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3116 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
3118 pe = loop_preheader_edge (loop);
3120 /* Generate temporary variable that contains
3121 number of iterations loop executes. */
3123 ni_name = vect_build_loop_niters (loop_vinfo);
3125 /* Create: ratio = ni >> log2(vf) */
3127 var = create_tmp_var (TREE_TYPE (ni), "bnd");
3128 add_referenced_tmp_var (var);
3129 ratio_name = make_ssa_name (var, NULL_TREE);
3130 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
3131 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
3132 SSA_NAME_DEF_STMT (ratio_name) = stmt;
3134 pe = loop_preheader_edge (loop);
3135 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3136 gcc_assert (!new_bb);
3138 /* Create: ratio_mult_vf = ratio << log2 (vf). */
3140 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
3141 add_referenced_tmp_var (var);
3142 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
3143 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
3144 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
3145 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
3147 pe = loop_preheader_edge (loop);
3148 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3149 gcc_assert (!new_bb);
3151 *ni_name_ptr = ni_name;
3152 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
3153 *ratio_name_ptr = ratio_name;
3159 /* Function vect_update_ivs_after_vectorizer.
3161 "Advance" the induction variables of LOOP to the value they should take
3162 after the execution of LOOP. This is currently necessary because the
3163 vectorizer does not handle induction variables that are used after the
3164 loop. Such a situation occurs when the last iterations of LOOP are
3166 1. We introduced new uses after LOOP for IVs that were not originally used
3167 after LOOP: the IVs of LOOP are now used by an epilog loop.
3168 2. LOOP is going to be vectorized; this means that it will iterate N/VF
3169 times, whereas the loop IVs should be bumped N times.
3172 - LOOP - a loop that is going to be vectorized. The last few iterations
3173 of LOOP were peeled.
3174 - NITERS - the number of iterations that LOOP executes (before it is
3175 vectorized). i.e, the number of times the ivs should be bumped.
3176 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
3177 coming out from LOOP on which there are uses of the LOOP ivs
3178 (this is the path from LOOP->exit to epilog_loop->preheader).
3180 The new definitions of the ivs are placed in LOOP->exit.
3181 The phi args associated with the edge UPDATE_E in the bb
3182 UPDATE_E->dest are updated accordingly.
3184 Assumption 1: Like the rest of the vectorizer, this function assumes
3185 a single loop exit that has a single predecessor.
3187 Assumption 2: The phi nodes in the LOOP header and in update_bb are
3188 organized in the same order.
3190 Assumption 3: The access function of the ivs is simple enough (see
3191 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
3193 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
3194 coming out of LOOP on which the ivs of LOOP are used (this is the path
3195 that leads to the epilog loop; other paths skip the epilog loop). This
3196 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
3197 needs to have its phis updated.
3201 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
3204 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3205 basic_block exit_bb = loop->exit_edges[0]->dest;
3207 basic_block update_bb = update_e->dest;
3209 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
3211 /* Make sure there exists a single-predecessor exit bb: */
3212 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
3214 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
3216 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
3218 tree access_fn = NULL;
3219 tree evolution_part;
3222 tree var, stmt, ni, ni_name;
3223 block_stmt_iterator last_bsi;
3225 /* Skip virtual phi's. */
3226 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3228 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3229 fprintf (vect_dump, "virtual phi. skip.");
3233 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
3234 gcc_assert (access_fn);
3236 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
3237 gcc_assert (evolution_part != NULL_TREE);
3239 /* FORNOW: We do not support IVs whose evolution function is a polynomial
3240 of degree >= 2 or exponential. */
3241 gcc_assert (!tree_is_chrec (evolution_part));
3243 step_expr = evolution_part;
3244 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
3247 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3248 build2 (MULT_EXPR, TREE_TYPE (niters),
3249 niters, step_expr), init_expr);
3251 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3252 add_referenced_tmp_var (var);
3254 ni_name = force_gimple_operand (ni, &stmt, false, var);
3256 /* Insert stmt into exit_bb. */
3257 last_bsi = bsi_last (exit_bb);
3259 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
3261 /* Fix phi expressions in the successor bb. */
3262 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
3263 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3264 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
3269 /* Function vect_do_peeling_for_loop_bound
3271 Peel the last iterations of the loop represented by LOOP_VINFO.
3272 The peeled iterations form a new epilog loop. Given that the loop now
3273 iterates NITERS times, the new epilog loop iterates
3274 NITERS % VECTORIZATION_FACTOR times.
3276 The original loop will later be made to iterate
3277 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
3280 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
3281 struct loops *loops)
3284 tree ni_name, ratio_mult_vf_name;
3285 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3286 struct loop *new_loop;
3288 #ifdef ENABLE_CHECKING
3292 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3293 fprintf (vect_dump, "=== vect_transtorm_for_unknown_loop_bound ===");
3295 /* Generate the following variables on the preheader of original loop:
3297 ni_name = number of iteration the original loop executes
3298 ratio = ni_name / vf
3299 ratio_mult_vf_name = ratio * vf */
3300 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3301 &ratio_mult_vf_name, ratio);
3303 /* Update loop info. */
3304 loop->pre_header = loop_preheader_edge (loop)->src;
3305 loop->pre_header_edges[0] = loop_preheader_edge (loop);
3307 #ifdef ENABLE_CHECKING
3308 loop_num = loop->num;
3310 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3311 ratio_mult_vf_name, ni_name, false);
3312 #ifdef ENABLE_CHECKING
3313 gcc_assert (new_loop);
3314 gcc_assert (loop_num == loop->num);
3315 slpeel_verify_cfg_after_peeling (loop, new_loop);
3318 /* A guard that controls whether the new_loop is to be executed or skipped
3319 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3320 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3321 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3322 is on the path where the LOOP IVs are used and need to be updated. */
3324 if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3325 update_e = EDGE_PRED (new_loop->pre_header, 0);
3327 update_e = EDGE_PRED (new_loop->pre_header, 1);
3329 /* Update IVs of original loop as if they were advanced
3330 by ratio_mult_vf_name steps. */
3331 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
3333 /* After peeling we have to reset scalar evolution analyzer. */
3340 /* Function vect_gen_niters_for_prolog_loop
3342 Set the number of iterations for the loop represented by LOOP_VINFO
3343 to the minimum between LOOP_NITERS (the original iteration count of the loop)
3344 and the misalignment of DR - the first data reference recorded in
3345 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3346 this loop, the data reference DR will refer to an aligned location.
3348 The following computation is generated:
3350 compute address misalignment in bytes:
3351 addr_mis = addr & (vectype_size - 1)
3353 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3355 (elem_size = element type size; an element is the scalar element
3356 whose type is the inner type of the vectype) */
3359 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3361 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3362 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3363 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3365 tree iters, iters_name;
3368 tree dr_stmt = DR_STMT (dr);
3369 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3370 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3371 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3374 tree new_stmts = NULL_TREE;
3376 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3377 tree ptr_type = TREE_TYPE (start_addr);
3378 tree size = TYPE_SIZE (ptr_type);
3379 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
3380 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3381 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3382 tree niters_type = TREE_TYPE (loop_niters);
3383 tree elem_size_log =
3384 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3385 tree vf_tree = build_int_cst (unsigned_type_node, vf);
3387 pe = loop_preheader_edge (loop);
3388 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3389 gcc_assert (!new_bb);
3391 /* Create: byte_misalign = addr & (vectype_size - 1) */
3392 byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3394 /* Create: elem_misalign = byte_misalign / element_size */
3396 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3398 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
3399 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3400 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3401 iters = fold_convert (niters_type, iters);
3403 /* Create: prolog_loop_niters = min (iters, loop_niters) */
3404 /* If the loop bound is known at compile time we already verified that it is
3405 greater than vf; since the misalignment ('iters') is at most vf, there's
3406 no need to generate the MIN_EXPR in this case. */
3407 if (TREE_CODE (loop_niters) != INTEGER_CST)
3408 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3410 var = create_tmp_var (niters_type, "prolog_loop_niters");
3411 add_referenced_tmp_var (var);
3412 iters_name = force_gimple_operand (iters, &stmt, false, var);
3414 /* Insert stmt on loop preheader edge. */
3415 pe = loop_preheader_edge (loop);
3418 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3419 gcc_assert (!new_bb);
3426 /* Function vect_update_inits_of_dr
3428 NITERS iterations were peeled from LOOP. DR represents a data reference
3429 in LOOP. This function updates the information recorded in DR to
3430 account for the fact that the first NITERS iterations had already been
3431 executed. Specifically, it updates the OFFSET field of stmt_info. */
3434 vect_update_inits_of_dr (struct data_reference *dr, tree niters)
3436 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
3437 tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
3439 niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
3440 STMT_VINFO_VECT_STEP (stmt_info)));
3441 offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
3442 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
3446 /* Function vect_update_inits_of_drs
3448 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3449 This function updates the information recorded for the data references in
3450 the loop to account for the fact that the first NITERS iterations had
3451 already been executed. Specifically, it updates the initial_condition of the
3452 access_function of all the data_references in the loop. */
3455 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3458 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3459 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3461 if (vect_dump && (dump_flags & TDF_DETAILS))
3462 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
3464 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3466 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3467 vect_update_inits_of_dr (dr, niters);
3470 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3472 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3473 vect_update_inits_of_dr (dr, niters);
3478 /* Function vect_do_peeling_for_alignment
3480 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3481 'niters' is set to the misalignment of one of the data references in the
3482 loop, thereby forcing it to refer to an aligned location at the beginning
3483 of the execution of this loop. The data reference for which we are
3484 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3487 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3489 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3490 tree niters_of_prolog_loop, ni_name;
3492 struct loop *new_loop;
3494 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3495 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
3497 ni_name = vect_build_loop_niters (loop_vinfo);
3498 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3500 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3502 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3503 niters_of_prolog_loop, ni_name, true);
3504 #ifdef ENABLE_CHECKING
3505 gcc_assert (new_loop);
3506 slpeel_verify_cfg_after_peeling (new_loop, loop);
3509 /* Update number of times loop executes. */
3510 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3511 LOOP_VINFO_NITERS (loop_vinfo) =
3512 build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3514 /* Update the init conditions of the access functions of all data refs. */
3515 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3517 /* After peeling we have to reset scalar evolution analyzer. */
3524 /* Function vect_transform_loop.
3526 The analysis phase has determined that the loop is vectorizable.
3527 Vectorize the loop - created vectorized stmts to replace the scalar
3528 stmts in the loop, and update the loop exit condition. */
3531 vect_transform_loop (loop_vec_info loop_vinfo,
3532 struct loops *loops ATTRIBUTE_UNUSED)
3534 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3535 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3536 int nbbs = loop->num_nodes;
3537 block_stmt_iterator si;
3540 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3542 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3543 fprintf (vect_dump, "=== vec_transform_loop ===");
3546 /* Peel the loop if there are data refs with unknown alignment.
3547 Only one data ref with unknown store is allowed. */
3549 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3550 vect_do_peeling_for_alignment (loop_vinfo, loops);
3552 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3553 compile time constant), or it is a constant that doesn't divide by the
3554 vectorization factor, then an epilog loop needs to be created.
3555 We therefore duplicate the loop: the original loop will be vectorized,
3556 and will compute the first (n/VF) iterations. The second copy of the loop
3557 will remain scalar and will compute the remaining (n%VF) iterations.
3558 (VF is the vectorization factor). */
3560 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3561 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3562 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3563 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3565 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3566 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3568 /* 1) Make sure the loop header has exactly two entries
3569 2) Make sure we have a preheader basic block. */
3571 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3573 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3576 /* FORNOW: the vectorizer supports only loops which body consist
3577 of one basic block (header + empty latch). When the vectorizer will
3578 support more involved loop forms, the order by which the BBs are
3579 traversed need to be reconsidered. */
3581 for (i = 0; i < nbbs; i++)
3583 basic_block bb = bbs[i];
3585 for (si = bsi_start (bb); !bsi_end_p (si);)
3587 tree stmt = bsi_stmt (si);
3588 stmt_vec_info stmt_info;
3591 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3593 fprintf (vect_dump, "------>vectorizing statement: ");
3594 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3596 stmt_info = vinfo_for_stmt (stmt);
3597 gcc_assert (stmt_info);
3598 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3603 #ifdef ENABLE_CHECKING
3604 /* FORNOW: Verify that all stmts operate on the same number of
3605 units and no inner unrolling is necessary. */
3607 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3608 == vectorization_factor);
3610 /* -------- vectorize statement ------------ */
3611 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3612 fprintf (vect_dump, "transform statement.");
3614 is_store = vect_transform_stmt (stmt, &si);
3617 /* free the attached stmt_vec_info and remove the stmt. */
3618 stmt_ann_t ann = stmt_ann (stmt);
3620 set_stmt_info (ann, NULL);
3629 slpeel_make_loop_iterate_ntimes (loop, ratio);
3631 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, LOOP_LOC (loop_vinfo)))
3632 fprintf (vect_dump, "LOOP VECTORIZED.");
3636 /* Function vect_is_simple_use.
3639 LOOP - the loop that is being vectorized.
3640 OPERAND - operand of a stmt in LOOP.
3641 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3643 Returns whether a stmt with OPERAND can be vectorized.
3644 Supportable operands are constants, loop invariants, and operands that are
3645 defined by the current iteration of the loop. Unsupportable operands are
3646 those that are defined by a previous iteration of the loop (as is the case
3647 in reduction/induction computations). */
3650 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def)
3654 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3659 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3662 if (TREE_CODE (operand) != SSA_NAME)
3665 def_stmt = SSA_NAME_DEF_STMT (operand);
3666 if (def_stmt == NULL_TREE )
3668 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3669 fprintf (vect_dump, "no def_stmt.");
3673 /* empty stmt is expected only in case of a function argument.
3674 (Otherwise - we expect a phi_node or a modify_expr). */
3675 if (IS_EMPTY_STMT (def_stmt))
3677 tree arg = TREE_OPERAND (def_stmt, 0);
3678 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3680 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3682 fprintf (vect_dump, "Unexpected empty stmt: ");
3683 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
3688 /* phi_node inside the loop indicates an induction/reduction pattern.
3689 This is not supported yet. */
3690 bb = bb_for_stmt (def_stmt);
3691 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3693 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3694 fprintf (vect_dump, "reduction/induction - unsupported.");
3695 return false; /* FORNOW: not supported yet. */
3698 /* Expecting a modify_expr or a phi_node. */
3699 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3700 || TREE_CODE (def_stmt) == PHI_NODE)
3711 /* Function vect_analyze_operations.
3713 Scan the loop stmts and make sure they are all vectorizable. */
3716 vect_analyze_operations (loop_vec_info loop_vinfo)
3718 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3719 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3720 int nbbs = loop->num_nodes;
3721 block_stmt_iterator si;
3722 unsigned int vectorization_factor = 0;
3727 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3728 fprintf (vect_dump, "=== vect_analyze_operations ===");
3730 for (i = 0; i < nbbs; i++)
3732 basic_block bb = bbs[i];
3734 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3736 tree stmt = bsi_stmt (si);
3737 unsigned int nunits;
3738 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3741 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3743 fprintf (vect_dump, "==> examining statement: ");
3744 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3747 gcc_assert (stmt_info);
3749 /* skip stmts which do not need to be vectorized.
3750 this is expected to include:
3751 - the COND_EXPR which is the loop exit condition
3752 - any LABEL_EXPRs in the loop
3753 - computations that are used only for array indexing or loop
3756 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3758 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3759 fprintf (vect_dump, "irrelevant.");
3763 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3765 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3766 LOOP_LOC (loop_vinfo)))
3768 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
3769 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3774 if (STMT_VINFO_DATA_REF (stmt_info))
3775 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3776 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3777 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3779 scalar_type = TREE_TYPE (stmt);
3781 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3783 fprintf (vect_dump, "get vectype for scalar type: ");
3784 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3787 vectype = get_vectype_for_scalar_type (scalar_type);
3790 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3791 LOOP_LOC (loop_vinfo)))
3794 "not vectorized: unsupported data-type ");
3795 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3800 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3802 fprintf (vect_dump, "vectype: ");
3803 print_generic_expr (vect_dump, vectype, TDF_SLIM);
3805 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3807 ok = (vectorizable_operation (stmt, NULL, NULL)
3808 || vectorizable_assignment (stmt, NULL, NULL)
3809 || vectorizable_load (stmt, NULL, NULL)
3810 || vectorizable_store (stmt, NULL, NULL));
3814 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3815 LOOP_LOC (loop_vinfo)))
3817 fprintf (vect_dump, "not vectorized: stmt not supported: ");
3818 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3823 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3824 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3825 fprintf (vect_dump, "nunits = %d", nunits);
3827 if (vectorization_factor)
3829 /* FORNOW: don't allow mixed units.
3830 This restriction will be relaxed in the future. */
3831 if (nunits != vectorization_factor)
3833 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3834 LOOP_LOC (loop_vinfo)))
3835 fprintf (vect_dump, "not vectorized: mixed data-types");
3840 vectorization_factor = nunits;
3842 #ifdef ENABLE_CHECKING
3843 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3844 * vectorization_factor == UNITS_PER_SIMD_WORD);
3849 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3851 if (vectorization_factor <= 1)
3853 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3854 LOOP_LOC (loop_vinfo)))
3855 fprintf (vect_dump, "not vectorized: unsupported data-type");
3858 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3860 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3861 && vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3863 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3864 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3866 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3867 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3869 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3870 LOOP_LOC (loop_vinfo)))
3871 fprintf (vect_dump, "not vectorized: iteration count too small.");
3875 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3876 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3878 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
3879 fprintf (vect_dump, "epilog loop required.");
3880 if (!vect_can_advance_ivs_p (loop_vinfo))
3882 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3883 LOOP_LOC (loop_vinfo)))
3885 "not vectorized: can't create epilog loop 1.");
3888 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3890 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3891 LOOP_LOC (loop_vinfo)))
3893 "not vectorized: can't create epilog loop 2.");
3902 /* Function exist_non_indexing_operands_for_use_p
3904 USE is one of the uses attached to STMT. Check if USE is
3905 used in STMT for anything other than indexing an array. */
3908 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3911 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3913 /* USE corresponds to some operand in STMT. If there is no data
3914 reference in STMT, then any operand that corresponds to USE
3915 is not indexing an array. */
3916 if (!STMT_VINFO_DATA_REF (stmt_info))
3919 /* STMT has a data_ref. FORNOW this means that its of one of
3920 the following forms:
3923 (This should have been verified in analyze_data_refs).
3925 'var' in the second case corresponds to a def, not a use,
3926 so USE cannot correspond to any operands that are not used
3929 Therefore, all we need to check is if STMT falls into the
3930 first case, and whether var corresponds to USE. */
3932 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3935 operand = TREE_OPERAND (stmt, 1);
3937 if (TREE_CODE (operand) != SSA_NAME)
3947 /* Function vect_is_simple_iv_evolution.
3949 FORNOW: A simple evolution of an induction variables in the loop is
3950 considered a polynomial evolution with constant step. */
3953 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3954 tree * step, bool strict)
3959 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3961 /* When there is no evolution in this loop, the evolution function
3963 if (evolution_part == NULL_TREE)
3966 /* When the evolution is a polynomial of degree >= 2
3967 the evolution function is not "simple". */
3968 if (tree_is_chrec (evolution_part))
3971 step_expr = evolution_part;
3972 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
3975 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3977 fprintf (vect_dump, "step: ");
3978 print_generic_expr (vect_dump, step_expr, TDF_SLIM);
3979 fprintf (vect_dump, ", init: ");
3980 print_generic_expr (vect_dump, init_expr, TDF_SLIM);
3986 if (TREE_CODE (step_expr) != INTEGER_CST)
3988 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3989 fprintf (vect_dump, "step unknown.");
3994 if (!integer_onep (step_expr))
3996 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3997 print_generic_expr (vect_dump, step_expr, TDF_SLIM);
4005 /* Function vect_analyze_scalar_cycles.
4007 Examine the cross iteration def-use cycles of scalar variables, by
4008 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
4009 cycles that they represent do not impede vectorization.
4011 FORNOW: Reduction as in the following loop, is not supported yet:
4015 The cross-iteration cycle corresponding to variable 'sum' will be
4016 considered too complicated and will impede vectorization.
4018 FORNOW: Induction as in the following loop, is not supported yet:
4023 However, the following loop *is* vectorizable:
4028 In both loops there exists a def-use cycle for the variable i:
4029 loop: i_2 = PHI (i_0, i_1)
4034 The evolution of the above cycle is considered simple enough,
4035 however, we also check that the cycle does not need to be
4036 vectorized, i.e - we check that the variable that this cycle
4037 defines is only used for array indexing or in stmts that do not
4038 need to be vectorized. This is not the case in loop2, but it
4039 *is* the case in loop3. */
4042 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
4045 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4046 basic_block bb = loop->header;
4049 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4050 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
4052 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
4054 tree access_fn = NULL;
4056 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4058 fprintf (vect_dump, "Analyze phi: ");
4059 print_generic_expr (vect_dump, phi, TDF_SLIM);
4062 /* Skip virtual phi's. The data dependences that are associated with
4063 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
4065 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
4067 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4068 fprintf (vect_dump, "virtual phi. skip.");
4072 /* Analyze the evolution function. */
4074 /* FORNOW: The only scalar cross-iteration cycles that we allow are
4075 those of loop induction variables; This property is verified here.
4077 Furthermore, if that induction variable is used in an operation
4078 that needs to be vectorized (i.e, is not solely used to index
4079 arrays and check the exit condition) - we do not support its
4080 vectorization yet. This property is verified in vect_is_simple_use,
4081 during vect_analyze_operations. */
4083 access_fn = /* instantiate_parameters
4085 analyze_scalar_evolution (loop, PHI_RESULT (phi));
4089 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4090 LOOP_LOC (loop_vinfo)))
4091 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
4095 if (vect_print_dump_info (REPORT_DETAILS,
4096 LOOP_LOC (loop_vinfo)))
4098 fprintf (vect_dump, "Access function of PHI: ");
4099 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
4102 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
4105 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4106 LOOP_LOC (loop_vinfo)))
4107 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
4116 /* Function vect_analyze_data_ref_dependence.
4118 Return TRUE if there (might) exist a dependence between a memory-reference
4119 DRA and a memory-reference DRB. */
4122 vect_analyze_data_ref_dependence (struct data_reference *dra,
4123 struct data_reference *drb,
4124 loop_vec_info loop_vinfo)
4127 struct data_dependence_relation *ddr;
4129 if (!array_base_name_differ_p (dra, drb, &differ_p))
4131 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4132 LOOP_LOC (loop_vinfo)))
4135 "not vectorized: can't determine dependence between: ");
4136 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
4137 fprintf (vect_dump, " and ");
4138 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
4146 ddr = initialize_data_dependence_relation (dra, drb);
4147 compute_affine_dependence (ddr);
4149 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4152 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4153 LOOP_LOC (loop_vinfo)))
4156 "not vectorized: possible dependence between data-refs ");
4157 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
4158 fprintf (vect_dump, " and ");
4159 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
4166 /* Function vect_analyze_data_ref_dependences.
4168 Examine all the data references in the loop, and make sure there do not
4169 exist any data dependences between them.
4171 TODO: dependences which distance is greater than the vectorization factor
4175 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
4178 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4179 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4181 /* Examine store-store (output) dependences. */
4183 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4184 fprintf (vect_dump, "=== vect_analyze_dependences ===");
4186 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4187 fprintf (vect_dump, "compare all store-store pairs.");
4189 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
4191 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4193 struct data_reference *dra =
4194 VARRAY_GENERIC_PTR (loop_write_refs, i);
4195 struct data_reference *drb =
4196 VARRAY_GENERIC_PTR (loop_write_refs, j);
4197 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
4202 /* Examine load-store (true/anti) dependences. */
4204 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4205 fprintf (vect_dump, "compare all load-store pairs.");
4207 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
4209 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4211 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
4212 struct data_reference *drb =
4213 VARRAY_GENERIC_PTR (loop_write_refs, j);
4214 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
4223 /* Function vect_compute_data_ref_alignment
4225 Compute the misalignment of the data reference DR.
4228 1. If during the misalignment computation it is found that the data reference
4229 cannot be vectorized then false is returned.
4230 2. DR_MISALIGNMENT (DR) is defined.
4232 FOR NOW: No analysis is actually performed. Misalignment is calculated
4233 only for trivial cases. TODO. */
4236 vect_compute_data_ref_alignment (struct data_reference *dr)
4238 tree stmt = DR_STMT (dr);
4239 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4240 tree ref = DR_REF (dr);
4242 tree base, alignment;
4243 bool base_aligned_p;
4246 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4247 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
4249 /* Initialize misalignment to unknown. */
4250 DR_MISALIGNMENT (dr) = -1;
4252 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
4253 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
4254 base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4255 vectype = STMT_VINFO_VECTYPE (stmt_info);
4259 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4261 fprintf (vect_dump, "Unknown alignment for access: ");
4262 print_generic_expr (vect_dump, base, TDF_SLIM);
4267 if (!base_aligned_p)
4269 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4271 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4273 fprintf (vect_dump, "can't force alignment of ref: ");
4274 print_generic_expr (vect_dump, ref, TDF_SLIM);
4279 /* Force the alignment of the decl.
4280 NOTE: This is the only change to the code we make during
4281 the analysis phase, before deciding to vectorize the loop. */
4282 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4283 fprintf (vect_dump, "force alignment");
4284 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4285 DECL_USER_ALIGN (base) = 1;
4288 /* At this point we assume that the base is aligned. */
4289 gcc_assert (base_aligned_p
4290 || (TREE_CODE (base) == VAR_DECL
4291 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4293 /* Alignment required, in bytes: */
4294 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
4296 /* Modulo alignment. */
4297 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
4298 if (tree_int_cst_sgn (misalign) < 0)
4300 /* Negative misalignment value. */
4301 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4302 fprintf (vect_dump, "unexpected misalign value");
4306 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
4308 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4309 fprintf (vect_dump, "misalign = %d bytes", DR_MISALIGNMENT (dr));
4315 /* Function vect_compute_data_refs_alignment
4317 Compute the misalignment of data references in the loop.
4318 This pass may take place at function granularity instead of at loop
4321 FOR NOW: No analysis is actually performed. Misalignment is calculated
4322 only for trivial cases. TODO. */
4325 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4327 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4328 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4331 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4333 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4334 if (!vect_compute_data_ref_alignment (dr))
4338 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4340 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4341 if (!vect_compute_data_ref_alignment (dr))
4349 /* Function vect_enhance_data_refs_alignment
4351 This pass will use loop versioning and loop peeling in order to enhance
4352 the alignment of data references in the loop.
4354 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4355 original loop is to be vectorized; Any other loops that are created by
4356 the transformations performed in this pass - are not supposed to be
4357 vectorized. This restriction will be relaxed. */
4360 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4362 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4363 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4367 This pass will require a cost model to guide it whether to apply peeling
4368 or versioning or a combination of the two. For example, the scheme that
4369 intel uses when given a loop with several memory accesses, is as follows:
4370 choose one memory access ('p') which alignment you want to force by doing
4371 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4372 other accesses are not necessarily aligned, or (2) use loop versioning to
4373 generate one loop in which all accesses are aligned, and another loop in
4374 which only 'p' is necessarily aligned.
4376 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4377 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4378 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4380 Devising a cost model is the most critical aspect of this work. It will
4381 guide us on which access to peel for, whether to use loop versioning, how
4382 many versions to create, etc. The cost model will probably consist of
4383 generic considerations as well as target specific considerations (on
4384 powerpc for example, misaligned stores are more painful than misaligned
4387 Here is the general steps involved in alignment enhancements:
4389 -- original loop, before alignment analysis:
4390 for (i=0; i<N; i++){
4391 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4392 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4395 -- After vect_compute_data_refs_alignment:
4396 for (i=0; i<N; i++){
4397 x = q[i]; # DR_MISALIGNMENT(q) = 3
4398 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4401 -- Possibility 1: we do loop versioning:
4403 for (i=0; i<N; i++){ # loop 1A
4404 x = q[i]; # DR_MISALIGNMENT(q) = 3
4405 p[i] = y; # DR_MISALIGNMENT(p) = 0
4409 for (i=0; i<N; i++){ # loop 1B
4410 x = q[i]; # DR_MISALIGNMENT(q) = 3
4411 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4415 -- Possibility 2: we do loop peeling:
4416 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4420 for (i = 3; i < N; i++){ # loop 2A
4421 x = q[i]; # DR_MISALIGNMENT(q) = 0
4422 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4425 -- Possibility 3: combination of loop peeling and versioning:
4426 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4431 for (i = 3; i<N; i++){ # loop 3A
4432 x = q[i]; # DR_MISALIGNMENT(q) = 0
4433 p[i] = y; # DR_MISALIGNMENT(p) = 0
4437 for (i = 3; i<N; i++){ # loop 3B
4438 x = q[i]; # DR_MISALIGNMENT(q) = 0
4439 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4443 These loops are later passed to loop_transform to be vectorized. The
4444 vectorizer will use the alignment information to guide the transformation
4445 (whether to generate regular loads/stores, or with special handling for
4449 /* (1) Peeling to force alignment. */
4451 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4453 + How many accesses will become aligned due to the peeling
4454 - How many accesses will become unaligned due to the peeling,
4455 and the cost of misaligned accesses.
4456 - The cost of peeling (the extra runtime checks, the increase
4459 The scheme we use FORNOW: peel to force the alignment of the first
4460 misaligned store in the loop.
4461 Rationale: misaligned stores are not yet supported.
4463 TODO: Use a better cost model. */
4465 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4467 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4468 if (!aligned_access_p (dr))
4470 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4471 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4476 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4478 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
4479 fprintf (vect_dump, "Peeling for alignment will not be applied.");
4483 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
4484 fprintf (vect_dump, "Peeling for alignment will be applied.");
4487 /* (1.2) Update the alignment info according to the peeling factor.
4488 If the misalignment of the DR we peel for is M, then the
4489 peeling factor is VF - M, and the misalignment of each access DR_i
4490 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4491 If the misalignment of the DR we peel for is unknown, then the
4492 misalignment of each access DR_i in the loop is also unknown.
4494 FORNOW: set the misalignment of the accesses to unknown even
4495 if the peeling factor is known at compile time.
4497 TODO: - if the peeling factor is known at compile time, use that
4498 when updating the misalignment info of the loop DRs.
4499 - consider accesses that are known to have the same
4500 alignment, even if that alignment is unknown. */
4502 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4504 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4505 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4507 DR_MISALIGNMENT (dr) = 0;
4508 if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
4509 fprintf (vect_dump, "Alignment of access forced using peeling.");
4512 DR_MISALIGNMENT (dr) = -1;
4514 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4516 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4517 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4519 DR_MISALIGNMENT (dr) = 0;
4520 if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
4521 fprintf (vect_dump, "Alignment of access forced using peeling.");
4524 DR_MISALIGNMENT (dr) = -1;
4529 /* Function vect_analyze_data_refs_alignment
4531 Analyze the alignment of the data-references in the loop.
4532 FOR NOW: Until support for misliagned accesses is in place, only if all
4533 accesses are aligned can the loop be vectorized. This restriction will be
4537 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4539 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4540 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4541 enum dr_alignment_support supportable_dr_alignment;
4544 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4545 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
4548 /* This pass may take place at function granularity instead of at loop
4551 if (!vect_compute_data_refs_alignment (loop_vinfo))
4553 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4554 LOOP_LOC (loop_vinfo)))
4556 "not vectorized: can't calculate alignment for data ref.");
4561 /* This pass will decide on using loop versioning and/or loop peeling in
4562 order to enhance the alignment of data references in the loop. */
4564 vect_enhance_data_refs_alignment (loop_vinfo);
4567 /* Finally, check that all the data references in the loop can be
4568 handled with respect to their alignment. */
4570 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4572 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4573 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4574 if (!supportable_dr_alignment)
4576 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4577 LOOP_LOC (loop_vinfo)))
4578 fprintf (vect_dump, "not vectorized: unsupported unaligned load.");
4581 if (supportable_dr_alignment != dr_aligned
4582 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
4583 fprintf (vect_dump, "Vectorizing an unaligned access.");
4585 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4587 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4588 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4589 if (!supportable_dr_alignment)
4591 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4592 LOOP_LOC (loop_vinfo)))
4593 fprintf (vect_dump, "not vectorized: unsupported unaligned store.");
4596 if (supportable_dr_alignment != dr_aligned
4597 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
4598 fprintf (vect_dump, "Vectorizing an unaligned access.");
4605 /* Function vect_analyze_data_ref_access.
4607 Analyze the access pattern of the data-reference DR. For now, a data access
4608 has to consecutive to be considered vectorizable. */
4611 vect_analyze_data_ref_access (struct data_reference *dr)
4613 tree stmt = DR_STMT (dr);
4614 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4615 tree step = STMT_VINFO_VECT_STEP (stmt_info);
4616 tree scalar_type = TREE_TYPE (DR_REF (dr));
4618 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
4620 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4621 fprintf (vect_dump, "not consecutive access");
4628 /* Function vect_analyze_data_ref_accesses.
4630 Analyze the access pattern of all the data references in the loop.
4632 FORNOW: the only access pattern that is considered vectorizable is a
4633 simple step 1 (consecutive) access.
4635 FORNOW: handle only arrays and pointer accesses. */
4638 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4641 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4642 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4644 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4645 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
4647 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4649 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4650 bool ok = vect_analyze_data_ref_access (dr);
4653 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4654 LOOP_LOC (loop_vinfo)))
4655 fprintf (vect_dump, "not vectorized: complicated access pattern.");
4660 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4662 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4663 bool ok = vect_analyze_data_ref_access (dr);
4666 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4667 LOOP_LOC (loop_vinfo)))
4668 fprintf (vect_dump, "not vectorized: complicated access pattern.");
4677 /* Function vect_analyze_pointer_ref_access.
4680 STMT - a stmt that contains a data-ref
4681 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4683 If the data-ref access is vectorizable, return a data_reference structure
4684 that represents it (DR). Otherwise - return NULL. */
4686 static struct data_reference *
4687 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4689 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4690 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4691 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4692 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4694 tree reftype, innertype;
4695 tree indx_access_fn;
4696 int loopnum = loop->num;
4697 struct data_reference *dr;
4701 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4702 LOOP_LOC (loop_vinfo)))
4703 fprintf (vect_dump, "not vectorized: complicated pointer access.");
4707 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4709 fprintf (vect_dump, "Access function of ptr: ");
4710 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
4713 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4715 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4716 LOOP_LOC (loop_vinfo)))
4717 fprintf (vect_dump, "not vectorized: pointer access is not simple.");
4723 if (!expr_invariant_in_loop_p (loop, init))
4725 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4726 LOOP_LOC (loop_vinfo)))
4728 "not vectorized: initial condition is not loop invariant.");
4732 if (TREE_CODE (step) != INTEGER_CST)
4734 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4735 LOOP_LOC (loop_vinfo)))
4737 "not vectorized: non constant step for pointer access.");
4741 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4742 if (TREE_CODE (reftype) != POINTER_TYPE)
4744 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4745 LOOP_LOC (loop_vinfo)))
4746 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
4750 reftype = TREE_TYPE (init);
4751 if (TREE_CODE (reftype) != POINTER_TYPE)
4753 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4754 LOOP_LOC (loop_vinfo)))
4755 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
4759 innertype = TREE_TYPE (reftype);
4760 if (tree_int_cst_compare (TYPE_SIZE_UNIT (innertype), step))
4762 /* FORNOW: support only consecutive access */
4763 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4764 LOOP_LOC (loop_vinfo)))
4765 fprintf (vect_dump, "not vectorized: non consecutive access.");
4769 STMT_VINFO_VECT_STEP (stmt_info) = fold_convert (ssizetype, step);
4770 if (TREE_CODE (init) == PLUS_EXPR
4771 || TREE_CODE (init) == MINUS_EXPR)
4772 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4773 size_binop (TREE_CODE (init), ssize_int (0),
4774 fold_convert (ssizetype, TREE_OPERAND (init, 1)));
4776 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = ssize_int (0);
4779 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4780 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4782 fprintf (vect_dump, "Access function of ptr indx: ");
4783 print_generic_expr (vect_dump, indx_access_fn, TDF_SLIM);
4785 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4790 /* Function vect_get_memtag_and_dr.
4792 The function returns the relevant variable for memory tag (for aliasing
4793 purposes). Also data reference structure DR is created.
4795 This function handles three kinds of MEMREF:
4797 It is called from vect_analyze_data_refs with a MEMREF that is either an
4798 ARRAY_REF or an INDIRECT_REF (this is category 1 - "recursion begins").
4799 It builds a DR for them using vect_get_base_and_offset, and calls itself
4800 recursively to retrieve the relevant memtag for the MEMREF, "peeling" the
4801 MEMREF along the way. During the recursive calls, the function may be called
4802 with a MEMREF for which the recursion has to continue - PLUS_EXPR,
4803 MINUS_EXPR, INDIRECT_REF (category 2 - "recursion continues"),
4804 and/or with a MEMREF for which a memtag can be trivially obtained - VAR_DECL
4805 and SSA_NAME (this is category 3 - "recursion stop condition").
4807 When the MEMREF falls into category 1 there is still no data reference struct
4808 (DR) available. It is created by this function, and then, along the
4809 recursion, MEMREF will fall into category 2 or 3, in which case a DR will
4810 have already been created, but the analysis continues to retrieve the MEMTAG.
4813 MEMREF - data reference in STMT
4814 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4817 DR - data_reference struct for MEMREF
4818 return value - the relevant variable for memory tag (for aliasing purposes).
4823 vect_get_memtag_and_dr (tree memref, tree stmt, bool is_read,
4824 loop_vec_info loop_vinfo,
4825 tree vectype, struct data_reference **dr)
4827 tree symbl, oprnd0, oprnd1;
4828 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4829 tree offset, misalign, step;
4830 tree ref_to_be_analyzed, tag, dr_base;
4831 struct data_reference *new_dr;
4832 bool base_aligned_p;
4836 /* Category 3: recursion stop condition. */
4837 /* (1) A DR already exists. We only need to get the relevant memtag for
4838 MEMREF, the rest of the data was already initialized. */
4840 switch (TREE_CODE (memref))
4842 /* (1.1) Stop condition: find the relevant memtag and return. */
4844 symbl = SSA_NAME_VAR (memref);
4845 tag = get_var_ann (symbl)->type_mem_tag;
4848 tree ptr = TREE_OPERAND (DR_REF ((*dr)), 0);
4849 if (TREE_CODE (ptr) == SSA_NAME)
4850 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
4854 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4856 fprintf (vect_dump, "not vectorized: no memtag for ref.");
4865 /* Category 2: recursion continues. */
4866 /* (1.2) A recursive call to find the relevant memtag is required. */
4868 symbl = TREE_OPERAND (memref, 0);
4869 break; /* For recursive call. */
4872 /* Could have recorded more accurate information -
4873 i.e, the actual FIELD_DECL that is being referenced -
4874 but later passes expect VAR_DECL as the nmt. */
4878 symbl = STMT_VINFO_VECT_DR_BASE (stmt_info);
4879 break; /* For recursive call. */
4883 /* Although DR exists, we have to call the function recursively to
4884 build MEMTAG for such expression. This is handled below. */
4885 oprnd0 = TREE_OPERAND (memref, 0);
4886 oprnd1 = TREE_OPERAND (memref, 1);
4888 STRIP_NOPS (oprnd1);
4889 /* Supported plus/minus expressions are of the form
4890 {address_base + offset}, such that address_base is of type
4891 POINTER/ARRAY, and offset is either an INTEGER_CST of type POINTER,
4892 or it's not of type POINTER/ARRAY.
4893 TODO: swap operands if {offset + address_base}. */
4894 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4895 && TREE_CODE (oprnd1) != INTEGER_CST)
4896 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4900 break; /* For recursive call. */
4908 /* Category 1: recursion begins. */
4909 /* (2) A DR does not exist yet and must be built, followed by a
4910 recursive call to get the relevant memtag for MEMREF. */
4912 switch (TREE_CODE (memref))
4915 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4919 symbl = DR_BASE_NAME (new_dr);
4920 ref_to_be_analyzed = DR_BASE_NAME (new_dr);
4924 new_dr = analyze_array (stmt, memref, is_read);
4926 symbl = DR_BASE_NAME (new_dr);
4927 ref_to_be_analyzed = memref;
4931 /* TODO: Support data-refs of form a[i].p for unions and single
4932 field structures. */
4936 offset = ssize_int (0);
4937 misalign = ssize_int (0);
4938 step = ssize_int (0);
4940 /* Analyze data-ref, find its base, initial offset from the base, step,
4942 dr_base = vect_get_base_and_offset (new_dr, ref_to_be_analyzed,
4943 vectype, loop_vinfo, &offset,
4944 &misalign, &step, &base_aligned_p);
4948 /* Initialize information according to above analysis. */
4949 /* Since offset and step of a pointer can be also set in
4950 vect_analyze_pointer_ref_access, we combine the values here. */
4951 if (STMT_VINFO_VECT_INIT_OFFSET (stmt_info))
4952 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4953 size_binop (PLUS_EXPR, offset,
4954 STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
4956 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
4958 if (step && STMT_VINFO_VECT_STEP (stmt_info))
4959 STMT_VINFO_VECT_STEP (stmt_info) =
4960 size_binop (PLUS_EXPR, step, STMT_VINFO_VECT_STEP (stmt_info));
4962 STMT_VINFO_VECT_STEP (stmt_info) = step;
4964 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned_p;
4965 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
4966 STMT_VINFO_VECT_DR_BASE (stmt_info) = dr_base;
4971 /* Recursive call to retrieve the relevant memtag. */
4972 tag = vect_get_memtag_and_dr (symbl, stmt, is_read, loop_vinfo, vectype, dr);
4977 /* Function vect_analyze_data_refs.
4979 Find all the data references in the loop.
4981 The general structure of the analysis of data refs in the vectorizer is as
4983 1- vect_analyze_data_refs(loop):
4984 Find and analyze all data-refs in the loop:
4986 ref_stmt.memtag = vect_get_memtag_and_dr (ref)
4987 1.1- vect_get_memtag_and_dr(ref):
4988 Analyze ref, and build a DR (data_referece struct) for it;
4989 call vect_get_base_and_offset to compute base, initial_offset,
4990 step and alignment. Set ref_stmt.base, ref_stmt.initial_offset,
4991 ref_stmt.alignment, and ref_stmt.step accordingly.
4992 1.1.1- vect_get_base_and_offset():
4993 Calculate base, initial_offset, step and alignment.
4994 For ARRAY_REFs and COMPONENT_REFs use call get_inner_reference.
4995 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
4996 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
4997 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
4999 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
5000 which base is really an array (not a pointer) and which alignment
5001 can be forced. This restriction will be relaxed. */
5004 vect_analyze_data_refs (loop_vec_info loop_vinfo)
5006 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5007 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5008 int nbbs = loop->num_nodes;
5009 block_stmt_iterator si;
5011 struct data_reference *dr;
5013 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5014 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
5016 for (j = 0; j < nbbs; j++)
5018 basic_block bb = bbs[j];
5019 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5021 bool is_read = false;
5022 tree stmt = bsi_stmt (si);
5023 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5024 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5025 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5026 vuse_optype vuses = STMT_VUSE_OPS (stmt);
5027 varray_type *datarefs = NULL;
5028 int nvuses, nv_may_defs, nv_must_defs;
5031 tree scalar_type, vectype;
5033 /* Assumption: there exists a data-ref in stmt, if and only if
5034 it has vuses/vdefs. */
5036 if (!vuses && !v_may_defs && !v_must_defs)
5039 nvuses = NUM_VUSES (vuses);
5040 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
5041 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
5043 if (nvuses && (nv_may_defs || nv_must_defs))
5045 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5047 fprintf (vect_dump, "unexpected vdefs and vuses in stmt: ");
5048 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5053 if (TREE_CODE (stmt) != MODIFY_EXPR)
5055 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5057 fprintf (vect_dump, "unexpected vops in stmt: ");
5058 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5065 memref = TREE_OPERAND (stmt, 1);
5066 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
5071 memref = TREE_OPERAND (stmt, 0);
5072 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
5076 scalar_type = TREE_TYPE (memref);
5077 vectype = get_vectype_for_scalar_type (scalar_type);
5080 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5082 fprintf (vect_dump, "no vectype for stmt: ");
5083 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5084 fprintf (vect_dump, " scalar_type: ");
5085 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
5087 /* It is not possible to vectorize this data reference. */
5090 /* Analyze MEMREF. If it is of a supported form, build data_reference
5091 struct for it (DR) and find memtag for aliasing purposes. */
5093 symbl = vect_get_memtag_and_dr (memref, stmt, is_read, loop_vinfo,
5097 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
5098 LOOP_LOC (loop_vinfo)))
5100 fprintf (vect_dump, "not vectorized: unhandled data ref: ");
5101 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5105 STMT_VINFO_MEMTAG (stmt_info) = symbl;
5106 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5107 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5108 STMT_VINFO_DATA_REF (stmt_info) = dr;
5116 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5118 /* Function vect_mark_relevant.
5120 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5123 vect_mark_relevant (varray_type *worklist, tree stmt)
5125 stmt_vec_info stmt_info;
5127 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5128 fprintf (vect_dump, "mark relevant.");
5130 if (TREE_CODE (stmt) == PHI_NODE)
5132 VARRAY_PUSH_TREE (*worklist, stmt);
5136 stmt_info = vinfo_for_stmt (stmt);
5140 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5142 fprintf (vect_dump, "mark relevant: no stmt info!!.");
5143 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5148 if (STMT_VINFO_RELEVANT_P (stmt_info))
5150 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5151 fprintf (vect_dump, "already marked relevant.");
5155 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5156 VARRAY_PUSH_TREE (*worklist, stmt);
5160 /* Function vect_stmt_relevant_p.
5162 Return true if STMT in loop that is represented by LOOP_VINFO is
5163 "relevant for vectorization".
5165 A stmt is considered "relevant for vectorization" if:
5166 - it has uses outside the loop.
5167 - it has vdefs (it alters memory).
5168 - control stmts in the loop (except for the exit condition).
5170 CHECKME: what other side effects would the vectorizer allow? */
5173 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5175 v_may_def_optype v_may_defs;
5176 v_must_def_optype v_must_defs;
5177 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5182 /* cond stmt other than loop exit cond. */
5183 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5186 /* changing memory. */
5187 if (TREE_CODE (stmt) != PHI_NODE)
5189 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5190 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5191 if (v_may_defs || v_must_defs)
5193 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5194 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
5199 /* uses outside the loop. */
5200 df = get_immediate_uses (stmt);
5201 num_uses = num_immediate_uses (df);
5202 for (i = 0; i < num_uses; i++)
5204 tree use = immediate_use (df, i);
5205 basic_block bb = bb_for_stmt (use);
5206 if (!flow_bb_inside_loop_p (loop, bb))
5208 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5209 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
5218 /* Function vect_mark_stmts_to_be_vectorized.
5220 Not all stmts in the loop need to be vectorized. For example:
5229 Stmt 1 and 3 do not need to be vectorized, because loop control and
5230 addressing of vectorized data-refs are handled differently.
5232 This pass detects such stmts. */
5235 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5237 varray_type worklist;
5238 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5239 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5240 unsigned int nbbs = loop->num_nodes;
5241 block_stmt_iterator si;
5247 stmt_vec_info stmt_info;
5251 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5252 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
5255 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5257 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5259 fprintf (vect_dump, "init: phi relevant? ");
5260 print_generic_expr (vect_dump, phi, TDF_SLIM);
5263 if (vect_stmt_relevant_p (phi, loop_vinfo))
5265 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
5266 LOOP_LOC (loop_vinfo)))
5267 fprintf (vect_dump, "unsupported reduction/induction.");
5272 VARRAY_TREE_INIT (worklist, 64, "work list");
5274 /* 1. Init worklist. */
5276 for (i = 0; i < nbbs; i++)
5279 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5281 stmt = bsi_stmt (si);
5283 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5285 fprintf (vect_dump, "init: stmt relevant? ");
5286 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5289 stmt_info = vinfo_for_stmt (stmt);
5290 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5292 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5293 vect_mark_relevant (&worklist, stmt);
5298 /* 2. Process_worklist */
5300 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5302 stmt = VARRAY_TOP_TREE (worklist);
5303 VARRAY_POP (worklist);
5305 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5307 fprintf (vect_dump, "worklist: examine stmt: ");
5308 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5311 /* Examine the USES in this statement. Mark all the statements which
5312 feed this statement's uses as "relevant", unless the USE is used as
5315 if (TREE_CODE (stmt) == PHI_NODE)
5317 /* follow the def-use chain inside the loop. */
5318 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5320 tree arg = PHI_ARG_DEF (stmt, j);
5321 tree def_stmt = NULL_TREE;
5323 if (!vect_is_simple_use (arg, loop_vinfo, &def_stmt))
5325 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
5326 LOOP_LOC (loop_vinfo)))
5327 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
5328 varray_clear (worklist);
5334 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5336 fprintf (vect_dump, "worklist: def_stmt: ");
5337 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
5340 bb = bb_for_stmt (def_stmt);
5341 if (flow_bb_inside_loop_p (loop, bb))
5342 vect_mark_relevant (&worklist, def_stmt);
5346 ann = stmt_ann (stmt);
5347 use_ops = USE_OPS (ann);
5349 for (i = 0; i < NUM_USES (use_ops); i++)
5351 tree use = USE_OP (use_ops, i);
5353 /* We are only interested in uses that need to be vectorized. Uses
5354 that are used for address computation are not considered relevant.
5356 if (exist_non_indexing_operands_for_use_p (use, stmt))
5358 tree def_stmt = NULL_TREE;
5360 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt))
5362 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
5363 LOOP_LOC (loop_vinfo)))
5364 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
5365 varray_clear (worklist);
5372 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5374 fprintf (vect_dump, "worklist: examine use %d: ", i);
5375 print_generic_expr (vect_dump, use, TDF_SLIM);
5378 bb = bb_for_stmt (def_stmt);
5379 if (flow_bb_inside_loop_p (loop, bb))
5380 vect_mark_relevant (&worklist, def_stmt);
5383 } /* while worklist */
5385 varray_clear (worklist);
5390 /* Function vect_can_advance_ivs_p
5392 In case the number of iterations that LOOP iterates in unknown at compile
5393 time, an epilog loop will be generated, and the loop induction variables
5394 (IVs) will be "advanced" to the value they are supposed to take just before
5395 the epilog loop. Here we check that the access function of the loop IVs
5396 and the expression that represents the loop bound are simple enough.
5397 These restrictions will be relaxed in the future. */
5400 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
5402 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5403 basic_block bb = loop->header;
5406 /* Analyze phi functions of the loop header. */
5408 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5410 tree access_fn = NULL;
5411 tree evolution_part;
5413 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5415 fprintf (vect_dump, "Analyze phi: ");
5416 print_generic_expr (vect_dump, phi, TDF_SLIM);
5419 /* Skip virtual phi's. The data dependences that are associated with
5420 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5422 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5424 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5425 fprintf (vect_dump, "virtual phi. skip.");
5429 /* Analyze the evolution function. */
5431 access_fn = instantiate_parameters
5432 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5436 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5437 fprintf (vect_dump, "No Access function.");
5441 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5443 fprintf (vect_dump, "Access function of PHI: ");
5444 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
5447 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5449 if (evolution_part == NULL_TREE)
5452 /* FORNOW: We do not transform initial conditions of IVs
5453 which evolution functions are a polynomial of degree >= 2. */
5455 if (tree_is_chrec (evolution_part))
5463 /* Function vect_get_loop_niters.
5465 Determine how many iterations the loop is executed.
5466 If an expression that represents the number of iterations
5467 can be constructed, place it in NUMBER_OF_ITERATIONS.
5468 Return the loop exit condition. */
5471 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5475 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5476 fprintf (vect_dump, "=== get_loop_niters ===");
5478 niters = number_of_iterations_in_loop (loop);
5480 if (niters != NULL_TREE
5481 && niters != chrec_dont_know)
5483 *number_of_iterations = niters;
5485 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5487 fprintf (vect_dump, "==> get_loop_niters:" );
5488 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
5492 return get_loop_exit_condition (loop);
5496 /* Function vect_analyze_loop_form.
5498 Verify the following restrictions (some may be relaxed in the future):
5499 - it's an inner-most loop
5500 - number of BBs = 2 (which are the loop header and the latch)
5501 - the loop has a pre-header
5502 - the loop has a single entry and exit
5503 - the loop exit condition is simple enough, and the number of iterations
5504 can be analyzed (a countable loop). */
5506 static loop_vec_info
5507 vect_analyze_loop_form (struct loop *loop)
5509 loop_vec_info loop_vinfo;
5511 tree number_of_iterations = NULL;
5512 bool rescan = false;
5515 loop_loc = find_loop_location (loop);
5517 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
5518 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
5522 if (vect_print_dump_info (REPORT_OUTER_LOOPS, loop_loc))
5523 fprintf (vect_dump, "not vectorized: nested loop.");
5527 if (!loop->single_exit
5528 || loop->num_nodes != 2
5529 || EDGE_COUNT (loop->header->preds) != 2
5530 || loop->num_entries != 1)
5532 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
5534 if (!loop->single_exit)
5535 fprintf (vect_dump, "not vectorized: multiple exits.");
5536 else if (loop->num_nodes != 2)
5537 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
5538 else if (EDGE_COUNT (loop->header->preds) != 2)
5539 fprintf (vect_dump, "not vectorized: too many incoming edges.");
5540 else if (loop->num_entries != 1)
5541 fprintf (vect_dump, "not vectorized: too many entries.");
5547 /* We assume that the loop exit condition is at the end of the loop. i.e,
5548 that the loop is represented as a do-while (with a proper if-guard
5549 before the loop if needed), where the loop header contains all the
5550 executable statements, and the latch is empty. */
5551 if (!empty_block_p (loop->latch))
5553 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
5554 fprintf (vect_dump, "not vectorized: unexpectd loop form.");
5558 /* Make sure we have a preheader basic block. */
5559 if (!loop->pre_header)
5562 loop_split_edge_with (loop_preheader_edge (loop), NULL);
5565 /* Make sure there exists a single-predecessor exit bb: */
5566 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5569 loop_split_edge_with (loop->exit_edges[0], NULL);
5574 flow_loop_scan (loop, LOOP_ALL);
5575 /* Flow loop scan does not update loop->single_exit field. */
5576 loop->single_exit = loop->exit_edges[0];
5579 if (empty_block_p (loop->header))
5581 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
5582 fprintf (vect_dump, "not vectorized: empty loop.");
5586 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5589 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
5590 fprintf (vect_dump, "not vectorized: complicated exit condition.");
5594 if (!number_of_iterations)
5596 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
5598 "not vectorized: number of iterations cannot be computed.");
5602 if (chrec_contains_undetermined (number_of_iterations))
5604 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
5605 fprintf (vect_dump, "Infinite number of iterations.");
5609 loop_vinfo = new_loop_vec_info (loop);
5610 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5612 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5614 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
5616 fprintf (vect_dump, "Symbolic number of iterations is ");
5617 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
5621 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5623 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, loop_loc))
5624 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
5628 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5629 LOOP_VINFO_LOC (loop_vinfo) = loop_loc;
5635 /* Function vect_analyze_loop.
5637 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5638 for it. The different analyses will record information in the
5639 loop_vec_info struct. */
5641 static loop_vec_info
5642 vect_analyze_loop (struct loop *loop)
5645 loop_vec_info loop_vinfo;
5647 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5648 fprintf (vect_dump, "===== analyze_loop_nest =====");
5650 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5652 loop_vinfo = vect_analyze_loop_form (loop);
5655 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5656 fprintf (vect_dump, "bad loop form.");
5660 /* Find all data references in the loop (which correspond to vdefs/vuses)
5661 and analyze their evolution in the loop.
5663 FORNOW: Handle only simple, array references, which
5664 alignment can be forced, and aligned pointer-references. */
5666 ok = vect_analyze_data_refs (loop_vinfo);
5669 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5670 fprintf (vect_dump, "bad data references.");
5671 destroy_loop_vec_info (loop_vinfo);
5675 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5677 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5680 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5681 fprintf (vect_dump, "unexpected pattern.");
5682 destroy_loop_vec_info (loop_vinfo);
5686 /* Check that all cross-iteration scalar data-flow cycles are OK.
5687 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5689 ok = vect_analyze_scalar_cycles (loop_vinfo);
5692 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5693 fprintf (vect_dump, "bad scalar cycle.");
5694 destroy_loop_vec_info (loop_vinfo);
5698 /* Analyze data dependences between the data-refs in the loop.
5699 FORNOW: fail at the first data dependence that we encounter. */
5701 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5704 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5705 fprintf (vect_dump, "bad data dependence.");
5706 destroy_loop_vec_info (loop_vinfo);
5710 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5711 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5713 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5716 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5717 fprintf (vect_dump, "bad data access.");
5718 destroy_loop_vec_info (loop_vinfo);
5722 /* Analyze the alignment of the data-refs in the loop.
5723 FORNOW: Only aligned accesses are handled. */
5725 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5728 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5729 fprintf (vect_dump, "bad data alignment.");
5730 destroy_loop_vec_info (loop_vinfo);
5734 /* Scan all the operations in the loop and make sure they are
5737 ok = vect_analyze_operations (loop_vinfo);
5740 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5741 fprintf (vect_dump, "bad operation or unsupported loop bound.");
5742 destroy_loop_vec_info (loop_vinfo);
5746 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5752 /* Function need_imm_uses_for.
5754 Return whether we ought to include information for 'var'
5755 when calculating immediate uses. For this pass we only want use
5756 information for non-virtual variables. */
5759 need_imm_uses_for (tree var)
5761 return is_gimple_reg (var);
5765 /* Function vectorize_loops.
5767 Entry Point to loop vectorization phase. */
5770 vectorize_loops (struct loops *loops)
5772 unsigned int i, loops_num;
5773 unsigned int num_vectorized_loops = 0;
5775 /* Fix the verbosity level if not defined explicitly by the user. */
5776 vect_set_dump_settings ();
5778 /* Does the target support SIMD? */
5779 /* FORNOW: until more sophisticated machine modelling is in place. */
5780 if (!UNITS_PER_SIMD_WORD)
5782 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5783 fprintf (vect_dump, "vectorizer: target vector size is not defined.");
5787 #ifdef ENABLE_CHECKING
5788 verify_loop_closed_ssa ();
5791 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5793 /* ----------- Analyze loops. ----------- */
5795 /* If some loop was duplicated, it gets bigger number
5796 than all previously defined loops. This fact allows us to run
5797 only over initial loops skipping newly generated ones. */
5798 loops_num = loops->num;
5799 for (i = 1; i < loops_num; i++)
5801 loop_vec_info loop_vinfo;
5802 struct loop *loop = loops->parray[i];
5807 loop_vinfo = vect_analyze_loop (loop);
5808 loop->aux = loop_vinfo;
5810 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5813 vect_transform_loop (loop_vinfo, loops);
5814 num_vectorized_loops++;
5817 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, UNKNOWN_LOC))
5818 fprintf (vect_dump, "vectorized %u loops in function.\n",
5819 num_vectorized_loops);
5821 /* ----------- Finalize. ----------- */
5824 for (i = 1; i < loops_num; i++)
5826 struct loop *loop = loops->parray[i];
5827 loop_vec_info loop_vinfo;
5831 loop_vinfo = loop->aux;
5832 destroy_loop_vec_info (loop_vinfo);
5836 rewrite_into_ssa (false);
5837 rewrite_into_loop_closed_ssa (); /* FORNOW */
5838 bitmap_clear (vars_to_rename);