1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
35 #include "cfglayout.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
44 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
46 /* Function vect_mark_relevant.
48 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
51 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
52 enum vect_relevant relevant, bool live_p)
54 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
55 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
56 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
58 if (vect_print_dump_info (REPORT_DETAILS))
59 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
61 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
65 /* This is the last stmt in a sequence that was detected as a
66 pattern that can potentially be vectorized. Don't mark the stmt
67 as relevant/live because it's not going to be vectorized.
68 Instead mark the pattern-stmt that replaces it. */
70 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
72 if (vect_print_dump_info (REPORT_DETAILS))
73 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
74 stmt_info = vinfo_for_stmt (pattern_stmt);
75 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
76 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
77 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
81 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
82 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
83 STMT_VINFO_RELEVANT (stmt_info) = relevant;
85 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
86 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
88 if (vect_print_dump_info (REPORT_DETAILS))
89 fprintf (vect_dump, "already marked relevant/live.");
93 VEC_safe_push (gimple, heap, *worklist, stmt);
97 /* Function vect_stmt_relevant_p.
99 Return true if STMT in loop that is represented by LOOP_VINFO is
100 "relevant for vectorization".
102 A stmt is considered "relevant for vectorization" if:
103 - it has uses outside the loop.
104 - it has vdefs (it alters memory).
105 - control stmts in the loop (except for the exit condition).
107 CHECKME: what other side effects would the vectorizer allow? */
110 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
111 enum vect_relevant *relevant, bool *live_p)
113 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
115 imm_use_iterator imm_iter;
119 *relevant = vect_unused_in_loop;
122 /* cond stmt other than loop exit cond. */
123 if (is_ctrl_stmt (stmt)
124 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
125 *relevant = vect_used_in_loop;
127 /* changing memory. */
128 if (gimple_code (stmt) != GIMPLE_PHI)
129 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
131 if (vect_print_dump_info (REPORT_DETAILS))
132 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
133 *relevant = vect_used_in_loop;
136 /* uses outside the loop. */
137 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
139 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
141 basic_block bb = gimple_bb (USE_STMT (use_p));
142 if (!flow_bb_inside_loop_p (loop, bb))
144 if (vect_print_dump_info (REPORT_DETAILS))
145 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
147 /* We expect all such uses to be in the loop exit phis
148 (because of loop closed form) */
149 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
150 gcc_assert (bb == single_exit (loop)->dest);
157 return (*live_p || *relevant);
161 /* Function exist_non_indexing_operands_for_use_p
163 USE is one of the uses attached to STMT. Check if USE is
164 used in STMT for anything other than indexing an array. */
167 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
170 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
172 /* USE corresponds to some operand in STMT. If there is no data
173 reference in STMT, then any operand that corresponds to USE
174 is not indexing an array. */
175 if (!STMT_VINFO_DATA_REF (stmt_info))
178 /* STMT has a data_ref. FORNOW this means that its of one of
182 (This should have been verified in analyze_data_refs).
184 'var' in the second case corresponds to a def, not a use,
185 so USE cannot correspond to any operands that are not used
188 Therefore, all we need to check is if STMT falls into the
189 first case, and whether var corresponds to USE. */
191 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
194 if (!gimple_assign_copy_p (stmt))
196 operand = gimple_assign_rhs1 (stmt);
198 if (TREE_CODE (operand) != SSA_NAME)
209 Function process_use.
212 - a USE in STMT in a loop represented by LOOP_VINFO
213 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
214 that defined USE. This is done by calling mark_relevant and passing it
215 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
218 Generally, LIVE_P and RELEVANT are used to define the liveness and
219 relevance info of the DEF_STMT of this USE:
220 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
221 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
223 - case 1: If USE is used only for address computations (e.g. array indexing),
224 which does not need to be directly vectorized, then the liveness/relevance
225 of the respective DEF_STMT is left unchanged.
226 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
227 skip DEF_STMT cause it had already been processed.
228 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
229 be modified accordingly.
231 Return true if everything is as expected. Return false otherwise. */
234 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
235 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
237 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
238 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
239 stmt_vec_info dstmt_vinfo;
240 basic_block bb, def_bb;
243 enum vect_def_type dt;
245 /* case 1: we are only interested in uses that need to be vectorized. Uses
246 that are used for address computation are not considered relevant. */
247 if (!exist_non_indexing_operands_for_use_p (use, stmt))
250 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
252 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
253 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
257 if (!def_stmt || gimple_nop_p (def_stmt))
260 def_bb = gimple_bb (def_stmt);
261 if (!flow_bb_inside_loop_p (loop, def_bb))
263 if (vect_print_dump_info (REPORT_DETAILS))
264 fprintf (vect_dump, "def_stmt is out of loop.");
268 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
269 DEF_STMT must have already been processed, because this should be the
270 only way that STMT, which is a reduction-phi, was put in the worklist,
271 as there should be no other uses for DEF_STMT in the loop. So we just
272 check that everything is as expected, and we are done. */
273 dstmt_vinfo = vinfo_for_stmt (def_stmt);
274 bb = gimple_bb (stmt);
275 if (gimple_code (stmt) == GIMPLE_PHI
276 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
277 && gimple_code (def_stmt) != GIMPLE_PHI
278 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
279 && bb->loop_father == def_bb->loop_father)
281 if (vect_print_dump_info (REPORT_DETAILS))
282 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
283 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
284 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
285 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
286 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
287 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
291 /* case 3a: outer-loop stmt defining an inner-loop stmt:
292 outer-loop-header-bb:
298 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
300 if (vect_print_dump_info (REPORT_DETAILS))
301 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
304 case vect_unused_in_loop:
305 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
306 vect_used_by_reduction : vect_unused_in_loop;
308 case vect_used_in_outer_by_reduction:
309 relevant = vect_used_by_reduction;
311 case vect_used_in_outer:
312 relevant = vect_used_in_loop;
314 case vect_used_by_reduction:
315 case vect_used_in_loop:
323 /* case 3b: inner-loop stmt defining an outer-loop stmt:
324 outer-loop-header-bb:
330 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
332 if (vect_print_dump_info (REPORT_DETAILS))
333 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
336 case vect_unused_in_loop:
337 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
338 vect_used_in_outer_by_reduction : vect_unused_in_loop;
341 case vect_used_in_outer_by_reduction:
342 case vect_used_in_outer:
345 case vect_used_by_reduction:
346 relevant = vect_used_in_outer_by_reduction;
349 case vect_used_in_loop:
350 relevant = vect_used_in_outer;
358 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
363 /* Function vect_mark_stmts_to_be_vectorized.
365 Not all stmts in the loop need to be vectorized. For example:
374 Stmt 1 and 3 do not need to be vectorized, because loop control and
375 addressing of vectorized data-refs are handled differently.
377 This pass detects such stmts. */
380 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
382 VEC(gimple,heap) *worklist;
383 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
384 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
385 unsigned int nbbs = loop->num_nodes;
386 gimple_stmt_iterator si;
389 stmt_vec_info stmt_vinfo;
393 enum vect_relevant relevant;
395 if (vect_print_dump_info (REPORT_DETAILS))
396 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
398 worklist = VEC_alloc (gimple, heap, 64);
400 /* 1. Init worklist. */
401 for (i = 0; i < nbbs; i++)
404 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
407 if (vect_print_dump_info (REPORT_DETAILS))
409 fprintf (vect_dump, "init: phi relevant? ");
410 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
413 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
414 vect_mark_relevant (&worklist, phi, relevant, live_p);
416 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
418 stmt = gsi_stmt (si);
419 if (vect_print_dump_info (REPORT_DETAILS))
421 fprintf (vect_dump, "init: stmt relevant? ");
422 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
425 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
426 vect_mark_relevant (&worklist, stmt, relevant, live_p);
430 /* 2. Process_worklist */
431 while (VEC_length (gimple, worklist) > 0)
436 stmt = VEC_pop (gimple, worklist);
437 if (vect_print_dump_info (REPORT_DETAILS))
439 fprintf (vect_dump, "worklist: examine stmt: ");
440 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
443 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
444 (DEF_STMT) as relevant/irrelevant and live/dead according to the
445 liveness and relevance properties of STMT. */
446 stmt_vinfo = vinfo_for_stmt (stmt);
447 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
448 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
450 /* Generally, the liveness and relevance properties of STMT are
451 propagated as is to the DEF_STMTs of its USEs:
452 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
453 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
455 One exception is when STMT has been identified as defining a reduction
456 variable; in this case we set the liveness/relevance as follows:
458 relevant = vect_used_by_reduction
459 This is because we distinguish between two kinds of relevant stmts -
460 those that are used by a reduction computation, and those that are
461 (also) used by a regular computation. This allows us later on to
462 identify stmts that are used solely by a reduction, and therefore the
463 order of the results that they produce does not have to be kept.
465 Reduction phis are expected to be used by a reduction stmt, or by
466 in an outer loop; Other reduction stmts are expected to be
467 in the loop, and possibly used by a stmt in an outer loop.
468 Here are the expected values of "relevant" for reduction phis/stmts:
471 vect_unused_in_loop ok
472 vect_used_in_outer_by_reduction ok ok
473 vect_used_in_outer ok ok
474 vect_used_by_reduction ok
477 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
479 enum vect_relevant tmp_relevant = relevant;
480 switch (tmp_relevant)
482 case vect_unused_in_loop:
483 gcc_assert (gimple_code (stmt) != GIMPLE_PHI);
484 relevant = vect_used_by_reduction;
487 case vect_used_in_outer_by_reduction:
488 case vect_used_in_outer:
489 gcc_assert (gimple_code (stmt) != GIMPLE_ASSIGN
490 || (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR
491 && (gimple_assign_rhs_code (stmt)
495 case vect_used_by_reduction:
496 if (gimple_code (stmt) == GIMPLE_PHI)
499 case vect_used_in_loop:
501 if (vect_print_dump_info (REPORT_DETAILS))
502 fprintf (vect_dump, "unsupported use of reduction.");
503 VEC_free (gimple, heap, worklist);
509 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
511 tree op = USE_FROM_PTR (use_p);
512 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
514 VEC_free (gimple, heap, worklist);
518 } /* while worklist */
520 VEC_free (gimple, heap, worklist);
526 cost_for_stmt (gimple stmt)
528 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
530 switch (STMT_VINFO_TYPE (stmt_info))
532 case load_vec_info_type:
533 return TARG_SCALAR_LOAD_COST;
534 case store_vec_info_type:
535 return TARG_SCALAR_STORE_COST;
536 case op_vec_info_type:
537 case condition_vec_info_type:
538 case assignment_vec_info_type:
539 case reduc_vec_info_type:
540 case induc_vec_info_type:
541 case type_promotion_vec_info_type:
542 case type_demotion_vec_info_type:
543 case type_conversion_vec_info_type:
544 case call_vec_info_type:
545 return TARG_SCALAR_STMT_COST;
546 case undef_vec_info_type:
552 /* Function vect_model_simple_cost.
554 Models cost for simple operations, i.e. those that only emit ncopies of a
555 single op. Right now, this does not account for multiple insns that could
556 be generated for the single vector op. We will handle that shortly. */
559 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
560 enum vect_def_type *dt, slp_tree slp_node)
563 int inside_cost = 0, outside_cost = 0;
565 /* The SLP costs were already calculated during SLP tree build. */
566 if (PURE_SLP_STMT (stmt_info))
569 inside_cost = ncopies * TARG_VEC_STMT_COST;
571 /* FORNOW: Assuming maximum 2 args per stmts. */
572 for (i = 0; i < 2; i++)
574 if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def)
575 outside_cost += TARG_SCALAR_TO_VEC_COST;
578 if (vect_print_dump_info (REPORT_COST))
579 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
580 "outside_cost = %d .", inside_cost, outside_cost);
582 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
583 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
584 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
588 /* Function vect_cost_strided_group_size
590 For strided load or store, return the group_size only if it is the first
591 load or store of a group, else return 1. This ensures that group size is
592 only returned once per group. */
595 vect_cost_strided_group_size (stmt_vec_info stmt_info)
597 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
599 if (first_stmt == STMT_VINFO_STMT (stmt_info))
600 return DR_GROUP_SIZE (stmt_info);
606 /* Function vect_model_store_cost
608 Models cost for stores. In the case of strided accesses, one access
609 has the overhead of the strided access attributed to it. */
612 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
613 enum vect_def_type dt, slp_tree slp_node)
616 int inside_cost = 0, outside_cost = 0;
618 /* The SLP costs were already calculated during SLP tree build. */
619 if (PURE_SLP_STMT (stmt_info))
622 if (dt == vect_constant_def || dt == vect_invariant_def)
623 outside_cost = TARG_SCALAR_TO_VEC_COST;
625 /* Strided access? */
626 if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node)
627 group_size = vect_cost_strided_group_size (stmt_info);
628 /* Not a strided access. */
632 /* Is this an access in a group of stores, which provide strided access?
633 If so, add in the cost of the permutes. */
636 /* Uses a high and low interleave operation for each needed permute. */
637 inside_cost = ncopies * exact_log2(group_size) * group_size
638 * TARG_VEC_STMT_COST;
640 if (vect_print_dump_info (REPORT_COST))
641 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
646 /* Costs of the stores. */
647 inside_cost += ncopies * TARG_VEC_STORE_COST;
649 if (vect_print_dump_info (REPORT_COST))
650 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
651 "outside_cost = %d .", inside_cost, outside_cost);
653 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
654 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
655 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
659 /* Function vect_model_load_cost
661 Models cost for loads. In the case of strided accesses, the last access
662 has the overhead of the strided access attributed to it. Since unaligned
663 accesses are supported for loads, we also account for the costs of the
664 access scheme chosen. */
667 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
671 int alignment_support_cheme;
673 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
674 int inside_cost = 0, outside_cost = 0;
676 /* The SLP costs were already calculated during SLP tree build. */
677 if (PURE_SLP_STMT (stmt_info))
680 /* Strided accesses? */
681 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
682 if (first_stmt && !slp_node)
684 group_size = vect_cost_strided_group_size (stmt_info);
685 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
687 /* Not a strided access. */
694 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
696 /* Is this an access in a group of loads providing strided access?
697 If so, add in the cost of the permutes. */
700 /* Uses an even and odd extract operations for each needed permute. */
701 inside_cost = ncopies * exact_log2(group_size) * group_size
702 * TARG_VEC_STMT_COST;
704 if (vect_print_dump_info (REPORT_COST))
705 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
710 /* The loads themselves. */
711 switch (alignment_support_cheme)
715 inside_cost += ncopies * TARG_VEC_LOAD_COST;
717 if (vect_print_dump_info (REPORT_COST))
718 fprintf (vect_dump, "vect_model_load_cost: aligned.");
722 case dr_unaligned_supported:
724 /* Here, we assign an additional cost for the unaligned load. */
725 inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
727 if (vect_print_dump_info (REPORT_COST))
728 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
733 case dr_explicit_realign:
735 inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
737 /* FIXME: If the misalignment remains fixed across the iterations of
738 the containing loop, the following cost should be added to the
740 if (targetm.vectorize.builtin_mask_for_load)
741 inside_cost += TARG_VEC_STMT_COST;
745 case dr_explicit_realign_optimized:
747 if (vect_print_dump_info (REPORT_COST))
748 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
751 /* Unaligned software pipeline has a load of an address, an initial
752 load, and possibly a mask operation to "prime" the loop. However,
753 if this is an access in a group of loads, which provide strided
754 access, then the above cost should only be considered for one
755 access in the group. Inside the loop, there is a load op
756 and a realignment op. */
758 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
760 outside_cost = 2*TARG_VEC_STMT_COST;
761 if (targetm.vectorize.builtin_mask_for_load)
762 outside_cost += TARG_VEC_STMT_COST;
765 inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
774 if (vect_print_dump_info (REPORT_COST))
775 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
776 "outside_cost = %d .", inside_cost, outside_cost);
778 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
779 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
780 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
784 /* Function vect_init_vector.
786 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
787 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
788 is not NULL. Otherwise, place the initialization at the loop preheader.
789 Return the DEF of INIT_STMT.
790 It will be used in the vectorization of STMT. */
793 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
794 gimple_stmt_iterator *gsi)
796 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
804 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
805 add_referenced_var (new_var);
806 init_stmt = gimple_build_assign (new_var, vector_var);
807 new_temp = make_ssa_name (new_var, init_stmt);
808 gimple_assign_set_lhs (init_stmt, new_temp);
811 vect_finish_stmt_generation (stmt, init_stmt, gsi);
814 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
815 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
817 if (nested_in_vect_loop_p (loop, stmt))
819 pe = loop_preheader_edge (loop);
820 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
821 gcc_assert (!new_bb);
824 if (vect_print_dump_info (REPORT_DETAILS))
826 fprintf (vect_dump, "created new init_stmt: ");
827 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
830 vec_oprnd = gimple_assign_lhs (init_stmt);
834 /* Function vect_get_vec_def_for_operand.
836 OP is an operand in STMT. This function returns a (vector) def that will be
837 used in the vectorized stmt for STMT.
839 In the case that OP is an SSA_NAME which is defined in the loop, then
840 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
842 In case OP is an invariant or constant, a new stmt that creates a vector def
843 needs to be introduced. */
846 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
851 stmt_vec_info def_stmt_info = NULL;
852 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
853 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
854 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
855 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
861 enum vect_def_type dt;
865 if (vect_print_dump_info (REPORT_DETAILS))
867 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
868 print_generic_expr (vect_dump, op, TDF_SLIM);
871 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
872 gcc_assert (is_simple_use);
873 if (vect_print_dump_info (REPORT_DETAILS))
877 fprintf (vect_dump, "def = ");
878 print_generic_expr (vect_dump, def, TDF_SLIM);
882 fprintf (vect_dump, " def_stmt = ");
883 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
889 /* Case 1: operand is a constant. */
890 case vect_constant_def:
895 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
896 if (vect_print_dump_info (REPORT_DETAILS))
897 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
899 for (i = nunits - 1; i >= 0; --i)
901 t = tree_cons (NULL_TREE, op, t);
903 vec_cst = build_vector (vectype, t);
904 return vect_init_vector (stmt, vec_cst, vectype, NULL);
907 /* Case 2: operand is defined outside the loop - loop invariant. */
908 case vect_invariant_def:
910 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
911 gcc_assert (vector_type);
912 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
917 /* Create 'vec_inv = {inv,inv,..,inv}' */
918 if (vect_print_dump_info (REPORT_DETAILS))
919 fprintf (vect_dump, "Create vector_inv.");
921 for (i = nunits - 1; i >= 0; --i)
923 t = tree_cons (NULL_TREE, def, t);
926 /* FIXME: use build_constructor directly. */
927 vec_inv = build_constructor_from_list (vector_type, t);
928 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
931 /* Case 3: operand is defined inside the loop. */
935 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
937 /* Get the def from the vectorized stmt. */
938 def_stmt_info = vinfo_for_stmt (def_stmt);
939 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
940 gcc_assert (vec_stmt);
941 if (gimple_code (vec_stmt) == GIMPLE_PHI)
942 vec_oprnd = PHI_RESULT (vec_stmt);
943 else if (is_gimple_call (vec_stmt))
944 vec_oprnd = gimple_call_lhs (vec_stmt);
946 vec_oprnd = gimple_assign_lhs (vec_stmt);
950 /* Case 4: operand is defined by a loop header phi - reduction */
951 case vect_reduction_def:
955 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
956 loop = (gimple_bb (def_stmt))->loop_father;
958 /* Get the def before the loop */
959 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
960 return get_initial_def_for_reduction (stmt, op, scalar_def);
963 /* Case 5: operand is defined by loop-header phi - induction. */
964 case vect_induction_def:
966 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
968 /* Get the def from the vectorized stmt. */
969 def_stmt_info = vinfo_for_stmt (def_stmt);
970 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
971 gcc_assert (vec_stmt && gimple_code (vec_stmt) == GIMPLE_PHI);
972 vec_oprnd = PHI_RESULT (vec_stmt);
982 /* Function vect_get_vec_def_for_stmt_copy
984 Return a vector-def for an operand. This function is used when the
985 vectorized stmt to be created (by the caller to this function) is a "copy"
986 created in case the vectorized result cannot fit in one vector, and several
987 copies of the vector-stmt are required. In this case the vector-def is
988 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
989 of the stmt that defines VEC_OPRND.
990 DT is the type of the vector def VEC_OPRND.
993 In case the vectorization factor (VF) is bigger than the number
994 of elements that can fit in a vectype (nunits), we have to generate
995 more than one vector stmt to vectorize the scalar stmt. This situation
996 arises when there are multiple data-types operated upon in the loop; the
997 smallest data-type determines the VF, and as a result, when vectorizing
998 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
999 vector stmt (each computing a vector of 'nunits' results, and together
1000 computing 'VF' results in each iteration). This function is called when
1001 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1002 which VF=16 and nunits=4, so the number of copies required is 4):
1004 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1006 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1007 VS1.1: vx.1 = memref1 VS1.2
1008 VS1.2: vx.2 = memref2 VS1.3
1009 VS1.3: vx.3 = memref3
1011 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1012 VSnew.1: vz1 = vx.1 + ... VSnew.2
1013 VSnew.2: vz2 = vx.2 + ... VSnew.3
1014 VSnew.3: vz3 = vx.3 + ...
1016 The vectorization of S1 is explained in vectorizable_load.
1017 The vectorization of S2:
1018 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1019 the function 'vect_get_vec_def_for_operand' is called to
1020 get the relevant vector-def for each operand of S2. For operand x it
1021 returns the vector-def 'vx.0'.
1023 To create the remaining copies of the vector-stmt (VSnew.j), this
1024 function is called to get the relevant vector-def for each operand. It is
1025 obtained from the respective VS1.j stmt, which is recorded in the
1026 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1028 For example, to obtain the vector-def 'vx.1' in order to create the
1029 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1030 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1031 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1032 and return its def ('vx.1').
1033 Overall, to create the above sequence this function will be called 3 times:
1034 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1035 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1036 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1039 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1041 gimple vec_stmt_for_operand;
1042 stmt_vec_info def_stmt_info;
1044 /* Do nothing; can reuse same def. */
1045 if (dt == vect_invariant_def || dt == vect_constant_def )
1048 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1049 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1050 gcc_assert (def_stmt_info);
1051 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1052 gcc_assert (vec_stmt_for_operand);
1053 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1054 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1055 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1057 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1062 /* Get vectorized definitions for the operands to create a copy of an original
1063 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
1066 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1067 VEC(tree,heap) **vec_oprnds0,
1068 VEC(tree,heap) **vec_oprnds1)
1070 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1072 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1073 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1075 if (vec_oprnds1 && *vec_oprnds1)
1077 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1078 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1079 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1084 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
1087 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1088 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1092 vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1);
1097 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1098 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1099 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1103 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1104 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1105 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1111 /* Function vect_finish_stmt_generation.
1113 Insert a new stmt. */
1116 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1117 gimple_stmt_iterator *gsi)
1119 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1120 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1122 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1124 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1126 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo));
1128 if (vect_print_dump_info (REPORT_DETAILS))
1130 fprintf (vect_dump, "add new stmt: ");
1131 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1134 gimple_set_location (vec_stmt, gimple_location (gsi_stmt (*gsi)));
1137 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1138 a function declaration if the target has a vectorized version
1139 of the function, or NULL_TREE if the function cannot be vectorized. */
1142 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1144 tree fndecl = gimple_call_fndecl (call);
1145 enum built_in_function code;
1147 /* We only handle functions that do not read or clobber memory -- i.e.
1148 const or novops ones. */
1149 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1153 || TREE_CODE (fndecl) != FUNCTION_DECL
1154 || !DECL_BUILT_IN (fndecl))
1157 code = DECL_FUNCTION_CODE (fndecl);
1158 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
1162 /* Function vectorizable_call.
1164 Check if STMT performs a function call that can be vectorized.
1165 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1166 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1167 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1170 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1175 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1176 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1177 tree vectype_out, vectype_in;
1180 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1181 tree fndecl, new_temp, def, rhs_type, lhs_type;
1183 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1186 VEC(tree, heap) *vargs = NULL;
1187 enum { NARROW, NONE, WIDEN } modifier;
1190 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1193 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
1196 /* FORNOW: SLP not supported. */
1197 if (STMT_SLP_TYPE (stmt_info))
1200 /* Is STMT a vectorizable call? */
1201 if (!is_gimple_call (stmt))
1204 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1207 /* Process function arguments. */
1208 rhs_type = NULL_TREE;
1209 nargs = gimple_call_num_args (stmt);
1211 /* Bail out if the function has more than two arguments, we
1212 do not have interesting builtin functions to vectorize with
1213 more than two arguments. No arguments is also not good. */
1214 if (nargs == 0 || nargs > 2)
1217 for (i = 0; i < nargs; i++)
1219 op = gimple_call_arg (stmt, i);
1221 /* We can only handle calls with arguments of the same type. */
1223 && rhs_type != TREE_TYPE (op))
1225 if (vect_print_dump_info (REPORT_DETAILS))
1226 fprintf (vect_dump, "argument types differ.");
1229 rhs_type = TREE_TYPE (op);
1231 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[i]))
1233 if (vect_print_dump_info (REPORT_DETAILS))
1234 fprintf (vect_dump, "use not simple.");
1239 vectype_in = get_vectype_for_scalar_type (rhs_type);
1242 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1244 lhs_type = TREE_TYPE (gimple_call_lhs (stmt));
1245 vectype_out = get_vectype_for_scalar_type (lhs_type);
1248 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1251 if (nunits_in == nunits_out / 2)
1253 else if (nunits_out == nunits_in)
1255 else if (nunits_out == nunits_in / 2)
1260 /* For now, we only vectorize functions if a target specific builtin
1261 is available. TODO -- in some cases, it might be profitable to
1262 insert the calls for pieces of the vector, in order to be able
1263 to vectorize other operations in the loop. */
1264 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1265 if (fndecl == NULL_TREE)
1267 if (vect_print_dump_info (REPORT_DETAILS))
1268 fprintf (vect_dump, "function is not vectorizable.");
1273 gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
1275 if (modifier == NARROW)
1276 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1278 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1280 /* Sanity check: make sure that at least one copy of the vectorized stmt
1281 needs to be generated. */
1282 gcc_assert (ncopies >= 1);
1284 if (!vec_stmt) /* transformation not required. */
1286 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1287 if (vect_print_dump_info (REPORT_DETAILS))
1288 fprintf (vect_dump, "=== vectorizable_call ===");
1289 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1295 if (vect_print_dump_info (REPORT_DETAILS))
1296 fprintf (vect_dump, "transform operation.");
1299 scalar_dest = gimple_call_lhs (stmt);
1300 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1302 prev_stmt_info = NULL;
1306 for (j = 0; j < ncopies; ++j)
1308 /* Build argument list for the vectorized call. */
1310 vargs = VEC_alloc (tree, heap, nargs);
1312 VEC_truncate (tree, vargs, 0);
1314 for (i = 0; i < nargs; i++)
1316 op = gimple_call_arg (stmt, i);
1319 = vect_get_vec_def_for_operand (op, stmt, NULL);
1322 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1324 VEC_quick_push (tree, vargs, vec_oprnd0);
1327 new_stmt = gimple_build_call_vec (fndecl, vargs);
1328 new_temp = make_ssa_name (vec_dest, new_stmt);
1329 gimple_call_set_lhs (new_stmt, new_temp);
1331 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1334 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1336 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1338 prev_stmt_info = vinfo_for_stmt (new_stmt);
1344 for (j = 0; j < ncopies; ++j)
1346 /* Build argument list for the vectorized call. */
1348 vargs = VEC_alloc (tree, heap, nargs * 2);
1350 VEC_truncate (tree, vargs, 0);
1352 for (i = 0; i < nargs; i++)
1354 op = gimple_call_arg (stmt, i);
1358 = vect_get_vec_def_for_operand (op, stmt, NULL);
1360 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1365 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
1367 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1370 VEC_quick_push (tree, vargs, vec_oprnd0);
1371 VEC_quick_push (tree, vargs, vec_oprnd1);
1374 new_stmt = gimple_build_call_vec (fndecl, vargs);
1375 new_temp = make_ssa_name (vec_dest, new_stmt);
1376 gimple_call_set_lhs (new_stmt, new_temp);
1378 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1381 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1383 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1385 prev_stmt_info = vinfo_for_stmt (new_stmt);
1388 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1393 /* No current target implements this case. */
1397 VEC_free (tree, heap, vargs);
1399 /* Update the exception handling table with the vector stmt if necessary. */
1400 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1401 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1403 /* The call in STMT might prevent it from being removed in dce.
1404 We however cannot remove it here, due to the way the ssa name
1405 it defines is mapped to the new definition. So just replace
1406 rhs of the statement with something harmless. */
1408 type = TREE_TYPE (scalar_dest);
1409 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
1410 fold_convert (type, integer_zero_node));
1411 set_vinfo_for_stmt (new_stmt, stmt_info);
1412 set_vinfo_for_stmt (stmt, NULL);
1413 STMT_VINFO_STMT (stmt_info) = new_stmt;
1414 gsi_replace (gsi, new_stmt, false);
1415 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1421 /* Function vect_gen_widened_results_half
1423 Create a vector stmt whose code, type, number of arguments, and result
1424 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1425 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1426 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1427 needs to be created (DECL is a function-decl of a target-builtin).
1428 STMT is the original scalar stmt that we are vectorizing. */
1431 vect_gen_widened_results_half (enum tree_code code,
1433 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1434 tree vec_dest, gimple_stmt_iterator *gsi,
1442 /* Generate half of the widened result: */
1443 if (code == CALL_EXPR)
1445 /* Target specific support */
1446 if (op_type == binary_op)
1447 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1449 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1450 new_temp = make_ssa_name (vec_dest, new_stmt);
1451 gimple_call_set_lhs (new_stmt, new_temp);
1455 /* Generic support */
1456 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1457 if (op_type != binary_op)
1459 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1461 new_temp = make_ssa_name (vec_dest, new_stmt);
1462 gimple_assign_set_lhs (new_stmt, new_temp);
1464 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1466 if (code == CALL_EXPR)
1468 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
1470 if (TREE_CODE (sym) == SSA_NAME)
1471 sym = SSA_NAME_VAR (sym);
1472 mark_sym_for_renaming (sym);
1480 /* Check if STMT performs a conversion operation, that can be vectorized.
1481 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1482 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1483 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1486 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1487 gimple *vec_stmt, slp_tree slp_node)
1492 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1493 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1494 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1495 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1496 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1500 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1501 gimple new_stmt = NULL;
1502 stmt_vec_info prev_stmt_info;
1505 tree vectype_out, vectype_in;
1508 tree rhs_type, lhs_type;
1510 enum { NARROW, NONE, WIDEN } modifier;
1512 VEC(tree,heap) *vec_oprnds0 = NULL;
1515 VEC(tree,heap) *dummy = NULL;
1518 /* Is STMT a vectorizable conversion? */
1520 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1523 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
1526 if (!is_gimple_assign (stmt))
1529 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1532 code = gimple_assign_rhs_code (stmt);
1533 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1536 /* Check types of lhs and rhs. */
1537 op0 = gimple_assign_rhs1 (stmt);
1538 rhs_type = TREE_TYPE (op0);
1539 vectype_in = get_vectype_for_scalar_type (rhs_type);
1542 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1544 scalar_dest = gimple_assign_lhs (stmt);
1545 lhs_type = TREE_TYPE (scalar_dest);
1546 vectype_out = get_vectype_for_scalar_type (lhs_type);
1549 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1552 if (nunits_in == nunits_out / 2)
1554 else if (nunits_out == nunits_in)
1556 else if (nunits_out == nunits_in / 2)
1561 if (modifier == NONE)
1562 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
1564 /* Bail out if the types are both integral or non-integral. */
1565 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
1566 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
1569 integral_type = INTEGRAL_TYPE_P (rhs_type) ? vectype_in : vectype_out;
1571 if (modifier == NARROW)
1572 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1574 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1576 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
1577 this, so we can safely override NCOPIES with 1 here. */
1581 /* Sanity check: make sure that at least one copy of the vectorized stmt
1582 needs to be generated. */
1583 gcc_assert (ncopies >= 1);
1585 /* Check the operands of the operation. */
1586 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
1588 if (vect_print_dump_info (REPORT_DETAILS))
1589 fprintf (vect_dump, "use not simple.");
1593 /* Supportable by target? */
1594 if ((modifier == NONE
1595 && !targetm.vectorize.builtin_conversion (code, integral_type))
1596 || (modifier == WIDEN
1597 && !supportable_widening_operation (code, stmt, vectype_in,
1600 &dummy_int, &dummy))
1601 || (modifier == NARROW
1602 && !supportable_narrowing_operation (code, stmt, vectype_in,
1603 &code1, &dummy_int, &dummy)))
1605 if (vect_print_dump_info (REPORT_DETAILS))
1606 fprintf (vect_dump, "conversion not supported by target.");
1610 if (modifier != NONE)
1612 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1613 /* FORNOW: SLP not supported. */
1614 if (STMT_SLP_TYPE (stmt_info))
1618 if (!vec_stmt) /* transformation not required. */
1620 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1625 if (vect_print_dump_info (REPORT_DETAILS))
1626 fprintf (vect_dump, "transform conversion.");
1629 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1631 if (modifier == NONE && !slp_node)
1632 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1634 prev_stmt_info = NULL;
1638 for (j = 0; j < ncopies; j++)
1644 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1646 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1649 targetm.vectorize.builtin_conversion (code, integral_type);
1650 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
1652 /* Arguments are ready. create the new vector stmt. */
1653 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1654 new_temp = make_ssa_name (vec_dest, new_stmt);
1655 gimple_call_set_lhs (new_stmt, new_temp);
1656 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1657 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter,
1658 SSA_OP_ALL_VIRTUALS)
1660 if (TREE_CODE (sym) == SSA_NAME)
1661 sym = SSA_NAME_VAR (sym);
1662 mark_sym_for_renaming (sym);
1665 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1669 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1671 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1672 prev_stmt_info = vinfo_for_stmt (new_stmt);
1677 /* In case the vectorization factor (VF) is bigger than the number
1678 of elements that we can fit in a vectype (nunits), we have to
1679 generate more than one vector stmt - i.e - we need to "unroll"
1680 the vector stmt by a factor VF/nunits. */
1681 for (j = 0; j < ncopies; j++)
1684 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1686 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1688 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1690 /* Generate first half of the widened result: */
1692 = vect_gen_widened_results_half (code1, decl1,
1693 vec_oprnd0, vec_oprnd1,
1694 unary_op, vec_dest, gsi, stmt);
1696 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1698 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1699 prev_stmt_info = vinfo_for_stmt (new_stmt);
1701 /* Generate second half of the widened result: */
1703 = vect_gen_widened_results_half (code2, decl2,
1704 vec_oprnd0, vec_oprnd1,
1705 unary_op, vec_dest, gsi, stmt);
1706 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1707 prev_stmt_info = vinfo_for_stmt (new_stmt);
1712 /* In case the vectorization factor (VF) is bigger than the number
1713 of elements that we can fit in a vectype (nunits), we have to
1714 generate more than one vector stmt - i.e - we need to "unroll"
1715 the vector stmt by a factor VF/nunits. */
1716 for (j = 0; j < ncopies; j++)
1721 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1722 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1726 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
1727 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1730 /* Arguments are ready. Create the new vector stmt. */
1731 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
1732 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
1734 new_temp = make_ssa_name (vec_dest, new_stmt);
1735 gimple_assign_set_lhs (new_stmt, new_temp);
1736 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1739 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1741 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1743 prev_stmt_info = vinfo_for_stmt (new_stmt);
1746 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1750 VEC_free (tree, heap, vec_oprnds0);
1754 /* Function vectorizable_assignment.
1756 Check if STMT performs an assignment (copy) that can be vectorized.
1757 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1758 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1759 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1762 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
1763 gimple *vec_stmt, slp_tree slp_node)
1768 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1769 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1770 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1774 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1775 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1778 VEC(tree,heap) *vec_oprnds = NULL;
1781 /* Multiple types in SLP are handled by creating the appropriate number of
1782 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1787 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1789 gcc_assert (ncopies >= 1);
1791 return false; /* FORNOW */
1793 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1796 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
1799 /* Is vectorizable assignment? */
1800 if (!is_gimple_assign (stmt))
1803 scalar_dest = gimple_assign_lhs (stmt);
1804 if (TREE_CODE (scalar_dest) != SSA_NAME)
1807 if (gimple_assign_single_p (stmt)
1808 || gimple_assign_rhs_code (stmt) == PAREN_EXPR)
1809 op = gimple_assign_rhs1 (stmt);
1813 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0]))
1815 if (vect_print_dump_info (REPORT_DETAILS))
1816 fprintf (vect_dump, "use not simple.");
1820 if (!vec_stmt) /* transformation not required. */
1822 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1823 if (vect_print_dump_info (REPORT_DETAILS))
1824 fprintf (vect_dump, "=== vectorizable_assignment ===");
1825 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1830 if (vect_print_dump_info (REPORT_DETAILS))
1831 fprintf (vect_dump, "transform assignment.");
1834 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1837 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
1839 /* Arguments are ready. create the new vector stmt. */
1840 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
1842 *vec_stmt = gimple_build_assign (vec_dest, vop);
1843 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1844 gimple_assign_set_lhs (*vec_stmt, new_temp);
1845 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
1846 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt;
1849 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), *vec_stmt);
1852 VEC_free (tree, heap, vec_oprnds);
1856 /* Function vectorizable_operation.
1858 Check if STMT performs a binary or unary operation that can be vectorized.
1859 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1860 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1861 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1864 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
1865 gimple *vec_stmt, slp_tree slp_node)
1869 tree op0, op1 = NULL;
1870 tree vec_oprnd1 = NULL_TREE;
1871 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1872 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1873 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1874 enum tree_code code;
1875 enum machine_mode vec_mode;
1880 enum machine_mode optab_op2_mode;
1883 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1884 gimple new_stmt = NULL;
1885 stmt_vec_info prev_stmt_info;
1886 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
1891 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
1894 bool shift_p = false;
1895 bool scalar_shift_arg = false;
1897 /* Multiple types in SLP are handled by creating the appropriate number of
1898 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1903 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1905 gcc_assert (ncopies >= 1);
1907 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1910 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
1913 /* Is STMT a vectorizable binary/unary operation? */
1914 if (!is_gimple_assign (stmt))
1917 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1920 scalar_dest = gimple_assign_lhs (stmt);
1921 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
1924 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1925 if (nunits_out != nunits_in)
1928 code = gimple_assign_rhs_code (stmt);
1930 /* For pointer addition, we should use the normal plus for
1931 the vector addition. */
1932 if (code == POINTER_PLUS_EXPR)
1935 /* Support only unary or binary operations. */
1936 op_type = TREE_CODE_LENGTH (code);
1937 if (op_type != unary_op && op_type != binary_op)
1939 if (vect_print_dump_info (REPORT_DETAILS))
1940 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1944 op0 = gimple_assign_rhs1 (stmt);
1945 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
1947 if (vect_print_dump_info (REPORT_DETAILS))
1948 fprintf (vect_dump, "use not simple.");
1952 if (op_type == binary_op)
1954 op1 = gimple_assign_rhs2 (stmt);
1955 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
1957 if (vect_print_dump_info (REPORT_DETAILS))
1958 fprintf (vect_dump, "use not simple.");
1963 /* If this is a shift/rotate, determine whether the shift amount is a vector,
1964 or scalar. If the shift/rotate amount is a vector, use the vector/vector
1966 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
1967 || code == RROTATE_EXPR)
1971 /* vector shifted by vector */
1972 if (dt[1] == vect_loop_def)
1974 optab = optab_for_tree_code (code, vectype, optab_vector);
1975 if (vect_print_dump_info (REPORT_DETAILS))
1976 fprintf (vect_dump, "vector/vector shift/rotate found.");
1979 /* See if the machine has a vector shifted by scalar insn and if not
1980 then see if it has a vector shifted by vector insn */
1981 else if (dt[1] == vect_constant_def || dt[1] == vect_invariant_def)
1983 optab = optab_for_tree_code (code, vectype, optab_scalar);
1985 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
1986 != CODE_FOR_nothing))
1988 scalar_shift_arg = true;
1989 if (vect_print_dump_info (REPORT_DETAILS))
1990 fprintf (vect_dump, "vector/scalar shift/rotate found.");
1994 optab = optab_for_tree_code (code, vectype, optab_vector);
1995 if (vect_print_dump_info (REPORT_DETAILS)
1997 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
1998 != CODE_FOR_nothing))
1999 fprintf (vect_dump, "vector/vector shift/rotate found.");
2005 if (vect_print_dump_info (REPORT_DETAILS))
2006 fprintf (vect_dump, "operand mode requires invariant argument.");
2011 optab = optab_for_tree_code (code, vectype, optab_default);
2013 /* Supportable by target? */
2016 if (vect_print_dump_info (REPORT_DETAILS))
2017 fprintf (vect_dump, "no optab.");
2020 vec_mode = TYPE_MODE (vectype);
2021 icode = (int) optab_handler (optab, vec_mode)->insn_code;
2022 if (icode == CODE_FOR_nothing)
2024 if (vect_print_dump_info (REPORT_DETAILS))
2025 fprintf (vect_dump, "op not supported by target.");
2026 /* Check only during analysis. */
2027 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2028 || (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2029 < vect_min_worthwhile_factor (code)
2032 if (vect_print_dump_info (REPORT_DETAILS))
2033 fprintf (vect_dump, "proceeding using word mode.");
2036 /* Worthwhile without SIMD support? Check only during analysis. */
2037 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2038 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2039 < vect_min_worthwhile_factor (code)
2042 if (vect_print_dump_info (REPORT_DETAILS))
2043 fprintf (vect_dump, "not worthwhile without SIMD support.");
2047 if (!vec_stmt) /* transformation not required. */
2049 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2050 if (vect_print_dump_info (REPORT_DETAILS))
2051 fprintf (vect_dump, "=== vectorizable_operation ===");
2052 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2058 if (vect_print_dump_info (REPORT_DETAILS))
2059 fprintf (vect_dump, "transform binary/unary operation.");
2062 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2064 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2065 created in the previous stages of the recursion, so no allocation is
2066 needed, except for the case of shift with scalar shift argument. In that
2067 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2068 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2069 In case of loop-based vectorization we allocate VECs of size 1. We
2070 allocate VEC_OPRNDS1 only in case of binary operation. */
2073 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2074 if (op_type == binary_op)
2075 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2077 else if (scalar_shift_arg)
2078 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2080 /* In case the vectorization factor (VF) is bigger than the number
2081 of elements that we can fit in a vectype (nunits), we have to generate
2082 more than one vector stmt - i.e - we need to "unroll" the
2083 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2084 from one copy of the vector stmt to the next, in the field
2085 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2086 stages to find the correct vector defs to be used when vectorizing
2087 stmts that use the defs of the current stmt. The example below illustrates
2088 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
2089 4 vectorized stmts):
2091 before vectorization:
2092 RELATED_STMT VEC_STMT
2096 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2098 RELATED_STMT VEC_STMT
2099 VS1_0: vx0 = memref0 VS1_1 -
2100 VS1_1: vx1 = memref1 VS1_2 -
2101 VS1_2: vx2 = memref2 VS1_3 -
2102 VS1_3: vx3 = memref3 - -
2103 S1: x = load - VS1_0
2106 step2: vectorize stmt S2 (done here):
2107 To vectorize stmt S2 we first need to find the relevant vector
2108 def for the first operand 'x'. This is, as usual, obtained from
2109 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2110 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2111 relevant vector def 'vx0'. Having found 'vx0' we can generate
2112 the vector stmt VS2_0, and as usual, record it in the
2113 STMT_VINFO_VEC_STMT of stmt S2.
2114 When creating the second copy (VS2_1), we obtain the relevant vector
2115 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2116 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2117 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2118 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2119 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2120 chain of stmts and pointers:
2121 RELATED_STMT VEC_STMT
2122 VS1_0: vx0 = memref0 VS1_1 -
2123 VS1_1: vx1 = memref1 VS1_2 -
2124 VS1_2: vx2 = memref2 VS1_3 -
2125 VS1_3: vx3 = memref3 - -
2126 S1: x = load - VS1_0
2127 VS2_0: vz0 = vx0 + v1 VS2_1 -
2128 VS2_1: vz1 = vx1 + v1 VS2_2 -
2129 VS2_2: vz2 = vx2 + v1 VS2_3 -
2130 VS2_3: vz3 = vx3 + v1 - -
2131 S2: z = x + 1 - VS2_0 */
2133 prev_stmt_info = NULL;
2134 for (j = 0; j < ncopies; j++)
2139 if (op_type == binary_op && scalar_shift_arg)
2141 /* Vector shl and shr insn patterns can be defined with scalar
2142 operand 2 (shift operand). In this case, use constant or loop
2143 invariant op1 directly, without extending it to vector mode
2145 optab_op2_mode = insn_data[icode].operand[2].mode;
2146 if (!VECTOR_MODE_P (optab_op2_mode))
2148 if (vect_print_dump_info (REPORT_DETAILS))
2149 fprintf (vect_dump, "operand 1 using scalar mode.");
2151 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2154 /* Store vec_oprnd1 for every vector stmt to be created
2155 for SLP_NODE. We check during the analysis that all the
2156 shift arguments are the same.
2157 TODO: Allow different constants for different vector
2158 stmts generated for an SLP instance. */
2159 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2160 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2165 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2166 (a special case for certain kind of vector shifts); otherwise,
2167 operand 1 should be of a vector type (the usual case). */
2168 if (op_type == binary_op && !vec_oprnd1)
2169 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2172 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2176 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2178 /* Arguments are ready. Create the new vector stmt. */
2179 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
2181 vop1 = ((op_type == binary_op)
2182 ? VEC_index (tree, vec_oprnds1, i) : NULL);
2183 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2184 new_temp = make_ssa_name (vec_dest, new_stmt);
2185 gimple_assign_set_lhs (new_stmt, new_temp);
2186 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2188 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2195 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2197 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2198 prev_stmt_info = vinfo_for_stmt (new_stmt);
2201 VEC_free (tree, heap, vec_oprnds0);
2203 VEC_free (tree, heap, vec_oprnds1);
2209 /* Get vectorized definitions for loop-based vectorization. For the first
2210 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2211 scalar operand), and for the rest we get a copy with
2212 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2213 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2214 The vectors are collected into VEC_OPRNDS. */
2217 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2218 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2222 /* Get first vector operand. */
2223 /* All the vector operands except the very first one (that is scalar oprnd)
2225 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2226 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2228 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2230 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2232 /* Get second vector operand. */
2233 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2234 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2238 /* For conversion in multiple steps, continue to get operands
2241 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2245 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2246 For multi-step conversions store the resulting vectors and call the function
2250 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2251 int multi_step_cvt, gimple stmt,
2252 VEC (tree, heap) *vec_dsts,
2253 gimple_stmt_iterator *gsi,
2254 slp_tree slp_node, enum tree_code code,
2255 stmt_vec_info *prev_stmt_info)
2258 tree vop0, vop1, new_tmp, vec_dest;
2260 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2262 vec_dest = VEC_pop (tree, vec_dsts);
2264 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2266 /* Create demotion operation. */
2267 vop0 = VEC_index (tree, *vec_oprnds, i);
2268 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2269 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2270 new_tmp = make_ssa_name (vec_dest, new_stmt);
2271 gimple_assign_set_lhs (new_stmt, new_tmp);
2272 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2275 /* Store the resulting vector for next recursive call. */
2276 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2279 /* This is the last step of the conversion sequence. Store the
2280 vectors in SLP_NODE or in vector info of the scalar statement
2281 (or in STMT_VINFO_RELATED_STMT chain). */
2283 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2286 if (!*prev_stmt_info)
2287 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2289 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2291 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2296 /* For multi-step demotion operations we first generate demotion operations
2297 from the source type to the intermediate types, and then combine the
2298 results (stored in VEC_OPRNDS) in demotion operation to the destination
2302 /* At each level of recursion we have have of the operands we had at the
2304 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2305 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2306 stmt, vec_dsts, gsi, slp_node,
2307 code, prev_stmt_info);
2312 /* Function vectorizable_type_demotion
2314 Check if STMT performs a binary or unary operation that involves
2315 type demotion, and if it can be vectorized.
2316 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2317 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2318 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2321 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
2322 gimple *vec_stmt, slp_tree slp_node)
2327 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2328 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2329 enum tree_code code, code1 = ERROR_MARK;
2332 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2333 stmt_vec_info prev_stmt_info;
2340 int multi_step_cvt = 0;
2341 VEC (tree, heap) *vec_oprnds0 = NULL;
2342 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2343 tree last_oprnd, intermediate_type;
2345 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2348 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2351 /* Is STMT a vectorizable type-demotion operation? */
2352 if (!is_gimple_assign (stmt))
2355 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2358 code = gimple_assign_rhs_code (stmt);
2359 if (!CONVERT_EXPR_CODE_P (code))
2362 op0 = gimple_assign_rhs1 (stmt);
2363 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2366 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2368 scalar_dest = gimple_assign_lhs (stmt);
2369 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2372 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2373 if (nunits_in >= nunits_out)
2376 /* Multiple types in SLP are handled by creating the appropriate number of
2377 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2382 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2384 gcc_assert (ncopies >= 1);
2386 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2387 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2388 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2389 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2390 && CONVERT_EXPR_CODE_P (code))))
2393 /* Check the operands of the operation. */
2394 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
2396 if (vect_print_dump_info (REPORT_DETAILS))
2397 fprintf (vect_dump, "use not simple.");
2401 /* Supportable by target? */
2402 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1,
2403 &multi_step_cvt, &interm_types))
2406 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2408 if (!vec_stmt) /* transformation not required. */
2410 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2411 if (vect_print_dump_info (REPORT_DETAILS))
2412 fprintf (vect_dump, "=== vectorizable_demotion ===");
2413 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2418 if (vect_print_dump_info (REPORT_DETAILS))
2419 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2422 /* In case of multi-step demotion, we first generate demotion operations to
2423 the intermediate types, and then from that types to the final one.
2424 We create vector destinations for the intermediate type (TYPES) received
2425 from supportable_narrowing_operation, and store them in the correct order
2426 for future use in vect_create_vectorized_demotion_stmts(). */
2428 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2430 vec_dsts = VEC_alloc (tree, heap, 1);
2432 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2433 VEC_quick_push (tree, vec_dsts, vec_dest);
2437 for (i = VEC_length (tree, interm_types) - 1;
2438 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2440 vec_dest = vect_create_destination_var (scalar_dest,
2442 VEC_quick_push (tree, vec_dsts, vec_dest);
2446 /* In case the vectorization factor (VF) is bigger than the number
2447 of elements that we can fit in a vectype (nunits), we have to generate
2448 more than one vector stmt - i.e - we need to "unroll" the
2449 vector stmt by a factor VF/nunits. */
2451 prev_stmt_info = NULL;
2452 for (j = 0; j < ncopies; j++)
2456 vect_get_slp_defs (slp_node, &vec_oprnds0, NULL);
2459 VEC_free (tree, heap, vec_oprnds0);
2460 vec_oprnds0 = VEC_alloc (tree, heap,
2461 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
2462 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2463 vect_pow2 (multi_step_cvt) - 1);
2466 /* Arguments are ready. Create the new vector stmts. */
2467 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2468 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
2469 multi_step_cvt, stmt, tmp_vec_dsts,
2470 gsi, slp_node, code1,
2474 VEC_free (tree, heap, vec_oprnds0);
2475 VEC_free (tree, heap, vec_dsts);
2476 VEC_free (tree, heap, tmp_vec_dsts);
2477 VEC_free (tree, heap, interm_types);
2479 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2484 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2485 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2486 the resulting vectors and call the function recursively. */
2489 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2490 VEC (tree, heap) **vec_oprnds1,
2491 int multi_step_cvt, gimple stmt,
2492 VEC (tree, heap) *vec_dsts,
2493 gimple_stmt_iterator *gsi,
2494 slp_tree slp_node, enum tree_code code1,
2495 enum tree_code code2, tree decl1,
2496 tree decl2, int op_type,
2497 stmt_vec_info *prev_stmt_info)
2500 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
2501 gimple new_stmt1, new_stmt2;
2502 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2503 VEC (tree, heap) *vec_tmp;
2505 vec_dest = VEC_pop (tree, vec_dsts);
2506 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2508 for (i = 0; VEC_iterate (tree, *vec_oprnds0, i, vop0); i++)
2510 if (op_type == binary_op)
2511 vop1 = VEC_index (tree, *vec_oprnds1, i);
2515 /* Generate the two halves of promotion operation. */
2516 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2517 op_type, vec_dest, gsi, stmt);
2518 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2519 op_type, vec_dest, gsi, stmt);
2520 if (is_gimple_call (new_stmt1))
2522 new_tmp1 = gimple_call_lhs (new_stmt1);
2523 new_tmp2 = gimple_call_lhs (new_stmt2);
2527 new_tmp1 = gimple_assign_lhs (new_stmt1);
2528 new_tmp2 = gimple_assign_lhs (new_stmt2);
2533 /* Store the results for the recursive call. */
2534 VEC_quick_push (tree, vec_tmp, new_tmp1);
2535 VEC_quick_push (tree, vec_tmp, new_tmp2);
2539 /* Last step of promotion sequience - store the results. */
2542 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
2543 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
2547 if (!*prev_stmt_info)
2548 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
2550 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
2552 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
2553 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
2554 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
2561 /* For multi-step promotion operation we first generate we call the
2562 function recurcively for every stage. We start from the input type,
2563 create promotion operations to the intermediate types, and then
2564 create promotions to the output type. */
2565 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
2566 VEC_free (tree, heap, vec_tmp);
2567 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
2568 multi_step_cvt - 1, stmt,
2569 vec_dsts, gsi, slp_node, code1,
2570 code2, decl2, decl2, op_type,
2576 /* Function vectorizable_type_promotion
2578 Check if STMT performs a binary or unary operation that involves
2579 type promotion, and if it can be vectorized.
2580 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2581 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2582 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2585 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
2586 gimple *vec_stmt, slp_tree slp_node)
2590 tree op0, op1 = NULL;
2591 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2592 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2593 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2594 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2595 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2599 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2600 stmt_vec_info prev_stmt_info;
2607 tree intermediate_type = NULL_TREE;
2608 int multi_step_cvt = 0;
2609 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2610 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2612 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2615 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2618 /* Is STMT a vectorizable type-promotion operation? */
2619 if (!is_gimple_assign (stmt))
2622 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2625 code = gimple_assign_rhs_code (stmt);
2626 if (!CONVERT_EXPR_CODE_P (code)
2627 && code != WIDEN_MULT_EXPR)
2630 op0 = gimple_assign_rhs1 (stmt);
2631 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2634 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2636 scalar_dest = gimple_assign_lhs (stmt);
2637 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2640 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2641 if (nunits_in <= nunits_out)
2644 /* Multiple types in SLP are handled by creating the appropriate number of
2645 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2650 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2652 gcc_assert (ncopies >= 1);
2654 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2655 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2656 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2657 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2658 && CONVERT_EXPR_CODE_P (code))))
2661 /* Check the operands of the operation. */
2662 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
2664 if (vect_print_dump_info (REPORT_DETAILS))
2665 fprintf (vect_dump, "use not simple.");
2669 op_type = TREE_CODE_LENGTH (code);
2670 if (op_type == binary_op)
2672 op1 = gimple_assign_rhs2 (stmt);
2673 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
2675 if (vect_print_dump_info (REPORT_DETAILS))
2676 fprintf (vect_dump, "use not simple.");
2681 /* Supportable by target? */
2682 if (!supportable_widening_operation (code, stmt, vectype_in,
2683 &decl1, &decl2, &code1, &code2,
2684 &multi_step_cvt, &interm_types))
2687 /* Binary widening operation can only be supported directly by the
2689 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2691 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2693 if (!vec_stmt) /* transformation not required. */
2695 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2696 if (vect_print_dump_info (REPORT_DETAILS))
2697 fprintf (vect_dump, "=== vectorizable_promotion ===");
2698 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
2704 if (vect_print_dump_info (REPORT_DETAILS))
2705 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
2709 /* In case of multi-step promotion, we first generate promotion operations
2710 to the intermediate types, and then from that types to the final one.
2711 We store vector destination in VEC_DSTS in the correct order for
2712 recursive creation of promotion operations in
2713 vect_create_vectorized_promotion_stmts(). Vector destinations are created
2714 according to TYPES recieved from supportable_widening_operation(). */
2716 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2718 vec_dsts = VEC_alloc (tree, heap, 1);
2720 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2721 VEC_quick_push (tree, vec_dsts, vec_dest);
2725 for (i = VEC_length (tree, interm_types) - 1;
2726 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2728 vec_dest = vect_create_destination_var (scalar_dest,
2730 VEC_quick_push (tree, vec_dsts, vec_dest);
2736 vec_oprnds0 = VEC_alloc (tree, heap,
2737 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2738 if (op_type == binary_op)
2739 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2742 /* In case the vectorization factor (VF) is bigger than the number
2743 of elements that we can fit in a vectype (nunits), we have to generate
2744 more than one vector stmt - i.e - we need to "unroll" the
2745 vector stmt by a factor VF/nunits. */
2747 prev_stmt_info = NULL;
2748 for (j = 0; j < ncopies; j++)
2754 vect_get_slp_defs (slp_node, &vec_oprnds0, &vec_oprnds1);
2757 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2758 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2759 if (op_type == binary_op)
2761 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2762 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2768 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2769 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
2770 if (op_type == binary_op)
2772 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
2773 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
2777 /* Arguments are ready. Create the new vector stmts. */
2778 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2779 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
2780 multi_step_cvt, stmt,
2782 gsi, slp_node, code1, code2,
2783 decl1, decl2, op_type,
2787 VEC_free (tree, heap, vec_dsts);
2788 VEC_free (tree, heap, tmp_vec_dsts);
2789 VEC_free (tree, heap, interm_types);
2790 VEC_free (tree, heap, vec_oprnds0);
2791 VEC_free (tree, heap, vec_oprnds1);
2793 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2798 /* Function vectorizable_store.
2800 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2802 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2803 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2804 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2807 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2813 tree vec_oprnd = NULL_TREE;
2814 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2815 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
2816 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2817 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2818 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2819 enum machine_mode vec_mode;
2821 enum dr_alignment_support alignment_support_scheme;
2824 enum vect_def_type dt;
2825 stmt_vec_info prev_stmt_info = NULL;
2826 tree dataref_ptr = NULL_TREE;
2827 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2830 gimple next_stmt, first_stmt = NULL;
2831 bool strided_store = false;
2832 unsigned int group_size, i;
2833 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
2835 VEC(tree,heap) *vec_oprnds = NULL;
2836 bool slp = (slp_node != NULL);
2837 stmt_vec_info first_stmt_vinfo;
2838 unsigned int vec_num;
2840 /* Multiple types in SLP are handled by creating the appropriate number of
2841 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2846 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2848 gcc_assert (ncopies >= 1);
2850 /* FORNOW. This restriction should be relaxed. */
2851 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
2853 if (vect_print_dump_info (REPORT_DETAILS))
2854 fprintf (vect_dump, "multiple types in nested loop.");
2858 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2861 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2864 /* Is vectorizable store? */
2866 if (!is_gimple_assign (stmt))
2869 scalar_dest = gimple_assign_lhs (stmt);
2870 if (TREE_CODE (scalar_dest) != ARRAY_REF
2871 && TREE_CODE (scalar_dest) != INDIRECT_REF
2872 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
2875 gcc_assert (gimple_assign_single_p (stmt));
2876 op = gimple_assign_rhs1 (stmt);
2877 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2879 if (vect_print_dump_info (REPORT_DETAILS))
2880 fprintf (vect_dump, "use not simple.");
2884 /* The scalar rhs type needs to be trivially convertible to the vector
2885 component type. This should always be the case. */
2886 if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
2888 if (vect_print_dump_info (REPORT_DETAILS))
2889 fprintf (vect_dump, "??? operands of different types");
2893 vec_mode = TYPE_MODE (vectype);
2894 /* FORNOW. In some cases can vectorize even if data-type not supported
2895 (e.g. - array initialization with 0). */
2896 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
2899 if (!STMT_VINFO_DATA_REF (stmt_info))
2902 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
2904 strided_store = true;
2905 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
2906 if (!vect_strided_store_supported (vectype)
2907 && !PURE_SLP_STMT (stmt_info) && !slp)
2910 if (first_stmt == stmt)
2912 /* STMT is the leader of the group. Check the operands of all the
2913 stmts of the group. */
2914 next_stmt = DR_GROUP_NEXT_DR (stmt_info);
2917 gcc_assert (gimple_assign_single_p (next_stmt));
2918 op = gimple_assign_rhs1 (next_stmt);
2919 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2921 if (vect_print_dump_info (REPORT_DETAILS))
2922 fprintf (vect_dump, "use not simple.");
2925 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
2930 if (!vec_stmt) /* transformation not required. */
2932 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2933 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
2941 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2942 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2944 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
2947 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
2949 /* We vectorize all the stmts of the interleaving group when we
2950 reach the last stmt in the group. */
2951 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
2952 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
2960 strided_store = false;
2962 /* VEC_NUM is the number of vect stmts to be created for this group. */
2964 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2966 vec_num = group_size;
2972 group_size = vec_num = 1;
2973 first_stmt_vinfo = stmt_info;
2976 if (vect_print_dump_info (REPORT_DETAILS))
2977 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
2979 dr_chain = VEC_alloc (tree, heap, group_size);
2980 oprnds = VEC_alloc (tree, heap, group_size);
2982 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
2983 gcc_assert (alignment_support_scheme);
2984 gcc_assert (alignment_support_scheme == dr_aligned); /* FORNOW */
2986 /* In case the vectorization factor (VF) is bigger than the number
2987 of elements that we can fit in a vectype (nunits), we have to generate
2988 more than one vector stmt - i.e - we need to "unroll" the
2989 vector stmt by a factor VF/nunits. For more details see documentation in
2990 vect_get_vec_def_for_copy_stmt. */
2992 /* In case of interleaving (non-unit strided access):
2999 We create vectorized stores starting from base address (the access of the
3000 first stmt in the chain (S2 in the above example), when the last store stmt
3001 of the chain (S4) is reached:
3004 VS2: &base + vec_size*1 = vx0
3005 VS3: &base + vec_size*2 = vx1
3006 VS4: &base + vec_size*3 = vx3
3008 Then permutation statements are generated:
3010 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3011 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3014 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3015 (the order of the data-refs in the output of vect_permute_store_chain
3016 corresponds to the order of scalar stmts in the interleaving chain - see
3017 the documentation of vect_permute_store_chain()).
3019 In case of both multiple types and interleaving, above vector stores and
3020 permutation stmts are created for every copy. The result vector stmts are
3021 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3022 STMT_VINFO_RELATED_STMT for the next copies.
3025 prev_stmt_info = NULL;
3026 for (j = 0; j < ncopies; j++)
3035 /* Get vectorized arguments for SLP_NODE. */
3036 vect_get_slp_defs (slp_node, &vec_oprnds, NULL);
3038 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3042 /* For interleaved stores we collect vectorized defs for all the
3043 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3044 used as an input to vect_permute_store_chain(), and OPRNDS as
3045 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3047 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3048 OPRNDS are of size 1. */
3049 next_stmt = first_stmt;
3050 for (i = 0; i < group_size; i++)
3052 /* Since gaps are not supported for interleaved stores,
3053 GROUP_SIZE is the exact number of stmts in the chain.
3054 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3055 there is no interleaving, GROUP_SIZE is 1, and only one
3056 iteration of the loop will be executed. */
3057 gcc_assert (next_stmt
3058 && gimple_assign_single_p (next_stmt));
3059 op = gimple_assign_rhs1 (next_stmt);
3061 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3063 VEC_quick_push(tree, dr_chain, vec_oprnd);
3064 VEC_quick_push(tree, oprnds, vec_oprnd);
3065 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3069 /* We should have catched mismatched types earlier. */
3070 gcc_assert (useless_type_conversion_p (vectype,
3071 TREE_TYPE (vec_oprnd)));
3072 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
3073 &dummy, &ptr_incr, false,
3075 gcc_assert (!inv_p);
3079 /* For interleaved stores we created vectorized defs for all the
3080 defs stored in OPRNDS in the previous iteration (previous copy).
3081 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3082 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3084 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3085 OPRNDS are of size 1. */
3086 for (i = 0; i < group_size; i++)
3088 op = VEC_index (tree, oprnds, i);
3089 vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
3090 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3091 VEC_replace(tree, dr_chain, i, vec_oprnd);
3092 VEC_replace(tree, oprnds, i, vec_oprnd);
3095 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3100 result_chain = VEC_alloc (tree, heap, group_size);
3102 if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3107 next_stmt = first_stmt;
3108 for (i = 0; i < vec_num; i++)
3111 /* Bump the vector pointer. */
3112 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3116 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3117 else if (strided_store)
3118 /* For strided stores vectorized defs are interleaved in
3119 vect_permute_store_chain(). */
3120 vec_oprnd = VEC_index (tree, result_chain, i);
3122 data_ref = build_fold_indirect_ref (dataref_ptr);
3124 /* Arguments are ready. Create the new vector stmt. */
3125 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3126 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3127 mark_symbols_for_renaming (new_stmt);
3133 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3135 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3137 prev_stmt_info = vinfo_for_stmt (new_stmt);
3138 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3144 VEC_free (tree, heap, dr_chain);
3145 VEC_free (tree, heap, oprnds);
3147 VEC_free (tree, heap, result_chain);
3152 /* vectorizable_load.
3154 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3156 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3157 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3158 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3161 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3162 slp_tree slp_node, slp_instance slp_node_instance)
3165 tree vec_dest = NULL;
3166 tree data_ref = NULL;
3167 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3168 stmt_vec_info prev_stmt_info;
3169 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3170 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3171 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3172 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3173 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3174 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3177 gimple new_stmt = NULL;
3179 enum dr_alignment_support alignment_support_scheme;
3180 tree dataref_ptr = NULL_TREE;
3182 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3184 int i, j, group_size;
3185 tree msq = NULL_TREE, lsq;
3186 tree offset = NULL_TREE;
3187 tree realignment_token = NULL_TREE;
3189 VEC(tree,heap) *dr_chain = NULL;
3190 bool strided_load = false;
3194 bool compute_in_loop = false;
3195 struct loop *at_loop;
3197 bool slp = (slp_node != NULL);
3198 bool slp_perm = false;
3199 enum tree_code code;
3201 /* Multiple types in SLP are handled by creating the appropriate number of
3202 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3207 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3209 gcc_assert (ncopies >= 1);
3211 /* FORNOW. This restriction should be relaxed. */
3212 if (nested_in_vect_loop && ncopies > 1)
3214 if (vect_print_dump_info (REPORT_DETAILS))
3215 fprintf (vect_dump, "multiple types in nested loop.");
3219 if (slp && SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
3222 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3225 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3228 /* Is vectorizable load? */
3229 if (!is_gimple_assign (stmt))
3232 scalar_dest = gimple_assign_lhs (stmt);
3233 if (TREE_CODE (scalar_dest) != SSA_NAME)
3236 code = gimple_assign_rhs_code (stmt);
3237 if (code != ARRAY_REF
3238 && code != INDIRECT_REF
3239 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
3242 if (!STMT_VINFO_DATA_REF (stmt_info))
3245 scalar_type = TREE_TYPE (DR_REF (dr));
3246 mode = (int) TYPE_MODE (vectype);
3248 /* FORNOW. In some cases can vectorize even if data-type not supported
3249 (e.g. - data copies). */
3250 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
3252 if (vect_print_dump_info (REPORT_DETAILS))
3253 fprintf (vect_dump, "Aligned load, but unsupported type.");
3257 /* The vector component type needs to be trivially convertible to the
3258 scalar lhs. This should always be the case. */
3259 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
3261 if (vect_print_dump_info (REPORT_DETAILS))
3262 fprintf (vect_dump, "??? operands of different types");
3266 /* Check if the load is a part of an interleaving chain. */
3267 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3269 strided_load = true;
3271 gcc_assert (! nested_in_vect_loop);
3273 /* Check if interleaving is supported. */
3274 if (!vect_strided_load_supported (vectype)
3275 && !PURE_SLP_STMT (stmt_info) && !slp)
3279 if (!vec_stmt) /* transformation not required. */
3281 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
3282 vect_model_load_cost (stmt_info, ncopies, NULL);
3286 if (vect_print_dump_info (REPORT_DETAILS))
3287 fprintf (vect_dump, "transform load.");
3293 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3294 /* Check if the chain of loads is already vectorized. */
3295 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
3297 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3300 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3301 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3303 /* VEC_NUM is the number of vect stmts to be created for this group. */
3306 strided_load = false;
3307 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3310 vec_num = group_size;
3312 dr_chain = VEC_alloc (tree, heap, vec_num);
3318 group_size = vec_num = 1;
3321 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
3322 gcc_assert (alignment_support_scheme);
3324 /* In case the vectorization factor (VF) is bigger than the number
3325 of elements that we can fit in a vectype (nunits), we have to generate
3326 more than one vector stmt - i.e - we need to "unroll" the
3327 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3328 from one copy of the vector stmt to the next, in the field
3329 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3330 stages to find the correct vector defs to be used when vectorizing
3331 stmts that use the defs of the current stmt. The example below illustrates
3332 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3333 4 vectorized stmts):
3335 before vectorization:
3336 RELATED_STMT VEC_STMT
3340 step 1: vectorize stmt S1:
3341 We first create the vector stmt VS1_0, and, as usual, record a
3342 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3343 Next, we create the vector stmt VS1_1, and record a pointer to
3344 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3345 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3347 RELATED_STMT VEC_STMT
3348 VS1_0: vx0 = memref0 VS1_1 -
3349 VS1_1: vx1 = memref1 VS1_2 -
3350 VS1_2: vx2 = memref2 VS1_3 -
3351 VS1_3: vx3 = memref3 - -
3352 S1: x = load - VS1_0
3355 See in documentation in vect_get_vec_def_for_stmt_copy for how the
3356 information we recorded in RELATED_STMT field is used to vectorize
3359 /* In case of interleaving (non-unit strided access):
3366 Vectorized loads are created in the order of memory accesses
3367 starting from the access of the first stmt of the chain:
3370 VS2: vx1 = &base + vec_size*1
3371 VS3: vx3 = &base + vec_size*2
3372 VS4: vx4 = &base + vec_size*3
3374 Then permutation statements are generated:
3376 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
3377 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
3380 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3381 (the order of the data-refs in the output of vect_permute_load_chain
3382 corresponds to the order of scalar stmts in the interleaving chain - see
3383 the documentation of vect_permute_load_chain()).
3384 The generation of permutation stmts and recording them in
3385 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3387 In case of both multiple types and interleaving, the vector loads and
3388 permutation stmts above are created for every copy. The result vector stmts
3389 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3390 STMT_VINFO_RELATED_STMT for the next copies. */
3392 /* If the data reference is aligned (dr_aligned) or potentially unaligned
3393 on a target that supports unaligned accesses (dr_unaligned_supported)
3394 we generate the following code:
3398 p = p + indx * vectype_size;
3403 Otherwise, the data reference is potentially unaligned on a target that
3404 does not support unaligned accesses (dr_explicit_realign_optimized) -
3405 then generate the following code, in which the data in each iteration is
3406 obtained by two vector loads, one from the previous iteration, and one
3407 from the current iteration:
3409 msq_init = *(floor(p1))
3410 p2 = initial_addr + VS - 1;
3411 realignment_token = call target_builtin;
3414 p2 = p2 + indx * vectype_size
3416 vec_dest = realign_load (msq, lsq, realignment_token)
3421 /* If the misalignment remains the same throughout the execution of the
3422 loop, we can create the init_addr and permutation mask at the loop
3423 preheader. Otherwise, it needs to be created inside the loop.
3424 This can only occur when vectorizing memory accesses in the inner-loop
3425 nested within an outer-loop that is being vectorized. */
3427 if (nested_in_vect_loop_p (loop, stmt)
3428 && (TREE_INT_CST_LOW (DR_STEP (dr))
3429 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
3431 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
3432 compute_in_loop = true;
3435 if ((alignment_support_scheme == dr_explicit_realign_optimized
3436 || alignment_support_scheme == dr_explicit_realign)
3437 && !compute_in_loop)
3439 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
3440 alignment_support_scheme, NULL_TREE,
3442 if (alignment_support_scheme == dr_explicit_realign_optimized)
3444 phi = SSA_NAME_DEF_STMT (msq);
3445 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3451 prev_stmt_info = NULL;
3452 for (j = 0; j < ncopies; j++)
3454 /* 1. Create the vector pointer update chain. */
3456 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
3458 &dummy, &ptr_incr, false,
3462 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3464 for (i = 0; i < vec_num; i++)
3467 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3470 /* 2. Create the vector-load in the loop. */
3471 switch (alignment_support_scheme)
3474 gcc_assert (aligned_access_p (first_dr));
3475 data_ref = build_fold_indirect_ref (dataref_ptr);
3477 case dr_unaligned_supported:
3479 int mis = DR_MISALIGNMENT (first_dr);
3480 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3482 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
3484 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3487 case dr_explicit_realign:
3490 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3492 if (compute_in_loop)
3493 msq = vect_setup_realignment (first_stmt, gsi,
3495 dr_explicit_realign,
3498 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3499 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3500 new_stmt = gimple_build_assign (vec_dest, data_ref);
3501 new_temp = make_ssa_name (vec_dest, new_stmt);
3502 gimple_assign_set_lhs (new_stmt, new_temp);
3503 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3504 copy_virtual_operands (new_stmt, stmt);
3505 mark_symbols_for_renaming (new_stmt);
3508 bump = size_binop (MULT_EXPR, vs_minus_1,
3509 TYPE_SIZE_UNIT (scalar_type));
3510 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
3511 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
3514 case dr_explicit_realign_optimized:
3515 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3520 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3521 new_stmt = gimple_build_assign (vec_dest, data_ref);
3522 new_temp = make_ssa_name (vec_dest, new_stmt);
3523 gimple_assign_set_lhs (new_stmt, new_temp);
3524 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3525 mark_symbols_for_renaming (new_stmt);
3527 /* 3. Handle explicit realignment if necessary/supported. Create in
3528 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
3529 if (alignment_support_scheme == dr_explicit_realign_optimized
3530 || alignment_support_scheme == dr_explicit_realign)
3534 lsq = gimple_assign_lhs (new_stmt);
3535 if (!realignment_token)
3536 realignment_token = dataref_ptr;
3537 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3538 tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
3540 new_stmt = gimple_build_assign (vec_dest, tmp);
3541 new_temp = make_ssa_name (vec_dest, new_stmt);
3542 gimple_assign_set_lhs (new_stmt, new_temp);
3543 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3545 if (alignment_support_scheme == dr_explicit_realign_optimized)
3548 if (i == vec_num - 1 && j == ncopies - 1)
3549 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop));
3554 /* 4. Handle invariant-load. */
3557 gcc_assert (!strided_load);
3558 gcc_assert (nested_in_vect_loop_p (loop, stmt));
3563 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
3565 /* CHECKME: bitpos depends on endianess? */
3566 bitpos = bitsize_zero_node;
3567 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
3570 vect_create_destination_var (scalar_dest, NULL_TREE);
3571 new_stmt = gimple_build_assign (vec_dest, vec_inv);
3572 new_temp = make_ssa_name (vec_dest, new_stmt);
3573 gimple_assign_set_lhs (new_stmt, new_temp);
3574 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3576 for (k = nunits - 1; k >= 0; --k)
3577 t = tree_cons (NULL_TREE, new_temp, t);
3578 /* FIXME: use build_constructor directly. */
3579 vec_inv = build_constructor_from_list (vectype, t);
3580 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
3581 new_stmt = SSA_NAME_DEF_STMT (new_temp);
3584 gcc_unreachable (); /* FORNOW. */
3587 /* Collect vector loads and later create their permutation in
3588 vect_transform_strided_load (). */
3589 if (strided_load || slp_perm)
3590 VEC_quick_push (tree, dr_chain, new_temp);
3592 /* Store vector loads in the corresponding SLP_NODE. */
3593 if (slp && !slp_perm)
3594 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3597 if (slp && !slp_perm)
3602 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi,
3603 LOOP_VINFO_VECT_FACTOR (loop_vinfo),
3604 slp_node_instance, false))
3606 VEC_free (tree, heap, dr_chain);
3614 if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi))
3617 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3618 VEC_free (tree, heap, dr_chain);
3619 dr_chain = VEC_alloc (tree, heap, group_size);
3624 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3626 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3627 prev_stmt_info = vinfo_for_stmt (new_stmt);
3633 VEC_free (tree, heap, dr_chain);
3638 /* Function vect_is_simple_cond.
3641 LOOP - the loop that is being vectorized.
3642 COND - Condition that is checked for simple use.
3644 Returns whether a COND can be vectorized. Checks whether
3645 condition operands are supportable using vec_is_simple_use. */
3648 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
3652 enum vect_def_type dt;
3654 if (!COMPARISON_CLASS_P (cond))
3657 lhs = TREE_OPERAND (cond, 0);
3658 rhs = TREE_OPERAND (cond, 1);
3660 if (TREE_CODE (lhs) == SSA_NAME)
3662 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
3663 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
3666 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
3667 && TREE_CODE (lhs) != FIXED_CST)
3670 if (TREE_CODE (rhs) == SSA_NAME)
3672 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
3673 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
3676 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
3677 && TREE_CODE (rhs) != FIXED_CST)
3683 /* vectorizable_condition.
3685 Check if STMT is conditional modify expression that can be vectorized.
3686 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3687 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
3690 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3693 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
3696 tree scalar_dest = NULL_TREE;
3697 tree vec_dest = NULL_TREE;
3698 tree op = NULL_TREE;
3699 tree cond_expr, then_clause, else_clause;
3700 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3701 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3702 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
3703 tree vec_compare, vec_cond_expr;
3705 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3706 enum machine_mode vec_mode;
3708 enum vect_def_type dt;
3709 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3710 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3711 enum tree_code code;
3713 gcc_assert (ncopies >= 1);
3715 return false; /* FORNOW */
3717 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3720 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3723 /* FORNOW: SLP not supported. */
3724 if (STMT_SLP_TYPE (stmt_info))
3727 /* FORNOW: not yet supported. */
3728 if (STMT_VINFO_LIVE_P (stmt_info))
3730 if (vect_print_dump_info (REPORT_DETAILS))
3731 fprintf (vect_dump, "value used after loop.");
3735 /* Is vectorizable conditional operation? */
3736 if (!is_gimple_assign (stmt))
3739 code = gimple_assign_rhs_code (stmt);
3741 if (code != COND_EXPR)
3744 gcc_assert (gimple_assign_single_p (stmt));
3745 op = gimple_assign_rhs1 (stmt);
3746 cond_expr = TREE_OPERAND (op, 0);
3747 then_clause = TREE_OPERAND (op, 1);
3748 else_clause = TREE_OPERAND (op, 2);
3750 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
3753 /* We do not handle two different vector types for the condition
3755 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
3758 if (TREE_CODE (then_clause) == SSA_NAME)
3760 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
3761 if (!vect_is_simple_use (then_clause, loop_vinfo,
3762 &then_def_stmt, &def, &dt))
3765 else if (TREE_CODE (then_clause) != INTEGER_CST
3766 && TREE_CODE (then_clause) != REAL_CST
3767 && TREE_CODE (then_clause) != FIXED_CST)
3770 if (TREE_CODE (else_clause) == SSA_NAME)
3772 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
3773 if (!vect_is_simple_use (else_clause, loop_vinfo,
3774 &else_def_stmt, &def, &dt))
3777 else if (TREE_CODE (else_clause) != INTEGER_CST
3778 && TREE_CODE (else_clause) != REAL_CST
3779 && TREE_CODE (else_clause) != FIXED_CST)
3783 vec_mode = TYPE_MODE (vectype);
3787 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
3788 return expand_vec_cond_expr_p (op, vec_mode);
3794 scalar_dest = gimple_assign_lhs (stmt);
3795 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3797 /* Handle cond expr. */
3799 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
3801 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
3802 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
3803 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
3805 /* Arguments are ready. Create the new vector stmt. */
3806 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
3807 vec_cond_lhs, vec_cond_rhs);
3808 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
3809 vec_compare, vec_then_clause, vec_else_clause);
3811 *vec_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
3812 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3813 gimple_assign_set_lhs (*vec_stmt, new_temp);
3814 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
3820 /* Function vect_analyze_operations.
3822 Scan the loop stmts and make sure they are all vectorizable. */
3825 vect_analyze_operations (loop_vec_info loop_vinfo)
3827 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3828 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3829 int nbbs = loop->num_nodes;
3830 gimple_stmt_iterator si;
3831 unsigned int vectorization_factor = 0;
3835 stmt_vec_info stmt_info;
3836 bool need_to_vectorize = false;
3837 int min_profitable_iters;
3838 int min_scalar_loop_bound;
3840 bool only_slp_in_loop = true;
3842 if (vect_print_dump_info (REPORT_DETAILS))
3843 fprintf (vect_dump, "=== vect_analyze_operations ===");
3845 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
3846 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3848 for (i = 0; i < nbbs; i++)
3850 basic_block bb = bbs[i];
3852 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
3854 phi = gsi_stmt (si);
3857 stmt_info = vinfo_for_stmt (phi);
3858 if (vect_print_dump_info (REPORT_DETAILS))
3860 fprintf (vect_dump, "examining phi: ");
3861 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
3864 if (! is_loop_header_bb_p (bb))
3866 /* inner-loop loop-closed exit phi in outer-loop vectorization
3867 (i.e. a phi in the tail of the outer-loop).
3868 FORNOW: we currently don't support the case that these phis
3869 are not used in the outerloop, cause this case requires
3870 to actually do something here. */
3871 if (!STMT_VINFO_RELEVANT_P (stmt_info)
3872 || STMT_VINFO_LIVE_P (stmt_info))
3874 if (vect_print_dump_info (REPORT_DETAILS))
3876 "Unsupported loop-closed phi in outer-loop.");
3882 gcc_assert (stmt_info);
3884 if (STMT_VINFO_LIVE_P (stmt_info))
3886 /* FORNOW: not yet supported. */
3887 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3888 fprintf (vect_dump, "not vectorized: value used after loop.");
3892 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
3893 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
3895 /* A scalar-dependence cycle that we don't support. */
3896 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3897 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
3901 if (STMT_VINFO_RELEVANT_P (stmt_info))
3903 need_to_vectorize = true;
3904 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
3905 ok = vectorizable_induction (phi, NULL, NULL);
3910 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3913 "not vectorized: relevant phi not supported: ");
3914 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
3920 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3922 gimple stmt = gsi_stmt (si);
3923 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3924 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
3926 if (vect_print_dump_info (REPORT_DETAILS))
3928 fprintf (vect_dump, "==> examining statement: ");
3929 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3932 gcc_assert (stmt_info);
3934 /* skip stmts which do not need to be vectorized.
3935 this is expected to include:
3936 - the COND_EXPR which is the loop exit condition
3937 - any LABEL_EXPRs in the loop
3938 - computations that are used only for array indexing or loop
3941 if (!STMT_VINFO_RELEVANT_P (stmt_info)
3942 && !STMT_VINFO_LIVE_P (stmt_info))
3944 if (vect_print_dump_info (REPORT_DETAILS))
3945 fprintf (vect_dump, "irrelevant.");
3949 switch (STMT_VINFO_DEF_TYPE (stmt_info))
3954 case vect_reduction_def:
3955 gcc_assert (relevance == vect_used_in_outer
3956 || relevance == vect_used_in_outer_by_reduction
3957 || relevance == vect_unused_in_loop);
3960 case vect_induction_def:
3961 case vect_constant_def:
3962 case vect_invariant_def:
3963 case vect_unknown_def_type:
3968 if (STMT_VINFO_RELEVANT_P (stmt_info))
3970 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
3971 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
3972 need_to_vectorize = true;
3976 if (STMT_VINFO_RELEVANT_P (stmt_info)
3977 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
3978 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
3979 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
3980 || vectorizable_conversion (stmt, NULL, NULL, NULL)
3981 || vectorizable_operation (stmt, NULL, NULL, NULL)
3982 || vectorizable_assignment (stmt, NULL, NULL, NULL)
3983 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
3984 || vectorizable_call (stmt, NULL, NULL)
3985 || vectorizable_store (stmt, NULL, NULL, NULL)
3986 || vectorizable_condition (stmt, NULL, NULL)
3987 || vectorizable_reduction (stmt, NULL, NULL));
3991 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3993 fprintf (vect_dump, "not vectorized: relevant stmt not ");
3994 fprintf (vect_dump, "supported: ");
3995 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4000 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
4001 need extra handling, except for vectorizable reductions. */
4002 if (STMT_VINFO_LIVE_P (stmt_info)
4003 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4004 ok = vectorizable_live_operation (stmt, NULL, NULL);
4008 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4010 fprintf (vect_dump, "not vectorized: live stmt not ");
4011 fprintf (vect_dump, "supported: ");
4012 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4017 if (!PURE_SLP_STMT (stmt_info))
4019 /* STMT needs loop-based vectorization. */
4020 only_slp_in_loop = false;
4022 /* Groups of strided accesses whose size is not a power of 2 are
4023 not vectorizable yet using loop-vectorization. Therefore, if
4024 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
4025 both SLPed and loop-based vectorized), the loop cannot be
4027 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
4028 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
4029 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
4031 if (vect_print_dump_info (REPORT_DETAILS))
4033 fprintf (vect_dump, "not vectorized: the size of group "
4034 "of strided accesses is not a power of 2");
4035 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4043 /* All operations in the loop are either irrelevant (deal with loop
4044 control, or dead), or only used outside the loop and can be moved
4045 out of the loop (e.g. invariants, inductions). The loop can be
4046 optimized away by scalar optimizations. We're better off not
4047 touching this loop. */
4048 if (!need_to_vectorize)
4050 if (vect_print_dump_info (REPORT_DETAILS))
4052 "All the computation can be taken out of the loop.");
4053 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4055 "not vectorized: redundant loop. no profit to vectorize.");
4059 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
4060 vectorization factor of the loop is the unrolling factor required by the
4061 SLP instances. If that unrolling factor is 1, we say, that we perform
4062 pure SLP on loop - cross iteration parallelism is not exploited. */
4063 if (only_slp_in_loop)
4064 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
4066 vectorization_factor = least_common_multiple (vectorization_factor,
4067 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
4069 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
4071 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4072 && vect_print_dump_info (REPORT_DETAILS))
4074 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
4075 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
4077 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4078 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
4080 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4081 fprintf (vect_dump, "not vectorized: iteration count too small.");
4082 if (vect_print_dump_info (REPORT_DETAILS))
4083 fprintf (vect_dump,"not vectorized: iteration count smaller than "
4084 "vectorization factor.");
4088 /* Analyze cost. Decide if worth while to vectorize. */
4090 /* Once VF is set, SLP costs should be updated since the number of created
4091 vector stmts depends on VF. */
4092 vect_update_slp_costs_according_to_vf (loop_vinfo);
4094 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
4095 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
4097 if (min_profitable_iters < 0)
4099 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4100 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
4101 if (vect_print_dump_info (REPORT_DETAILS))
4102 fprintf (vect_dump, "not vectorized: vector version will never be "
4107 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
4108 * vectorization_factor) - 1);
4110 /* Use the cost model only if it is more conservative than user specified
4113 th = (unsigned) min_scalar_loop_bound;
4114 if (min_profitable_iters
4115 && (!min_scalar_loop_bound
4116 || min_profitable_iters > min_scalar_loop_bound))
4117 th = (unsigned) min_profitable_iters;
4119 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4120 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
4122 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4123 fprintf (vect_dump, "not vectorized: vectorization not "
4125 if (vect_print_dump_info (REPORT_DETAILS))
4126 fprintf (vect_dump, "not vectorized: iteration count smaller than "
4127 "user specified loop bound parameter or minimum "
4128 "profitable iterations (whichever is more conservative).");
4132 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4133 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
4134 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
4136 if (vect_print_dump_info (REPORT_DETAILS))
4137 fprintf (vect_dump, "epilog loop required.");
4138 if (!vect_can_advance_ivs_p (loop_vinfo))
4140 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4142 "not vectorized: can't create epilog loop 1.");
4145 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
4147 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4149 "not vectorized: can't create epilog loop 2.");
4158 /* Function vect_transform_stmt.
4160 Create a vectorized stmt to replace STMT, and insert it at BSI. */
4163 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
4164 bool *strided_store, slp_tree slp_node,
4165 slp_instance slp_node_instance)
4167 bool is_store = false;
4168 gimple vec_stmt = NULL;
4169 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4170 gimple orig_stmt_in_pattern;
4172 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4173 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4175 switch (STMT_VINFO_TYPE (stmt_info))
4177 case type_demotion_vec_info_type:
4178 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
4182 case type_promotion_vec_info_type:
4183 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
4187 case type_conversion_vec_info_type:
4188 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
4192 case induc_vec_info_type:
4193 gcc_assert (!slp_node);
4194 done = vectorizable_induction (stmt, gsi, &vec_stmt);
4198 case op_vec_info_type:
4199 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
4203 case assignment_vec_info_type:
4204 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
4208 case load_vec_info_type:
4209 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
4214 case store_vec_info_type:
4215 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
4217 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
4219 /* In case of interleaving, the whole chain is vectorized when the
4220 last store in the chain is reached. Store stmts before the last
4221 one are skipped, and there vec_stmt_info shouldn't be freed
4223 *strided_store = true;
4224 if (STMT_VINFO_VEC_STMT (stmt_info))
4231 case condition_vec_info_type:
4232 gcc_assert (!slp_node);
4233 done = vectorizable_condition (stmt, gsi, &vec_stmt);
4237 case call_vec_info_type:
4238 gcc_assert (!slp_node);
4239 done = vectorizable_call (stmt, gsi, &vec_stmt);
4242 case reduc_vec_info_type:
4243 gcc_assert (!slp_node);
4244 done = vectorizable_reduction (stmt, gsi, &vec_stmt);
4249 if (!STMT_VINFO_LIVE_P (stmt_info))
4251 if (vect_print_dump_info (REPORT_DETAILS))
4252 fprintf (vect_dump, "stmt not supported.");
4257 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
4258 is being vectorized, but outside the immediately enclosing loop. */
4260 && nested_in_vect_loop_p (loop, stmt)
4261 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
4262 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
4263 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer_by_reduction))
4265 struct loop *innerloop = loop->inner;
4266 imm_use_iterator imm_iter;
4267 use_operand_p use_p;
4271 if (vect_print_dump_info (REPORT_DETAILS))
4272 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
4274 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
4275 (to be used when vectorizing outer-loop stmts that use the DEF of
4277 if (gimple_code (stmt) == GIMPLE_PHI)
4278 scalar_dest = PHI_RESULT (stmt);
4280 scalar_dest = gimple_assign_lhs (stmt);
4282 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4284 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
4286 exit_phi = USE_STMT (use_p);
4287 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
4292 /* Handle stmts whose DEF is used outside the loop-nest that is
4293 being vectorized. */
4294 if (STMT_VINFO_LIVE_P (stmt_info)
4295 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4297 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
4303 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
4304 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
4305 if (orig_stmt_in_pattern)
4307 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
4308 /* STMT was inserted by the vectorizer to replace a computation idiom.
4309 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
4310 computed this idiom. We need to record a pointer to VEC_STMT in
4311 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
4312 documentation of vect_pattern_recog. */
4313 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4315 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4316 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
4325 /* Remove a group of stores (for SLP or interleaving), free their
4329 vect_remove_stores (gimple first_stmt)
4331 gimple next = first_stmt;
4333 gimple_stmt_iterator next_si;
4337 /* Free the attached stmt_vec_info and remove the stmt. */
4338 next_si = gsi_for_stmt (next);
4339 gsi_remove (&next_si, true);
4340 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
4341 free_stmt_vec_info (next);
4347 /* Function new_stmt_vec_info.
4349 Create and initialize a new stmt_vec_info struct for STMT. */
4352 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo)
4355 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
4357 STMT_VINFO_TYPE (res) = undef_vec_info_type;
4358 STMT_VINFO_STMT (res) = stmt;
4359 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
4360 STMT_VINFO_RELEVANT (res) = 0;
4361 STMT_VINFO_LIVE_P (res) = false;
4362 STMT_VINFO_VECTYPE (res) = NULL;
4363 STMT_VINFO_VEC_STMT (res) = NULL;
4364 STMT_VINFO_IN_PATTERN_P (res) = false;
4365 STMT_VINFO_RELATED_STMT (res) = NULL;
4366 STMT_VINFO_DATA_REF (res) = NULL;
4368 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
4369 STMT_VINFO_DR_OFFSET (res) = NULL;
4370 STMT_VINFO_DR_INIT (res) = NULL;
4371 STMT_VINFO_DR_STEP (res) = NULL;
4372 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
4374 if (gimple_code (stmt) == GIMPLE_PHI
4375 && is_loop_header_bb_p (gimple_bb (stmt)))
4376 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
4378 STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
4379 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
4380 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
4381 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
4382 STMT_SLP_TYPE (res) = 0;
4383 DR_GROUP_FIRST_DR (res) = NULL;
4384 DR_GROUP_NEXT_DR (res) = NULL;
4385 DR_GROUP_SIZE (res) = 0;
4386 DR_GROUP_STORE_COUNT (res) = 0;
4387 DR_GROUP_GAP (res) = 0;
4388 DR_GROUP_SAME_DR_STMT (res) = NULL;
4389 DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
4395 /* Create a hash table for stmt_vec_info. */
4398 init_stmt_vec_info_vec (void)
4400 gcc_assert (!stmt_vec_info_vec);
4401 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
4405 /* Free hash table for stmt_vec_info. */
4408 free_stmt_vec_info_vec (void)
4410 gcc_assert (stmt_vec_info_vec);
4411 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
4415 /* Free stmt vectorization related info. */
4418 free_stmt_vec_info (gimple stmt)
4420 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4425 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
4426 set_vinfo_for_stmt (stmt, NULL);
4431 /* Function get_vectype_for_scalar_type.
4433 Returns the vector type corresponding to SCALAR_TYPE as supported
4437 get_vectype_for_scalar_type (tree scalar_type)
4439 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
4440 int nbytes = GET_MODE_SIZE (inner_mode);
4444 if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode))
4447 /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD)
4449 nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes;
4451 vectype = build_vector_type (scalar_type, nunits);
4452 if (vect_print_dump_info (REPORT_DETAILS))
4454 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
4455 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4461 if (vect_print_dump_info (REPORT_DETAILS))
4463 fprintf (vect_dump, "vectype: ");
4464 print_generic_expr (vect_dump, vectype, TDF_SLIM);
4467 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4468 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
4470 if (vect_print_dump_info (REPORT_DETAILS))
4471 fprintf (vect_dump, "mode not supported by target.");
4478 /* Function vect_is_simple_use.
4481 LOOP - the loop that is being vectorized.
4482 OPERAND - operand of a stmt in LOOP.
4483 DEF - the defining stmt in case OPERAND is an SSA_NAME.
4485 Returns whether a stmt with OPERAND can be vectorized.
4486 Supportable operands are constants, loop invariants, and operands that are
4487 defined by the current iteration of the loop. Unsupportable operands are
4488 those that are defined by a previous iteration of the loop (as is the case
4489 in reduction/induction computations). */
4492 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, gimple *def_stmt,
4493 tree *def, enum vect_def_type *dt)
4496 stmt_vec_info stmt_vinfo;
4497 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4502 if (vect_print_dump_info (REPORT_DETAILS))
4504 fprintf (vect_dump, "vect_is_simple_use: operand ");
4505 print_generic_expr (vect_dump, operand, TDF_SLIM);
4508 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
4510 *dt = vect_constant_def;
4513 if (is_gimple_min_invariant (operand))
4516 *dt = vect_invariant_def;
4520 if (TREE_CODE (operand) == PAREN_EXPR)
4522 if (vect_print_dump_info (REPORT_DETAILS))
4523 fprintf (vect_dump, "non-associatable copy.");
4524 operand = TREE_OPERAND (operand, 0);
4526 if (TREE_CODE (operand) != SSA_NAME)
4528 if (vect_print_dump_info (REPORT_DETAILS))
4529 fprintf (vect_dump, "not ssa-name.");
4533 *def_stmt = SSA_NAME_DEF_STMT (operand);
4534 if (*def_stmt == NULL)
4536 if (vect_print_dump_info (REPORT_DETAILS))
4537 fprintf (vect_dump, "no def_stmt.");
4541 if (vect_print_dump_info (REPORT_DETAILS))
4543 fprintf (vect_dump, "def_stmt: ");
4544 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
4547 /* empty stmt is expected only in case of a function argument.
4548 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
4549 if (gimple_nop_p (*def_stmt))
4552 *dt = vect_invariant_def;
4556 bb = gimple_bb (*def_stmt);
4557 if (!flow_bb_inside_loop_p (loop, bb))
4558 *dt = vect_invariant_def;
4561 stmt_vinfo = vinfo_for_stmt (*def_stmt);
4562 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
4565 if (*dt == vect_unknown_def_type)
4567 if (vect_print_dump_info (REPORT_DETAILS))
4568 fprintf (vect_dump, "Unsupported pattern.");
4572 if (vect_print_dump_info (REPORT_DETAILS))
4573 fprintf (vect_dump, "type of def: %d.",*dt);
4575 switch (gimple_code (*def_stmt))
4578 *def = gimple_phi_result (*def_stmt);
4582 *def = gimple_assign_lhs (*def_stmt);
4586 *def = gimple_call_lhs (*def_stmt);
4591 if (vect_print_dump_info (REPORT_DETAILS))
4592 fprintf (vect_dump, "unsupported defining stmt: ");
4600 /* Function supportable_widening_operation
4602 Check whether an operation represented by the code CODE is a
4603 widening operation that is supported by the target platform in
4604 vector form (i.e., when operating on arguments of type VECTYPE).
4606 Widening operations we currently support are NOP (CONVERT), FLOAT
4607 and WIDEN_MULT. This function checks if these operations are supported
4608 by the target platform either directly (via vector tree-codes), or via
4612 - CODE1 and CODE2 are codes of vector operations to be used when
4613 vectorizing the operation, if available.
4614 - DECL1 and DECL2 are decls of target builtin functions to be used
4615 when vectorizing the operation, if available. In this case,
4616 CODE1 and CODE2 are CALL_EXPR.
4617 - MULTI_STEP_CVT determines the number of required intermediate steps in
4618 case of multi-step conversion (like char->short->int - in that case
4619 MULTI_STEP_CVT will be 1).
4620 - INTERM_TYPES contains the intermediate type required to perform the
4621 widening operation (short in the above example). */
4624 supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
4625 tree *decl1, tree *decl2,
4626 enum tree_code *code1, enum tree_code *code2,
4627 int *multi_step_cvt,
4628 VEC (tree, heap) **interm_types)
4630 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4631 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4632 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
4634 enum machine_mode vec_mode;
4635 enum insn_code icode1 = 0, icode2 = 0;
4636 optab optab1, optab2;
4637 tree type = gimple_expr_type (stmt);
4638 tree wide_vectype = get_vectype_for_scalar_type (type);
4639 enum tree_code c1, c2;
4641 /* The result of a vectorized widening operation usually requires two vectors
4642 (because the widened results do not fit int one vector). The generated
4643 vector results would normally be expected to be generated in the same
4644 order as in the original scalar computation, i.e. if 8 results are
4645 generated in each vector iteration, they are to be organized as follows:
4646 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
4648 However, in the special case that the result of the widening operation is
4649 used in a reduction computation only, the order doesn't matter (because
4650 when vectorizing a reduction we change the order of the computation).
4651 Some targets can take advantage of this and generate more efficient code.
4652 For example, targets like Altivec, that support widen_mult using a sequence
4653 of {mult_even,mult_odd} generate the following vectors:
4654 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
4656 When vectorizing outer-loops, we execute the inner-loop sequentially
4657 (each vectorized inner-loop iteration contributes to VF outer-loop
4658 iterations in parallel). We therefore don't allow to change the order
4659 of the computation in the inner-loop during outer-loop vectorization. */
4661 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
4662 && !nested_in_vect_loop_p (vect_loop, stmt))
4668 && code == WIDEN_MULT_EXPR
4669 && targetm.vectorize.builtin_mul_widen_even
4670 && targetm.vectorize.builtin_mul_widen_even (vectype)
4671 && targetm.vectorize.builtin_mul_widen_odd
4672 && targetm.vectorize.builtin_mul_widen_odd (vectype))
4674 if (vect_print_dump_info (REPORT_DETAILS))
4675 fprintf (vect_dump, "Unordered widening operation detected.");
4677 *code1 = *code2 = CALL_EXPR;
4678 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
4679 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
4685 case WIDEN_MULT_EXPR:
4686 if (BYTES_BIG_ENDIAN)
4688 c1 = VEC_WIDEN_MULT_HI_EXPR;
4689 c2 = VEC_WIDEN_MULT_LO_EXPR;
4693 c2 = VEC_WIDEN_MULT_HI_EXPR;
4694 c1 = VEC_WIDEN_MULT_LO_EXPR;
4699 if (BYTES_BIG_ENDIAN)
4701 c1 = VEC_UNPACK_HI_EXPR;
4702 c2 = VEC_UNPACK_LO_EXPR;
4706 c2 = VEC_UNPACK_HI_EXPR;
4707 c1 = VEC_UNPACK_LO_EXPR;
4712 if (BYTES_BIG_ENDIAN)
4714 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
4715 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
4719 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
4720 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
4724 case FIX_TRUNC_EXPR:
4725 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
4726 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
4727 computing the operation. */
4734 if (code == FIX_TRUNC_EXPR)
4736 /* The signedness is determined from output operand. */
4737 optab1 = optab_for_tree_code (c1, type, optab_default);
4738 optab2 = optab_for_tree_code (c2, type, optab_default);
4742 optab1 = optab_for_tree_code (c1, vectype, optab_default);
4743 optab2 = optab_for_tree_code (c2, vectype, optab_default);
4746 if (!optab1 || !optab2)
4749 vec_mode = TYPE_MODE (vectype);
4750 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
4751 || (icode2 = optab_handler (optab2, vec_mode)->insn_code)
4752 == CODE_FOR_nothing)
4755 /* Check if it's a multi-step conversion that can be done using intermediate
4757 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
4758 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
4761 tree prev_type = vectype, intermediate_type;
4762 enum machine_mode intermediate_mode, prev_mode = vec_mode;
4763 optab optab3, optab4;
4765 if (!CONVERT_EXPR_CODE_P (code))
4771 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4772 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4773 to get to NARROW_VECTYPE, and fail if we do not. */
4774 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4775 for (i = 0; i < 3; i++)
4777 intermediate_mode = insn_data[icode1].operand[0].mode;
4778 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4779 TYPE_UNSIGNED (prev_type));
4780 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
4781 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
4783 if (!optab3 || !optab4
4784 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
4786 || insn_data[icode1].operand[0].mode != intermediate_mode
4787 || (icode2 = optab2->handlers[(int) prev_mode].insn_code)
4789 || insn_data[icode2].operand[0].mode != intermediate_mode
4790 || (icode1 = optab3->handlers[(int) intermediate_mode].insn_code)
4792 || (icode2 = optab4->handlers[(int) intermediate_mode].insn_code)
4793 == CODE_FOR_nothing)
4796 VEC_quick_push (tree, *interm_types, intermediate_type);
4797 (*multi_step_cvt)++;
4799 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
4800 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
4803 prev_type = intermediate_type;
4804 prev_mode = intermediate_mode;
4816 /* Function supportable_narrowing_operation
4818 Check whether an operation represented by the code CODE is a
4819 narrowing operation that is supported by the target platform in
4820 vector form (i.e., when operating on arguments of type VECTYPE).
4822 Narrowing operations we currently support are NOP (CONVERT) and
4823 FIX_TRUNC. This function checks if these operations are supported by
4824 the target platform directly via vector tree-codes.
4827 - CODE1 is the code of a vector operation to be used when
4828 vectorizing the operation, if available.
4829 - MULTI_STEP_CVT determines the number of required intermediate steps in
4830 case of multi-step conversion (like int->short->char - in that case
4831 MULTI_STEP_CVT will be 1).
4832 - INTERM_TYPES contains the intermediate type required to perform the
4833 narrowing operation (short in the above example). */
4836 supportable_narrowing_operation (enum tree_code code,
4837 const_gimple stmt, tree vectype,
4838 enum tree_code *code1, int *multi_step_cvt,
4839 VEC (tree, heap) **interm_types)
4841 enum machine_mode vec_mode;
4842 enum insn_code icode1;
4843 optab optab1, interm_optab;
4844 tree type = gimple_expr_type (stmt);
4845 tree narrow_vectype = get_vectype_for_scalar_type (type);
4847 tree intermediate_type, prev_type;
4853 c1 = VEC_PACK_TRUNC_EXPR;
4856 case FIX_TRUNC_EXPR:
4857 c1 = VEC_PACK_FIX_TRUNC_EXPR;
4861 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
4862 tree code and optabs used for computing the operation. */
4869 if (code == FIX_TRUNC_EXPR)
4870 /* The signedness is determined from output operand. */
4871 optab1 = optab_for_tree_code (c1, type, optab_default);
4873 optab1 = optab_for_tree_code (c1, vectype, optab_default);
4878 vec_mode = TYPE_MODE (vectype);
4879 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code)
4880 == CODE_FOR_nothing)
4883 /* Check if it's a multi-step conversion that can be done using intermediate
4885 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
4887 enum machine_mode intermediate_mode, prev_mode = vec_mode;
4890 prev_type = vectype;
4891 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4892 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4893 to get to NARROW_VECTYPE, and fail if we do not. */
4894 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4895 for (i = 0; i < 3; i++)
4897 intermediate_mode = insn_data[icode1].operand[0].mode;
4898 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4899 TYPE_UNSIGNED (prev_type));
4900 interm_optab = optab_for_tree_code (c1, intermediate_type,
4903 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
4905 || insn_data[icode1].operand[0].mode != intermediate_mode
4907 = interm_optab->handlers[(int) intermediate_mode].insn_code)
4908 == CODE_FOR_nothing)
4911 VEC_quick_push (tree, *interm_types, intermediate_type);
4912 (*multi_step_cvt)++;
4914 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
4917 prev_type = intermediate_type;
4918 prev_mode = intermediate_mode;