1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type {
63 /* Return the vectorized type for the given statement. */
66 stmt_vectype (struct _stmt_vec_info *stmt_info)
68 return STMT_VINFO_VECTYPE (stmt_info);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
74 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
76 gimple *stmt = STMT_VINFO_STMT (stmt_info);
77 basic_block bb = gimple_bb (stmt);
78 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
84 loop = LOOP_VINFO_LOOP (loop_vinfo);
86 return (bb->loop_father == loop->inner);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
94 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
95 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
96 int misalign, enum vect_cost_model_location where)
100 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
101 stmt_info_for_cost si = { count, kind,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
104 body_cost_vec->safe_push (si);
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
109 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
110 count, kind, stmt_info, misalign, where);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
116 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
118 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
128 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
129 tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
154 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
155 tree array, unsigned HOST_WIDE_INT n)
160 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
161 build_int_cst (size_type_node, n),
162 NULL_TREE, NULL_TREE);
164 new_stmt = gimple_build_assign (array_ref, vect);
165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
173 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
177 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
178 /* Arrays have the same alignment as their type. */
179 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
183 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
185 /* Function vect_mark_relevant.
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
190 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
191 enum vect_relevant relevant, bool live_p)
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
195 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
196 gimple *pattern_stmt;
198 if (dump_enabled_p ())
200 dump_printf_loc (MSG_NOTE, vect_location,
201 "mark relevant %d, live %d: ", relevant, live_p);
202 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
211 /* This is the last stmt in a sequence that was detected as a
212 pattern that can potentially be vectorized. Don't mark the stmt
213 as relevant/live because it's not going to be vectorized.
214 Instead mark the pattern-stmt that replaces it. */
216 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE, vect_location,
220 "last stmt in pattern. don't mark"
221 " relevant/live.\n");
222 stmt_info = vinfo_for_stmt (pattern_stmt);
223 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
224 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
225 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
229 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
230 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
231 STMT_VINFO_RELEVANT (stmt_info) = relevant;
233 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
234 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
236 if (dump_enabled_p ())
237 dump_printf_loc (MSG_NOTE, vect_location,
238 "already marked relevant/live.\n");
242 worklist->safe_push (stmt);
246 /* Function is_simple_and_all_uses_invariant
248 Return true if STMT is simple and all uses of it are invariant. */
251 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
257 if (!is_gimple_assign (stmt))
260 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
262 enum vect_def_type dt = vect_uninitialized_def;
264 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
268 "use not simple.\n");
272 if (dt != vect_external_def && dt != vect_constant_def)
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
291 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
292 enum vect_relevant *relevant, bool *live_p)
294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
296 imm_use_iterator imm_iter;
300 *relevant = vect_unused_in_scope;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
306 != loop_exit_ctrl_vec_info_type)
307 *relevant = vect_used_in_scope;
309 /* changing memory. */
310 if (gimple_code (stmt) != GIMPLE_PHI)
311 if (gimple_vdef (stmt)
312 && !gimple_clobber_p (stmt))
314 if (dump_enabled_p ())
315 dump_printf_loc (MSG_NOTE, vect_location,
316 "vec_stmt_relevant_p: stmt has vdefs.\n");
317 *relevant = vect_used_in_scope;
320 /* uses outside the loop. */
321 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
323 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
325 basic_block bb = gimple_bb (USE_STMT (use_p));
326 if (!flow_bb_inside_loop_p (loop, bb))
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE, vect_location,
330 "vec_stmt_relevant_p: used out of loop.\n");
332 if (is_gimple_debug (USE_STMT (use_p)))
335 /* We expect all such uses to be in the loop exit phis
336 (because of loop closed form) */
337 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
338 gcc_assert (bb == single_exit (loop)->dest);
345 if (*live_p && *relevant == vect_unused_in_scope
346 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE, vect_location,
350 "vec_stmt_relevant_p: stmt live but not relevant.\n");
351 *relevant = vect_used_only_live;
354 return (*live_p || *relevant);
358 /* Function exist_non_indexing_operands_for_use_p
360 USE is one of the uses attached to STMT. Check if USE is
361 used in STMT for anything other than indexing an array. */
364 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
369 /* USE corresponds to some operand in STMT. If there is no data
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info))
375 /* STMT has a data_ref. FORNOW this means that its of one of
379 (This should have been verified in analyze_data_refs).
381 'var' in the second case corresponds to a def, not a use,
382 so USE cannot correspond to any operands that are not used
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
388 if (!gimple_assign_copy_p (stmt))
390 if (is_gimple_call (stmt)
391 && gimple_call_internal_p (stmt))
392 switch (gimple_call_internal_fn (stmt))
395 operand = gimple_call_arg (stmt, 3);
400 operand = gimple_call_arg (stmt, 2);
410 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
412 operand = gimple_assign_rhs1 (stmt);
413 if (TREE_CODE (operand) != SSA_NAME)
424 Function process_use.
427 - a USE in STMT in a loop represented by LOOP_VINFO
428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
429 that defined USE. This is done by calling mark_relevant and passing it
430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
440 - case 1: If USE is used only for address computations (e.g. array indexing),
441 which does not need to be directly vectorized, then the liveness/relevance
442 of the respective DEF_STMT is left unchanged.
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
448 Return true if everything is as expected. Return false otherwise. */
451 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
452 enum vect_relevant relevant, vec<gimple *> *worklist,
455 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
456 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
457 stmt_vec_info dstmt_vinfo;
458 basic_block bb, def_bb;
460 enum vect_def_type dt;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
467 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
471 "not vectorized: unsupported use in stmt.\n");
475 if (!def_stmt || gimple_nop_p (def_stmt))
478 def_bb = gimple_bb (def_stmt);
479 if (!flow_bb_inside_loop_p (loop, def_bb))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo = vinfo_for_stmt (def_stmt);
492 bb = gimple_bb (stmt);
493 if (gimple_code (stmt) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
495 && gimple_code (def_stmt) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
497 && bb->loop_father == def_bb->loop_father)
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE, vect_location,
501 "reduc-stmt defining reduc-phi in the same nest.\n");
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
503 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
506 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
517 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_NOTE, vect_location,
521 "outer-loop def-stmt defining inner-loop stmt.\n");
525 case vect_unused_in_scope:
526 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
527 vect_used_in_scope : vect_unused_in_scope;
530 case vect_used_in_outer_by_reduction:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
532 relevant = vect_used_by_reduction;
535 case vect_used_in_outer:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
537 relevant = vect_used_in_scope;
540 case vect_used_in_scope:
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
555 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE, vect_location,
559 "inner-loop def-stmt defining outer-loop stmt.\n");
563 case vect_unused_in_scope:
564 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
565 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
566 vect_used_in_outer_by_reduction : vect_unused_in_scope;
569 case vect_used_by_reduction:
570 case vect_used_only_live:
571 relevant = vect_used_in_outer_by_reduction;
574 case vect_used_in_scope:
575 relevant = vect_used_in_outer;
582 /* We are also not interested in uses on loop PHI backedges that are
583 inductions. Otherwise we'll needlessly vectorize the IV increment
584 and cause hybrid SLP for SLP inductions. */
585 else if (gimple_code (stmt) == GIMPLE_PHI
586 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
587 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
590 if (dump_enabled_p ())
591 dump_printf_loc (MSG_NOTE, vect_location,
592 "induction value on backedge.\n");
597 vect_mark_relevant (worklist, def_stmt, relevant, false);
602 /* Function vect_mark_stmts_to_be_vectorized.
604 Not all stmts in the loop need to be vectorized. For example:
613 Stmt 1 and 3 do not need to be vectorized, because loop control and
614 addressing of vectorized data-refs are handled differently.
616 This pass detects such stmts. */
619 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
621 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
622 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
623 unsigned int nbbs = loop->num_nodes;
624 gimple_stmt_iterator si;
627 stmt_vec_info stmt_vinfo;
631 enum vect_relevant relevant;
633 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE, vect_location,
635 "=== vect_mark_stmts_to_be_vectorized ===\n");
637 auto_vec<gimple *, 64> worklist;
639 /* 1. Init worklist. */
640 for (i = 0; i < nbbs; i++)
643 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
646 if (dump_enabled_p ())
648 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
649 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
652 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
653 vect_mark_relevant (&worklist, phi, relevant, live_p);
655 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
657 stmt = gsi_stmt (si);
658 if (dump_enabled_p ())
660 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
661 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
664 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
665 vect_mark_relevant (&worklist, stmt, relevant, live_p);
669 /* 2. Process_worklist */
670 while (worklist.length () > 0)
675 stmt = worklist.pop ();
676 if (dump_enabled_p ())
678 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
679 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
682 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
683 (DEF_STMT) as relevant/irrelevant according to the relevance property
685 stmt_vinfo = vinfo_for_stmt (stmt);
686 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
688 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
689 propagated as is to the DEF_STMTs of its USEs.
691 One exception is when STMT has been identified as defining a reduction
692 variable; in this case we set the relevance to vect_used_by_reduction.
693 This is because we distinguish between two kinds of relevant stmts -
694 those that are used by a reduction computation, and those that are
695 (also) used by a regular computation. This allows us later on to
696 identify stmts that are used solely by a reduction, and therefore the
697 order of the results that they produce does not have to be kept. */
699 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
701 case vect_reduction_def:
702 gcc_assert (relevant != vect_unused_in_scope);
703 if (relevant != vect_unused_in_scope
704 && relevant != vect_used_in_scope
705 && relevant != vect_used_by_reduction
706 && relevant != vect_used_only_live)
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
710 "unsupported use of reduction.\n");
715 case vect_nested_cycle:
716 if (relevant != vect_unused_in_scope
717 && relevant != vect_used_in_outer_by_reduction
718 && relevant != vect_used_in_outer)
720 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
722 "unsupported use of nested cycle.\n");
728 case vect_double_reduction_def:
729 if (relevant != vect_unused_in_scope
730 && relevant != vect_used_by_reduction
731 && relevant != vect_used_only_live)
733 if (dump_enabled_p ())
734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
735 "unsupported use of double reduction.\n");
745 if (is_pattern_stmt_p (stmt_vinfo))
747 /* Pattern statements are not inserted into the code, so
748 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
749 have to scan the RHS or function arguments instead. */
750 if (is_gimple_assign (stmt))
752 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
753 tree op = gimple_assign_rhs1 (stmt);
756 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
758 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
759 relevant, &worklist, false)
760 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
761 relevant, &worklist, false))
765 for (; i < gimple_num_ops (stmt); i++)
767 op = gimple_op (stmt, i);
768 if (TREE_CODE (op) == SSA_NAME
769 && !process_use (stmt, op, loop_vinfo, relevant,
774 else if (is_gimple_call (stmt))
776 for (i = 0; i < gimple_call_num_args (stmt); i++)
778 tree arg = gimple_call_arg (stmt, i);
779 if (!process_use (stmt, arg, loop_vinfo, relevant,
786 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
788 tree op = USE_FROM_PTR (use_p);
789 if (!process_use (stmt, op, loop_vinfo, relevant,
794 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
796 gather_scatter_info gs_info;
797 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
799 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
803 } /* while worklist */
809 /* Function vect_model_simple_cost.
811 Models cost for simple operations, i.e. those that only emit ncopies of a
812 single op. Right now, this does not account for multiple insns that could
813 be generated for the single vector op. We will handle that shortly. */
816 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
817 enum vect_def_type *dt,
819 stmt_vector_for_cost *prologue_cost_vec,
820 stmt_vector_for_cost *body_cost_vec)
823 int inside_cost = 0, prologue_cost = 0;
825 /* The SLP costs were already calculated during SLP tree build. */
826 if (PURE_SLP_STMT (stmt_info))
829 /* Cost the "broadcast" of a scalar operand in to a vector operand.
830 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
832 for (i = 0; i < ndts; i++)
833 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
834 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
835 stmt_info, 0, vect_prologue);
837 /* Pass the inside-of-loop statements to the target-specific cost model. */
838 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
839 stmt_info, 0, vect_body);
841 if (dump_enabled_p ())
842 dump_printf_loc (MSG_NOTE, vect_location,
843 "vect_model_simple_cost: inside_cost = %d, "
844 "prologue_cost = %d .\n", inside_cost, prologue_cost);
848 /* Model cost for type demotion and promotion operations. PWR is normally
849 zero for single-step promotions and demotions. It will be one if
850 two-step promotion/demotion is required, and so on. Each additional
851 step doubles the number of instructions required. */
854 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
855 enum vect_def_type *dt, int pwr)
858 int inside_cost = 0, prologue_cost = 0;
859 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
860 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
861 void *target_cost_data;
863 /* The SLP costs were already calculated during SLP tree build. */
864 if (PURE_SLP_STMT (stmt_info))
868 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
870 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
872 for (i = 0; i < pwr + 1; i++)
874 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
876 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
877 vec_promote_demote, stmt_info, 0,
881 /* FORNOW: Assuming maximum 2 args per stmts. */
882 for (i = 0; i < 2; i++)
883 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
884 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
885 stmt_info, 0, vect_prologue);
887 if (dump_enabled_p ())
888 dump_printf_loc (MSG_NOTE, vect_location,
889 "vect_model_promotion_demotion_cost: inside_cost = %d, "
890 "prologue_cost = %d .\n", inside_cost, prologue_cost);
893 /* Function vect_model_store_cost
895 Models cost for stores. In the case of grouped accesses, one access
896 has the overhead of the grouped access attributed to it. */
899 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
900 vect_memory_access_type memory_access_type,
901 enum vect_def_type dt, slp_tree slp_node,
902 stmt_vector_for_cost *prologue_cost_vec,
903 stmt_vector_for_cost *body_cost_vec)
905 unsigned int inside_cost = 0, prologue_cost = 0;
906 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
907 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
908 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
910 if (dt == vect_constant_def || dt == vect_external_def)
911 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
912 stmt_info, 0, vect_prologue);
914 /* Grouped stores update all elements in the group at once,
915 so we want the DR for the first statement. */
916 if (!slp_node && grouped_access_p)
918 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
919 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
922 /* True if we should include any once-per-group costs as well as
923 the cost of the statement itself. For SLP we only get called
924 once per group anyhow. */
925 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
927 /* We assume that the cost of a single store-lanes instruction is
928 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
929 access is instead being provided by a permute-and-store operation,
930 include the cost of the permutes. */
932 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
934 /* Uses a high and low interleave or shuffle operations for each
936 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
937 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
938 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
939 stmt_info, 0, vect_body);
941 if (dump_enabled_p ())
942 dump_printf_loc (MSG_NOTE, vect_location,
943 "vect_model_store_cost: strided group_size = %d .\n",
947 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
948 /* Costs of the stores. */
949 if (memory_access_type == VMAT_ELEMENTWISE
950 || memory_access_type == VMAT_GATHER_SCATTER)
951 /* N scalar stores plus extracting the elements. */
952 inside_cost += record_stmt_cost (body_cost_vec,
953 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
954 scalar_store, stmt_info, 0, vect_body);
956 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
958 if (memory_access_type == VMAT_ELEMENTWISE
959 || memory_access_type == VMAT_STRIDED_SLP)
960 inside_cost += record_stmt_cost (body_cost_vec,
961 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
962 vec_to_scalar, stmt_info, 0, vect_body);
964 if (dump_enabled_p ())
965 dump_printf_loc (MSG_NOTE, vect_location,
966 "vect_model_store_cost: inside_cost = %d, "
967 "prologue_cost = %d .\n", inside_cost, prologue_cost);
971 /* Calculate cost of DR's memory access. */
973 vect_get_store_cost (struct data_reference *dr, int ncopies,
974 unsigned int *inside_cost,
975 stmt_vector_for_cost *body_cost_vec)
977 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
978 gimple *stmt = DR_STMT (dr);
979 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
981 switch (alignment_support_scheme)
985 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
986 vector_store, stmt_info, 0,
989 if (dump_enabled_p ())
990 dump_printf_loc (MSG_NOTE, vect_location,
991 "vect_model_store_cost: aligned.\n");
995 case dr_unaligned_supported:
997 /* Here, we assign an additional cost for the unaligned store. */
998 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
999 unaligned_store, stmt_info,
1000 DR_MISALIGNMENT (dr), vect_body);
1001 if (dump_enabled_p ())
1002 dump_printf_loc (MSG_NOTE, vect_location,
1003 "vect_model_store_cost: unaligned supported by "
1008 case dr_unaligned_unsupported:
1010 *inside_cost = VECT_MAX_COST;
1012 if (dump_enabled_p ())
1013 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1014 "vect_model_store_cost: unsupported access.\n");
1024 /* Function vect_model_load_cost
1026 Models cost for loads. In the case of grouped accesses, one access has
1027 the overhead of the grouped access attributed to it. Since unaligned
1028 accesses are supported for loads, we also account for the costs of the
1029 access scheme chosen. */
1032 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1033 vect_memory_access_type memory_access_type,
1035 stmt_vector_for_cost *prologue_cost_vec,
1036 stmt_vector_for_cost *body_cost_vec)
1038 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1039 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1040 unsigned int inside_cost = 0, prologue_cost = 0;
1041 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1043 /* Grouped loads read all elements in the group at once,
1044 so we want the DR for the first statement. */
1045 if (!slp_node && grouped_access_p)
1047 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1048 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1051 /* True if we should include any once-per-group costs as well as
1052 the cost of the statement itself. For SLP we only get called
1053 once per group anyhow. */
1054 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1056 /* We assume that the cost of a single load-lanes instruction is
1057 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1058 access is instead being provided by a load-and-permute operation,
1059 include the cost of the permutes. */
1061 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1063 /* Uses an even and odd extract operations or shuffle operations
1064 for each needed permute. */
1065 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1066 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1067 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1068 stmt_info, 0, vect_body);
1070 if (dump_enabled_p ())
1071 dump_printf_loc (MSG_NOTE, vect_location,
1072 "vect_model_load_cost: strided group_size = %d .\n",
1076 /* The loads themselves. */
1077 if (memory_access_type == VMAT_ELEMENTWISE
1078 || memory_access_type == VMAT_GATHER_SCATTER)
1080 /* N scalar loads plus gathering them into a vector. */
1081 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1082 inside_cost += record_stmt_cost (body_cost_vec,
1083 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1084 scalar_load, stmt_info, 0, vect_body);
1087 vect_get_load_cost (dr, ncopies, first_stmt_p,
1088 &inside_cost, &prologue_cost,
1089 prologue_cost_vec, body_cost_vec, true);
1090 if (memory_access_type == VMAT_ELEMENTWISE
1091 || memory_access_type == VMAT_STRIDED_SLP)
1092 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1093 stmt_info, 0, vect_body);
1095 if (dump_enabled_p ())
1096 dump_printf_loc (MSG_NOTE, vect_location,
1097 "vect_model_load_cost: inside_cost = %d, "
1098 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1102 /* Calculate cost of DR's memory access. */
1104 vect_get_load_cost (struct data_reference *dr, int ncopies,
1105 bool add_realign_cost, unsigned int *inside_cost,
1106 unsigned int *prologue_cost,
1107 stmt_vector_for_cost *prologue_cost_vec,
1108 stmt_vector_for_cost *body_cost_vec,
1109 bool record_prologue_costs)
1111 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1112 gimple *stmt = DR_STMT (dr);
1113 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1115 switch (alignment_support_scheme)
1119 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1120 stmt_info, 0, vect_body);
1122 if (dump_enabled_p ())
1123 dump_printf_loc (MSG_NOTE, vect_location,
1124 "vect_model_load_cost: aligned.\n");
1128 case dr_unaligned_supported:
1130 /* Here, we assign an additional cost for the unaligned load. */
1131 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1132 unaligned_load, stmt_info,
1133 DR_MISALIGNMENT (dr), vect_body);
1135 if (dump_enabled_p ())
1136 dump_printf_loc (MSG_NOTE, vect_location,
1137 "vect_model_load_cost: unaligned supported by "
1142 case dr_explicit_realign:
1144 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1145 vector_load, stmt_info, 0, vect_body);
1146 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1147 vec_perm, stmt_info, 0, vect_body);
1149 /* FIXME: If the misalignment remains fixed across the iterations of
1150 the containing loop, the following cost should be added to the
1152 if (targetm.vectorize.builtin_mask_for_load)
1153 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1154 stmt_info, 0, vect_body);
1156 if (dump_enabled_p ())
1157 dump_printf_loc (MSG_NOTE, vect_location,
1158 "vect_model_load_cost: explicit realign\n");
1162 case dr_explicit_realign_optimized:
1164 if (dump_enabled_p ())
1165 dump_printf_loc (MSG_NOTE, vect_location,
1166 "vect_model_load_cost: unaligned software "
1169 /* Unaligned software pipeline has a load of an address, an initial
1170 load, and possibly a mask operation to "prime" the loop. However,
1171 if this is an access in a group of loads, which provide grouped
1172 access, then the above cost should only be considered for one
1173 access in the group. Inside the loop, there is a load op
1174 and a realignment op. */
1176 if (add_realign_cost && record_prologue_costs)
1178 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1179 vector_stmt, stmt_info,
1181 if (targetm.vectorize.builtin_mask_for_load)
1182 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1183 vector_stmt, stmt_info,
1187 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1188 stmt_info, 0, vect_body);
1189 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1190 stmt_info, 0, vect_body);
1192 if (dump_enabled_p ())
1193 dump_printf_loc (MSG_NOTE, vect_location,
1194 "vect_model_load_cost: explicit realign optimized"
1200 case dr_unaligned_unsupported:
1202 *inside_cost = VECT_MAX_COST;
1204 if (dump_enabled_p ())
1205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1206 "vect_model_load_cost: unsupported access.\n");
1215 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1216 the loop preheader for the vectorized stmt STMT. */
1219 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1222 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1225 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1226 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1234 if (nested_in_vect_loop_p (loop, stmt))
1237 pe = loop_preheader_edge (loop);
1238 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1239 gcc_assert (!new_bb);
1243 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1245 gimple_stmt_iterator gsi_bb_start;
1247 gcc_assert (bb_vinfo);
1248 bb = BB_VINFO_BB (bb_vinfo);
1249 gsi_bb_start = gsi_after_labels (bb);
1250 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1254 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE, vect_location,
1257 "created new init_stmt: ");
1258 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1262 /* Function vect_init_vector.
1264 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1265 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1266 vector type a vector with all elements equal to VAL is created first.
1267 Place the initialization at BSI if it is not NULL. Otherwise, place the
1268 initialization at the loop preheader.
1269 Return the DEF of INIT_STMT.
1270 It will be used in the vectorization of STMT. */
1273 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1278 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1279 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1281 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1282 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1284 /* Scalar boolean value should be transformed into
1285 all zeros or all ones value before building a vector. */
1286 if (VECTOR_BOOLEAN_TYPE_P (type))
1288 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1289 tree false_val = build_zero_cst (TREE_TYPE (type));
1291 if (CONSTANT_CLASS_P (val))
1292 val = integer_zerop (val) ? false_val : true_val;
1295 new_temp = make_ssa_name (TREE_TYPE (type));
1296 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1297 val, true_val, false_val);
1298 vect_init_vector_1 (stmt, init_stmt, gsi);
1302 else if (CONSTANT_CLASS_P (val))
1303 val = fold_convert (TREE_TYPE (type), val);
1306 new_temp = make_ssa_name (TREE_TYPE (type));
1307 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1308 init_stmt = gimple_build_assign (new_temp,
1309 fold_build1 (VIEW_CONVERT_EXPR,
1313 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1314 vect_init_vector_1 (stmt, init_stmt, gsi);
1318 val = build_vector_from_val (type, val);
1321 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1322 init_stmt = gimple_build_assign (new_temp, val);
1323 vect_init_vector_1 (stmt, init_stmt, gsi);
1327 /* Function vect_get_vec_def_for_operand_1.
1329 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1330 DT that will be used in the vectorized stmt. */
1333 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1337 stmt_vec_info def_stmt_info = NULL;
1341 /* operand is a constant or a loop invariant. */
1342 case vect_constant_def:
1343 case vect_external_def:
1344 /* Code should use vect_get_vec_def_for_operand. */
1347 /* operand is defined inside the loop. */
1348 case vect_internal_def:
1350 /* Get the def from the vectorized stmt. */
1351 def_stmt_info = vinfo_for_stmt (def_stmt);
1353 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1354 /* Get vectorized pattern statement. */
1356 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1357 && !STMT_VINFO_RELEVANT (def_stmt_info))
1358 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1359 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1360 gcc_assert (vec_stmt);
1361 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1362 vec_oprnd = PHI_RESULT (vec_stmt);
1363 else if (is_gimple_call (vec_stmt))
1364 vec_oprnd = gimple_call_lhs (vec_stmt);
1366 vec_oprnd = gimple_assign_lhs (vec_stmt);
1370 /* operand is defined by a loop header phi. */
1371 case vect_reduction_def:
1372 case vect_double_reduction_def:
1373 case vect_nested_cycle:
1374 case vect_induction_def:
1376 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1378 /* Get the def from the vectorized stmt. */
1379 def_stmt_info = vinfo_for_stmt (def_stmt);
1380 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1381 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1382 vec_oprnd = PHI_RESULT (vec_stmt);
1384 vec_oprnd = gimple_get_lhs (vec_stmt);
1394 /* Function vect_get_vec_def_for_operand.
1396 OP is an operand in STMT. This function returns a (vector) def that will be
1397 used in the vectorized stmt for STMT.
1399 In the case that OP is an SSA_NAME which is defined in the loop, then
1400 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1402 In case OP is an invariant or constant, a new stmt that creates a vector def
1403 needs to be introduced. VECTYPE may be used to specify a required type for
1404 vector invariant. */
1407 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1410 enum vect_def_type dt;
1412 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1413 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1415 if (dump_enabled_p ())
1417 dump_printf_loc (MSG_NOTE, vect_location,
1418 "vect_get_vec_def_for_operand: ");
1419 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1420 dump_printf (MSG_NOTE, "\n");
1423 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1424 gcc_assert (is_simple_use);
1425 if (def_stmt && dump_enabled_p ())
1427 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1428 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1431 if (dt == vect_constant_def || dt == vect_external_def)
1433 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1437 vector_type = vectype;
1438 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1439 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1440 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1442 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1444 gcc_assert (vector_type);
1445 return vect_init_vector (stmt, op, vector_type, NULL);
1448 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1452 /* Function vect_get_vec_def_for_stmt_copy
1454 Return a vector-def for an operand. This function is used when the
1455 vectorized stmt to be created (by the caller to this function) is a "copy"
1456 created in case the vectorized result cannot fit in one vector, and several
1457 copies of the vector-stmt are required. In this case the vector-def is
1458 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1459 of the stmt that defines VEC_OPRND.
1460 DT is the type of the vector def VEC_OPRND.
1463 In case the vectorization factor (VF) is bigger than the number
1464 of elements that can fit in a vectype (nunits), we have to generate
1465 more than one vector stmt to vectorize the scalar stmt. This situation
1466 arises when there are multiple data-types operated upon in the loop; the
1467 smallest data-type determines the VF, and as a result, when vectorizing
1468 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1469 vector stmt (each computing a vector of 'nunits' results, and together
1470 computing 'VF' results in each iteration). This function is called when
1471 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1472 which VF=16 and nunits=4, so the number of copies required is 4):
1474 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1476 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1477 VS1.1: vx.1 = memref1 VS1.2
1478 VS1.2: vx.2 = memref2 VS1.3
1479 VS1.3: vx.3 = memref3
1481 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1482 VSnew.1: vz1 = vx.1 + ... VSnew.2
1483 VSnew.2: vz2 = vx.2 + ... VSnew.3
1484 VSnew.3: vz3 = vx.3 + ...
1486 The vectorization of S1 is explained in vectorizable_load.
1487 The vectorization of S2:
1488 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1489 the function 'vect_get_vec_def_for_operand' is called to
1490 get the relevant vector-def for each operand of S2. For operand x it
1491 returns the vector-def 'vx.0'.
1493 To create the remaining copies of the vector-stmt (VSnew.j), this
1494 function is called to get the relevant vector-def for each operand. It is
1495 obtained from the respective VS1.j stmt, which is recorded in the
1496 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1498 For example, to obtain the vector-def 'vx.1' in order to create the
1499 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1500 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1501 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1502 and return its def ('vx.1').
1503 Overall, to create the above sequence this function will be called 3 times:
1504 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1505 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1506 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1509 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1511 gimple *vec_stmt_for_operand;
1512 stmt_vec_info def_stmt_info;
1514 /* Do nothing; can reuse same def. */
1515 if (dt == vect_external_def || dt == vect_constant_def )
1518 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1519 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1520 gcc_assert (def_stmt_info);
1521 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1522 gcc_assert (vec_stmt_for_operand);
1523 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1524 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1526 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1531 /* Get vectorized definitions for the operands to create a copy of an original
1532 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1535 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1536 vec<tree> *vec_oprnds0,
1537 vec<tree> *vec_oprnds1)
1539 tree vec_oprnd = vec_oprnds0->pop ();
1541 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1542 vec_oprnds0->quick_push (vec_oprnd);
1544 if (vec_oprnds1 && vec_oprnds1->length ())
1546 vec_oprnd = vec_oprnds1->pop ();
1547 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1548 vec_oprnds1->quick_push (vec_oprnd);
1553 /* Get vectorized definitions for OP0 and OP1. */
1556 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1557 vec<tree> *vec_oprnds0,
1558 vec<tree> *vec_oprnds1,
1563 int nops = (op1 == NULL_TREE) ? 1 : 2;
1564 auto_vec<tree> ops (nops);
1565 auto_vec<vec<tree> > vec_defs (nops);
1567 ops.quick_push (op0);
1569 ops.quick_push (op1);
1571 vect_get_slp_defs (ops, slp_node, &vec_defs);
1573 *vec_oprnds0 = vec_defs[0];
1575 *vec_oprnds1 = vec_defs[1];
1581 vec_oprnds0->create (1);
1582 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1583 vec_oprnds0->quick_push (vec_oprnd);
1587 vec_oprnds1->create (1);
1588 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1589 vec_oprnds1->quick_push (vec_oprnd);
1595 /* Function vect_finish_stmt_generation.
1597 Insert a new stmt. */
1600 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1601 gimple_stmt_iterator *gsi)
1603 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1604 vec_info *vinfo = stmt_info->vinfo;
1606 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1608 if (!gsi_end_p (*gsi)
1609 && gimple_has_mem_ops (vec_stmt))
1611 gimple *at_stmt = gsi_stmt (*gsi);
1612 tree vuse = gimple_vuse (at_stmt);
1613 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1615 tree vdef = gimple_vdef (at_stmt);
1616 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1617 /* If we have an SSA vuse and insert a store, update virtual
1618 SSA form to avoid triggering the renamer. Do so only
1619 if we can easily see all uses - which is what almost always
1620 happens with the way vectorized stmts are inserted. */
1621 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1622 && ((is_gimple_assign (vec_stmt)
1623 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1624 || (is_gimple_call (vec_stmt)
1625 && !(gimple_call_flags (vec_stmt)
1626 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1628 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1629 gimple_set_vdef (vec_stmt, new_vdef);
1630 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1634 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1636 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1638 if (dump_enabled_p ())
1640 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1641 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1644 gimple_set_location (vec_stmt, gimple_location (stmt));
1646 /* While EH edges will generally prevent vectorization, stmt might
1647 e.g. be in a must-not-throw region. Ensure newly created stmts
1648 that could throw are part of the same region. */
1649 int lp_nr = lookup_stmt_eh_lp (stmt);
1650 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1651 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1654 /* We want to vectorize a call to combined function CFN with function
1655 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1656 as the types of all inputs. Check whether this is possible using
1657 an internal function, returning its code if so or IFN_LAST if not. */
1660 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1661 tree vectype_out, tree vectype_in)
1664 if (internal_fn_p (cfn))
1665 ifn = as_internal_fn (cfn);
1667 ifn = associated_internal_fn (fndecl);
1668 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1670 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1671 if (info.vectorizable)
1673 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1674 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1675 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1676 OPTIMIZE_FOR_SPEED))
1684 static tree permute_vec_elements (tree, tree, tree, gimple *,
1685 gimple_stmt_iterator *);
1687 /* STMT is a non-strided load or store, meaning that it accesses
1688 elements with a known constant step. Return -1 if that step
1689 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1692 compare_step_with_zero (gimple *stmt)
1694 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1695 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1697 if (loop_vinfo && nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt))
1698 step = STMT_VINFO_DR_STEP (stmt_info);
1700 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
1701 return tree_int_cst_compare (step, size_zero_node);
1704 /* If the target supports a permute mask that reverses the elements in
1705 a vector of type VECTYPE, return that mask, otherwise return null. */
1708 perm_mask_for_reverse (tree vectype)
1713 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1714 sel = XALLOCAVEC (unsigned char, nunits);
1716 for (i = 0; i < nunits; ++i)
1717 sel[i] = nunits - 1 - i;
1719 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
1721 return vect_gen_perm_mask_checked (vectype, sel);
1724 /* A subroutine of get_load_store_type, with a subset of the same
1725 arguments. Handle the case where STMT is part of a grouped load
1728 For stores, the statements in the group are all consecutive
1729 and there is no gap at the end. For loads, the statements in the
1730 group might not be consecutive; there can be gaps between statements
1731 as well as at the end. */
1734 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1735 vec_load_store_type vls_type,
1736 vect_memory_access_type *memory_access_type)
1738 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1739 vec_info *vinfo = stmt_info->vinfo;
1740 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1741 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1742 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1743 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1744 bool single_element_p = (stmt == first_stmt
1745 && !GROUP_NEXT_ELEMENT (stmt_info));
1746 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1747 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
1749 /* True if the vectorized statements would access beyond the last
1750 statement in the group. */
1751 bool overrun_p = false;
1753 /* True if we can cope with such overrun by peeling for gaps, so that
1754 there is at least one final scalar iteration after the vector loop. */
1755 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1757 /* There can only be a gap at the end of the group if the stride is
1758 known at compile time. */
1759 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1761 /* Stores can't yet have gaps. */
1762 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1766 if (STMT_VINFO_STRIDED_P (stmt_info))
1768 /* Try to use consecutive accesses of GROUP_SIZE elements,
1769 separated by the stride, until we have a complete vector.
1770 Fall back to scalar accesses if that isn't possible. */
1771 if (nunits % group_size == 0)
1772 *memory_access_type = VMAT_STRIDED_SLP;
1774 *memory_access_type = VMAT_ELEMENTWISE;
1778 overrun_p = loop_vinfo && gap != 0;
1779 if (overrun_p && vls_type != VLS_LOAD)
1781 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1782 "Grouped store with gaps requires"
1783 " non-consecutive accesses\n");
1786 /* If the access is aligned an overrun is fine. */
1789 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1791 if (overrun_p && !can_overrun_p)
1793 if (dump_enabled_p ())
1794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1795 "Peeling for outer loop is not supported\n");
1798 *memory_access_type = VMAT_CONTIGUOUS;
1803 /* We can always handle this case using elementwise accesses,
1804 but see if something more efficient is available. */
1805 *memory_access_type = VMAT_ELEMENTWISE;
1807 /* If there is a gap at the end of the group then these optimizations
1808 would access excess elements in the last iteration. */
1809 bool would_overrun_p = (gap != 0);
1810 /* If the access is aligned an overrun is fine, but only if the
1811 overrun is not inside an unused vector (if the gap is as large
1812 or larger than a vector). */
1816 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1817 would_overrun_p = false;
1818 if (!STMT_VINFO_STRIDED_P (stmt_info)
1819 && (can_overrun_p || !would_overrun_p)
1820 && compare_step_with_zero (stmt) > 0)
1822 /* First try using LOAD/STORE_LANES. */
1823 if (vls_type == VLS_LOAD
1824 ? vect_load_lanes_supported (vectype, group_size)
1825 : vect_store_lanes_supported (vectype, group_size))
1827 *memory_access_type = VMAT_LOAD_STORE_LANES;
1828 overrun_p = would_overrun_p;
1831 /* If that fails, try using permuting loads. */
1832 if (*memory_access_type == VMAT_ELEMENTWISE
1833 && (vls_type == VLS_LOAD
1834 ? vect_grouped_load_supported (vectype, single_element_p,
1836 : vect_grouped_store_supported (vectype, group_size)))
1838 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1839 overrun_p = would_overrun_p;
1844 if (vls_type != VLS_LOAD && first_stmt == stmt)
1846 /* STMT is the leader of the group. Check the operands of all the
1847 stmts of the group. */
1848 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1851 gcc_assert (gimple_assign_single_p (next_stmt));
1852 tree op = gimple_assign_rhs1 (next_stmt);
1854 enum vect_def_type dt;
1855 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1857 if (dump_enabled_p ())
1858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1859 "use not simple.\n");
1862 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1868 gcc_assert (can_overrun_p);
1869 if (dump_enabled_p ())
1870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1871 "Data access with gaps requires scalar "
1873 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1879 /* A subroutine of get_load_store_type, with a subset of the same
1880 arguments. Handle the case where STMT is a load or store that
1881 accesses consecutive elements with a negative step. */
1883 static vect_memory_access_type
1884 get_negative_load_store_type (gimple *stmt, tree vectype,
1885 vec_load_store_type vls_type,
1886 unsigned int ncopies)
1888 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1889 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1890 dr_alignment_support alignment_support_scheme;
1894 if (dump_enabled_p ())
1895 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1896 "multiple types with negative step.\n");
1897 return VMAT_ELEMENTWISE;
1900 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1901 if (alignment_support_scheme != dr_aligned
1902 && alignment_support_scheme != dr_unaligned_supported)
1904 if (dump_enabled_p ())
1905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1906 "negative step but alignment required.\n");
1907 return VMAT_ELEMENTWISE;
1910 if (vls_type == VLS_STORE_INVARIANT)
1912 if (dump_enabled_p ())
1913 dump_printf_loc (MSG_NOTE, vect_location,
1914 "negative step with invariant source;"
1915 " no permute needed.\n");
1916 return VMAT_CONTIGUOUS_DOWN;
1919 if (!perm_mask_for_reverse (vectype))
1921 if (dump_enabled_p ())
1922 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1923 "negative step and reversing not supported.\n");
1924 return VMAT_ELEMENTWISE;
1927 return VMAT_CONTIGUOUS_REVERSE;
1930 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1931 if there is a memory access type that the vectorized form can use,
1932 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1933 or scatters, fill in GS_INFO accordingly.
1935 SLP says whether we're performing SLP rather than loop vectorization.
1936 VECTYPE is the vector type that the vectorized statements will use.
1937 NCOPIES is the number of vector statements that will be needed. */
1940 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1941 vec_load_store_type vls_type, unsigned int ncopies,
1942 vect_memory_access_type *memory_access_type,
1943 gather_scatter_info *gs_info)
1945 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1946 vec_info *vinfo = stmt_info->vinfo;
1947 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1948 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1950 *memory_access_type = VMAT_GATHER_SCATTER;
1952 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1954 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1955 &gs_info->offset_dt,
1956 &gs_info->offset_vectype))
1958 if (dump_enabled_p ())
1959 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1960 "%s index use not simple.\n",
1961 vls_type == VLS_LOAD ? "gather" : "scatter");
1965 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1967 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1968 memory_access_type))
1971 else if (STMT_VINFO_STRIDED_P (stmt_info))
1974 *memory_access_type = VMAT_ELEMENTWISE;
1978 int cmp = compare_step_with_zero (stmt);
1980 *memory_access_type = get_negative_load_store_type
1981 (stmt, vectype, vls_type, ncopies);
1984 gcc_assert (vls_type == VLS_LOAD);
1985 *memory_access_type = VMAT_INVARIANT;
1988 *memory_access_type = VMAT_CONTIGUOUS;
1991 /* FIXME: At the moment the cost model seems to underestimate the
1992 cost of using elementwise accesses. This check preserves the
1993 traditional behavior until that can be fixed. */
1994 if (*memory_access_type == VMAT_ELEMENTWISE
1995 && !STMT_VINFO_STRIDED_P (stmt_info))
1997 if (dump_enabled_p ())
1998 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1999 "not falling back to elementwise accesses\n");
2005 /* Function vectorizable_mask_load_store.
2007 Check if STMT performs a conditional load or store that can be vectorized.
2008 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2009 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2010 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2013 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2014 gimple **vec_stmt, slp_tree slp_node)
2016 tree vec_dest = NULL;
2017 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2018 stmt_vec_info prev_stmt_info;
2019 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2020 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2021 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2022 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2023 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2024 tree rhs_vectype = NULL_TREE;
2029 tree dataref_ptr = NULL_TREE;
2031 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2035 gather_scatter_info gs_info;
2036 vec_load_store_type vls_type;
2039 enum vect_def_type dt;
2041 if (slp_node != NULL)
2044 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2045 gcc_assert (ncopies >= 1);
2047 mask = gimple_call_arg (stmt, 2);
2049 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2052 /* FORNOW. This restriction should be relaxed. */
2053 if (nested_in_vect_loop && ncopies > 1)
2055 if (dump_enabled_p ())
2056 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2057 "multiple types in nested loop.");
2061 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2064 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2068 if (!STMT_VINFO_DATA_REF (stmt_info))
2071 elem_type = TREE_TYPE (vectype);
2073 if (TREE_CODE (mask) != SSA_NAME)
2076 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2080 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2082 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2083 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2086 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2088 tree rhs = gimple_call_arg (stmt, 3);
2089 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2091 if (dt == vect_constant_def || dt == vect_external_def)
2092 vls_type = VLS_STORE_INVARIANT;
2094 vls_type = VLS_STORE;
2097 vls_type = VLS_LOAD;
2099 vect_memory_access_type memory_access_type;
2100 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2101 &memory_access_type, &gs_info))
2104 if (memory_access_type == VMAT_GATHER_SCATTER)
2106 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2108 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2109 if (TREE_CODE (masktype) == INTEGER_TYPE)
2111 if (dump_enabled_p ())
2112 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2113 "masked gather with integer mask not supported.");
2117 else if (memory_access_type != VMAT_CONTIGUOUS)
2119 if (dump_enabled_p ())
2120 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2121 "unsupported access type for masked %s.\n",
2122 vls_type == VLS_LOAD ? "load" : "store");
2125 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2126 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2127 TYPE_MODE (mask_vectype),
2128 vls_type == VLS_LOAD)
2130 && !useless_type_conversion_p (vectype, rhs_vectype)))
2133 if (!vec_stmt) /* transformation not required. */
2135 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2136 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2137 if (vls_type == VLS_LOAD)
2138 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2141 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2142 dt, NULL, NULL, NULL);
2145 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2149 if (memory_access_type == VMAT_GATHER_SCATTER)
2151 tree vec_oprnd0 = NULL_TREE, op;
2152 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2153 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2154 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2155 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2156 tree mask_perm_mask = NULL_TREE;
2157 edge pe = loop_preheader_edge (loop);
2160 enum { NARROW, NONE, WIDEN } modifier;
2161 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2163 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2164 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2165 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2166 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2167 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2168 scaletype = TREE_VALUE (arglist);
2169 gcc_checking_assert (types_compatible_p (srctype, rettype)
2170 && types_compatible_p (srctype, masktype));
2172 if (nunits == gather_off_nunits)
2174 else if (nunits == gather_off_nunits / 2)
2176 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
2179 for (i = 0; i < gather_off_nunits; ++i)
2180 sel[i] = i | nunits;
2182 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
2184 else if (nunits == gather_off_nunits * 2)
2186 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
2189 for (i = 0; i < nunits; ++i)
2190 sel[i] = i < gather_off_nunits
2191 ? i : i + nunits - gather_off_nunits;
2193 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
2195 for (i = 0; i < nunits; ++i)
2196 sel[i] = i | gather_off_nunits;
2197 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
2202 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2204 ptr = fold_convert (ptrtype, gs_info.base);
2205 if (!is_gimple_min_invariant (ptr))
2207 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2208 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2209 gcc_assert (!new_bb);
2212 scale = build_int_cst (scaletype, gs_info.scale);
2214 prev_stmt_info = NULL;
2215 for (j = 0; j < ncopies; ++j)
2217 if (modifier == WIDEN && (j & 1))
2218 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2219 perm_mask, stmt, gsi);
2222 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2225 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2227 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2229 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2230 == TYPE_VECTOR_SUBPARTS (idxtype));
2231 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2232 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2234 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2235 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2239 if (mask_perm_mask && (j & 1))
2240 mask_op = permute_vec_elements (mask_op, mask_op,
2241 mask_perm_mask, stmt, gsi);
2245 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2248 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2249 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2253 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2255 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2256 == TYPE_VECTOR_SUBPARTS (masktype));
2257 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2258 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2260 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2261 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2267 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2270 if (!useless_type_conversion_p (vectype, rettype))
2272 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2273 == TYPE_VECTOR_SUBPARTS (rettype));
2274 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2275 gimple_call_set_lhs (new_stmt, op);
2276 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2277 var = make_ssa_name (vec_dest);
2278 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2279 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2283 var = make_ssa_name (vec_dest, new_stmt);
2284 gimple_call_set_lhs (new_stmt, var);
2287 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2289 if (modifier == NARROW)
2296 var = permute_vec_elements (prev_res, var,
2297 perm_mask, stmt, gsi);
2298 new_stmt = SSA_NAME_DEF_STMT (var);
2301 if (prev_stmt_info == NULL)
2302 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2304 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2305 prev_stmt_info = vinfo_for_stmt (new_stmt);
2308 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2310 if (STMT_VINFO_RELATED_STMT (stmt_info))
2312 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2313 stmt_info = vinfo_for_stmt (stmt);
2315 tree lhs = gimple_call_lhs (stmt);
2316 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2317 set_vinfo_for_stmt (new_stmt, stmt_info);
2318 set_vinfo_for_stmt (stmt, NULL);
2319 STMT_VINFO_STMT (stmt_info) = new_stmt;
2320 gsi_replace (gsi, new_stmt, true);
2323 else if (vls_type != VLS_LOAD)
2325 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2326 prev_stmt_info = NULL;
2327 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2328 for (i = 0; i < ncopies; i++)
2330 unsigned align, misalign;
2334 tree rhs = gimple_call_arg (stmt, 3);
2335 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2336 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2337 /* We should have catched mismatched types earlier. */
2338 gcc_assert (useless_type_conversion_p (vectype,
2339 TREE_TYPE (vec_rhs)));
2340 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2341 NULL_TREE, &dummy, gsi,
2342 &ptr_incr, false, &inv_p);
2343 gcc_assert (!inv_p);
2347 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2348 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2349 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2350 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2351 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2352 TYPE_SIZE_UNIT (vectype));
2355 align = TYPE_ALIGN_UNIT (vectype);
2356 if (aligned_access_p (dr))
2358 else if (DR_MISALIGNMENT (dr) == -1)
2360 align = TYPE_ALIGN_UNIT (elem_type);
2364 misalign = DR_MISALIGNMENT (dr);
2365 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2367 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2368 misalign ? least_bit_hwi (misalign) : align);
2370 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2371 ptr, vec_mask, vec_rhs);
2372 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2374 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2376 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2377 prev_stmt_info = vinfo_for_stmt (new_stmt);
2382 tree vec_mask = NULL_TREE;
2383 prev_stmt_info = NULL;
2384 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2385 for (i = 0; i < ncopies; i++)
2387 unsigned align, misalign;
2391 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2392 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2393 NULL_TREE, &dummy, gsi,
2394 &ptr_incr, false, &inv_p);
2395 gcc_assert (!inv_p);
2399 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2400 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2401 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2402 TYPE_SIZE_UNIT (vectype));
2405 align = TYPE_ALIGN_UNIT (vectype);
2406 if (aligned_access_p (dr))
2408 else if (DR_MISALIGNMENT (dr) == -1)
2410 align = TYPE_ALIGN_UNIT (elem_type);
2414 misalign = DR_MISALIGNMENT (dr);
2415 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2417 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2418 misalign ? least_bit_hwi (misalign) : align);
2420 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2422 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2423 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2425 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2427 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2428 prev_stmt_info = vinfo_for_stmt (new_stmt);
2432 if (vls_type == VLS_LOAD)
2434 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2436 if (STMT_VINFO_RELATED_STMT (stmt_info))
2438 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2439 stmt_info = vinfo_for_stmt (stmt);
2441 tree lhs = gimple_call_lhs (stmt);
2442 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2443 set_vinfo_for_stmt (new_stmt, stmt_info);
2444 set_vinfo_for_stmt (stmt, NULL);
2445 STMT_VINFO_STMT (stmt_info) = new_stmt;
2446 gsi_replace (gsi, new_stmt, true);
2452 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2455 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2456 gimple **vec_stmt, slp_tree slp_node,
2457 tree vectype_in, enum vect_def_type *dt)
2460 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2461 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2462 unsigned ncopies, nunits;
2464 op = gimple_call_arg (stmt, 0);
2465 vectype = STMT_VINFO_VECTYPE (stmt_info);
2466 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2468 /* Multiple types in SLP are handled by creating the appropriate number of
2469 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2474 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2476 gcc_assert (ncopies >= 1);
2478 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2483 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (char_vectype));
2484 unsigned char *elt = elts;
2485 unsigned word_bytes = TYPE_VECTOR_SUBPARTS (char_vectype) / nunits;
2486 for (unsigned i = 0; i < nunits; ++i)
2487 for (unsigned j = 0; j < word_bytes; ++j)
2488 *elt++ = (i + 1) * word_bytes - j - 1;
2490 if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, elts))
2495 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2496 if (dump_enabled_p ())
2497 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2499 if (! PURE_SLP_STMT (stmt_info))
2501 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2502 1, vector_stmt, stmt_info, 0, vect_prologue);
2503 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2504 ncopies, vec_perm, stmt_info, 0, vect_body);
2509 tree *telts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (char_vectype));
2510 for (unsigned i = 0; i < TYPE_VECTOR_SUBPARTS (char_vectype); ++i)
2511 telts[i] = build_int_cst (char_type_node, elts[i]);
2512 tree bswap_vconst = build_vector (char_vectype, telts);
2515 vec<tree> vec_oprnds = vNULL;
2516 gimple *new_stmt = NULL;
2517 stmt_vec_info prev_stmt_info = NULL;
2518 for (unsigned j = 0; j < ncopies; j++)
2522 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2524 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2526 /* Arguments are ready. create the new vector stmt. */
2529 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2531 tree tem = make_ssa_name (char_vectype);
2532 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2533 char_vectype, vop));
2534 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2535 tree tem2 = make_ssa_name (char_vectype);
2536 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2537 tem, tem, bswap_vconst);
2538 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2539 tem = make_ssa_name (vectype);
2540 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2542 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2544 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2551 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2553 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2555 prev_stmt_info = vinfo_for_stmt (new_stmt);
2558 vec_oprnds.release ();
2562 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2563 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2564 in a single step. On success, store the binary pack code in
2568 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2569 tree_code *convert_code)
2571 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2572 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2576 int multi_step_cvt = 0;
2577 auto_vec <tree, 8> interm_types;
2578 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2579 &code, &multi_step_cvt,
2584 *convert_code = code;
2588 /* Function vectorizable_call.
2590 Check if GS performs a function call that can be vectorized.
2591 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2592 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2593 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2596 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2603 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2604 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2605 tree vectype_out, vectype_in;
2608 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2609 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2610 vec_info *vinfo = stmt_info->vinfo;
2611 tree fndecl, new_temp, rhs_type;
2613 enum vect_def_type dt[3]
2614 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2616 gimple *new_stmt = NULL;
2618 vec<tree> vargs = vNULL;
2619 enum { NARROW, NONE, WIDEN } modifier;
2623 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2626 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2630 /* Is GS a vectorizable call? */
2631 stmt = dyn_cast <gcall *> (gs);
2635 if (gimple_call_internal_p (stmt)
2636 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2637 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2638 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2641 if (gimple_call_lhs (stmt) == NULL_TREE
2642 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2645 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2647 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2649 /* Process function arguments. */
2650 rhs_type = NULL_TREE;
2651 vectype_in = NULL_TREE;
2652 nargs = gimple_call_num_args (stmt);
2654 /* Bail out if the function has more than three arguments, we do not have
2655 interesting builtin functions to vectorize with more than two arguments
2656 except for fma. No arguments is also not good. */
2657 if (nargs == 0 || nargs > 3)
2660 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2661 if (gimple_call_internal_p (stmt)
2662 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2665 rhs_type = unsigned_type_node;
2668 for (i = 0; i < nargs; i++)
2672 op = gimple_call_arg (stmt, i);
2674 /* We can only handle calls with arguments of the same type. */
2676 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2678 if (dump_enabled_p ())
2679 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2680 "argument types differ.\n");
2684 rhs_type = TREE_TYPE (op);
2686 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2688 if (dump_enabled_p ())
2689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2690 "use not simple.\n");
2695 vectype_in = opvectype;
2697 && opvectype != vectype_in)
2699 if (dump_enabled_p ())
2700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2701 "argument vector types differ.\n");
2705 /* If all arguments are external or constant defs use a vector type with
2706 the same size as the output vector type. */
2708 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2710 gcc_assert (vectype_in);
2713 if (dump_enabled_p ())
2715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2716 "no vectype for scalar type ");
2717 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2718 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2725 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2726 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2727 if (nunits_in == nunits_out / 2)
2729 else if (nunits_out == nunits_in)
2731 else if (nunits_out == nunits_in / 2)
2736 /* We only handle functions that do not read or clobber memory. */
2737 if (gimple_vuse (stmt))
2739 if (dump_enabled_p ())
2740 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2741 "function reads from or writes to memory.\n");
2745 /* For now, we only vectorize functions if a target specific builtin
2746 is available. TODO -- in some cases, it might be profitable to
2747 insert the calls for pieces of the vector, in order to be able
2748 to vectorize other operations in the loop. */
2750 internal_fn ifn = IFN_LAST;
2751 combined_fn cfn = gimple_call_combined_fn (stmt);
2752 tree callee = gimple_call_fndecl (stmt);
2754 /* First try using an internal function. */
2755 tree_code convert_code = ERROR_MARK;
2757 && (modifier == NONE
2758 || (modifier == NARROW
2759 && simple_integer_narrowing (vectype_out, vectype_in,
2761 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2764 /* If that fails, try asking for a target-specific built-in function. */
2765 if (ifn == IFN_LAST)
2767 if (cfn != CFN_LAST)
2768 fndecl = targetm.vectorize.builtin_vectorized_function
2769 (cfn, vectype_out, vectype_in);
2771 fndecl = targetm.vectorize.builtin_md_vectorized_function
2772 (callee, vectype_out, vectype_in);
2775 if (ifn == IFN_LAST && !fndecl)
2777 if (cfn == CFN_GOMP_SIMD_LANE
2780 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2781 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2782 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2783 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2785 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2786 { 0, 1, 2, ... vf - 1 } vector. */
2787 gcc_assert (nargs == 0);
2789 else if (modifier == NONE
2790 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2791 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2792 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2793 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2797 if (dump_enabled_p ())
2798 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2799 "function is not vectorizable.\n");
2806 else if (modifier == NARROW && ifn == IFN_LAST)
2807 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2809 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2811 /* Sanity check: make sure that at least one copy of the vectorized stmt
2812 needs to be generated. */
2813 gcc_assert (ncopies >= 1);
2815 if (!vec_stmt) /* transformation not required. */
2817 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2818 if (dump_enabled_p ())
2819 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2821 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
2822 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2823 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2824 vec_promote_demote, stmt_info, 0, vect_body);
2831 if (dump_enabled_p ())
2832 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2835 scalar_dest = gimple_call_lhs (stmt);
2836 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2838 prev_stmt_info = NULL;
2839 if (modifier == NONE || ifn != IFN_LAST)
2841 tree prev_res = NULL_TREE;
2842 for (j = 0; j < ncopies; ++j)
2844 /* Build argument list for the vectorized call. */
2846 vargs.create (nargs);
2852 auto_vec<vec<tree> > vec_defs (nargs);
2853 vec<tree> vec_oprnds0;
2855 for (i = 0; i < nargs; i++)
2856 vargs.quick_push (gimple_call_arg (stmt, i));
2857 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2858 vec_oprnds0 = vec_defs[0];
2860 /* Arguments are ready. Create the new vector stmt. */
2861 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2864 for (k = 0; k < nargs; k++)
2866 vec<tree> vec_oprndsk = vec_defs[k];
2867 vargs[k] = vec_oprndsk[i];
2869 if (modifier == NARROW)
2871 tree half_res = make_ssa_name (vectype_in);
2872 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2873 gimple_call_set_lhs (new_stmt, half_res);
2874 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2877 prev_res = half_res;
2880 new_temp = make_ssa_name (vec_dest);
2881 new_stmt = gimple_build_assign (new_temp, convert_code,
2882 prev_res, half_res);
2886 if (ifn != IFN_LAST)
2887 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2889 new_stmt = gimple_build_call_vec (fndecl, vargs);
2890 new_temp = make_ssa_name (vec_dest, new_stmt);
2891 gimple_call_set_lhs (new_stmt, new_temp);
2893 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2894 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2897 for (i = 0; i < nargs; i++)
2899 vec<tree> vec_oprndsi = vec_defs[i];
2900 vec_oprndsi.release ();
2905 for (i = 0; i < nargs; i++)
2907 op = gimple_call_arg (stmt, i);
2910 = vect_get_vec_def_for_operand (op, stmt);
2913 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2915 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2918 vargs.quick_push (vec_oprnd0);
2921 if (gimple_call_internal_p (stmt)
2922 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2924 tree *v = XALLOCAVEC (tree, nunits_out);
2926 for (k = 0; k < nunits_out; ++k)
2927 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2928 tree cst = build_vector (vectype_out, v);
2930 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2931 gimple *init_stmt = gimple_build_assign (new_var, cst);
2932 vect_init_vector_1 (stmt, init_stmt, NULL);
2933 new_temp = make_ssa_name (vec_dest);
2934 new_stmt = gimple_build_assign (new_temp, new_var);
2936 else if (modifier == NARROW)
2938 tree half_res = make_ssa_name (vectype_in);
2939 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2940 gimple_call_set_lhs (new_stmt, half_res);
2941 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2944 prev_res = half_res;
2947 new_temp = make_ssa_name (vec_dest);
2948 new_stmt = gimple_build_assign (new_temp, convert_code,
2949 prev_res, half_res);
2953 if (ifn != IFN_LAST)
2954 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2956 new_stmt = gimple_build_call_vec (fndecl, vargs);
2957 new_temp = make_ssa_name (vec_dest, new_stmt);
2958 gimple_call_set_lhs (new_stmt, new_temp);
2960 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2962 if (j == (modifier == NARROW ? 1 : 0))
2963 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2965 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2967 prev_stmt_info = vinfo_for_stmt (new_stmt);
2970 else if (modifier == NARROW)
2972 for (j = 0; j < ncopies; ++j)
2974 /* Build argument list for the vectorized call. */
2976 vargs.create (nargs * 2);
2982 auto_vec<vec<tree> > vec_defs (nargs);
2983 vec<tree> vec_oprnds0;
2985 for (i = 0; i < nargs; i++)
2986 vargs.quick_push (gimple_call_arg (stmt, i));
2987 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2988 vec_oprnds0 = vec_defs[0];
2990 /* Arguments are ready. Create the new vector stmt. */
2991 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2995 for (k = 0; k < nargs; k++)
2997 vec<tree> vec_oprndsk = vec_defs[k];
2998 vargs.quick_push (vec_oprndsk[i]);
2999 vargs.quick_push (vec_oprndsk[i + 1]);
3001 if (ifn != IFN_LAST)
3002 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
3004 new_stmt = gimple_build_call_vec (fndecl, vargs);
3005 new_temp = make_ssa_name (vec_dest, new_stmt);
3006 gimple_call_set_lhs (new_stmt, new_temp);
3007 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3008 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3011 for (i = 0; i < nargs; i++)
3013 vec<tree> vec_oprndsi = vec_defs[i];
3014 vec_oprndsi.release ();
3019 for (i = 0; i < nargs; i++)
3021 op = gimple_call_arg (stmt, i);
3025 = vect_get_vec_def_for_operand (op, stmt);
3027 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3031 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3033 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3035 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3038 vargs.quick_push (vec_oprnd0);
3039 vargs.quick_push (vec_oprnd1);
3042 new_stmt = gimple_build_call_vec (fndecl, vargs);
3043 new_temp = make_ssa_name (vec_dest, new_stmt);
3044 gimple_call_set_lhs (new_stmt, new_temp);
3045 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3048 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3050 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3052 prev_stmt_info = vinfo_for_stmt (new_stmt);
3055 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3058 /* No current target implements this case. */
3063 /* The call in STMT might prevent it from being removed in dce.
3064 We however cannot remove it here, due to the way the ssa name
3065 it defines is mapped to the new definition. So just replace
3066 rhs of the statement with something harmless. */
3071 type = TREE_TYPE (scalar_dest);
3072 if (is_pattern_stmt_p (stmt_info))
3073 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3075 lhs = gimple_call_lhs (stmt);
3077 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3078 set_vinfo_for_stmt (new_stmt, stmt_info);
3079 set_vinfo_for_stmt (stmt, NULL);
3080 STMT_VINFO_STMT (stmt_info) = new_stmt;
3081 gsi_replace (gsi, new_stmt, false);
3087 struct simd_call_arg_info
3091 HOST_WIDE_INT linear_step;
3092 enum vect_def_type dt;
3094 bool simd_lane_linear;
3097 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3098 is linear within simd lane (but not within whole loop), note it in
3102 vect_simd_lane_linear (tree op, struct loop *loop,
3103 struct simd_call_arg_info *arginfo)
3105 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3107 if (!is_gimple_assign (def_stmt)
3108 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3109 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3112 tree base = gimple_assign_rhs1 (def_stmt);
3113 HOST_WIDE_INT linear_step = 0;
3114 tree v = gimple_assign_rhs2 (def_stmt);
3115 while (TREE_CODE (v) == SSA_NAME)
3118 def_stmt = SSA_NAME_DEF_STMT (v);
3119 if (is_gimple_assign (def_stmt))
3120 switch (gimple_assign_rhs_code (def_stmt))
3123 t = gimple_assign_rhs2 (def_stmt);
3124 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3126 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3127 v = gimple_assign_rhs1 (def_stmt);
3130 t = gimple_assign_rhs2 (def_stmt);
3131 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3133 linear_step = tree_to_shwi (t);
3134 v = gimple_assign_rhs1 (def_stmt);
3137 t = gimple_assign_rhs1 (def_stmt);
3138 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3139 || (TYPE_PRECISION (TREE_TYPE (v))
3140 < TYPE_PRECISION (TREE_TYPE (t))))
3149 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3151 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3152 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3157 arginfo->linear_step = linear_step;
3159 arginfo->simd_lane_linear = true;
3165 /* Function vectorizable_simd_clone_call.
3167 Check if STMT performs a function call that can be vectorized
3168 by calling a simd clone of the function.
3169 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3170 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3171 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3174 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3175 gimple **vec_stmt, slp_tree slp_node)
3180 tree vec_oprnd0 = NULL_TREE;
3181 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3183 unsigned int nunits;
3184 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3185 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3186 vec_info *vinfo = stmt_info->vinfo;
3187 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3188 tree fndecl, new_temp;
3190 gimple *new_stmt = NULL;
3192 auto_vec<simd_call_arg_info> arginfo;
3193 vec<tree> vargs = vNULL;
3195 tree lhs, rtype, ratype;
3196 vec<constructor_elt, va_gc> *ret_ctor_elts;
3198 /* Is STMT a vectorizable call? */
3199 if (!is_gimple_call (stmt))
3202 fndecl = gimple_call_fndecl (stmt);
3203 if (fndecl == NULL_TREE)
3206 struct cgraph_node *node = cgraph_node::get (fndecl);
3207 if (node == NULL || node->simd_clones == NULL)
3210 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3213 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3217 if (gimple_call_lhs (stmt)
3218 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3221 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3223 vectype = STMT_VINFO_VECTYPE (stmt_info);
3225 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3232 /* Process function arguments. */
3233 nargs = gimple_call_num_args (stmt);
3235 /* Bail out if the function has zero arguments. */
3239 arginfo.reserve (nargs, true);
3241 for (i = 0; i < nargs; i++)
3243 simd_call_arg_info thisarginfo;
3246 thisarginfo.linear_step = 0;
3247 thisarginfo.align = 0;
3248 thisarginfo.op = NULL_TREE;
3249 thisarginfo.simd_lane_linear = false;
3251 op = gimple_call_arg (stmt, i);
3252 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3253 &thisarginfo.vectype)
3254 || thisarginfo.dt == vect_uninitialized_def)
3256 if (dump_enabled_p ())
3257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3258 "use not simple.\n");
3262 if (thisarginfo.dt == vect_constant_def
3263 || thisarginfo.dt == vect_external_def)
3264 gcc_assert (thisarginfo.vectype == NULL_TREE);
3266 gcc_assert (thisarginfo.vectype != NULL_TREE);
3268 /* For linear arguments, the analyze phase should have saved
3269 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3270 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3271 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3273 gcc_assert (vec_stmt);
3274 thisarginfo.linear_step
3275 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3277 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3278 thisarginfo.simd_lane_linear
3279 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3280 == boolean_true_node);
3281 /* If loop has been peeled for alignment, we need to adjust it. */
3282 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3283 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3284 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3286 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3287 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3288 tree opt = TREE_TYPE (thisarginfo.op);
3289 bias = fold_convert (TREE_TYPE (step), bias);
3290 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3292 = fold_build2 (POINTER_TYPE_P (opt)
3293 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3294 thisarginfo.op, bias);
3298 && thisarginfo.dt != vect_constant_def
3299 && thisarginfo.dt != vect_external_def
3301 && TREE_CODE (op) == SSA_NAME
3302 && simple_iv (loop, loop_containing_stmt (stmt), op,
3304 && tree_fits_shwi_p (iv.step))
3306 thisarginfo.linear_step = tree_to_shwi (iv.step);
3307 thisarginfo.op = iv.base;
3309 else if ((thisarginfo.dt == vect_constant_def
3310 || thisarginfo.dt == vect_external_def)
3311 && POINTER_TYPE_P (TREE_TYPE (op)))
3312 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3313 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3315 if (POINTER_TYPE_P (TREE_TYPE (op))
3316 && !thisarginfo.linear_step
3318 && thisarginfo.dt != vect_constant_def
3319 && thisarginfo.dt != vect_external_def
3322 && TREE_CODE (op) == SSA_NAME)
3323 vect_simd_lane_linear (op, loop, &thisarginfo);
3325 arginfo.quick_push (thisarginfo);
3328 unsigned int badness = 0;
3329 struct cgraph_node *bestn = NULL;
3330 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3331 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3333 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3334 n = n->simdclone->next_clone)
3336 unsigned int this_badness = 0;
3337 if (n->simdclone->simdlen
3338 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3339 || n->simdclone->nargs != nargs)
3341 if (n->simdclone->simdlen
3342 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3343 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3344 - exact_log2 (n->simdclone->simdlen)) * 1024;
3345 if (n->simdclone->inbranch)
3346 this_badness += 2048;
3347 int target_badness = targetm.simd_clone.usable (n);
3348 if (target_badness < 0)
3350 this_badness += target_badness * 512;
3351 /* FORNOW: Have to add code to add the mask argument. */
3352 if (n->simdclone->inbranch)
3354 for (i = 0; i < nargs; i++)
3356 switch (n->simdclone->args[i].arg_type)
3358 case SIMD_CLONE_ARG_TYPE_VECTOR:
3359 if (!useless_type_conversion_p
3360 (n->simdclone->args[i].orig_type,
3361 TREE_TYPE (gimple_call_arg (stmt, i))))
3363 else if (arginfo[i].dt == vect_constant_def
3364 || arginfo[i].dt == vect_external_def
3365 || arginfo[i].linear_step)
3368 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3369 if (arginfo[i].dt != vect_constant_def
3370 && arginfo[i].dt != vect_external_def)
3373 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3374 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3375 if (arginfo[i].dt == vect_constant_def
3376 || arginfo[i].dt == vect_external_def
3377 || (arginfo[i].linear_step
3378 != n->simdclone->args[i].linear_step))
3381 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3382 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3383 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3384 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3385 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3386 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3390 case SIMD_CLONE_ARG_TYPE_MASK:
3393 if (i == (size_t) -1)
3395 if (n->simdclone->args[i].alignment > arginfo[i].align)
3400 if (arginfo[i].align)
3401 this_badness += (exact_log2 (arginfo[i].align)
3402 - exact_log2 (n->simdclone->args[i].alignment));
3404 if (i == (size_t) -1)
3406 if (bestn == NULL || this_badness < badness)
3409 badness = this_badness;
3416 for (i = 0; i < nargs; i++)
3417 if ((arginfo[i].dt == vect_constant_def
3418 || arginfo[i].dt == vect_external_def)
3419 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3422 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3424 if (arginfo[i].vectype == NULL
3425 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3426 > bestn->simdclone->simdlen))
3430 fndecl = bestn->decl;
3431 nunits = bestn->simdclone->simdlen;
3432 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3434 /* If the function isn't const, only allow it in simd loops where user
3435 has asserted that at least nunits consecutive iterations can be
3436 performed using SIMD instructions. */
3437 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3438 && gimple_vuse (stmt))
3441 /* Sanity check: make sure that at least one copy of the vectorized stmt
3442 needs to be generated. */
3443 gcc_assert (ncopies >= 1);
3445 if (!vec_stmt) /* transformation not required. */
3447 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3448 for (i = 0; i < nargs; i++)
3449 if ((bestn->simdclone->args[i].arg_type
3450 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3451 || (bestn->simdclone->args[i].arg_type
3452 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3454 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3456 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3457 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3458 ? size_type_node : TREE_TYPE (arginfo[i].op);
3459 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3460 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3461 tree sll = arginfo[i].simd_lane_linear
3462 ? boolean_true_node : boolean_false_node;
3463 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3465 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3466 if (dump_enabled_p ())
3467 dump_printf_loc (MSG_NOTE, vect_location,
3468 "=== vectorizable_simd_clone_call ===\n");
3469 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3475 if (dump_enabled_p ())
3476 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3479 scalar_dest = gimple_call_lhs (stmt);
3480 vec_dest = NULL_TREE;
3485 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3486 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3487 if (TREE_CODE (rtype) == ARRAY_TYPE)
3490 rtype = TREE_TYPE (ratype);
3494 prev_stmt_info = NULL;
3495 for (j = 0; j < ncopies; ++j)
3497 /* Build argument list for the vectorized call. */
3499 vargs.create (nargs);
3503 for (i = 0; i < nargs; i++)
3505 unsigned int k, l, m, o;
3507 op = gimple_call_arg (stmt, i);
3508 switch (bestn->simdclone->args[i].arg_type)
3510 case SIMD_CLONE_ARG_TYPE_VECTOR:
3511 atype = bestn->simdclone->args[i].vector_type;
3512 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3513 for (m = j * o; m < (j + 1) * o; m++)
3515 if (TYPE_VECTOR_SUBPARTS (atype)
3516 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3518 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3519 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3520 / TYPE_VECTOR_SUBPARTS (atype));
3521 gcc_assert ((k & (k - 1)) == 0);
3524 = vect_get_vec_def_for_operand (op, stmt);
3527 vec_oprnd0 = arginfo[i].op;
3528 if ((m & (k - 1)) == 0)
3530 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3533 arginfo[i].op = vec_oprnd0;
3535 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3537 bitsize_int ((m & (k - 1)) * prec));
3539 = gimple_build_assign (make_ssa_name (atype),
3541 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3542 vargs.safe_push (gimple_assign_lhs (new_stmt));
3546 k = (TYPE_VECTOR_SUBPARTS (atype)
3547 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3548 gcc_assert ((k & (k - 1)) == 0);
3549 vec<constructor_elt, va_gc> *ctor_elts;
3551 vec_alloc (ctor_elts, k);
3554 for (l = 0; l < k; l++)
3556 if (m == 0 && l == 0)
3558 = vect_get_vec_def_for_operand (op, stmt);
3561 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3563 arginfo[i].op = vec_oprnd0;
3566 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3570 vargs.safe_push (vec_oprnd0);
3573 vec_oprnd0 = build_constructor (atype, ctor_elts);
3575 = gimple_build_assign (make_ssa_name (atype),
3577 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3578 vargs.safe_push (gimple_assign_lhs (new_stmt));
3583 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3584 vargs.safe_push (op);
3586 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3587 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3592 = force_gimple_operand (arginfo[i].op, &stmts, true,
3597 edge pe = loop_preheader_edge (loop);
3598 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3599 gcc_assert (!new_bb);
3601 if (arginfo[i].simd_lane_linear)
3603 vargs.safe_push (arginfo[i].op);
3606 tree phi_res = copy_ssa_name (op);
3607 gphi *new_phi = create_phi_node (phi_res, loop->header);
3608 set_vinfo_for_stmt (new_phi,
3609 new_stmt_vec_info (new_phi, loop_vinfo));
3610 add_phi_arg (new_phi, arginfo[i].op,
3611 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3613 = POINTER_TYPE_P (TREE_TYPE (op))
3614 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3615 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3616 ? sizetype : TREE_TYPE (op);
3618 = wi::mul (bestn->simdclone->args[i].linear_step,
3620 tree tcst = wide_int_to_tree (type, cst);
3621 tree phi_arg = copy_ssa_name (op);
3623 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3624 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3625 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3626 set_vinfo_for_stmt (new_stmt,
3627 new_stmt_vec_info (new_stmt, loop_vinfo));
3628 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3630 arginfo[i].op = phi_res;
3631 vargs.safe_push (phi_res);
3636 = POINTER_TYPE_P (TREE_TYPE (op))
3637 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3638 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3639 ? sizetype : TREE_TYPE (op);
3641 = wi::mul (bestn->simdclone->args[i].linear_step,
3643 tree tcst = wide_int_to_tree (type, cst);
3644 new_temp = make_ssa_name (TREE_TYPE (op));
3645 new_stmt = gimple_build_assign (new_temp, code,
3646 arginfo[i].op, tcst);
3647 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3648 vargs.safe_push (new_temp);
3651 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3652 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3653 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3654 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3655 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3656 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3662 new_stmt = gimple_build_call_vec (fndecl, vargs);
3665 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3667 new_temp = create_tmp_var (ratype);
3668 else if (TYPE_VECTOR_SUBPARTS (vectype)
3669 == TYPE_VECTOR_SUBPARTS (rtype))
3670 new_temp = make_ssa_name (vec_dest, new_stmt);
3672 new_temp = make_ssa_name (rtype, new_stmt);
3673 gimple_call_set_lhs (new_stmt, new_temp);
3675 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3679 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3682 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3683 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3684 gcc_assert ((k & (k - 1)) == 0);
3685 for (l = 0; l < k; l++)
3690 t = build_fold_addr_expr (new_temp);
3691 t = build2 (MEM_REF, vectype, t,
3692 build_int_cst (TREE_TYPE (t),
3693 l * prec / BITS_PER_UNIT));
3696 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3697 size_int (prec), bitsize_int (l * prec));
3699 = gimple_build_assign (make_ssa_name (vectype), t);
3700 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3701 if (j == 0 && l == 0)
3702 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3704 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3706 prev_stmt_info = vinfo_for_stmt (new_stmt);
3711 tree clobber = build_constructor (ratype, NULL);
3712 TREE_THIS_VOLATILE (clobber) = 1;
3713 new_stmt = gimple_build_assign (new_temp, clobber);
3714 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3718 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3720 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3721 / TYPE_VECTOR_SUBPARTS (rtype));
3722 gcc_assert ((k & (k - 1)) == 0);
3723 if ((j & (k - 1)) == 0)
3724 vec_alloc (ret_ctor_elts, k);
3727 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3728 for (m = 0; m < o; m++)
3730 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3731 size_int (m), NULL_TREE, NULL_TREE);
3733 = gimple_build_assign (make_ssa_name (rtype), tem);
3734 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3735 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3736 gimple_assign_lhs (new_stmt));
3738 tree clobber = build_constructor (ratype, NULL);
3739 TREE_THIS_VOLATILE (clobber) = 1;
3740 new_stmt = gimple_build_assign (new_temp, clobber);
3741 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3744 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3745 if ((j & (k - 1)) != k - 1)
3747 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3749 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3750 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3752 if ((unsigned) j == k - 1)
3753 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3755 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3757 prev_stmt_info = vinfo_for_stmt (new_stmt);
3762 tree t = build_fold_addr_expr (new_temp);
3763 t = build2 (MEM_REF, vectype, t,
3764 build_int_cst (TREE_TYPE (t), 0));
3766 = gimple_build_assign (make_ssa_name (vec_dest), t);
3767 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3768 tree clobber = build_constructor (ratype, NULL);
3769 TREE_THIS_VOLATILE (clobber) = 1;
3770 vect_finish_stmt_generation (stmt,
3771 gimple_build_assign (new_temp,
3777 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3779 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3781 prev_stmt_info = vinfo_for_stmt (new_stmt);
3786 /* The call in STMT might prevent it from being removed in dce.
3787 We however cannot remove it here, due to the way the ssa name
3788 it defines is mapped to the new definition. So just replace
3789 rhs of the statement with something harmless. */
3796 type = TREE_TYPE (scalar_dest);
3797 if (is_pattern_stmt_p (stmt_info))
3798 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3800 lhs = gimple_call_lhs (stmt);
3801 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3804 new_stmt = gimple_build_nop ();
3805 set_vinfo_for_stmt (new_stmt, stmt_info);
3806 set_vinfo_for_stmt (stmt, NULL);
3807 STMT_VINFO_STMT (stmt_info) = new_stmt;
3808 gsi_replace (gsi, new_stmt, true);
3809 unlink_stmt_vdef (stmt);
3815 /* Function vect_gen_widened_results_half
3817 Create a vector stmt whose code, type, number of arguments, and result
3818 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3819 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3820 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3821 needs to be created (DECL is a function-decl of a target-builtin).
3822 STMT is the original scalar stmt that we are vectorizing. */
3825 vect_gen_widened_results_half (enum tree_code code,
3827 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3828 tree vec_dest, gimple_stmt_iterator *gsi,
3834 /* Generate half of the widened result: */
3835 if (code == CALL_EXPR)
3837 /* Target specific support */
3838 if (op_type == binary_op)
3839 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3841 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3842 new_temp = make_ssa_name (vec_dest, new_stmt);
3843 gimple_call_set_lhs (new_stmt, new_temp);
3847 /* Generic support */
3848 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3849 if (op_type != binary_op)
3851 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3852 new_temp = make_ssa_name (vec_dest, new_stmt);
3853 gimple_assign_set_lhs (new_stmt, new_temp);
3855 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3861 /* Get vectorized definitions for loop-based vectorization. For the first
3862 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3863 scalar operand), and for the rest we get a copy with
3864 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3865 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3866 The vectors are collected into VEC_OPRNDS. */
3869 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3870 vec<tree> *vec_oprnds, int multi_step_cvt)
3874 /* Get first vector operand. */
3875 /* All the vector operands except the very first one (that is scalar oprnd)
3877 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3878 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3880 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3882 vec_oprnds->quick_push (vec_oprnd);
3884 /* Get second vector operand. */
3885 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3886 vec_oprnds->quick_push (vec_oprnd);
3890 /* For conversion in multiple steps, continue to get operands
3893 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3897 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3898 For multi-step conversions store the resulting vectors and call the function
3902 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3903 int multi_step_cvt, gimple *stmt,
3905 gimple_stmt_iterator *gsi,
3906 slp_tree slp_node, enum tree_code code,
3907 stmt_vec_info *prev_stmt_info)
3910 tree vop0, vop1, new_tmp, vec_dest;
3912 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3914 vec_dest = vec_dsts.pop ();
3916 for (i = 0; i < vec_oprnds->length (); i += 2)
3918 /* Create demotion operation. */
3919 vop0 = (*vec_oprnds)[i];
3920 vop1 = (*vec_oprnds)[i + 1];
3921 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3922 new_tmp = make_ssa_name (vec_dest, new_stmt);
3923 gimple_assign_set_lhs (new_stmt, new_tmp);
3924 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3927 /* Store the resulting vector for next recursive call. */
3928 (*vec_oprnds)[i/2] = new_tmp;
3931 /* This is the last step of the conversion sequence. Store the
3932 vectors in SLP_NODE or in vector info of the scalar statement
3933 (or in STMT_VINFO_RELATED_STMT chain). */
3935 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3938 if (!*prev_stmt_info)
3939 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3941 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3943 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3948 /* For multi-step demotion operations we first generate demotion operations
3949 from the source type to the intermediate types, and then combine the
3950 results (stored in VEC_OPRNDS) in demotion operation to the destination
3954 /* At each level of recursion we have half of the operands we had at the
3956 vec_oprnds->truncate ((i+1)/2);
3957 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3958 stmt, vec_dsts, gsi, slp_node,
3959 VEC_PACK_TRUNC_EXPR,
3963 vec_dsts.quick_push (vec_dest);
3967 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3968 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3969 the resulting vectors and call the function recursively. */
3972 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3973 vec<tree> *vec_oprnds1,
3974 gimple *stmt, tree vec_dest,
3975 gimple_stmt_iterator *gsi,
3976 enum tree_code code1,
3977 enum tree_code code2, tree decl1,
3978 tree decl2, int op_type)
3981 tree vop0, vop1, new_tmp1, new_tmp2;
3982 gimple *new_stmt1, *new_stmt2;
3983 vec<tree> vec_tmp = vNULL;
3985 vec_tmp.create (vec_oprnds0->length () * 2);
3986 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3988 if (op_type == binary_op)
3989 vop1 = (*vec_oprnds1)[i];
3993 /* Generate the two halves of promotion operation. */
3994 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3995 op_type, vec_dest, gsi, stmt);
3996 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3997 op_type, vec_dest, gsi, stmt);
3998 if (is_gimple_call (new_stmt1))
4000 new_tmp1 = gimple_call_lhs (new_stmt1);
4001 new_tmp2 = gimple_call_lhs (new_stmt2);
4005 new_tmp1 = gimple_assign_lhs (new_stmt1);
4006 new_tmp2 = gimple_assign_lhs (new_stmt2);
4009 /* Store the results for the next step. */
4010 vec_tmp.quick_push (new_tmp1);
4011 vec_tmp.quick_push (new_tmp2);
4014 vec_oprnds0->release ();
4015 *vec_oprnds0 = vec_tmp;
4019 /* Check if STMT performs a conversion operation, that can be vectorized.
4020 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4021 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4022 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4025 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4026 gimple **vec_stmt, slp_tree slp_node)
4030 tree op0, op1 = NULL_TREE;
4031 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4032 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4033 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4034 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4035 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4036 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4039 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4041 gimple *new_stmt = NULL;
4042 stmt_vec_info prev_stmt_info;
4045 tree vectype_out, vectype_in;
4047 tree lhs_type, rhs_type;
4048 enum { NARROW, NONE, WIDEN } modifier;
4049 vec<tree> vec_oprnds0 = vNULL;
4050 vec<tree> vec_oprnds1 = vNULL;
4052 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4053 vec_info *vinfo = stmt_info->vinfo;
4054 int multi_step_cvt = 0;
4055 vec<tree> interm_types = vNULL;
4056 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4058 machine_mode rhs_mode;
4059 unsigned short fltsz;
4061 /* Is STMT a vectorizable conversion? */
4063 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4066 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4070 if (!is_gimple_assign (stmt))
4073 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4076 code = gimple_assign_rhs_code (stmt);
4077 if (!CONVERT_EXPR_CODE_P (code)
4078 && code != FIX_TRUNC_EXPR
4079 && code != FLOAT_EXPR
4080 && code != WIDEN_MULT_EXPR
4081 && code != WIDEN_LSHIFT_EXPR)
4084 op_type = TREE_CODE_LENGTH (code);
4086 /* Check types of lhs and rhs. */
4087 scalar_dest = gimple_assign_lhs (stmt);
4088 lhs_type = TREE_TYPE (scalar_dest);
4089 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4091 op0 = gimple_assign_rhs1 (stmt);
4092 rhs_type = TREE_TYPE (op0);
4094 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4095 && !((INTEGRAL_TYPE_P (lhs_type)
4096 && INTEGRAL_TYPE_P (rhs_type))
4097 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4098 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4101 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4102 && ((INTEGRAL_TYPE_P (lhs_type)
4103 && (TYPE_PRECISION (lhs_type)
4104 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
4105 || (INTEGRAL_TYPE_P (rhs_type)
4106 && (TYPE_PRECISION (rhs_type)
4107 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
4109 if (dump_enabled_p ())
4110 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4111 "type conversion to/from bit-precision unsupported."
4116 /* Check the operands of the operation. */
4117 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4119 if (dump_enabled_p ())
4120 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4121 "use not simple.\n");
4124 if (op_type == binary_op)
4128 op1 = gimple_assign_rhs2 (stmt);
4129 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4130 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4132 if (CONSTANT_CLASS_P (op0))
4133 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4135 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4139 if (dump_enabled_p ())
4140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4141 "use not simple.\n");
4146 /* If op0 is an external or constant defs use a vector type of
4147 the same size as the output vector type. */
4149 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4151 gcc_assert (vectype_in);
4154 if (dump_enabled_p ())
4156 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4157 "no vectype for scalar type ");
4158 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4159 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4165 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4166 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4168 if (dump_enabled_p ())
4170 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4171 "can't convert between boolean and non "
4173 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4174 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4180 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4181 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4182 if (nunits_in < nunits_out)
4184 else if (nunits_out == nunits_in)
4189 /* Multiple types in SLP are handled by creating the appropriate number of
4190 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4194 else if (modifier == NARROW)
4195 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4197 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4199 /* Sanity check: make sure that at least one copy of the vectorized stmt
4200 needs to be generated. */
4201 gcc_assert (ncopies >= 1);
4203 /* Supportable by target? */
4207 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4209 if (supportable_convert_operation (code, vectype_out, vectype_in,
4214 if (dump_enabled_p ())
4215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4216 "conversion not supported by target.\n");
4220 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4221 &code1, &code2, &multi_step_cvt,
4224 /* Binary widening operation can only be supported directly by the
4226 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4230 if (code != FLOAT_EXPR
4231 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4232 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4235 rhs_mode = TYPE_MODE (rhs_type);
4236 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
4237 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
4238 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
4239 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
4242 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4243 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4244 if (cvt_type == NULL_TREE)
4247 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4249 if (!supportable_convert_operation (code, vectype_out,
4250 cvt_type, &decl1, &codecvt1))
4253 else if (!supportable_widening_operation (code, stmt, vectype_out,
4254 cvt_type, &codecvt1,
4255 &codecvt2, &multi_step_cvt,
4259 gcc_assert (multi_step_cvt == 0);
4261 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4262 vectype_in, &code1, &code2,
4263 &multi_step_cvt, &interm_types))
4267 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
4270 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4271 codecvt2 = ERROR_MARK;
4275 interm_types.safe_push (cvt_type);
4276 cvt_type = NULL_TREE;
4281 gcc_assert (op_type == unary_op);
4282 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4283 &code1, &multi_step_cvt,
4287 if (code != FIX_TRUNC_EXPR
4288 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4289 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4292 rhs_mode = TYPE_MODE (rhs_type);
4294 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4295 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4296 if (cvt_type == NULL_TREE)
4298 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4301 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4302 &code1, &multi_step_cvt,
4311 if (!vec_stmt) /* transformation not required. */
4313 if (dump_enabled_p ())
4314 dump_printf_loc (MSG_NOTE, vect_location,
4315 "=== vectorizable_conversion ===\n");
4316 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4318 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4319 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4321 else if (modifier == NARROW)
4323 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4324 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4328 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4329 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4331 interm_types.release ();
4336 if (dump_enabled_p ())
4337 dump_printf_loc (MSG_NOTE, vect_location,
4338 "transform conversion. ncopies = %d.\n", ncopies);
4340 if (op_type == binary_op)
4342 if (CONSTANT_CLASS_P (op0))
4343 op0 = fold_convert (TREE_TYPE (op1), op0);
4344 else if (CONSTANT_CLASS_P (op1))
4345 op1 = fold_convert (TREE_TYPE (op0), op1);
4348 /* In case of multi-step conversion, we first generate conversion operations
4349 to the intermediate types, and then from that types to the final one.
4350 We create vector destinations for the intermediate type (TYPES) received
4351 from supportable_*_operation, and store them in the correct order
4352 for future use in vect_create_vectorized_*_stmts (). */
4353 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4354 vec_dest = vect_create_destination_var (scalar_dest,
4355 (cvt_type && modifier == WIDEN)
4356 ? cvt_type : vectype_out);
4357 vec_dsts.quick_push (vec_dest);
4361 for (i = interm_types.length () - 1;
4362 interm_types.iterate (i, &intermediate_type); i--)
4364 vec_dest = vect_create_destination_var (scalar_dest,
4366 vec_dsts.quick_push (vec_dest);
4371 vec_dest = vect_create_destination_var (scalar_dest,
4373 ? vectype_out : cvt_type);
4377 if (modifier == WIDEN)
4379 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4380 if (op_type == binary_op)
4381 vec_oprnds1.create (1);
4383 else if (modifier == NARROW)
4384 vec_oprnds0.create (
4385 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4387 else if (code == WIDEN_LSHIFT_EXPR)
4388 vec_oprnds1.create (slp_node->vec_stmts_size);
4391 prev_stmt_info = NULL;
4395 for (j = 0; j < ncopies; j++)
4398 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4400 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4402 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4404 /* Arguments are ready, create the new vector stmt. */
4405 if (code1 == CALL_EXPR)
4407 new_stmt = gimple_build_call (decl1, 1, vop0);
4408 new_temp = make_ssa_name (vec_dest, new_stmt);
4409 gimple_call_set_lhs (new_stmt, new_temp);
4413 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4414 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4415 new_temp = make_ssa_name (vec_dest, new_stmt);
4416 gimple_assign_set_lhs (new_stmt, new_temp);
4419 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4421 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4424 if (!prev_stmt_info)
4425 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4427 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4428 prev_stmt_info = vinfo_for_stmt (new_stmt);
4435 /* In case the vectorization factor (VF) is bigger than the number
4436 of elements that we can fit in a vectype (nunits), we have to
4437 generate more than one vector stmt - i.e - we need to "unroll"
4438 the vector stmt by a factor VF/nunits. */
4439 for (j = 0; j < ncopies; j++)
4446 if (code == WIDEN_LSHIFT_EXPR)
4451 /* Store vec_oprnd1 for every vector stmt to be created
4452 for SLP_NODE. We check during the analysis that all
4453 the shift arguments are the same. */
4454 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4455 vec_oprnds1.quick_push (vec_oprnd1);
4457 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4461 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4462 &vec_oprnds1, slp_node);
4466 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4467 vec_oprnds0.quick_push (vec_oprnd0);
4468 if (op_type == binary_op)
4470 if (code == WIDEN_LSHIFT_EXPR)
4473 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4474 vec_oprnds1.quick_push (vec_oprnd1);
4480 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4481 vec_oprnds0.truncate (0);
4482 vec_oprnds0.quick_push (vec_oprnd0);
4483 if (op_type == binary_op)
4485 if (code == WIDEN_LSHIFT_EXPR)
4488 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4490 vec_oprnds1.truncate (0);
4491 vec_oprnds1.quick_push (vec_oprnd1);
4495 /* Arguments are ready. Create the new vector stmts. */
4496 for (i = multi_step_cvt; i >= 0; i--)
4498 tree this_dest = vec_dsts[i];
4499 enum tree_code c1 = code1, c2 = code2;
4500 if (i == 0 && codecvt2 != ERROR_MARK)
4505 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4507 stmt, this_dest, gsi,
4508 c1, c2, decl1, decl2,
4512 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4516 if (codecvt1 == CALL_EXPR)
4518 new_stmt = gimple_build_call (decl1, 1, vop0);
4519 new_temp = make_ssa_name (vec_dest, new_stmt);
4520 gimple_call_set_lhs (new_stmt, new_temp);
4524 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4525 new_temp = make_ssa_name (vec_dest);
4526 new_stmt = gimple_build_assign (new_temp, codecvt1,
4530 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4533 new_stmt = SSA_NAME_DEF_STMT (vop0);
4536 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4539 if (!prev_stmt_info)
4540 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4542 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4543 prev_stmt_info = vinfo_for_stmt (new_stmt);
4548 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4552 /* In case the vectorization factor (VF) is bigger than the number
4553 of elements that we can fit in a vectype (nunits), we have to
4554 generate more than one vector stmt - i.e - we need to "unroll"
4555 the vector stmt by a factor VF/nunits. */
4556 for (j = 0; j < ncopies; j++)
4560 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4564 vec_oprnds0.truncate (0);
4565 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4566 vect_pow2 (multi_step_cvt) - 1);
4569 /* Arguments are ready. Create the new vector stmts. */
4571 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4573 if (codecvt1 == CALL_EXPR)
4575 new_stmt = gimple_build_call (decl1, 1, vop0);
4576 new_temp = make_ssa_name (vec_dest, new_stmt);
4577 gimple_call_set_lhs (new_stmt, new_temp);
4581 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4582 new_temp = make_ssa_name (vec_dest);
4583 new_stmt = gimple_build_assign (new_temp, codecvt1,
4587 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4588 vec_oprnds0[i] = new_temp;
4591 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4592 stmt, vec_dsts, gsi,
4597 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4601 vec_oprnds0.release ();
4602 vec_oprnds1.release ();
4603 interm_types.release ();
4609 /* Function vectorizable_assignment.
4611 Check if STMT performs an assignment (copy) that can be vectorized.
4612 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4613 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4614 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4617 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4618 gimple **vec_stmt, slp_tree slp_node)
4623 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4624 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4627 enum vect_def_type dt[1] = {vect_unknown_def_type};
4631 vec<tree> vec_oprnds = vNULL;
4633 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4634 vec_info *vinfo = stmt_info->vinfo;
4635 gimple *new_stmt = NULL;
4636 stmt_vec_info prev_stmt_info = NULL;
4637 enum tree_code code;
4640 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4643 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4647 /* Is vectorizable assignment? */
4648 if (!is_gimple_assign (stmt))
4651 scalar_dest = gimple_assign_lhs (stmt);
4652 if (TREE_CODE (scalar_dest) != SSA_NAME)
4655 code = gimple_assign_rhs_code (stmt);
4656 if (gimple_assign_single_p (stmt)
4657 || code == PAREN_EXPR
4658 || CONVERT_EXPR_CODE_P (code))
4659 op = gimple_assign_rhs1 (stmt);
4663 if (code == VIEW_CONVERT_EXPR)
4664 op = TREE_OPERAND (op, 0);
4666 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4667 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4669 /* Multiple types in SLP are handled by creating the appropriate number of
4670 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4675 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4677 gcc_assert (ncopies >= 1);
4679 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4681 if (dump_enabled_p ())
4682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4683 "use not simple.\n");
4687 /* We can handle NOP_EXPR conversions that do not change the number
4688 of elements or the vector size. */
4689 if ((CONVERT_EXPR_CODE_P (code)
4690 || code == VIEW_CONVERT_EXPR)
4692 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4693 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4694 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4697 /* We do not handle bit-precision changes. */
4698 if ((CONVERT_EXPR_CODE_P (code)
4699 || code == VIEW_CONVERT_EXPR)
4700 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4701 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4702 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4703 || ((TYPE_PRECISION (TREE_TYPE (op))
4704 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4705 /* But a conversion that does not change the bit-pattern is ok. */
4706 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4707 > TYPE_PRECISION (TREE_TYPE (op)))
4708 && TYPE_UNSIGNED (TREE_TYPE (op)))
4709 /* Conversion between boolean types of different sizes is
4710 a simple assignment in case their vectypes are same
4712 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4713 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4715 if (dump_enabled_p ())
4716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4717 "type conversion to/from bit-precision "
4722 if (!vec_stmt) /* transformation not required. */
4724 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4725 if (dump_enabled_p ())
4726 dump_printf_loc (MSG_NOTE, vect_location,
4727 "=== vectorizable_assignment ===\n");
4728 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4733 if (dump_enabled_p ())
4734 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4737 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4740 for (j = 0; j < ncopies; j++)
4744 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
4746 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4748 /* Arguments are ready. create the new vector stmt. */
4749 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4751 if (CONVERT_EXPR_CODE_P (code)
4752 || code == VIEW_CONVERT_EXPR)
4753 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4754 new_stmt = gimple_build_assign (vec_dest, vop);
4755 new_temp = make_ssa_name (vec_dest, new_stmt);
4756 gimple_assign_set_lhs (new_stmt, new_temp);
4757 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4759 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4766 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4768 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4770 prev_stmt_info = vinfo_for_stmt (new_stmt);
4773 vec_oprnds.release ();
4778 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4779 either as shift by a scalar or by a vector. */
4782 vect_supportable_shift (enum tree_code code, tree scalar_type)
4785 machine_mode vec_mode;
4790 vectype = get_vectype_for_scalar_type (scalar_type);
4794 optab = optab_for_tree_code (code, vectype, optab_scalar);
4796 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4798 optab = optab_for_tree_code (code, vectype, optab_vector);
4800 || (optab_handler (optab, TYPE_MODE (vectype))
4801 == CODE_FOR_nothing))
4805 vec_mode = TYPE_MODE (vectype);
4806 icode = (int) optab_handler (optab, vec_mode);
4807 if (icode == CODE_FOR_nothing)
4814 /* Function vectorizable_shift.
4816 Check if STMT performs a shift operation that can be vectorized.
4817 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4818 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4819 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4822 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4823 gimple **vec_stmt, slp_tree slp_node)
4827 tree op0, op1 = NULL;
4828 tree vec_oprnd1 = NULL_TREE;
4829 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4831 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4832 enum tree_code code;
4833 machine_mode vec_mode;
4837 machine_mode optab_op2_mode;
4839 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4841 gimple *new_stmt = NULL;
4842 stmt_vec_info prev_stmt_info;
4849 vec<tree> vec_oprnds0 = vNULL;
4850 vec<tree> vec_oprnds1 = vNULL;
4853 bool scalar_shift_arg = true;
4854 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4855 vec_info *vinfo = stmt_info->vinfo;
4858 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4861 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4865 /* Is STMT a vectorizable binary/unary operation? */
4866 if (!is_gimple_assign (stmt))
4869 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4872 code = gimple_assign_rhs_code (stmt);
4874 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4875 || code == RROTATE_EXPR))
4878 scalar_dest = gimple_assign_lhs (stmt);
4879 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4880 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4881 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4883 if (dump_enabled_p ())
4884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4885 "bit-precision shifts not supported.\n");
4889 op0 = gimple_assign_rhs1 (stmt);
4890 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4892 if (dump_enabled_p ())
4893 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4894 "use not simple.\n");
4897 /* If op0 is an external or constant def use a vector type with
4898 the same size as the output vector type. */
4900 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4902 gcc_assert (vectype);
4905 if (dump_enabled_p ())
4906 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4907 "no vectype for scalar type\n");
4911 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4912 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4913 if (nunits_out != nunits_in)
4916 op1 = gimple_assign_rhs2 (stmt);
4917 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4919 if (dump_enabled_p ())
4920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4921 "use not simple.\n");
4926 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4930 /* Multiple types in SLP are handled by creating the appropriate number of
4931 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4936 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4938 gcc_assert (ncopies >= 1);
4940 /* Determine whether the shift amount is a vector, or scalar. If the
4941 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4943 if ((dt[1] == vect_internal_def
4944 || dt[1] == vect_induction_def)
4946 scalar_shift_arg = false;
4947 else if (dt[1] == vect_constant_def
4948 || dt[1] == vect_external_def
4949 || dt[1] == vect_internal_def)
4951 /* In SLP, need to check whether the shift count is the same,
4952 in loops if it is a constant or invariant, it is always
4956 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4959 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4960 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4961 scalar_shift_arg = false;
4964 /* If the shift amount is computed by a pattern stmt we cannot
4965 use the scalar amount directly thus give up and use a vector
4967 if (dt[1] == vect_internal_def)
4969 gimple *def = SSA_NAME_DEF_STMT (op1);
4970 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4971 scalar_shift_arg = false;
4976 if (dump_enabled_p ())
4977 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4978 "operand mode requires invariant argument.\n");
4982 /* Vector shifted by vector. */
4983 if (!scalar_shift_arg)
4985 optab = optab_for_tree_code (code, vectype, optab_vector);
4986 if (dump_enabled_p ())
4987 dump_printf_loc (MSG_NOTE, vect_location,
4988 "vector/vector shift/rotate found.\n");
4991 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4992 if (op1_vectype == NULL_TREE
4993 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4995 if (dump_enabled_p ())
4996 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4997 "unusable type for last operand in"
4998 " vector/vector shift/rotate.\n");
5002 /* See if the machine has a vector shifted by scalar insn and if not
5003 then see if it has a vector shifted by vector insn. */
5006 optab = optab_for_tree_code (code, vectype, optab_scalar);
5008 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5010 if (dump_enabled_p ())
5011 dump_printf_loc (MSG_NOTE, vect_location,
5012 "vector/scalar shift/rotate found.\n");
5016 optab = optab_for_tree_code (code, vectype, optab_vector);
5018 && (optab_handler (optab, TYPE_MODE (vectype))
5019 != CODE_FOR_nothing))
5021 scalar_shift_arg = false;
5023 if (dump_enabled_p ())
5024 dump_printf_loc (MSG_NOTE, vect_location,
5025 "vector/vector shift/rotate found.\n");
5027 /* Unlike the other binary operators, shifts/rotates have
5028 the rhs being int, instead of the same type as the lhs,
5029 so make sure the scalar is the right type if we are
5030 dealing with vectors of long long/long/short/char. */
5031 if (dt[1] == vect_constant_def)
5032 op1 = fold_convert (TREE_TYPE (vectype), op1);
5033 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5037 && TYPE_MODE (TREE_TYPE (vectype))
5038 != TYPE_MODE (TREE_TYPE (op1)))
5040 if (dump_enabled_p ())
5041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5042 "unusable type for last operand in"
5043 " vector/vector shift/rotate.\n");
5046 if (vec_stmt && !slp_node)
5048 op1 = fold_convert (TREE_TYPE (vectype), op1);
5049 op1 = vect_init_vector (stmt, op1,
5050 TREE_TYPE (vectype), NULL);
5057 /* Supportable by target? */
5060 if (dump_enabled_p ())
5061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5065 vec_mode = TYPE_MODE (vectype);
5066 icode = (int) optab_handler (optab, vec_mode);
5067 if (icode == CODE_FOR_nothing)
5069 if (dump_enabled_p ())
5070 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5071 "op not supported by target.\n");
5072 /* Check only during analysis. */
5073 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5074 || (vf < vect_min_worthwhile_factor (code)
5077 if (dump_enabled_p ())
5078 dump_printf_loc (MSG_NOTE, vect_location,
5079 "proceeding using word mode.\n");
5082 /* Worthwhile without SIMD support? Check only during analysis. */
5083 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5084 && vf < vect_min_worthwhile_factor (code)
5087 if (dump_enabled_p ())
5088 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5089 "not worthwhile without SIMD support.\n");
5093 if (!vec_stmt) /* transformation not required. */
5095 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5096 if (dump_enabled_p ())
5097 dump_printf_loc (MSG_NOTE, vect_location,
5098 "=== vectorizable_shift ===\n");
5099 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5105 if (dump_enabled_p ())
5106 dump_printf_loc (MSG_NOTE, vect_location,
5107 "transform binary/unary operation.\n");
5110 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5112 prev_stmt_info = NULL;
5113 for (j = 0; j < ncopies; j++)
5118 if (scalar_shift_arg)
5120 /* Vector shl and shr insn patterns can be defined with scalar
5121 operand 2 (shift operand). In this case, use constant or loop
5122 invariant op1 directly, without extending it to vector mode
5124 optab_op2_mode = insn_data[icode].operand[2].mode;
5125 if (!VECTOR_MODE_P (optab_op2_mode))
5127 if (dump_enabled_p ())
5128 dump_printf_loc (MSG_NOTE, vect_location,
5129 "operand 1 using scalar mode.\n");
5131 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5132 vec_oprnds1.quick_push (vec_oprnd1);
5135 /* Store vec_oprnd1 for every vector stmt to be created
5136 for SLP_NODE. We check during the analysis that all
5137 the shift arguments are the same.
5138 TODO: Allow different constants for different vector
5139 stmts generated for an SLP instance. */
5140 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5141 vec_oprnds1.quick_push (vec_oprnd1);
5146 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5147 (a special case for certain kind of vector shifts); otherwise,
5148 operand 1 should be of a vector type (the usual case). */
5150 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5153 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5157 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5159 /* Arguments are ready. Create the new vector stmt. */
5160 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5162 vop1 = vec_oprnds1[i];
5163 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5164 new_temp = make_ssa_name (vec_dest, new_stmt);
5165 gimple_assign_set_lhs (new_stmt, new_temp);
5166 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5168 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5175 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5177 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5178 prev_stmt_info = vinfo_for_stmt (new_stmt);
5181 vec_oprnds0.release ();
5182 vec_oprnds1.release ();
5188 /* Function vectorizable_operation.
5190 Check if STMT performs a binary, unary or ternary operation that can
5192 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5193 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5194 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5197 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5198 gimple **vec_stmt, slp_tree slp_node)
5202 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5203 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5205 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5206 enum tree_code code;
5207 machine_mode vec_mode;
5211 bool target_support_p;
5213 enum vect_def_type dt[3]
5214 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5216 gimple *new_stmt = NULL;
5217 stmt_vec_info prev_stmt_info;
5223 vec<tree> vec_oprnds0 = vNULL;
5224 vec<tree> vec_oprnds1 = vNULL;
5225 vec<tree> vec_oprnds2 = vNULL;
5226 tree vop0, vop1, vop2;
5227 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5228 vec_info *vinfo = stmt_info->vinfo;
5231 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5234 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5238 /* Is STMT a vectorizable binary/unary operation? */
5239 if (!is_gimple_assign (stmt))
5242 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5245 code = gimple_assign_rhs_code (stmt);
5247 /* For pointer addition, we should use the normal plus for
5248 the vector addition. */
5249 if (code == POINTER_PLUS_EXPR)
5252 /* Support only unary or binary operations. */
5253 op_type = TREE_CODE_LENGTH (code);
5254 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5256 if (dump_enabled_p ())
5257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5258 "num. args = %d (not unary/binary/ternary op).\n",
5263 scalar_dest = gimple_assign_lhs (stmt);
5264 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5266 /* Most operations cannot handle bit-precision types without extra
5268 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5269 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5270 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
5271 /* Exception are bitwise binary operations. */
5272 && code != BIT_IOR_EXPR
5273 && code != BIT_XOR_EXPR
5274 && code != BIT_AND_EXPR)
5276 if (dump_enabled_p ())
5277 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5278 "bit-precision arithmetic not supported.\n");
5282 op0 = gimple_assign_rhs1 (stmt);
5283 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5285 if (dump_enabled_p ())
5286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5287 "use not simple.\n");
5290 /* If op0 is an external or constant def use a vector type with
5291 the same size as the output vector type. */
5294 /* For boolean type we cannot determine vectype by
5295 invariant value (don't know whether it is a vector
5296 of booleans or vector of integers). We use output
5297 vectype because operations on boolean don't change
5299 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5301 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5303 if (dump_enabled_p ())
5304 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5305 "not supported operation on bool value.\n");
5308 vectype = vectype_out;
5311 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5314 gcc_assert (vectype);
5317 if (dump_enabled_p ())
5319 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5320 "no vectype for scalar type ");
5321 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5323 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5329 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5330 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5331 if (nunits_out != nunits_in)
5334 if (op_type == binary_op || op_type == ternary_op)
5336 op1 = gimple_assign_rhs2 (stmt);
5337 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5339 if (dump_enabled_p ())
5340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5341 "use not simple.\n");
5345 if (op_type == ternary_op)
5347 op2 = gimple_assign_rhs3 (stmt);
5348 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5350 if (dump_enabled_p ())
5351 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5352 "use not simple.\n");
5358 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5362 /* Multiple types in SLP are handled by creating the appropriate number of
5363 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5368 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
5370 gcc_assert (ncopies >= 1);
5372 /* Shifts are handled in vectorizable_shift (). */
5373 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5374 || code == RROTATE_EXPR)
5377 /* Supportable by target? */
5379 vec_mode = TYPE_MODE (vectype);
5380 if (code == MULT_HIGHPART_EXPR)
5381 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5384 optab = optab_for_tree_code (code, vectype, optab_default);
5387 if (dump_enabled_p ())
5388 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5392 target_support_p = (optab_handler (optab, vec_mode)
5393 != CODE_FOR_nothing);
5396 if (!target_support_p)
5398 if (dump_enabled_p ())
5399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5400 "op not supported by target.\n");
5401 /* Check only during analysis. */
5402 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5403 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
5405 if (dump_enabled_p ())
5406 dump_printf_loc (MSG_NOTE, vect_location,
5407 "proceeding using word mode.\n");
5410 /* Worthwhile without SIMD support? Check only during analysis. */
5411 if (!VECTOR_MODE_P (vec_mode)
5413 && vf < vect_min_worthwhile_factor (code))
5415 if (dump_enabled_p ())
5416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5417 "not worthwhile without SIMD support.\n");
5421 if (!vec_stmt) /* transformation not required. */
5423 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5424 if (dump_enabled_p ())
5425 dump_printf_loc (MSG_NOTE, vect_location,
5426 "=== vectorizable_operation ===\n");
5427 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5433 if (dump_enabled_p ())
5434 dump_printf_loc (MSG_NOTE, vect_location,
5435 "transform binary/unary operation.\n");
5438 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5440 /* In case the vectorization factor (VF) is bigger than the number
5441 of elements that we can fit in a vectype (nunits), we have to generate
5442 more than one vector stmt - i.e - we need to "unroll" the
5443 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5444 from one copy of the vector stmt to the next, in the field
5445 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5446 stages to find the correct vector defs to be used when vectorizing
5447 stmts that use the defs of the current stmt. The example below
5448 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5449 we need to create 4 vectorized stmts):
5451 before vectorization:
5452 RELATED_STMT VEC_STMT
5456 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5458 RELATED_STMT VEC_STMT
5459 VS1_0: vx0 = memref0 VS1_1 -
5460 VS1_1: vx1 = memref1 VS1_2 -
5461 VS1_2: vx2 = memref2 VS1_3 -
5462 VS1_3: vx3 = memref3 - -
5463 S1: x = load - VS1_0
5466 step2: vectorize stmt S2 (done here):
5467 To vectorize stmt S2 we first need to find the relevant vector
5468 def for the first operand 'x'. This is, as usual, obtained from
5469 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5470 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5471 relevant vector def 'vx0'. Having found 'vx0' we can generate
5472 the vector stmt VS2_0, and as usual, record it in the
5473 STMT_VINFO_VEC_STMT of stmt S2.
5474 When creating the second copy (VS2_1), we obtain the relevant vector
5475 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5476 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5477 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5478 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5479 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5480 chain of stmts and pointers:
5481 RELATED_STMT VEC_STMT
5482 VS1_0: vx0 = memref0 VS1_1 -
5483 VS1_1: vx1 = memref1 VS1_2 -
5484 VS1_2: vx2 = memref2 VS1_3 -
5485 VS1_3: vx3 = memref3 - -
5486 S1: x = load - VS1_0
5487 VS2_0: vz0 = vx0 + v1 VS2_1 -
5488 VS2_1: vz1 = vx1 + v1 VS2_2 -
5489 VS2_2: vz2 = vx2 + v1 VS2_3 -
5490 VS2_3: vz3 = vx3 + v1 - -
5491 S2: z = x + 1 - VS2_0 */
5493 prev_stmt_info = NULL;
5494 for (j = 0; j < ncopies; j++)
5499 if (op_type == binary_op || op_type == ternary_op)
5500 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5503 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5505 if (op_type == ternary_op)
5506 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5511 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5512 if (op_type == ternary_op)
5514 tree vec_oprnd = vec_oprnds2.pop ();
5515 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5520 /* Arguments are ready. Create the new vector stmt. */
5521 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5523 vop1 = ((op_type == binary_op || op_type == ternary_op)
5524 ? vec_oprnds1[i] : NULL_TREE);
5525 vop2 = ((op_type == ternary_op)
5526 ? vec_oprnds2[i] : NULL_TREE);
5527 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5528 new_temp = make_ssa_name (vec_dest, new_stmt);
5529 gimple_assign_set_lhs (new_stmt, new_temp);
5530 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5532 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5539 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5541 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5542 prev_stmt_info = vinfo_for_stmt (new_stmt);
5545 vec_oprnds0.release ();
5546 vec_oprnds1.release ();
5547 vec_oprnds2.release ();
5552 /* A helper function to ensure data reference DR's base alignment
5556 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5561 if (DR_VECT_AUX (dr)->base_misaligned)
5563 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5564 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5566 if (decl_in_symtab_p (base_decl))
5567 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5570 SET_DECL_ALIGN (base_decl, TYPE_ALIGN (vectype));
5571 DECL_USER_ALIGN (base_decl) = 1;
5573 DR_VECT_AUX (dr)->base_misaligned = false;
5578 /* Function get_group_alias_ptr_type.
5580 Return the alias type for the group starting at FIRST_STMT. */
5583 get_group_alias_ptr_type (gimple *first_stmt)
5585 struct data_reference *first_dr, *next_dr;
5588 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5589 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5592 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5593 if (get_alias_set (DR_REF (first_dr))
5594 != get_alias_set (DR_REF (next_dr)))
5596 if (dump_enabled_p ())
5597 dump_printf_loc (MSG_NOTE, vect_location,
5598 "conflicting alias set types.\n");
5599 return ptr_type_node;
5601 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5603 return reference_alias_ptr_type (DR_REF (first_dr));
5607 /* Function vectorizable_store.
5609 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5611 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5612 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5613 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5616 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5622 tree vec_oprnd = NULL_TREE;
5623 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5624 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5626 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5627 struct loop *loop = NULL;
5628 machine_mode vec_mode;
5630 enum dr_alignment_support alignment_support_scheme;
5632 enum vect_def_type dt;
5633 stmt_vec_info prev_stmt_info = NULL;
5634 tree dataref_ptr = NULL_TREE;
5635 tree dataref_offset = NULL_TREE;
5636 gimple *ptr_incr = NULL;
5639 gimple *next_stmt, *first_stmt;
5641 unsigned int group_size, i;
5642 vec<tree> oprnds = vNULL;
5643 vec<tree> result_chain = vNULL;
5645 tree offset = NULL_TREE;
5646 vec<tree> vec_oprnds = vNULL;
5647 bool slp = (slp_node != NULL);
5648 unsigned int vec_num;
5649 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5650 vec_info *vinfo = stmt_info->vinfo;
5652 gather_scatter_info gs_info;
5653 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5656 vec_load_store_type vls_type;
5659 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5662 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5666 /* Is vectorizable store? */
5668 if (!is_gimple_assign (stmt))
5671 scalar_dest = gimple_assign_lhs (stmt);
5672 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5673 && is_pattern_stmt_p (stmt_info))
5674 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5675 if (TREE_CODE (scalar_dest) != ARRAY_REF
5676 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5677 && TREE_CODE (scalar_dest) != INDIRECT_REF
5678 && TREE_CODE (scalar_dest) != COMPONENT_REF
5679 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5680 && TREE_CODE (scalar_dest) != REALPART_EXPR
5681 && TREE_CODE (scalar_dest) != MEM_REF)
5684 /* Cannot have hybrid store SLP -- that would mean storing to the
5685 same location twice. */
5686 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5688 gcc_assert (gimple_assign_single_p (stmt));
5690 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5691 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5695 loop = LOOP_VINFO_LOOP (loop_vinfo);
5696 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5701 /* Multiple types in SLP are handled by creating the appropriate number of
5702 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5707 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5709 gcc_assert (ncopies >= 1);
5711 /* FORNOW. This restriction should be relaxed. */
5712 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5714 if (dump_enabled_p ())
5715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5716 "multiple types in nested loop.\n");
5720 op = gimple_assign_rhs1 (stmt);
5722 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5724 if (dump_enabled_p ())
5725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5726 "use not simple.\n");
5730 if (dt == vect_constant_def || dt == vect_external_def)
5731 vls_type = VLS_STORE_INVARIANT;
5733 vls_type = VLS_STORE;
5735 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5738 elem_type = TREE_TYPE (vectype);
5739 vec_mode = TYPE_MODE (vectype);
5741 /* FORNOW. In some cases can vectorize even if data-type not supported
5742 (e.g. - array initialization with 0). */
5743 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5746 if (!STMT_VINFO_DATA_REF (stmt_info))
5749 vect_memory_access_type memory_access_type;
5750 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5751 &memory_access_type, &gs_info))
5754 if (!vec_stmt) /* transformation not required. */
5756 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5757 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5758 /* The SLP costs are calculated during SLP analysis. */
5759 if (!PURE_SLP_STMT (stmt_info))
5760 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5764 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5768 ensure_base_align (stmt_info, dr);
5770 if (memory_access_type == VMAT_GATHER_SCATTER)
5772 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5773 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5774 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5775 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5776 edge pe = loop_preheader_edge (loop);
5779 enum { NARROW, NONE, WIDEN } modifier;
5780 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5782 if (nunits == (unsigned int) scatter_off_nunits)
5784 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5786 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5789 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5790 sel[i] = i | nunits;
5792 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
5793 gcc_assert (perm_mask != NULL_TREE);
5795 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5797 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5800 for (i = 0; i < (unsigned int) nunits; ++i)
5801 sel[i] = i | scatter_off_nunits;
5803 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5804 gcc_assert (perm_mask != NULL_TREE);
5810 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5811 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5812 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5813 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5814 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5815 scaletype = TREE_VALUE (arglist);
5817 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5818 && TREE_CODE (rettype) == VOID_TYPE);
5820 ptr = fold_convert (ptrtype, gs_info.base);
5821 if (!is_gimple_min_invariant (ptr))
5823 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5824 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5825 gcc_assert (!new_bb);
5828 /* Currently we support only unconditional scatter stores,
5829 so mask should be all ones. */
5830 mask = build_int_cst (masktype, -1);
5831 mask = vect_init_vector (stmt, mask, masktype, NULL);
5833 scale = build_int_cst (scaletype, gs_info.scale);
5835 prev_stmt_info = NULL;
5836 for (j = 0; j < ncopies; ++j)
5841 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5843 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5845 else if (modifier != NONE && (j & 1))
5847 if (modifier == WIDEN)
5850 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5851 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5854 else if (modifier == NARROW)
5856 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5859 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5868 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5870 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5874 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5876 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5877 == TYPE_VECTOR_SUBPARTS (srctype));
5878 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5879 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5880 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5881 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5885 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5887 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5888 == TYPE_VECTOR_SUBPARTS (idxtype));
5889 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5890 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5891 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5892 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5897 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5899 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5901 if (prev_stmt_info == NULL)
5902 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5904 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5905 prev_stmt_info = vinfo_for_stmt (new_stmt);
5910 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5913 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5914 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5915 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5917 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5920 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5922 /* We vectorize all the stmts of the interleaving group when we
5923 reach the last stmt in the group. */
5924 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5925 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5934 grouped_store = false;
5935 /* VEC_NUM is the number of vect stmts to be created for this
5937 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5938 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5939 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5940 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5941 op = gimple_assign_rhs1 (first_stmt);
5944 /* VEC_NUM is the number of vect stmts to be created for this
5946 vec_num = group_size;
5948 ref_type = get_group_alias_ptr_type (first_stmt);
5954 group_size = vec_num = 1;
5955 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
5958 if (dump_enabled_p ())
5959 dump_printf_loc (MSG_NOTE, vect_location,
5960 "transform store. ncopies = %d\n", ncopies);
5962 if (memory_access_type == VMAT_ELEMENTWISE
5963 || memory_access_type == VMAT_STRIDED_SLP)
5965 gimple_stmt_iterator incr_gsi;
5971 gimple_seq stmts = NULL;
5972 tree stride_base, stride_step, alias_off;
5976 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5979 = fold_build_pointer_plus
5980 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5981 size_binop (PLUS_EXPR,
5982 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5983 convert_to_ptrofftype (DR_INIT (first_dr))));
5984 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5986 /* For a store with loop-invariant (but other than power-of-2)
5987 stride (i.e. not a grouped access) like so:
5989 for (i = 0; i < n; i += stride)
5992 we generate a new induction variable and new stores from
5993 the components of the (vectorized) rhs:
5995 for (j = 0; ; j += VF*stride)
6000 array[j + stride] = tmp2;
6004 unsigned nstores = nunits;
6006 tree ltype = elem_type;
6009 if (group_size < nunits
6010 && nunits % group_size == 0)
6012 nstores = nunits / group_size;
6014 ltype = build_vector_type (elem_type, group_size);
6016 else if (group_size >= nunits
6017 && group_size % nunits == 0)
6023 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6024 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6027 ivstep = stride_step;
6028 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6029 build_int_cst (TREE_TYPE (ivstep), vf));
6031 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6033 create_iv (stride_base, ivstep, NULL,
6034 loop, &incr_gsi, insert_after,
6036 incr = gsi_stmt (incr_gsi);
6037 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6039 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6041 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6043 prev_stmt_info = NULL;
6044 alias_off = build_int_cst (ref_type, 0);
6045 next_stmt = first_stmt;
6046 for (g = 0; g < group_size; g++)
6048 running_off = offvar;
6051 tree size = TYPE_SIZE_UNIT (ltype);
6052 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6054 tree newoff = copy_ssa_name (running_off, NULL);
6055 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6057 vect_finish_stmt_generation (stmt, incr, gsi);
6058 running_off = newoff;
6060 unsigned int group_el = 0;
6061 unsigned HOST_WIDE_INT
6062 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6063 for (j = 0; j < ncopies; j++)
6065 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6066 and first_stmt == stmt. */
6071 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6073 vec_oprnd = vec_oprnds[0];
6077 gcc_assert (gimple_assign_single_p (next_stmt));
6078 op = gimple_assign_rhs1 (next_stmt);
6079 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6085 vec_oprnd = vec_oprnds[j];
6088 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6089 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6093 for (i = 0; i < nstores; i++)
6095 tree newref, newoff;
6096 gimple *incr, *assign;
6097 tree size = TYPE_SIZE (ltype);
6098 /* Extract the i'th component. */
6099 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6100 bitsize_int (i), size);
6101 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6104 elem = force_gimple_operand_gsi (gsi, elem, true,
6108 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6110 newref = build2 (MEM_REF, ltype,
6111 running_off, this_off);
6113 /* And store it to *running_off. */
6114 assign = gimple_build_assign (newref, elem);
6115 vect_finish_stmt_generation (stmt, assign, gsi);
6119 || group_el == group_size)
6121 newoff = copy_ssa_name (running_off, NULL);
6122 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6123 running_off, stride_step);
6124 vect_finish_stmt_generation (stmt, incr, gsi);
6126 running_off = newoff;
6129 if (g == group_size - 1
6132 if (j == 0 && i == 0)
6133 STMT_VINFO_VEC_STMT (stmt_info)
6134 = *vec_stmt = assign;
6136 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6137 prev_stmt_info = vinfo_for_stmt (assign);
6141 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6146 vec_oprnds.release ();
6150 auto_vec<tree> dr_chain (group_size);
6151 oprnds.create (group_size);
6153 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6154 gcc_assert (alignment_support_scheme);
6155 /* Targets with store-lane instructions must not require explicit
6157 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6158 || alignment_support_scheme == dr_aligned
6159 || alignment_support_scheme == dr_unaligned_supported);
6161 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6162 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6163 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6165 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6166 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6168 aggr_type = vectype;
6170 /* In case the vectorization factor (VF) is bigger than the number
6171 of elements that we can fit in a vectype (nunits), we have to generate
6172 more than one vector stmt - i.e - we need to "unroll" the
6173 vector stmt by a factor VF/nunits. For more details see documentation in
6174 vect_get_vec_def_for_copy_stmt. */
6176 /* In case of interleaving (non-unit grouped access):
6183 We create vectorized stores starting from base address (the access of the
6184 first stmt in the chain (S2 in the above example), when the last store stmt
6185 of the chain (S4) is reached:
6188 VS2: &base + vec_size*1 = vx0
6189 VS3: &base + vec_size*2 = vx1
6190 VS4: &base + vec_size*3 = vx3
6192 Then permutation statements are generated:
6194 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6195 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6198 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6199 (the order of the data-refs in the output of vect_permute_store_chain
6200 corresponds to the order of scalar stmts in the interleaving chain - see
6201 the documentation of vect_permute_store_chain()).
6203 In case of both multiple types and interleaving, above vector stores and
6204 permutation stmts are created for every copy. The result vector stmts are
6205 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6206 STMT_VINFO_RELATED_STMT for the next copies.
6209 prev_stmt_info = NULL;
6210 for (j = 0; j < ncopies; j++)
6217 /* Get vectorized arguments for SLP_NODE. */
6218 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6221 vec_oprnd = vec_oprnds[0];
6225 /* For interleaved stores we collect vectorized defs for all the
6226 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6227 used as an input to vect_permute_store_chain(), and OPRNDS as
6228 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6230 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6231 OPRNDS are of size 1. */
6232 next_stmt = first_stmt;
6233 for (i = 0; i < group_size; i++)
6235 /* Since gaps are not supported for interleaved stores,
6236 GROUP_SIZE is the exact number of stmts in the chain.
6237 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6238 there is no interleaving, GROUP_SIZE is 1, and only one
6239 iteration of the loop will be executed. */
6240 gcc_assert (next_stmt
6241 && gimple_assign_single_p (next_stmt));
6242 op = gimple_assign_rhs1 (next_stmt);
6244 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6245 dr_chain.quick_push (vec_oprnd);
6246 oprnds.quick_push (vec_oprnd);
6247 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6251 /* We should have catched mismatched types earlier. */
6252 gcc_assert (useless_type_conversion_p (vectype,
6253 TREE_TYPE (vec_oprnd)));
6254 bool simd_lane_access_p
6255 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6256 if (simd_lane_access_p
6257 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6258 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6259 && integer_zerop (DR_OFFSET (first_dr))
6260 && integer_zerop (DR_INIT (first_dr))
6261 && alias_sets_conflict_p (get_alias_set (aggr_type),
6262 get_alias_set (TREE_TYPE (ref_type))))
6264 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6265 dataref_offset = build_int_cst (ref_type, 0);
6270 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6271 simd_lane_access_p ? loop : NULL,
6272 offset, &dummy, gsi, &ptr_incr,
6273 simd_lane_access_p, &inv_p);
6274 gcc_assert (bb_vinfo || !inv_p);
6278 /* For interleaved stores we created vectorized defs for all the
6279 defs stored in OPRNDS in the previous iteration (previous copy).
6280 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6281 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6283 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6284 OPRNDS are of size 1. */
6285 for (i = 0; i < group_size; i++)
6288 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6289 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6290 dr_chain[i] = vec_oprnd;
6291 oprnds[i] = vec_oprnd;
6295 = int_const_binop (PLUS_EXPR, dataref_offset,
6296 TYPE_SIZE_UNIT (aggr_type));
6298 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6299 TYPE_SIZE_UNIT (aggr_type));
6302 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6306 /* Combine all the vectors into an array. */
6307 vec_array = create_vector_array (vectype, vec_num);
6308 for (i = 0; i < vec_num; i++)
6310 vec_oprnd = dr_chain[i];
6311 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6315 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6316 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6317 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
6318 gimple_call_set_lhs (new_stmt, data_ref);
6319 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6327 result_chain.create (group_size);
6329 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6333 next_stmt = first_stmt;
6334 for (i = 0; i < vec_num; i++)
6336 unsigned align, misalign;
6339 /* Bump the vector pointer. */
6340 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6344 vec_oprnd = vec_oprnds[i];
6345 else if (grouped_store)
6346 /* For grouped stores vectorized defs are interleaved in
6347 vect_permute_store_chain(). */
6348 vec_oprnd = result_chain[i];
6350 data_ref = fold_build2 (MEM_REF, vectype,
6354 : build_int_cst (ref_type, 0));
6355 align = TYPE_ALIGN_UNIT (vectype);
6356 if (aligned_access_p (first_dr))
6358 else if (DR_MISALIGNMENT (first_dr) == -1)
6360 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6361 align = TYPE_ALIGN_UNIT (elem_type);
6363 align = get_object_alignment (DR_REF (first_dr))
6366 TREE_TYPE (data_ref)
6367 = build_aligned_type (TREE_TYPE (data_ref),
6368 align * BITS_PER_UNIT);
6372 TREE_TYPE (data_ref)
6373 = build_aligned_type (TREE_TYPE (data_ref),
6374 TYPE_ALIGN (elem_type));
6375 misalign = DR_MISALIGNMENT (first_dr);
6377 if (dataref_offset == NULL_TREE
6378 && TREE_CODE (dataref_ptr) == SSA_NAME)
6379 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6382 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6384 tree perm_mask = perm_mask_for_reverse (vectype);
6386 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6388 tree new_temp = make_ssa_name (perm_dest);
6390 /* Generate the permute statement. */
6392 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6393 vec_oprnd, perm_mask);
6394 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6396 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6397 vec_oprnd = new_temp;
6400 /* Arguments are ready. Create the new vector stmt. */
6401 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6402 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6407 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6415 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6417 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6418 prev_stmt_info = vinfo_for_stmt (new_stmt);
6423 result_chain.release ();
6424 vec_oprnds.release ();
6429 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6430 VECTOR_CST mask. No checks are made that the target platform supports the
6431 mask, so callers may wish to test can_vec_perm_p separately, or use
6432 vect_gen_perm_mask_checked. */
6435 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6437 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6440 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6442 mask_elt_type = lang_hooks.types.type_for_mode
6443 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6444 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6446 mask_elts = XALLOCAVEC (tree, nunits);
6447 for (i = nunits - 1; i >= 0; i--)
6448 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6449 mask_vec = build_vector (mask_type, mask_elts);
6454 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6455 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6458 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6460 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6461 return vect_gen_perm_mask_any (vectype, sel);
6464 /* Given a vector variable X and Y, that was generated for the scalar
6465 STMT, generate instructions to permute the vector elements of X and Y
6466 using permutation mask MASK_VEC, insert them at *GSI and return the
6467 permuted vector variable. */
6470 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6471 gimple_stmt_iterator *gsi)
6473 tree vectype = TREE_TYPE (x);
6474 tree perm_dest, data_ref;
6477 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6478 data_ref = make_ssa_name (perm_dest);
6480 /* Generate the permute statement. */
6481 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6482 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6487 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6488 inserting them on the loops preheader edge. Returns true if we
6489 were successful in doing so (and thus STMT can be moved then),
6490 otherwise returns false. */
6493 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6499 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6501 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6502 if (!gimple_nop_p (def_stmt)
6503 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6505 /* Make sure we don't need to recurse. While we could do
6506 so in simple cases when there are more complex use webs
6507 we don't have an easy way to preserve stmt order to fulfil
6508 dependencies within them. */
6511 if (gimple_code (def_stmt) == GIMPLE_PHI)
6513 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6515 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6516 if (!gimple_nop_p (def_stmt2)
6517 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6527 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6529 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6530 if (!gimple_nop_p (def_stmt)
6531 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6533 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6534 gsi_remove (&gsi, false);
6535 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6542 /* vectorizable_load.
6544 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6546 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6547 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6548 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6551 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6552 slp_tree slp_node, slp_instance slp_node_instance)
6555 tree vec_dest = NULL;
6556 tree data_ref = NULL;
6557 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6558 stmt_vec_info prev_stmt_info;
6559 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6560 struct loop *loop = NULL;
6561 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6562 bool nested_in_vect_loop = false;
6563 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6567 gimple *new_stmt = NULL;
6569 enum dr_alignment_support alignment_support_scheme;
6570 tree dataref_ptr = NULL_TREE;
6571 tree dataref_offset = NULL_TREE;
6572 gimple *ptr_incr = NULL;
6574 int i, j, group_size, group_gap_adj;
6575 tree msq = NULL_TREE, lsq;
6576 tree offset = NULL_TREE;
6577 tree byte_offset = NULL_TREE;
6578 tree realignment_token = NULL_TREE;
6580 vec<tree> dr_chain = vNULL;
6581 bool grouped_load = false;
6583 gimple *first_stmt_for_drptr = NULL;
6585 bool compute_in_loop = false;
6586 struct loop *at_loop;
6588 bool slp = (slp_node != NULL);
6589 bool slp_perm = false;
6590 enum tree_code code;
6591 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6594 gather_scatter_info gs_info;
6595 vec_info *vinfo = stmt_info->vinfo;
6598 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6601 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6605 /* Is vectorizable load? */
6606 if (!is_gimple_assign (stmt))
6609 scalar_dest = gimple_assign_lhs (stmt);
6610 if (TREE_CODE (scalar_dest) != SSA_NAME)
6613 code = gimple_assign_rhs_code (stmt);
6614 if (code != ARRAY_REF
6615 && code != BIT_FIELD_REF
6616 && code != INDIRECT_REF
6617 && code != COMPONENT_REF
6618 && code != IMAGPART_EXPR
6619 && code != REALPART_EXPR
6621 && TREE_CODE_CLASS (code) != tcc_declaration)
6624 if (!STMT_VINFO_DATA_REF (stmt_info))
6627 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6628 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6632 loop = LOOP_VINFO_LOOP (loop_vinfo);
6633 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6634 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6639 /* Multiple types in SLP are handled by creating the appropriate number of
6640 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6645 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6647 gcc_assert (ncopies >= 1);
6649 /* FORNOW. This restriction should be relaxed. */
6650 if (nested_in_vect_loop && ncopies > 1)
6652 if (dump_enabled_p ())
6653 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6654 "multiple types in nested loop.\n");
6658 /* Invalidate assumptions made by dependence analysis when vectorization
6659 on the unrolled body effectively re-orders stmts. */
6661 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6662 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6663 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6665 if (dump_enabled_p ())
6666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6667 "cannot perform implicit CSE when unrolling "
6668 "with negative dependence distance\n");
6672 elem_type = TREE_TYPE (vectype);
6673 mode = TYPE_MODE (vectype);
6675 /* FORNOW. In some cases can vectorize even if data-type not supported
6676 (e.g. - data copies). */
6677 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6679 if (dump_enabled_p ())
6680 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6681 "Aligned load, but unsupported type.\n");
6685 /* Check if the load is a part of an interleaving chain. */
6686 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6688 grouped_load = true;
6690 gcc_assert (!nested_in_vect_loop);
6691 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6693 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6694 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6696 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6699 /* Invalidate assumptions made by dependence analysis when vectorization
6700 on the unrolled body effectively re-orders stmts. */
6701 if (!PURE_SLP_STMT (stmt_info)
6702 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6703 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6704 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6706 if (dump_enabled_p ())
6707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6708 "cannot perform implicit CSE when performing "
6709 "group loads with negative dependence distance\n");
6713 /* Similarly when the stmt is a load that is both part of a SLP
6714 instance and a loop vectorized stmt via the same-dr mechanism
6715 we have to give up. */
6716 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6717 && (STMT_SLP_TYPE (stmt_info)
6718 != STMT_SLP_TYPE (vinfo_for_stmt
6719 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6721 if (dump_enabled_p ())
6722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6723 "conflicting SLP types for CSEd load\n");
6728 vect_memory_access_type memory_access_type;
6729 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6730 &memory_access_type, &gs_info))
6733 if (!vec_stmt) /* transformation not required. */
6736 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6737 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6738 /* The SLP costs are calculated during SLP analysis. */
6739 if (!PURE_SLP_STMT (stmt_info))
6740 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6746 gcc_assert (memory_access_type
6747 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6749 if (dump_enabled_p ())
6750 dump_printf_loc (MSG_NOTE, vect_location,
6751 "transform load. ncopies = %d\n", ncopies);
6755 ensure_base_align (stmt_info, dr);
6757 if (memory_access_type == VMAT_GATHER_SCATTER)
6759 tree vec_oprnd0 = NULL_TREE, op;
6760 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6761 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6762 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6763 edge pe = loop_preheader_edge (loop);
6766 enum { NARROW, NONE, WIDEN } modifier;
6767 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6769 if (nunits == gather_off_nunits)
6771 else if (nunits == gather_off_nunits / 2)
6773 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6776 for (i = 0; i < gather_off_nunits; ++i)
6777 sel[i] = i | nunits;
6779 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
6781 else if (nunits == gather_off_nunits * 2)
6783 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6786 for (i = 0; i < nunits; ++i)
6787 sel[i] = i < gather_off_nunits
6788 ? i : i + nunits - gather_off_nunits;
6790 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6796 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6797 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6798 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6799 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6800 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6801 scaletype = TREE_VALUE (arglist);
6802 gcc_checking_assert (types_compatible_p (srctype, rettype));
6804 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6806 ptr = fold_convert (ptrtype, gs_info.base);
6807 if (!is_gimple_min_invariant (ptr))
6809 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6810 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6811 gcc_assert (!new_bb);
6814 /* Currently we support only unconditional gather loads,
6815 so mask should be all ones. */
6816 if (TREE_CODE (masktype) == INTEGER_TYPE)
6817 mask = build_int_cst (masktype, -1);
6818 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6820 mask = build_int_cst (TREE_TYPE (masktype), -1);
6821 mask = build_vector_from_val (masktype, mask);
6822 mask = vect_init_vector (stmt, mask, masktype, NULL);
6824 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6828 for (j = 0; j < 6; ++j)
6830 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6831 mask = build_real (TREE_TYPE (masktype), r);
6832 mask = build_vector_from_val (masktype, mask);
6833 mask = vect_init_vector (stmt, mask, masktype, NULL);
6838 scale = build_int_cst (scaletype, gs_info.scale);
6840 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6841 merge = build_int_cst (TREE_TYPE (rettype), 0);
6842 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6846 for (j = 0; j < 6; ++j)
6848 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6849 merge = build_real (TREE_TYPE (rettype), r);
6853 merge = build_vector_from_val (rettype, merge);
6854 merge = vect_init_vector (stmt, merge, rettype, NULL);
6856 prev_stmt_info = NULL;
6857 for (j = 0; j < ncopies; ++j)
6859 if (modifier == WIDEN && (j & 1))
6860 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6861 perm_mask, stmt, gsi);
6864 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6867 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6869 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6871 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6872 == TYPE_VECTOR_SUBPARTS (idxtype));
6873 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6874 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6876 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6877 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6882 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6884 if (!useless_type_conversion_p (vectype, rettype))
6886 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6887 == TYPE_VECTOR_SUBPARTS (rettype));
6888 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6889 gimple_call_set_lhs (new_stmt, op);
6890 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6891 var = make_ssa_name (vec_dest);
6892 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6894 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6898 var = make_ssa_name (vec_dest, new_stmt);
6899 gimple_call_set_lhs (new_stmt, var);
6902 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6904 if (modifier == NARROW)
6911 var = permute_vec_elements (prev_res, var,
6912 perm_mask, stmt, gsi);
6913 new_stmt = SSA_NAME_DEF_STMT (var);
6916 if (prev_stmt_info == NULL)
6917 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6919 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6920 prev_stmt_info = vinfo_for_stmt (new_stmt);
6925 if (memory_access_type == VMAT_ELEMENTWISE
6926 || memory_access_type == VMAT_STRIDED_SLP)
6928 gimple_stmt_iterator incr_gsi;
6934 vec<constructor_elt, va_gc> *v = NULL;
6935 gimple_seq stmts = NULL;
6936 tree stride_base, stride_step, alias_off;
6938 gcc_assert (!nested_in_vect_loop);
6940 if (slp && grouped_load)
6942 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6943 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6944 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6945 ref_type = get_group_alias_ptr_type (first_stmt);
6952 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6956 = fold_build_pointer_plus
6957 (DR_BASE_ADDRESS (first_dr),
6958 size_binop (PLUS_EXPR,
6959 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6960 convert_to_ptrofftype (DR_INIT (first_dr))));
6961 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6963 /* For a load with loop-invariant (but other than power-of-2)
6964 stride (i.e. not a grouped access) like so:
6966 for (i = 0; i < n; i += stride)
6969 we generate a new induction variable and new accesses to
6970 form a new vector (or vectors, depending on ncopies):
6972 for (j = 0; ; j += VF*stride)
6974 tmp2 = array[j + stride];
6976 vectemp = {tmp1, tmp2, ...}
6979 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6980 build_int_cst (TREE_TYPE (stride_step), vf));
6982 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6984 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6985 loop, &incr_gsi, insert_after,
6987 incr = gsi_stmt (incr_gsi);
6988 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6990 stride_step = force_gimple_operand (unshare_expr (stride_step),
6991 &stmts, true, NULL_TREE);
6993 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6995 prev_stmt_info = NULL;
6996 running_off = offvar;
6997 alias_off = build_int_cst (ref_type, 0);
6998 int nloads = nunits;
7000 tree ltype = TREE_TYPE (vectype);
7001 tree lvectype = vectype;
7002 auto_vec<tree> dr_chain;
7003 if (memory_access_type == VMAT_STRIDED_SLP)
7005 if (group_size < nunits)
7007 /* Avoid emitting a constructor of vector elements by performing
7008 the loads using an integer type of the same size,
7009 constructing a vector of those and then re-interpreting it
7010 as the original vector type. This works around the fact
7011 that the vec_init optab was only designed for scalar
7012 element modes and thus expansion goes through memory.
7013 This avoids a huge runtime penalty due to the general
7014 inability to perform store forwarding from smaller stores
7015 to a larger load. */
7017 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7018 enum machine_mode elmode = mode_for_size (lsize, MODE_INT, 0);
7019 enum machine_mode vmode = mode_for_vector (elmode,
7020 nunits / group_size);
7021 /* If we can't construct such a vector fall back to
7022 element loads of the original vector type. */
7023 if (VECTOR_MODE_P (vmode)
7024 && optab_handler (vec_init_optab, vmode) != CODE_FOR_nothing)
7026 nloads = nunits / group_size;
7028 ltype = build_nonstandard_integer_type (lsize, 1);
7029 lvectype = build_vector_type (ltype, nloads);
7038 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7042 /* For SLP permutation support we need to load the whole group,
7043 not only the number of vector stmts the permutation result
7047 ncopies = (group_size * vf + nunits - 1) / nunits;
7048 dr_chain.create (ncopies);
7051 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7054 unsigned HOST_WIDE_INT
7055 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7056 for (j = 0; j < ncopies; j++)
7059 vec_alloc (v, nloads);
7060 for (i = 0; i < nloads; i++)
7062 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7064 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7065 build2 (MEM_REF, ltype,
7066 running_off, this_off));
7067 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7069 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7070 gimple_assign_lhs (new_stmt));
7074 || group_el == group_size)
7076 tree newoff = copy_ssa_name (running_off);
7077 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7078 running_off, stride_step);
7079 vect_finish_stmt_generation (stmt, incr, gsi);
7081 running_off = newoff;
7087 tree vec_inv = build_constructor (lvectype, v);
7088 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7089 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7090 if (lvectype != vectype)
7092 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7094 build1 (VIEW_CONVERT_EXPR,
7095 vectype, new_temp));
7096 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7103 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7105 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7110 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7112 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7113 prev_stmt_info = vinfo_for_stmt (new_stmt);
7119 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7120 slp_node_instance, false, &n_perms);
7127 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7128 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7129 /* For SLP vectorization we directly vectorize a subchain
7130 without permutation. */
7131 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7132 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7133 /* For BB vectorization always use the first stmt to base
7134 the data ref pointer on. */
7136 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7138 /* Check if the chain of loads is already vectorized. */
7139 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7140 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7141 ??? But we can only do so if there is exactly one
7142 as we have no way to get at the rest. Leave the CSE
7144 ??? With the group load eventually participating
7145 in multiple different permutations (having multiple
7146 slp nodes which refer to the same group) the CSE
7147 is even wrong code. See PR56270. */
7150 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7153 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7156 /* VEC_NUM is the number of vect stmts to be created for this group. */
7159 grouped_load = false;
7160 /* For SLP permutation support we need to load the whole group,
7161 not only the number of vector stmts the permutation result
7164 vec_num = (group_size * vf + nunits - 1) / nunits;
7166 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7167 group_gap_adj = vf * group_size - nunits * vec_num;
7170 vec_num = group_size;
7172 ref_type = get_group_alias_ptr_type (first_stmt);
7178 group_size = vec_num = 1;
7180 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7183 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7184 gcc_assert (alignment_support_scheme);
7185 /* Targets with load-lane instructions must not require explicit
7187 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7188 || alignment_support_scheme == dr_aligned
7189 || alignment_support_scheme == dr_unaligned_supported);
7191 /* In case the vectorization factor (VF) is bigger than the number
7192 of elements that we can fit in a vectype (nunits), we have to generate
7193 more than one vector stmt - i.e - we need to "unroll" the
7194 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7195 from one copy of the vector stmt to the next, in the field
7196 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7197 stages to find the correct vector defs to be used when vectorizing
7198 stmts that use the defs of the current stmt. The example below
7199 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7200 need to create 4 vectorized stmts):
7202 before vectorization:
7203 RELATED_STMT VEC_STMT
7207 step 1: vectorize stmt S1:
7208 We first create the vector stmt VS1_0, and, as usual, record a
7209 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7210 Next, we create the vector stmt VS1_1, and record a pointer to
7211 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7212 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7214 RELATED_STMT VEC_STMT
7215 VS1_0: vx0 = memref0 VS1_1 -
7216 VS1_1: vx1 = memref1 VS1_2 -
7217 VS1_2: vx2 = memref2 VS1_3 -
7218 VS1_3: vx3 = memref3 - -
7219 S1: x = load - VS1_0
7222 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7223 information we recorded in RELATED_STMT field is used to vectorize
7226 /* In case of interleaving (non-unit grouped access):
7233 Vectorized loads are created in the order of memory accesses
7234 starting from the access of the first stmt of the chain:
7237 VS2: vx1 = &base + vec_size*1
7238 VS3: vx3 = &base + vec_size*2
7239 VS4: vx4 = &base + vec_size*3
7241 Then permutation statements are generated:
7243 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7244 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7247 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7248 (the order of the data-refs in the output of vect_permute_load_chain
7249 corresponds to the order of scalar stmts in the interleaving chain - see
7250 the documentation of vect_permute_load_chain()).
7251 The generation of permutation stmts and recording them in
7252 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7254 In case of both multiple types and interleaving, the vector loads and
7255 permutation stmts above are created for every copy. The result vector
7256 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7257 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7259 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7260 on a target that supports unaligned accesses (dr_unaligned_supported)
7261 we generate the following code:
7265 p = p + indx * vectype_size;
7270 Otherwise, the data reference is potentially unaligned on a target that
7271 does not support unaligned accesses (dr_explicit_realign_optimized) -
7272 then generate the following code, in which the data in each iteration is
7273 obtained by two vector loads, one from the previous iteration, and one
7274 from the current iteration:
7276 msq_init = *(floor(p1))
7277 p2 = initial_addr + VS - 1;
7278 realignment_token = call target_builtin;
7281 p2 = p2 + indx * vectype_size
7283 vec_dest = realign_load (msq, lsq, realignment_token)
7288 /* If the misalignment remains the same throughout the execution of the
7289 loop, we can create the init_addr and permutation mask at the loop
7290 preheader. Otherwise, it needs to be created inside the loop.
7291 This can only occur when vectorizing memory accesses in the inner-loop
7292 nested within an outer-loop that is being vectorized. */
7294 if (nested_in_vect_loop
7295 && (TREE_INT_CST_LOW (DR_STEP (dr))
7296 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7298 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7299 compute_in_loop = true;
7302 if ((alignment_support_scheme == dr_explicit_realign_optimized
7303 || alignment_support_scheme == dr_explicit_realign)
7304 && !compute_in_loop)
7306 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7307 alignment_support_scheme, NULL_TREE,
7309 if (alignment_support_scheme == dr_explicit_realign_optimized)
7311 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7312 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7319 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7320 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7322 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7323 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7325 aggr_type = vectype;
7327 prev_stmt_info = NULL;
7328 for (j = 0; j < ncopies; j++)
7330 /* 1. Create the vector or array pointer update chain. */
7333 bool simd_lane_access_p
7334 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7335 if (simd_lane_access_p
7336 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7337 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7338 && integer_zerop (DR_OFFSET (first_dr))
7339 && integer_zerop (DR_INIT (first_dr))
7340 && alias_sets_conflict_p (get_alias_set (aggr_type),
7341 get_alias_set (TREE_TYPE (ref_type)))
7342 && (alignment_support_scheme == dr_aligned
7343 || alignment_support_scheme == dr_unaligned_supported))
7345 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7346 dataref_offset = build_int_cst (ref_type, 0);
7349 else if (first_stmt_for_drptr
7350 && first_stmt != first_stmt_for_drptr)
7353 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7354 at_loop, offset, &dummy, gsi,
7355 &ptr_incr, simd_lane_access_p,
7356 &inv_p, byte_offset);
7357 /* Adjust the pointer by the difference to first_stmt. */
7358 data_reference_p ptrdr
7359 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7360 tree diff = fold_convert (sizetype,
7361 size_binop (MINUS_EXPR,
7364 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7369 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7370 offset, &dummy, gsi, &ptr_incr,
7371 simd_lane_access_p, &inv_p,
7374 else if (dataref_offset)
7375 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7376 TYPE_SIZE_UNIT (aggr_type));
7378 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7379 TYPE_SIZE_UNIT (aggr_type));
7381 if (grouped_load || slp_perm)
7382 dr_chain.create (vec_num);
7384 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7388 vec_array = create_vector_array (vectype, vec_num);
7391 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7392 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7393 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7394 gimple_call_set_lhs (new_stmt, vec_array);
7395 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7397 /* Extract each vector into an SSA_NAME. */
7398 for (i = 0; i < vec_num; i++)
7400 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7402 dr_chain.quick_push (new_temp);
7405 /* Record the mapping between SSA_NAMEs and statements. */
7406 vect_record_grouped_load_vectors (stmt, dr_chain);
7410 for (i = 0; i < vec_num; i++)
7413 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7416 /* 2. Create the vector-load in the loop. */
7417 switch (alignment_support_scheme)
7420 case dr_unaligned_supported:
7422 unsigned int align, misalign;
7425 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7428 : build_int_cst (ref_type, 0));
7429 align = TYPE_ALIGN_UNIT (vectype);
7430 if (alignment_support_scheme == dr_aligned)
7432 gcc_assert (aligned_access_p (first_dr));
7435 else if (DR_MISALIGNMENT (first_dr) == -1)
7437 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7438 align = TYPE_ALIGN_UNIT (elem_type);
7440 align = (get_object_alignment (DR_REF (first_dr))
7443 TREE_TYPE (data_ref)
7444 = build_aligned_type (TREE_TYPE (data_ref),
7445 align * BITS_PER_UNIT);
7449 TREE_TYPE (data_ref)
7450 = build_aligned_type (TREE_TYPE (data_ref),
7451 TYPE_ALIGN (elem_type));
7452 misalign = DR_MISALIGNMENT (first_dr);
7454 if (dataref_offset == NULL_TREE
7455 && TREE_CODE (dataref_ptr) == SSA_NAME)
7456 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7460 case dr_explicit_realign:
7464 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7466 if (compute_in_loop)
7467 msq = vect_setup_realignment (first_stmt, gsi,
7469 dr_explicit_realign,
7472 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7473 ptr = copy_ssa_name (dataref_ptr);
7475 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7476 new_stmt = gimple_build_assign
7477 (ptr, BIT_AND_EXPR, dataref_ptr,
7479 (TREE_TYPE (dataref_ptr),
7480 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7481 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7483 = build2 (MEM_REF, vectype, ptr,
7484 build_int_cst (ref_type, 0));
7485 vec_dest = vect_create_destination_var (scalar_dest,
7487 new_stmt = gimple_build_assign (vec_dest, data_ref);
7488 new_temp = make_ssa_name (vec_dest, new_stmt);
7489 gimple_assign_set_lhs (new_stmt, new_temp);
7490 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7491 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7492 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7495 bump = size_binop (MULT_EXPR, vs,
7496 TYPE_SIZE_UNIT (elem_type));
7497 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7498 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7499 new_stmt = gimple_build_assign
7500 (NULL_TREE, BIT_AND_EXPR, ptr,
7503 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7504 ptr = copy_ssa_name (ptr, new_stmt);
7505 gimple_assign_set_lhs (new_stmt, ptr);
7506 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7508 = build2 (MEM_REF, vectype, ptr,
7509 build_int_cst (ref_type, 0));
7512 case dr_explicit_realign_optimized:
7513 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7514 new_temp = copy_ssa_name (dataref_ptr);
7516 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7517 new_stmt = gimple_build_assign
7518 (new_temp, BIT_AND_EXPR, dataref_ptr,
7520 (TREE_TYPE (dataref_ptr),
7521 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7522 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7524 = build2 (MEM_REF, vectype, new_temp,
7525 build_int_cst (ref_type, 0));
7530 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7531 new_stmt = gimple_build_assign (vec_dest, data_ref);
7532 new_temp = make_ssa_name (vec_dest, new_stmt);
7533 gimple_assign_set_lhs (new_stmt, new_temp);
7534 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7536 /* 3. Handle explicit realignment if necessary/supported.
7538 vec_dest = realign_load (msq, lsq, realignment_token) */
7539 if (alignment_support_scheme == dr_explicit_realign_optimized
7540 || alignment_support_scheme == dr_explicit_realign)
7542 lsq = gimple_assign_lhs (new_stmt);
7543 if (!realignment_token)
7544 realignment_token = dataref_ptr;
7545 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7546 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7547 msq, lsq, realignment_token);
7548 new_temp = make_ssa_name (vec_dest, new_stmt);
7549 gimple_assign_set_lhs (new_stmt, new_temp);
7550 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7552 if (alignment_support_scheme == dr_explicit_realign_optimized)
7555 if (i == vec_num - 1 && j == ncopies - 1)
7556 add_phi_arg (phi, lsq,
7557 loop_latch_edge (containing_loop),
7563 /* 4. Handle invariant-load. */
7564 if (inv_p && !bb_vinfo)
7566 gcc_assert (!grouped_load);
7567 /* If we have versioned for aliasing or the loop doesn't
7568 have any data dependencies that would preclude this,
7569 then we are sure this is a loop invariant load and
7570 thus we can insert it on the preheader edge. */
7571 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7572 && !nested_in_vect_loop
7573 && hoist_defs_of_uses (stmt, loop))
7575 if (dump_enabled_p ())
7577 dump_printf_loc (MSG_NOTE, vect_location,
7578 "hoisting out of the vectorized "
7580 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7582 tree tem = copy_ssa_name (scalar_dest);
7583 gsi_insert_on_edge_immediate
7584 (loop_preheader_edge (loop),
7585 gimple_build_assign (tem,
7587 (gimple_assign_rhs1 (stmt))));
7588 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7589 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7590 set_vinfo_for_stmt (new_stmt,
7591 new_stmt_vec_info (new_stmt, vinfo));
7595 gimple_stmt_iterator gsi2 = *gsi;
7597 new_temp = vect_init_vector (stmt, scalar_dest,
7599 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7603 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7605 tree perm_mask = perm_mask_for_reverse (vectype);
7606 new_temp = permute_vec_elements (new_temp, new_temp,
7607 perm_mask, stmt, gsi);
7608 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7611 /* Collect vector loads and later create their permutation in
7612 vect_transform_grouped_load (). */
7613 if (grouped_load || slp_perm)
7614 dr_chain.quick_push (new_temp);
7616 /* Store vector loads in the corresponding SLP_NODE. */
7617 if (slp && !slp_perm)
7618 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7620 /* Bump the vector pointer to account for a gap or for excess
7621 elements loaded for a permuted SLP load. */
7622 if (group_gap_adj != 0)
7626 = wide_int_to_tree (sizetype,
7627 wi::smul (TYPE_SIZE_UNIT (elem_type),
7628 group_gap_adj, &ovf));
7629 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7634 if (slp && !slp_perm)
7640 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7641 slp_node_instance, false,
7644 dr_chain.release ();
7652 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7653 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7654 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7659 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7661 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7662 prev_stmt_info = vinfo_for_stmt (new_stmt);
7665 dr_chain.release ();
7671 /* Function vect_is_simple_cond.
7674 LOOP - the loop that is being vectorized.
7675 COND - Condition that is checked for simple use.
7678 *COMP_VECTYPE - the vector type for the comparison.
7679 *DTS - The def types for the arguments of the comparison
7681 Returns whether a COND can be vectorized. Checks whether
7682 condition operands are supportable using vec_is_simple_use. */
7685 vect_is_simple_cond (tree cond, vec_info *vinfo,
7686 tree *comp_vectype, enum vect_def_type *dts)
7689 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7692 if (TREE_CODE (cond) == SSA_NAME
7693 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
7695 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7696 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7697 &dts[0], comp_vectype)
7699 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7704 if (!COMPARISON_CLASS_P (cond))
7707 lhs = TREE_OPERAND (cond, 0);
7708 rhs = TREE_OPERAND (cond, 1);
7710 if (TREE_CODE (lhs) == SSA_NAME)
7712 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7713 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
7716 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7717 || TREE_CODE (lhs) == FIXED_CST)
7718 dts[0] = vect_constant_def;
7722 if (TREE_CODE (rhs) == SSA_NAME)
7724 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7725 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
7728 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7729 || TREE_CODE (rhs) == FIXED_CST)
7730 dts[1] = vect_constant_def;
7734 if (vectype1 && vectype2
7735 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7738 *comp_vectype = vectype1 ? vectype1 : vectype2;
7742 /* vectorizable_condition.
7744 Check if STMT is conditional modify expression that can be vectorized.
7745 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7746 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7749 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7750 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7751 else clause if it is 2).
7753 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7756 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7757 gimple **vec_stmt, tree reduc_def, int reduc_index,
7760 tree scalar_dest = NULL_TREE;
7761 tree vec_dest = NULL_TREE;
7762 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7763 tree then_clause, else_clause;
7764 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7765 tree comp_vectype = NULL_TREE;
7766 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7767 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7770 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7771 enum vect_def_type dts[4]
7772 = {vect_unknown_def_type, vect_unknown_def_type,
7773 vect_unknown_def_type, vect_unknown_def_type};
7776 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7777 stmt_vec_info prev_stmt_info = NULL;
7779 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7780 vec<tree> vec_oprnds0 = vNULL;
7781 vec<tree> vec_oprnds1 = vNULL;
7782 vec<tree> vec_oprnds2 = vNULL;
7783 vec<tree> vec_oprnds3 = vNULL;
7785 bool masked = false;
7787 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7790 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7792 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7795 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7796 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7800 /* FORNOW: not yet supported. */
7801 if (STMT_VINFO_LIVE_P (stmt_info))
7803 if (dump_enabled_p ())
7804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7805 "value used after loop.\n");
7810 /* Is vectorizable conditional operation? */
7811 if (!is_gimple_assign (stmt))
7814 code = gimple_assign_rhs_code (stmt);
7816 if (code != COND_EXPR)
7819 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7820 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7821 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7826 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7828 gcc_assert (ncopies >= 1);
7829 if (reduc_index && ncopies > 1)
7830 return false; /* FORNOW */
7832 cond_expr = gimple_assign_rhs1 (stmt);
7833 then_clause = gimple_assign_rhs2 (stmt);
7834 else_clause = gimple_assign_rhs3 (stmt);
7836 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
7837 &comp_vectype, &dts[0])
7842 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
7845 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
7849 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7852 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7855 masked = !COMPARISON_CLASS_P (cond_expr);
7856 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7858 if (vec_cmp_type == NULL_TREE)
7861 cond_code = TREE_CODE (cond_expr);
7864 cond_expr0 = TREE_OPERAND (cond_expr, 0);
7865 cond_expr1 = TREE_OPERAND (cond_expr, 1);
7868 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
7870 /* Boolean values may have another representation in vectors
7871 and therefore we prefer bit operations over comparison for
7872 them (which also works for scalar masks). We store opcodes
7873 to use in bitop1 and bitop2. Statement is vectorized as
7874 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7875 depending on bitop1 and bitop2 arity. */
7879 bitop1 = BIT_NOT_EXPR;
7880 bitop2 = BIT_AND_EXPR;
7883 bitop1 = BIT_NOT_EXPR;
7884 bitop2 = BIT_IOR_EXPR;
7887 bitop1 = BIT_NOT_EXPR;
7888 bitop2 = BIT_AND_EXPR;
7889 std::swap (cond_expr0, cond_expr1);
7892 bitop1 = BIT_NOT_EXPR;
7893 bitop2 = BIT_IOR_EXPR;
7894 std::swap (cond_expr0, cond_expr1);
7897 bitop1 = BIT_XOR_EXPR;
7900 bitop1 = BIT_XOR_EXPR;
7901 bitop2 = BIT_NOT_EXPR;
7906 cond_code = SSA_NAME;
7911 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7912 if (bitop1 != NOP_EXPR)
7914 machine_mode mode = TYPE_MODE (comp_vectype);
7917 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
7918 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7921 if (bitop2 != NOP_EXPR)
7923 optab = optab_for_tree_code (bitop2, comp_vectype,
7925 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7929 if (expand_vec_cond_expr_p (vectype, comp_vectype,
7932 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
7942 vec_oprnds0.create (1);
7943 vec_oprnds1.create (1);
7944 vec_oprnds2.create (1);
7945 vec_oprnds3.create (1);
7949 scalar_dest = gimple_assign_lhs (stmt);
7950 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7952 /* Handle cond expr. */
7953 for (j = 0; j < ncopies; j++)
7955 gassign *new_stmt = NULL;
7960 auto_vec<tree, 4> ops;
7961 auto_vec<vec<tree>, 4> vec_defs;
7964 ops.safe_push (cond_expr);
7967 ops.safe_push (cond_expr0);
7968 ops.safe_push (cond_expr1);
7970 ops.safe_push (then_clause);
7971 ops.safe_push (else_clause);
7972 vect_get_slp_defs (ops, slp_node, &vec_defs);
7973 vec_oprnds3 = vec_defs.pop ();
7974 vec_oprnds2 = vec_defs.pop ();
7976 vec_oprnds1 = vec_defs.pop ();
7977 vec_oprnds0 = vec_defs.pop ();
7985 = vect_get_vec_def_for_operand (cond_expr, stmt,
7987 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7993 = vect_get_vec_def_for_operand (cond_expr0,
7994 stmt, comp_vectype);
7995 vect_is_simple_use (cond_expr0, loop_vinfo, >emp, &dts[0]);
7998 = vect_get_vec_def_for_operand (cond_expr1,
7999 stmt, comp_vectype);
8000 vect_is_simple_use (cond_expr1, loop_vinfo, >emp, &dts[1]);
8002 if (reduc_index == 1)
8003 vec_then_clause = reduc_def;
8006 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8008 vect_is_simple_use (then_clause, loop_vinfo,
8011 if (reduc_index == 2)
8012 vec_else_clause = reduc_def;
8015 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8017 vect_is_simple_use (else_clause, loop_vinfo, >emp, &dts[3]);
8024 = vect_get_vec_def_for_stmt_copy (dts[0],
8025 vec_oprnds0.pop ());
8028 = vect_get_vec_def_for_stmt_copy (dts[1],
8029 vec_oprnds1.pop ());
8031 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8032 vec_oprnds2.pop ());
8033 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8034 vec_oprnds3.pop ());
8039 vec_oprnds0.quick_push (vec_cond_lhs);
8041 vec_oprnds1.quick_push (vec_cond_rhs);
8042 vec_oprnds2.quick_push (vec_then_clause);
8043 vec_oprnds3.quick_push (vec_else_clause);
8046 /* Arguments are ready. Create the new vector stmt. */
8047 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8049 vec_then_clause = vec_oprnds2[i];
8050 vec_else_clause = vec_oprnds3[i];
8053 vec_compare = vec_cond_lhs;
8056 vec_cond_rhs = vec_oprnds1[i];
8057 if (bitop1 == NOP_EXPR)
8058 vec_compare = build2 (cond_code, vec_cmp_type,
8059 vec_cond_lhs, vec_cond_rhs);
8062 new_temp = make_ssa_name (vec_cmp_type);
8063 if (bitop1 == BIT_NOT_EXPR)
8064 new_stmt = gimple_build_assign (new_temp, bitop1,
8068 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8070 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8071 if (bitop2 == NOP_EXPR)
8072 vec_compare = new_temp;
8073 else if (bitop2 == BIT_NOT_EXPR)
8075 /* Instead of doing ~x ? y : z do x ? z : y. */
8076 vec_compare = new_temp;
8077 std::swap (vec_then_clause, vec_else_clause);
8081 vec_compare = make_ssa_name (vec_cmp_type);
8083 = gimple_build_assign (vec_compare, bitop2,
8084 vec_cond_lhs, new_temp);
8085 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8089 new_temp = make_ssa_name (vec_dest);
8090 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8091 vec_compare, vec_then_clause,
8093 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8095 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8102 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8104 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8106 prev_stmt_info = vinfo_for_stmt (new_stmt);
8109 vec_oprnds0.release ();
8110 vec_oprnds1.release ();
8111 vec_oprnds2.release ();
8112 vec_oprnds3.release ();
8117 /* vectorizable_comparison.
8119 Check if STMT is comparison expression that can be vectorized.
8120 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8121 comparison, put it in VEC_STMT, and insert it at GSI.
8123 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8126 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8127 gimple **vec_stmt, tree reduc_def,
8130 tree lhs, rhs1, rhs2;
8131 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8132 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8133 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8134 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8136 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8137 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8141 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8142 stmt_vec_info prev_stmt_info = NULL;
8144 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8145 vec<tree> vec_oprnds0 = vNULL;
8146 vec<tree> vec_oprnds1 = vNULL;
8151 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8154 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8157 mask_type = vectype;
8158 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8163 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
8165 gcc_assert (ncopies >= 1);
8166 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8167 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8171 if (STMT_VINFO_LIVE_P (stmt_info))
8173 if (dump_enabled_p ())
8174 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8175 "value used after loop.\n");
8179 if (!is_gimple_assign (stmt))
8182 code = gimple_assign_rhs_code (stmt);
8184 if (TREE_CODE_CLASS (code) != tcc_comparison)
8187 rhs1 = gimple_assign_rhs1 (stmt);
8188 rhs2 = gimple_assign_rhs2 (stmt);
8190 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8191 &dts[0], &vectype1))
8194 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8195 &dts[1], &vectype2))
8198 if (vectype1 && vectype2
8199 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8202 vectype = vectype1 ? vectype1 : vectype2;
8204 /* Invariant comparison. */
8207 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8208 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
8211 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8214 /* Can't compare mask and non-mask types. */
8215 if (vectype1 && vectype2
8216 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8219 /* Boolean values may have another representation in vectors
8220 and therefore we prefer bit operations over comparison for
8221 them (which also works for scalar masks). We store opcodes
8222 to use in bitop1 and bitop2. Statement is vectorized as
8223 BITOP2 (rhs1 BITOP1 rhs2) or
8224 rhs1 BITOP2 (BITOP1 rhs2)
8225 depending on bitop1 and bitop2 arity. */
8226 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8228 if (code == GT_EXPR)
8230 bitop1 = BIT_NOT_EXPR;
8231 bitop2 = BIT_AND_EXPR;
8233 else if (code == GE_EXPR)
8235 bitop1 = BIT_NOT_EXPR;
8236 bitop2 = BIT_IOR_EXPR;
8238 else if (code == LT_EXPR)
8240 bitop1 = BIT_NOT_EXPR;
8241 bitop2 = BIT_AND_EXPR;
8242 std::swap (rhs1, rhs2);
8243 std::swap (dts[0], dts[1]);
8245 else if (code == LE_EXPR)
8247 bitop1 = BIT_NOT_EXPR;
8248 bitop2 = BIT_IOR_EXPR;
8249 std::swap (rhs1, rhs2);
8250 std::swap (dts[0], dts[1]);
8254 bitop1 = BIT_XOR_EXPR;
8255 if (code == EQ_EXPR)
8256 bitop2 = BIT_NOT_EXPR;
8262 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8263 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8264 dts, ndts, NULL, NULL);
8265 if (bitop1 == NOP_EXPR)
8266 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8269 machine_mode mode = TYPE_MODE (vectype);
8272 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8273 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8276 if (bitop2 != NOP_EXPR)
8278 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8279 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8289 vec_oprnds0.create (1);
8290 vec_oprnds1.create (1);
8294 lhs = gimple_assign_lhs (stmt);
8295 mask = vect_create_destination_var (lhs, mask_type);
8297 /* Handle cmp expr. */
8298 for (j = 0; j < ncopies; j++)
8300 gassign *new_stmt = NULL;
8305 auto_vec<tree, 2> ops;
8306 auto_vec<vec<tree>, 2> vec_defs;
8308 ops.safe_push (rhs1);
8309 ops.safe_push (rhs2);
8310 vect_get_slp_defs (ops, slp_node, &vec_defs);
8311 vec_oprnds1 = vec_defs.pop ();
8312 vec_oprnds0 = vec_defs.pop ();
8316 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8317 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8322 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8323 vec_oprnds0.pop ());
8324 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8325 vec_oprnds1.pop ());
8330 vec_oprnds0.quick_push (vec_rhs1);
8331 vec_oprnds1.quick_push (vec_rhs2);
8334 /* Arguments are ready. Create the new vector stmt. */
8335 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8337 vec_rhs2 = vec_oprnds1[i];
8339 new_temp = make_ssa_name (mask);
8340 if (bitop1 == NOP_EXPR)
8342 new_stmt = gimple_build_assign (new_temp, code,
8343 vec_rhs1, vec_rhs2);
8344 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8348 if (bitop1 == BIT_NOT_EXPR)
8349 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8351 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8353 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8354 if (bitop2 != NOP_EXPR)
8356 tree res = make_ssa_name (mask);
8357 if (bitop2 == BIT_NOT_EXPR)
8358 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8360 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8362 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8366 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8373 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8375 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8377 prev_stmt_info = vinfo_for_stmt (new_stmt);
8380 vec_oprnds0.release ();
8381 vec_oprnds1.release ();
8386 /* Make sure the statement is vectorizable. */
8389 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
8391 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8392 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8393 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8395 gimple *pattern_stmt;
8396 gimple_seq pattern_def_seq;
8398 if (dump_enabled_p ())
8400 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8401 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8404 if (gimple_has_volatile_ops (stmt))
8406 if (dump_enabled_p ())
8407 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8408 "not vectorized: stmt has volatile operands\n");
8413 /* Skip stmts that do not need to be vectorized. In loops this is expected
8415 - the COND_EXPR which is the loop exit condition
8416 - any LABEL_EXPRs in the loop
8417 - computations that are used only for array indexing or loop control.
8418 In basic blocks we only analyze statements that are a part of some SLP
8419 instance, therefore, all the statements are relevant.
8421 Pattern statement needs to be analyzed instead of the original statement
8422 if the original statement is not relevant. Otherwise, we analyze both
8423 statements. In basic blocks we are called from some SLP instance
8424 traversal, don't analyze pattern stmts instead, the pattern stmts
8425 already will be part of SLP instance. */
8427 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8428 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8429 && !STMT_VINFO_LIVE_P (stmt_info))
8431 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8433 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8434 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8436 /* Analyze PATTERN_STMT instead of the original stmt. */
8437 stmt = pattern_stmt;
8438 stmt_info = vinfo_for_stmt (pattern_stmt);
8439 if (dump_enabled_p ())
8441 dump_printf_loc (MSG_NOTE, vect_location,
8442 "==> examining pattern statement: ");
8443 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8448 if (dump_enabled_p ())
8449 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8454 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8457 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8458 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8460 /* Analyze PATTERN_STMT too. */
8461 if (dump_enabled_p ())
8463 dump_printf_loc (MSG_NOTE, vect_location,
8464 "==> examining pattern statement: ");
8465 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8468 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
8472 if (is_pattern_stmt_p (stmt_info)
8474 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8476 gimple_stmt_iterator si;
8478 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8480 gimple *pattern_def_stmt = gsi_stmt (si);
8481 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8482 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8484 /* Analyze def stmt of STMT if it's a pattern stmt. */
8485 if (dump_enabled_p ())
8487 dump_printf_loc (MSG_NOTE, vect_location,
8488 "==> examining pattern def statement: ");
8489 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8492 if (!vect_analyze_stmt (pattern_def_stmt,
8493 need_to_vectorize, node))
8499 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8501 case vect_internal_def:
8504 case vect_reduction_def:
8505 case vect_nested_cycle:
8506 gcc_assert (!bb_vinfo
8507 && (relevance == vect_used_in_outer
8508 || relevance == vect_used_in_outer_by_reduction
8509 || relevance == vect_used_by_reduction
8510 || relevance == vect_unused_in_scope
8511 || relevance == vect_used_only_live));
8514 case vect_induction_def:
8515 gcc_assert (!bb_vinfo);
8518 case vect_constant_def:
8519 case vect_external_def:
8520 case vect_unknown_def_type:
8525 if (STMT_VINFO_RELEVANT_P (stmt_info))
8527 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8528 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8529 || (is_gimple_call (stmt)
8530 && gimple_call_lhs (stmt) == NULL_TREE));
8531 *need_to_vectorize = true;
8534 if (PURE_SLP_STMT (stmt_info) && !node)
8536 dump_printf_loc (MSG_NOTE, vect_location,
8537 "handled only by SLP analysis\n");
8543 && (STMT_VINFO_RELEVANT_P (stmt_info)
8544 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8545 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8546 || vectorizable_conversion (stmt, NULL, NULL, node)
8547 || vectorizable_shift (stmt, NULL, NULL, node)
8548 || vectorizable_operation (stmt, NULL, NULL, node)
8549 || vectorizable_assignment (stmt, NULL, NULL, node)
8550 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8551 || vectorizable_call (stmt, NULL, NULL, node)
8552 || vectorizable_store (stmt, NULL, NULL, node)
8553 || vectorizable_reduction (stmt, NULL, NULL, node)
8554 || vectorizable_induction (stmt, NULL, NULL, node)
8555 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8556 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8560 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8561 || vectorizable_conversion (stmt, NULL, NULL, node)
8562 || vectorizable_shift (stmt, NULL, NULL, node)
8563 || vectorizable_operation (stmt, NULL, NULL, node)
8564 || vectorizable_assignment (stmt, NULL, NULL, node)
8565 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8566 || vectorizable_call (stmt, NULL, NULL, node)
8567 || vectorizable_store (stmt, NULL, NULL, node)
8568 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8569 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8574 if (dump_enabled_p ())
8576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8577 "not vectorized: relevant stmt not ");
8578 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8579 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8588 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8589 need extra handling, except for vectorizable reductions. */
8590 if (STMT_VINFO_LIVE_P (stmt_info)
8591 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8592 ok = vectorizable_live_operation (stmt, NULL, NULL, -1, NULL);
8596 if (dump_enabled_p ())
8598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8599 "not vectorized: live stmt not ");
8600 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8601 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8611 /* Function vect_transform_stmt.
8613 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8616 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8617 bool *grouped_store, slp_tree slp_node,
8618 slp_instance slp_node_instance)
8620 bool is_store = false;
8621 gimple *vec_stmt = NULL;
8622 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8625 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8626 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8628 switch (STMT_VINFO_TYPE (stmt_info))
8630 case type_demotion_vec_info_type:
8631 case type_promotion_vec_info_type:
8632 case type_conversion_vec_info_type:
8633 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8637 case induc_vec_info_type:
8638 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
8642 case shift_vec_info_type:
8643 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8647 case op_vec_info_type:
8648 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8652 case assignment_vec_info_type:
8653 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8657 case load_vec_info_type:
8658 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8663 case store_vec_info_type:
8664 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8666 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8668 /* In case of interleaving, the whole chain is vectorized when the
8669 last store in the chain is reached. Store stmts before the last
8670 one are skipped, and there vec_stmt_info shouldn't be freed
8672 *grouped_store = true;
8673 if (STMT_VINFO_VEC_STMT (stmt_info))
8680 case condition_vec_info_type:
8681 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8685 case comparison_vec_info_type:
8686 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8690 case call_vec_info_type:
8691 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8692 stmt = gsi_stmt (*gsi);
8693 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
8697 case call_simd_clone_vec_info_type:
8698 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8699 stmt = gsi_stmt (*gsi);
8702 case reduc_vec_info_type:
8703 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8708 if (!STMT_VINFO_LIVE_P (stmt_info))
8710 if (dump_enabled_p ())
8711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8712 "stmt not supported.\n");
8717 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8718 This would break hybrid SLP vectorization. */
8720 gcc_assert (!vec_stmt
8721 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8723 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8724 is being vectorized, but outside the immediately enclosing loop. */
8726 && STMT_VINFO_LOOP_VINFO (stmt_info)
8727 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8728 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8729 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8730 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8731 || STMT_VINFO_RELEVANT (stmt_info) ==
8732 vect_used_in_outer_by_reduction))
8734 struct loop *innerloop = LOOP_VINFO_LOOP (
8735 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8736 imm_use_iterator imm_iter;
8737 use_operand_p use_p;
8741 if (dump_enabled_p ())
8742 dump_printf_loc (MSG_NOTE, vect_location,
8743 "Record the vdef for outer-loop vectorization.\n");
8745 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8746 (to be used when vectorizing outer-loop stmts that use the DEF of
8748 if (gimple_code (stmt) == GIMPLE_PHI)
8749 scalar_dest = PHI_RESULT (stmt);
8751 scalar_dest = gimple_assign_lhs (stmt);
8753 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8755 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8757 exit_phi = USE_STMT (use_p);
8758 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8763 /* Handle stmts whose DEF is used outside the loop-nest that is
8764 being vectorized. */
8769 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8770 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8772 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8773 if (STMT_VINFO_LIVE_P (slp_stmt_info))
8775 done = vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8781 else if (STMT_VINFO_LIVE_P (stmt_info)
8782 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8784 done = vectorizable_live_operation (stmt, gsi, slp_node, -1, &vec_stmt);
8789 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8795 /* Remove a group of stores (for SLP or interleaving), free their
8799 vect_remove_stores (gimple *first_stmt)
8801 gimple *next = first_stmt;
8803 gimple_stmt_iterator next_si;
8807 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8809 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8810 if (is_pattern_stmt_p (stmt_info))
8811 next = STMT_VINFO_RELATED_STMT (stmt_info);
8812 /* Free the attached stmt_vec_info and remove the stmt. */
8813 next_si = gsi_for_stmt (next);
8814 unlink_stmt_vdef (next);
8815 gsi_remove (&next_si, true);
8816 release_defs (next);
8817 free_stmt_vec_info (next);
8823 /* Function new_stmt_vec_info.
8825 Create and initialize a new stmt_vec_info struct for STMT. */
8828 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8831 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8833 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8834 STMT_VINFO_STMT (res) = stmt;
8836 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8837 STMT_VINFO_LIVE_P (res) = false;
8838 STMT_VINFO_VECTYPE (res) = NULL;
8839 STMT_VINFO_VEC_STMT (res) = NULL;
8840 STMT_VINFO_VECTORIZABLE (res) = true;
8841 STMT_VINFO_IN_PATTERN_P (res) = false;
8842 STMT_VINFO_RELATED_STMT (res) = NULL;
8843 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8844 STMT_VINFO_DATA_REF (res) = NULL;
8845 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8846 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
8848 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8849 STMT_VINFO_DR_OFFSET (res) = NULL;
8850 STMT_VINFO_DR_INIT (res) = NULL;
8851 STMT_VINFO_DR_STEP (res) = NULL;
8852 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8854 if (gimple_code (stmt) == GIMPLE_PHI
8855 && is_loop_header_bb_p (gimple_bb (stmt)))
8856 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8858 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8860 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8861 STMT_SLP_TYPE (res) = loop_vect;
8862 STMT_VINFO_NUM_SLP_USES (res) = 0;
8864 GROUP_FIRST_ELEMENT (res) = NULL;
8865 GROUP_NEXT_ELEMENT (res) = NULL;
8866 GROUP_SIZE (res) = 0;
8867 GROUP_STORE_COUNT (res) = 0;
8868 GROUP_GAP (res) = 0;
8869 GROUP_SAME_DR_STMT (res) = NULL;
8875 /* Create a hash table for stmt_vec_info. */
8878 init_stmt_vec_info_vec (void)
8880 gcc_assert (!stmt_vec_info_vec.exists ());
8881 stmt_vec_info_vec.create (50);
8885 /* Free hash table for stmt_vec_info. */
8888 free_stmt_vec_info_vec (void)
8892 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8894 free_stmt_vec_info (STMT_VINFO_STMT (info));
8895 gcc_assert (stmt_vec_info_vec.exists ());
8896 stmt_vec_info_vec.release ();
8900 /* Free stmt vectorization related info. */
8903 free_stmt_vec_info (gimple *stmt)
8905 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8910 /* Check if this statement has a related "pattern stmt"
8911 (introduced by the vectorizer during the pattern recognition
8912 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8914 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8916 stmt_vec_info patt_info
8917 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8920 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8921 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8922 gimple_set_bb (patt_stmt, NULL);
8923 tree lhs = gimple_get_lhs (patt_stmt);
8924 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8925 release_ssa_name (lhs);
8928 gimple_stmt_iterator si;
8929 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8931 gimple *seq_stmt = gsi_stmt (si);
8932 gimple_set_bb (seq_stmt, NULL);
8933 lhs = gimple_get_lhs (seq_stmt);
8934 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8935 release_ssa_name (lhs);
8936 free_stmt_vec_info (seq_stmt);
8939 free_stmt_vec_info (patt_stmt);
8943 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8944 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8945 set_vinfo_for_stmt (stmt, NULL);
8950 /* Function get_vectype_for_scalar_type_and_size.
8952 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8956 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8958 tree orig_scalar_type = scalar_type;
8959 machine_mode inner_mode = TYPE_MODE (scalar_type);
8960 machine_mode simd_mode;
8961 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8968 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8969 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8972 /* For vector types of elements whose mode precision doesn't
8973 match their types precision we use a element type of mode
8974 precision. The vectorization routines will have to make sure
8975 they support the proper result truncation/extension.
8976 We also make sure to build vector types with INTEGER_TYPE
8977 component type only. */
8978 if (INTEGRAL_TYPE_P (scalar_type)
8979 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8980 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8981 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8982 TYPE_UNSIGNED (scalar_type));
8984 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8985 When the component mode passes the above test simply use a type
8986 corresponding to that mode. The theory is that any use that
8987 would cause problems with this will disable vectorization anyway. */
8988 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8989 && !INTEGRAL_TYPE_P (scalar_type))
8990 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8992 /* We can't build a vector type of elements with alignment bigger than
8994 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
8995 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8996 TYPE_UNSIGNED (scalar_type));
8998 /* If we felt back to using the mode fail if there was
8999 no scalar type for it. */
9000 if (scalar_type == NULL_TREE)
9003 /* If no size was supplied use the mode the target prefers. Otherwise
9004 lookup a vector mode of the specified size. */
9006 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9008 simd_mode = mode_for_vector (inner_mode, size / nbytes);
9009 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
9013 vectype = build_vector_type (scalar_type, nunits);
9015 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9016 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9019 /* Re-attach the address-space qualifier if we canonicalized the scalar
9021 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9022 return build_qualified_type
9023 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9028 unsigned int current_vector_size;
9030 /* Function get_vectype_for_scalar_type.
9032 Returns the vector type corresponding to SCALAR_TYPE as supported
9036 get_vectype_for_scalar_type (tree scalar_type)
9039 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9040 current_vector_size);
9042 && current_vector_size == 0)
9043 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9047 /* Function get_mask_type_for_scalar_type.
9049 Returns the mask type corresponding to a result of comparison
9050 of vectors of specified SCALAR_TYPE as supported by target. */
9053 get_mask_type_for_scalar_type (tree scalar_type)
9055 tree vectype = get_vectype_for_scalar_type (scalar_type);
9060 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9061 current_vector_size);
9064 /* Function get_same_sized_vectype
9066 Returns a vector type corresponding to SCALAR_TYPE of size
9067 VECTOR_TYPE if supported by the target. */
9070 get_same_sized_vectype (tree scalar_type, tree vector_type)
9072 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9073 return build_same_sized_truth_vector_type (vector_type);
9075 return get_vectype_for_scalar_type_and_size
9076 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9079 /* Function vect_is_simple_use.
9082 VINFO - the vect info of the loop or basic block that is being vectorized.
9083 OPERAND - operand in the loop or bb.
9085 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9086 DT - the type of definition
9088 Returns whether a stmt with OPERAND can be vectorized.
9089 For loops, supportable operands are constants, loop invariants, and operands
9090 that are defined by the current iteration of the loop. Unsupportable
9091 operands are those that are defined by a previous iteration of the loop (as
9092 is the case in reduction/induction computations).
9093 For basic blocks, supportable operands are constants and bb invariants.
9094 For now, operands defined outside the basic block are not supported. */
9097 vect_is_simple_use (tree operand, vec_info *vinfo,
9098 gimple **def_stmt, enum vect_def_type *dt)
9101 *dt = vect_unknown_def_type;
9103 if (dump_enabled_p ())
9105 dump_printf_loc (MSG_NOTE, vect_location,
9106 "vect_is_simple_use: operand ");
9107 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9108 dump_printf (MSG_NOTE, "\n");
9111 if (CONSTANT_CLASS_P (operand))
9113 *dt = vect_constant_def;
9117 if (is_gimple_min_invariant (operand))
9119 *dt = vect_external_def;
9123 if (TREE_CODE (operand) != SSA_NAME)
9125 if (dump_enabled_p ())
9126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9131 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9133 *dt = vect_external_def;
9137 *def_stmt = SSA_NAME_DEF_STMT (operand);
9138 if (dump_enabled_p ())
9140 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9141 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9144 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9145 *dt = vect_external_def;
9148 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9149 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9152 if (dump_enabled_p ())
9154 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9157 case vect_uninitialized_def:
9158 dump_printf (MSG_NOTE, "uninitialized\n");
9160 case vect_constant_def:
9161 dump_printf (MSG_NOTE, "constant\n");
9163 case vect_external_def:
9164 dump_printf (MSG_NOTE, "external\n");
9166 case vect_internal_def:
9167 dump_printf (MSG_NOTE, "internal\n");
9169 case vect_induction_def:
9170 dump_printf (MSG_NOTE, "induction\n");
9172 case vect_reduction_def:
9173 dump_printf (MSG_NOTE, "reduction\n");
9175 case vect_double_reduction_def:
9176 dump_printf (MSG_NOTE, "double reduction\n");
9178 case vect_nested_cycle:
9179 dump_printf (MSG_NOTE, "nested cycle\n");
9181 case vect_unknown_def_type:
9182 dump_printf (MSG_NOTE, "unknown\n");
9187 if (*dt == vect_unknown_def_type)
9189 if (dump_enabled_p ())
9190 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9191 "Unsupported pattern.\n");
9195 switch (gimple_code (*def_stmt))
9202 if (dump_enabled_p ())
9203 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9204 "unsupported defining stmt:\n");
9211 /* Function vect_is_simple_use.
9213 Same as vect_is_simple_use but also determines the vector operand
9214 type of OPERAND and stores it to *VECTYPE. If the definition of
9215 OPERAND is vect_uninitialized_def, vect_constant_def or
9216 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9217 is responsible to compute the best suited vector type for the
9221 vect_is_simple_use (tree operand, vec_info *vinfo,
9222 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9224 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9227 /* Now get a vector type if the def is internal, otherwise supply
9228 NULL_TREE and leave it up to the caller to figure out a proper
9229 type for the use stmt. */
9230 if (*dt == vect_internal_def
9231 || *dt == vect_induction_def
9232 || *dt == vect_reduction_def
9233 || *dt == vect_double_reduction_def
9234 || *dt == vect_nested_cycle)
9236 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9238 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9239 && !STMT_VINFO_RELEVANT (stmt_info)
9240 && !STMT_VINFO_LIVE_P (stmt_info))
9241 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9243 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9244 gcc_assert (*vectype != NULL_TREE);
9246 else if (*dt == vect_uninitialized_def
9247 || *dt == vect_constant_def
9248 || *dt == vect_external_def)
9249 *vectype = NULL_TREE;
9257 /* Function supportable_widening_operation
9259 Check whether an operation represented by the code CODE is a
9260 widening operation that is supported by the target platform in
9261 vector form (i.e., when operating on arguments of type VECTYPE_IN
9262 producing a result of type VECTYPE_OUT).
9264 Widening operations we currently support are NOP (CONVERT), FLOAT
9265 and WIDEN_MULT. This function checks if these operations are supported
9266 by the target platform either directly (via vector tree-codes), or via
9270 - CODE1 and CODE2 are codes of vector operations to be used when
9271 vectorizing the operation, if available.
9272 - MULTI_STEP_CVT determines the number of required intermediate steps in
9273 case of multi-step conversion (like char->short->int - in that case
9274 MULTI_STEP_CVT will be 1).
9275 - INTERM_TYPES contains the intermediate type required to perform the
9276 widening operation (short in the above example). */
9279 supportable_widening_operation (enum tree_code code, gimple *stmt,
9280 tree vectype_out, tree vectype_in,
9281 enum tree_code *code1, enum tree_code *code2,
9282 int *multi_step_cvt,
9283 vec<tree> *interm_types)
9285 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9286 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9287 struct loop *vect_loop = NULL;
9288 machine_mode vec_mode;
9289 enum insn_code icode1, icode2;
9290 optab optab1, optab2;
9291 tree vectype = vectype_in;
9292 tree wide_vectype = vectype_out;
9293 enum tree_code c1, c2;
9295 tree prev_type, intermediate_type;
9296 machine_mode intermediate_mode, prev_mode;
9297 optab optab3, optab4;
9299 *multi_step_cvt = 0;
9301 vect_loop = LOOP_VINFO_LOOP (loop_info);
9305 case WIDEN_MULT_EXPR:
9306 /* The result of a vectorized widening operation usually requires
9307 two vectors (because the widened results do not fit into one vector).
9308 The generated vector results would normally be expected to be
9309 generated in the same order as in the original scalar computation,
9310 i.e. if 8 results are generated in each vector iteration, they are
9311 to be organized as follows:
9312 vect1: [res1,res2,res3,res4],
9313 vect2: [res5,res6,res7,res8].
9315 However, in the special case that the result of the widening
9316 operation is used in a reduction computation only, the order doesn't
9317 matter (because when vectorizing a reduction we change the order of
9318 the computation). Some targets can take advantage of this and
9319 generate more efficient code. For example, targets like Altivec,
9320 that support widen_mult using a sequence of {mult_even,mult_odd}
9321 generate the following vectors:
9322 vect1: [res1,res3,res5,res7],
9323 vect2: [res2,res4,res6,res8].
9325 When vectorizing outer-loops, we execute the inner-loop sequentially
9326 (each vectorized inner-loop iteration contributes to VF outer-loop
9327 iterations in parallel). We therefore don't allow to change the
9328 order of the computation in the inner-loop during outer-loop
9330 /* TODO: Another case in which order doesn't *really* matter is when we
9331 widen and then contract again, e.g. (short)((int)x * y >> 8).
9332 Normally, pack_trunc performs an even/odd permute, whereas the
9333 repack from an even/odd expansion would be an interleave, which
9334 would be significantly simpler for e.g. AVX2. */
9335 /* In any case, in order to avoid duplicating the code below, recurse
9336 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9337 are properly set up for the caller. If we fail, we'll continue with
9338 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9340 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9341 && !nested_in_vect_loop_p (vect_loop, stmt)
9342 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9343 stmt, vectype_out, vectype_in,
9344 code1, code2, multi_step_cvt,
9347 /* Elements in a vector with vect_used_by_reduction property cannot
9348 be reordered if the use chain with this property does not have the
9349 same operation. One such an example is s += a * b, where elements
9350 in a and b cannot be reordered. Here we check if the vector defined
9351 by STMT is only directly used in the reduction statement. */
9352 tree lhs = gimple_assign_lhs (stmt);
9353 use_operand_p dummy;
9355 stmt_vec_info use_stmt_info = NULL;
9356 if (single_imm_use (lhs, &dummy, &use_stmt)
9357 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9358 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9361 c1 = VEC_WIDEN_MULT_LO_EXPR;
9362 c2 = VEC_WIDEN_MULT_HI_EXPR;
9375 case VEC_WIDEN_MULT_EVEN_EXPR:
9376 /* Support the recursion induced just above. */
9377 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9378 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9381 case WIDEN_LSHIFT_EXPR:
9382 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9383 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9387 c1 = VEC_UNPACK_LO_EXPR;
9388 c2 = VEC_UNPACK_HI_EXPR;
9392 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9393 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9396 case FIX_TRUNC_EXPR:
9397 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9398 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9399 computing the operation. */
9406 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9409 if (code == FIX_TRUNC_EXPR)
9411 /* The signedness is determined from output operand. */
9412 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9413 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9417 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9418 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9421 if (!optab1 || !optab2)
9424 vec_mode = TYPE_MODE (vectype);
9425 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9426 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9432 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9433 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9434 /* For scalar masks we may have different boolean
9435 vector types having the same QImode. Thus we
9436 add additional check for elements number. */
9437 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9438 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9439 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9441 /* Check if it's a multi-step conversion that can be done using intermediate
9444 prev_type = vectype;
9445 prev_mode = vec_mode;
9447 if (!CONVERT_EXPR_CODE_P (code))
9450 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9451 intermediate steps in promotion sequence. We try
9452 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9454 interm_types->create (MAX_INTERM_CVT_STEPS);
9455 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9457 intermediate_mode = insn_data[icode1].operand[0].mode;
9458 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9461 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9462 current_vector_size);
9463 if (intermediate_mode != TYPE_MODE (intermediate_type))
9468 = lang_hooks.types.type_for_mode (intermediate_mode,
9469 TYPE_UNSIGNED (prev_type));
9471 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9472 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9474 if (!optab3 || !optab4
9475 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9476 || insn_data[icode1].operand[0].mode != intermediate_mode
9477 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9478 || insn_data[icode2].operand[0].mode != intermediate_mode
9479 || ((icode1 = optab_handler (optab3, intermediate_mode))
9480 == CODE_FOR_nothing)
9481 || ((icode2 = optab_handler (optab4, intermediate_mode))
9482 == CODE_FOR_nothing))
9485 interm_types->quick_push (intermediate_type);
9486 (*multi_step_cvt)++;
9488 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9489 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9490 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9491 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9492 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9494 prev_type = intermediate_type;
9495 prev_mode = intermediate_mode;
9498 interm_types->release ();
9503 /* Function supportable_narrowing_operation
9505 Check whether an operation represented by the code CODE is a
9506 narrowing operation that is supported by the target platform in
9507 vector form (i.e., when operating on arguments of type VECTYPE_IN
9508 and producing a result of type VECTYPE_OUT).
9510 Narrowing operations we currently support are NOP (CONVERT) and
9511 FIX_TRUNC. This function checks if these operations are supported by
9512 the target platform directly via vector tree-codes.
9515 - CODE1 is the code of a vector operation to be used when
9516 vectorizing the operation, if available.
9517 - MULTI_STEP_CVT determines the number of required intermediate steps in
9518 case of multi-step conversion (like int->short->char - in that case
9519 MULTI_STEP_CVT will be 1).
9520 - INTERM_TYPES contains the intermediate type required to perform the
9521 narrowing operation (short in the above example). */
9524 supportable_narrowing_operation (enum tree_code code,
9525 tree vectype_out, tree vectype_in,
9526 enum tree_code *code1, int *multi_step_cvt,
9527 vec<tree> *interm_types)
9529 machine_mode vec_mode;
9530 enum insn_code icode1;
9531 optab optab1, interm_optab;
9532 tree vectype = vectype_in;
9533 tree narrow_vectype = vectype_out;
9535 tree intermediate_type, prev_type;
9536 machine_mode intermediate_mode, prev_mode;
9540 *multi_step_cvt = 0;
9544 c1 = VEC_PACK_TRUNC_EXPR;
9547 case FIX_TRUNC_EXPR:
9548 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9552 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9553 tree code and optabs used for computing the operation. */
9560 if (code == FIX_TRUNC_EXPR)
9561 /* The signedness is determined from output operand. */
9562 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9564 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9569 vec_mode = TYPE_MODE (vectype);
9570 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9575 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9576 /* For scalar masks we may have different boolean
9577 vector types having the same QImode. Thus we
9578 add additional check for elements number. */
9579 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9580 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9581 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9583 /* Check if it's a multi-step conversion that can be done using intermediate
9585 prev_mode = vec_mode;
9586 prev_type = vectype;
9587 if (code == FIX_TRUNC_EXPR)
9588 uns = TYPE_UNSIGNED (vectype_out);
9590 uns = TYPE_UNSIGNED (vectype);
9592 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9593 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9594 costly than signed. */
9595 if (code == FIX_TRUNC_EXPR && uns)
9597 enum insn_code icode2;
9600 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9602 = optab_for_tree_code (c1, intermediate_type, optab_default);
9603 if (interm_optab != unknown_optab
9604 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9605 && insn_data[icode1].operand[0].mode
9606 == insn_data[icode2].operand[0].mode)
9609 optab1 = interm_optab;
9614 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9615 intermediate steps in promotion sequence. We try
9616 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9617 interm_types->create (MAX_INTERM_CVT_STEPS);
9618 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9620 intermediate_mode = insn_data[icode1].operand[0].mode;
9621 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9624 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9625 current_vector_size);
9626 if (intermediate_mode != TYPE_MODE (intermediate_type))
9631 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9633 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9636 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9637 || insn_data[icode1].operand[0].mode != intermediate_mode
9638 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9639 == CODE_FOR_nothing))
9642 interm_types->quick_push (intermediate_type);
9643 (*multi_step_cvt)++;
9645 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9646 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9647 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9648 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9650 prev_mode = intermediate_mode;
9651 prev_type = intermediate_type;
9652 optab1 = interm_optab;
9655 interm_types->release ();