1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type {
63 /* Return the vectorized type for the given statement. */
66 stmt_vectype (struct _stmt_vec_info *stmt_info)
68 return STMT_VINFO_VECTYPE (stmt_info);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
74 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
76 gimple *stmt = STMT_VINFO_STMT (stmt_info);
77 basic_block bb = gimple_bb (stmt);
78 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
84 loop = LOOP_VINFO_LOOP (loop_vinfo);
86 return (bb->loop_father == loop->inner);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
94 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
95 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
96 int misalign, enum vect_cost_model_location where)
100 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
101 stmt_info_for_cost si = { count, kind,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
104 body_cost_vec->safe_push (si);
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
109 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
110 count, kind, stmt_info, misalign, where);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
116 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
118 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
128 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
129 tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
154 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
155 tree array, unsigned HOST_WIDE_INT n)
160 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
161 build_int_cst (size_type_node, n),
162 NULL_TREE, NULL_TREE);
164 new_stmt = gimple_build_assign (array_ref, vect);
165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
173 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
177 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
178 /* Arrays have the same alignment as their type. */
179 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
183 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
185 /* Function vect_mark_relevant.
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
190 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
191 enum vect_relevant relevant, bool live_p)
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
195 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
196 gimple *pattern_stmt;
198 if (dump_enabled_p ())
200 dump_printf_loc (MSG_NOTE, vect_location,
201 "mark relevant %d, live %d: ", relevant, live_p);
202 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
211 /* This is the last stmt in a sequence that was detected as a
212 pattern that can potentially be vectorized. Don't mark the stmt
213 as relevant/live because it's not going to be vectorized.
214 Instead mark the pattern-stmt that replaces it. */
216 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE, vect_location,
220 "last stmt in pattern. don't mark"
221 " relevant/live.\n");
222 stmt_info = vinfo_for_stmt (pattern_stmt);
223 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
224 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
225 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
229 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
230 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
231 STMT_VINFO_RELEVANT (stmt_info) = relevant;
233 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
234 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
236 if (dump_enabled_p ())
237 dump_printf_loc (MSG_NOTE, vect_location,
238 "already marked relevant/live.\n");
242 worklist->safe_push (stmt);
246 /* Function is_simple_and_all_uses_invariant
248 Return true if STMT is simple and all uses of it are invariant. */
251 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
257 if (!is_gimple_assign (stmt))
260 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
262 enum vect_def_type dt = vect_uninitialized_def;
264 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
268 "use not simple.\n");
272 if (dt != vect_external_def && dt != vect_constant_def)
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
291 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
292 enum vect_relevant *relevant, bool *live_p)
294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
296 imm_use_iterator imm_iter;
300 *relevant = vect_unused_in_scope;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
306 != loop_exit_ctrl_vec_info_type)
307 *relevant = vect_used_in_scope;
309 /* changing memory. */
310 if (gimple_code (stmt) != GIMPLE_PHI)
311 if (gimple_vdef (stmt)
312 && !gimple_clobber_p (stmt))
314 if (dump_enabled_p ())
315 dump_printf_loc (MSG_NOTE, vect_location,
316 "vec_stmt_relevant_p: stmt has vdefs.\n");
317 *relevant = vect_used_in_scope;
320 /* uses outside the loop. */
321 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
323 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
325 basic_block bb = gimple_bb (USE_STMT (use_p));
326 if (!flow_bb_inside_loop_p (loop, bb))
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE, vect_location,
330 "vec_stmt_relevant_p: used out of loop.\n");
332 if (is_gimple_debug (USE_STMT (use_p)))
335 /* We expect all such uses to be in the loop exit phis
336 (because of loop closed form) */
337 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
338 gcc_assert (bb == single_exit (loop)->dest);
345 if (*live_p && *relevant == vect_unused_in_scope
346 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE, vect_location,
350 "vec_stmt_relevant_p: stmt live but not relevant.\n");
351 *relevant = vect_used_only_live;
354 return (*live_p || *relevant);
358 /* Function exist_non_indexing_operands_for_use_p
360 USE is one of the uses attached to STMT. Check if USE is
361 used in STMT for anything other than indexing an array. */
364 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
369 /* USE corresponds to some operand in STMT. If there is no data
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info))
375 /* STMT has a data_ref. FORNOW this means that its of one of
379 (This should have been verified in analyze_data_refs).
381 'var' in the second case corresponds to a def, not a use,
382 so USE cannot correspond to any operands that are not used
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
388 if (!gimple_assign_copy_p (stmt))
390 if (is_gimple_call (stmt)
391 && gimple_call_internal_p (stmt))
392 switch (gimple_call_internal_fn (stmt))
395 operand = gimple_call_arg (stmt, 3);
400 operand = gimple_call_arg (stmt, 2);
410 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
412 operand = gimple_assign_rhs1 (stmt);
413 if (TREE_CODE (operand) != SSA_NAME)
424 Function process_use.
427 - a USE in STMT in a loop represented by LOOP_VINFO
428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
429 that defined USE. This is done by calling mark_relevant and passing it
430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
440 - case 1: If USE is used only for address computations (e.g. array indexing),
441 which does not need to be directly vectorized, then the liveness/relevance
442 of the respective DEF_STMT is left unchanged.
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
448 Return true if everything is as expected. Return false otherwise. */
451 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
452 enum vect_relevant relevant, vec<gimple *> *worklist,
455 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
456 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
457 stmt_vec_info dstmt_vinfo;
458 basic_block bb, def_bb;
460 enum vect_def_type dt;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
467 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
471 "not vectorized: unsupported use in stmt.\n");
475 if (!def_stmt || gimple_nop_p (def_stmt))
478 def_bb = gimple_bb (def_stmt);
479 if (!flow_bb_inside_loop_p (loop, def_bb))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo = vinfo_for_stmt (def_stmt);
492 bb = gimple_bb (stmt);
493 if (gimple_code (stmt) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
495 && gimple_code (def_stmt) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
497 && bb->loop_father == def_bb->loop_father)
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE, vect_location,
501 "reduc-stmt defining reduc-phi in the same nest.\n");
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
503 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
506 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
517 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_NOTE, vect_location,
521 "outer-loop def-stmt defining inner-loop stmt.\n");
525 case vect_unused_in_scope:
526 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
527 vect_used_in_scope : vect_unused_in_scope;
530 case vect_used_in_outer_by_reduction:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
532 relevant = vect_used_by_reduction;
535 case vect_used_in_outer:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
537 relevant = vect_used_in_scope;
540 case vect_used_in_scope:
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
555 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE, vect_location,
559 "inner-loop def-stmt defining outer-loop stmt.\n");
563 case vect_unused_in_scope:
564 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
565 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
566 vect_used_in_outer_by_reduction : vect_unused_in_scope;
569 case vect_used_by_reduction:
570 case vect_used_only_live:
571 relevant = vect_used_in_outer_by_reduction;
574 case vect_used_in_scope:
575 relevant = vect_used_in_outer;
583 vect_mark_relevant (worklist, def_stmt, relevant, false);
588 /* Function vect_mark_stmts_to_be_vectorized.
590 Not all stmts in the loop need to be vectorized. For example:
599 Stmt 1 and 3 do not need to be vectorized, because loop control and
600 addressing of vectorized data-refs are handled differently.
602 This pass detects such stmts. */
605 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
607 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
608 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
609 unsigned int nbbs = loop->num_nodes;
610 gimple_stmt_iterator si;
613 stmt_vec_info stmt_vinfo;
617 enum vect_relevant relevant;
619 if (dump_enabled_p ())
620 dump_printf_loc (MSG_NOTE, vect_location,
621 "=== vect_mark_stmts_to_be_vectorized ===\n");
623 auto_vec<gimple *, 64> worklist;
625 /* 1. Init worklist. */
626 for (i = 0; i < nbbs; i++)
629 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
632 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
635 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
638 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
639 vect_mark_relevant (&worklist, phi, relevant, live_p);
641 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
643 stmt = gsi_stmt (si);
644 if (dump_enabled_p ())
646 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
647 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
650 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
651 vect_mark_relevant (&worklist, stmt, relevant, live_p);
655 /* 2. Process_worklist */
656 while (worklist.length () > 0)
661 stmt = worklist.pop ();
662 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
665 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
668 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
669 (DEF_STMT) as relevant/irrelevant according to the relevance property
671 stmt_vinfo = vinfo_for_stmt (stmt);
672 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
674 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
675 propagated as is to the DEF_STMTs of its USEs.
677 One exception is when STMT has been identified as defining a reduction
678 variable; in this case we set the relevance to vect_used_by_reduction.
679 This is because we distinguish between two kinds of relevant stmts -
680 those that are used by a reduction computation, and those that are
681 (also) used by a regular computation. This allows us later on to
682 identify stmts that are used solely by a reduction, and therefore the
683 order of the results that they produce does not have to be kept. */
685 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
687 case vect_reduction_def:
688 gcc_assert (relevant != vect_unused_in_scope);
689 if (relevant != vect_unused_in_scope
690 && relevant != vect_used_in_scope
691 && relevant != vect_used_by_reduction
692 && relevant != vect_used_only_live)
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
696 "unsupported use of reduction.\n");
701 case vect_nested_cycle:
702 if (relevant != vect_unused_in_scope
703 && relevant != vect_used_in_outer_by_reduction
704 && relevant != vect_used_in_outer)
706 if (dump_enabled_p ())
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
708 "unsupported use of nested cycle.\n");
714 case vect_double_reduction_def:
715 if (relevant != vect_unused_in_scope
716 && relevant != vect_used_by_reduction
717 && relevant != vect_used_only_live)
719 if (dump_enabled_p ())
720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
721 "unsupported use of double reduction.\n");
731 if (is_pattern_stmt_p (stmt_vinfo))
733 /* Pattern statements are not inserted into the code, so
734 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
735 have to scan the RHS or function arguments instead. */
736 if (is_gimple_assign (stmt))
738 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
739 tree op = gimple_assign_rhs1 (stmt);
742 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
744 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
745 relevant, &worklist, false)
746 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
747 relevant, &worklist, false))
751 for (; i < gimple_num_ops (stmt); i++)
753 op = gimple_op (stmt, i);
754 if (TREE_CODE (op) == SSA_NAME
755 && !process_use (stmt, op, loop_vinfo, relevant,
760 else if (is_gimple_call (stmt))
762 for (i = 0; i < gimple_call_num_args (stmt); i++)
764 tree arg = gimple_call_arg (stmt, i);
765 if (!process_use (stmt, arg, loop_vinfo, relevant,
772 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
774 tree op = USE_FROM_PTR (use_p);
775 if (!process_use (stmt, op, loop_vinfo, relevant,
780 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
782 gather_scatter_info gs_info;
783 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
785 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
789 } /* while worklist */
795 /* Function vect_model_simple_cost.
797 Models cost for simple operations, i.e. those that only emit ncopies of a
798 single op. Right now, this does not account for multiple insns that could
799 be generated for the single vector op. We will handle that shortly. */
802 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
803 enum vect_def_type *dt,
804 stmt_vector_for_cost *prologue_cost_vec,
805 stmt_vector_for_cost *body_cost_vec)
808 int inside_cost = 0, prologue_cost = 0;
810 /* The SLP costs were already calculated during SLP tree build. */
811 if (PURE_SLP_STMT (stmt_info))
814 /* FORNOW: Assuming maximum 2 args per stmts. */
815 for (i = 0; i < 2; i++)
816 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
817 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
818 stmt_info, 0, vect_prologue);
820 /* Pass the inside-of-loop statements to the target-specific cost model. */
821 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
822 stmt_info, 0, vect_body);
824 if (dump_enabled_p ())
825 dump_printf_loc (MSG_NOTE, vect_location,
826 "vect_model_simple_cost: inside_cost = %d, "
827 "prologue_cost = %d .\n", inside_cost, prologue_cost);
831 /* Model cost for type demotion and promotion operations. PWR is normally
832 zero for single-step promotions and demotions. It will be one if
833 two-step promotion/demotion is required, and so on. Each additional
834 step doubles the number of instructions required. */
837 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
838 enum vect_def_type *dt, int pwr)
841 int inside_cost = 0, prologue_cost = 0;
842 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
843 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
844 void *target_cost_data;
846 /* The SLP costs were already calculated during SLP tree build. */
847 if (PURE_SLP_STMT (stmt_info))
851 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
853 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
855 for (i = 0; i < pwr + 1; i++)
857 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
859 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
860 vec_promote_demote, stmt_info, 0,
864 /* FORNOW: Assuming maximum 2 args per stmts. */
865 for (i = 0; i < 2; i++)
866 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
867 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
868 stmt_info, 0, vect_prologue);
870 if (dump_enabled_p ())
871 dump_printf_loc (MSG_NOTE, vect_location,
872 "vect_model_promotion_demotion_cost: inside_cost = %d, "
873 "prologue_cost = %d .\n", inside_cost, prologue_cost);
876 /* Function vect_model_store_cost
878 Models cost for stores. In the case of grouped accesses, one access
879 has the overhead of the grouped access attributed to it. */
882 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
883 vect_memory_access_type memory_access_type,
884 enum vect_def_type dt, slp_tree slp_node,
885 stmt_vector_for_cost *prologue_cost_vec,
886 stmt_vector_for_cost *body_cost_vec)
888 unsigned int inside_cost = 0, prologue_cost = 0;
889 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
890 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
891 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
893 if (dt == vect_constant_def || dt == vect_external_def)
894 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
895 stmt_info, 0, vect_prologue);
897 /* Grouped stores update all elements in the group at once,
898 so we want the DR for the first statement. */
899 if (!slp_node && grouped_access_p)
901 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
902 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
905 /* True if we should include any once-per-group costs as well as
906 the cost of the statement itself. For SLP we only get called
907 once per group anyhow. */
908 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
910 /* We assume that the cost of a single store-lanes instruction is
911 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
912 access is instead being provided by a permute-and-store operation,
913 include the cost of the permutes. */
915 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
917 /* Uses a high and low interleave or shuffle operations for each
919 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
920 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
921 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
922 stmt_info, 0, vect_body);
924 if (dump_enabled_p ())
925 dump_printf_loc (MSG_NOTE, vect_location,
926 "vect_model_store_cost: strided group_size = %d .\n",
930 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
931 /* Costs of the stores. */
932 if (memory_access_type == VMAT_ELEMENTWISE)
933 /* N scalar stores plus extracting the elements. */
934 inside_cost += record_stmt_cost (body_cost_vec,
935 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
936 scalar_store, stmt_info, 0, vect_body);
938 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
940 if (memory_access_type == VMAT_ELEMENTWISE
941 || memory_access_type == VMAT_STRIDED_SLP)
942 inside_cost += record_stmt_cost (body_cost_vec,
943 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
944 vec_to_scalar, stmt_info, 0, vect_body);
946 if (dump_enabled_p ())
947 dump_printf_loc (MSG_NOTE, vect_location,
948 "vect_model_store_cost: inside_cost = %d, "
949 "prologue_cost = %d .\n", inside_cost, prologue_cost);
953 /* Calculate cost of DR's memory access. */
955 vect_get_store_cost (struct data_reference *dr, int ncopies,
956 unsigned int *inside_cost,
957 stmt_vector_for_cost *body_cost_vec)
959 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
960 gimple *stmt = DR_STMT (dr);
961 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
963 switch (alignment_support_scheme)
967 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
968 vector_store, stmt_info, 0,
971 if (dump_enabled_p ())
972 dump_printf_loc (MSG_NOTE, vect_location,
973 "vect_model_store_cost: aligned.\n");
977 case dr_unaligned_supported:
979 /* Here, we assign an additional cost for the unaligned store. */
980 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
981 unaligned_store, stmt_info,
982 DR_MISALIGNMENT (dr), vect_body);
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE, vect_location,
985 "vect_model_store_cost: unaligned supported by "
990 case dr_unaligned_unsupported:
992 *inside_cost = VECT_MAX_COST;
994 if (dump_enabled_p ())
995 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
996 "vect_model_store_cost: unsupported access.\n");
1006 /* Function vect_model_load_cost
1008 Models cost for loads. In the case of grouped accesses, one access has
1009 the overhead of the grouped access attributed to it. Since unaligned
1010 accesses are supported for loads, we also account for the costs of the
1011 access scheme chosen. */
1014 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1015 vect_memory_access_type memory_access_type,
1017 stmt_vector_for_cost *prologue_cost_vec,
1018 stmt_vector_for_cost *body_cost_vec)
1020 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1021 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1022 unsigned int inside_cost = 0, prologue_cost = 0;
1023 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1025 /* Grouped loads read all elements in the group at once,
1026 so we want the DR for the first statement. */
1027 if (!slp_node && grouped_access_p)
1029 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1030 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1033 /* True if we should include any once-per-group costs as well as
1034 the cost of the statement itself. For SLP we only get called
1035 once per group anyhow. */
1036 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1038 /* We assume that the cost of a single load-lanes instruction is
1039 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1040 access is instead being provided by a load-and-permute operation,
1041 include the cost of the permutes. */
1043 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1045 /* Uses an even and odd extract operations or shuffle operations
1046 for each needed permute. */
1047 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1048 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1049 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1050 stmt_info, 0, vect_body);
1052 if (dump_enabled_p ())
1053 dump_printf_loc (MSG_NOTE, vect_location,
1054 "vect_model_load_cost: strided group_size = %d .\n",
1058 /* The loads themselves. */
1059 if (memory_access_type == VMAT_ELEMENTWISE)
1061 /* N scalar loads plus gathering them into a vector. */
1062 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1063 inside_cost += record_stmt_cost (body_cost_vec,
1064 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1065 scalar_load, stmt_info, 0, vect_body);
1068 vect_get_load_cost (dr, ncopies, first_stmt_p,
1069 &inside_cost, &prologue_cost,
1070 prologue_cost_vec, body_cost_vec, true);
1071 if (memory_access_type == VMAT_ELEMENTWISE
1072 || memory_access_type == VMAT_STRIDED_SLP)
1073 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1074 stmt_info, 0, vect_body);
1076 if (dump_enabled_p ())
1077 dump_printf_loc (MSG_NOTE, vect_location,
1078 "vect_model_load_cost: inside_cost = %d, "
1079 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1083 /* Calculate cost of DR's memory access. */
1085 vect_get_load_cost (struct data_reference *dr, int ncopies,
1086 bool add_realign_cost, unsigned int *inside_cost,
1087 unsigned int *prologue_cost,
1088 stmt_vector_for_cost *prologue_cost_vec,
1089 stmt_vector_for_cost *body_cost_vec,
1090 bool record_prologue_costs)
1092 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1093 gimple *stmt = DR_STMT (dr);
1094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1096 switch (alignment_support_scheme)
1100 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1101 stmt_info, 0, vect_body);
1103 if (dump_enabled_p ())
1104 dump_printf_loc (MSG_NOTE, vect_location,
1105 "vect_model_load_cost: aligned.\n");
1109 case dr_unaligned_supported:
1111 /* Here, we assign an additional cost for the unaligned load. */
1112 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1113 unaligned_load, stmt_info,
1114 DR_MISALIGNMENT (dr), vect_body);
1116 if (dump_enabled_p ())
1117 dump_printf_loc (MSG_NOTE, vect_location,
1118 "vect_model_load_cost: unaligned supported by "
1123 case dr_explicit_realign:
1125 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1126 vector_load, stmt_info, 0, vect_body);
1127 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1128 vec_perm, stmt_info, 0, vect_body);
1130 /* FIXME: If the misalignment remains fixed across the iterations of
1131 the containing loop, the following cost should be added to the
1133 if (targetm.vectorize.builtin_mask_for_load)
1134 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1135 stmt_info, 0, vect_body);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE, vect_location,
1139 "vect_model_load_cost: explicit realign\n");
1143 case dr_explicit_realign_optimized:
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: unaligned software "
1150 /* Unaligned software pipeline has a load of an address, an initial
1151 load, and possibly a mask operation to "prime" the loop. However,
1152 if this is an access in a group of loads, which provide grouped
1153 access, then the above cost should only be considered for one
1154 access in the group. Inside the loop, there is a load op
1155 and a realignment op. */
1157 if (add_realign_cost && record_prologue_costs)
1159 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1160 vector_stmt, stmt_info,
1162 if (targetm.vectorize.builtin_mask_for_load)
1163 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1164 vector_stmt, stmt_info,
1168 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1169 stmt_info, 0, vect_body);
1170 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1171 stmt_info, 0, vect_body);
1173 if (dump_enabled_p ())
1174 dump_printf_loc (MSG_NOTE, vect_location,
1175 "vect_model_load_cost: explicit realign optimized"
1181 case dr_unaligned_unsupported:
1183 *inside_cost = VECT_MAX_COST;
1185 if (dump_enabled_p ())
1186 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1187 "vect_model_load_cost: unsupported access.\n");
1196 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1197 the loop preheader for the vectorized stmt STMT. */
1200 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1203 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1206 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1207 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1211 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1215 if (nested_in_vect_loop_p (loop, stmt))
1218 pe = loop_preheader_edge (loop);
1219 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1220 gcc_assert (!new_bb);
1224 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1226 gimple_stmt_iterator gsi_bb_start;
1228 gcc_assert (bb_vinfo);
1229 bb = BB_VINFO_BB (bb_vinfo);
1230 gsi_bb_start = gsi_after_labels (bb);
1231 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1235 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_NOTE, vect_location,
1238 "created new init_stmt: ");
1239 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1243 /* Function vect_init_vector.
1245 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1246 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1247 vector type a vector with all elements equal to VAL is created first.
1248 Place the initialization at BSI if it is not NULL. Otherwise, place the
1249 initialization at the loop preheader.
1250 Return the DEF of INIT_STMT.
1251 It will be used in the vectorization of STMT. */
1254 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1259 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1260 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1262 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1263 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1265 /* Scalar boolean value should be transformed into
1266 all zeros or all ones value before building a vector. */
1267 if (VECTOR_BOOLEAN_TYPE_P (type))
1269 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1270 tree false_val = build_zero_cst (TREE_TYPE (type));
1272 if (CONSTANT_CLASS_P (val))
1273 val = integer_zerop (val) ? false_val : true_val;
1276 new_temp = make_ssa_name (TREE_TYPE (type));
1277 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1278 val, true_val, false_val);
1279 vect_init_vector_1 (stmt, init_stmt, gsi);
1283 else if (CONSTANT_CLASS_P (val))
1284 val = fold_convert (TREE_TYPE (type), val);
1287 new_temp = make_ssa_name (TREE_TYPE (type));
1288 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1289 init_stmt = gimple_build_assign (new_temp,
1290 fold_build1 (VIEW_CONVERT_EXPR,
1294 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1295 vect_init_vector_1 (stmt, init_stmt, gsi);
1299 val = build_vector_from_val (type, val);
1302 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1303 init_stmt = gimple_build_assign (new_temp, val);
1304 vect_init_vector_1 (stmt, init_stmt, gsi);
1308 /* Function vect_get_vec_def_for_operand_1.
1310 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1311 DT that will be used in the vectorized stmt. */
1314 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1318 stmt_vec_info def_stmt_info = NULL;
1322 /* operand is a constant or a loop invariant. */
1323 case vect_constant_def:
1324 case vect_external_def:
1325 /* Code should use vect_get_vec_def_for_operand. */
1328 /* operand is defined inside the loop. */
1329 case vect_internal_def:
1331 /* Get the def from the vectorized stmt. */
1332 def_stmt_info = vinfo_for_stmt (def_stmt);
1334 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1335 /* Get vectorized pattern statement. */
1337 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1338 && !STMT_VINFO_RELEVANT (def_stmt_info))
1339 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1340 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1341 gcc_assert (vec_stmt);
1342 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1343 vec_oprnd = PHI_RESULT (vec_stmt);
1344 else if (is_gimple_call (vec_stmt))
1345 vec_oprnd = gimple_call_lhs (vec_stmt);
1347 vec_oprnd = gimple_assign_lhs (vec_stmt);
1351 /* operand is defined by a loop header phi - reduction */
1352 case vect_reduction_def:
1353 case vect_double_reduction_def:
1354 case vect_nested_cycle:
1355 /* Code should use get_initial_def_for_reduction. */
1358 /* operand is defined by loop-header phi - induction. */
1359 case vect_induction_def:
1361 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1363 /* Get the def from the vectorized stmt. */
1364 def_stmt_info = vinfo_for_stmt (def_stmt);
1365 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1366 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1367 vec_oprnd = PHI_RESULT (vec_stmt);
1369 vec_oprnd = gimple_get_lhs (vec_stmt);
1379 /* Function vect_get_vec_def_for_operand.
1381 OP is an operand in STMT. This function returns a (vector) def that will be
1382 used in the vectorized stmt for STMT.
1384 In the case that OP is an SSA_NAME which is defined in the loop, then
1385 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1387 In case OP is an invariant or constant, a new stmt that creates a vector def
1388 needs to be introduced. VECTYPE may be used to specify a required type for
1389 vector invariant. */
1392 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1395 enum vect_def_type dt;
1397 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1398 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1400 if (dump_enabled_p ())
1402 dump_printf_loc (MSG_NOTE, vect_location,
1403 "vect_get_vec_def_for_operand: ");
1404 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1405 dump_printf (MSG_NOTE, "\n");
1408 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1409 gcc_assert (is_simple_use);
1410 if (def_stmt && dump_enabled_p ())
1412 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1413 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1416 if (dt == vect_constant_def || dt == vect_external_def)
1418 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1422 vector_type = vectype;
1423 else if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
1424 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1425 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1427 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1429 gcc_assert (vector_type);
1430 return vect_init_vector (stmt, op, vector_type, NULL);
1433 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1437 /* Function vect_get_vec_def_for_stmt_copy
1439 Return a vector-def for an operand. This function is used when the
1440 vectorized stmt to be created (by the caller to this function) is a "copy"
1441 created in case the vectorized result cannot fit in one vector, and several
1442 copies of the vector-stmt are required. In this case the vector-def is
1443 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1444 of the stmt that defines VEC_OPRND.
1445 DT is the type of the vector def VEC_OPRND.
1448 In case the vectorization factor (VF) is bigger than the number
1449 of elements that can fit in a vectype (nunits), we have to generate
1450 more than one vector stmt to vectorize the scalar stmt. This situation
1451 arises when there are multiple data-types operated upon in the loop; the
1452 smallest data-type determines the VF, and as a result, when vectorizing
1453 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1454 vector stmt (each computing a vector of 'nunits' results, and together
1455 computing 'VF' results in each iteration). This function is called when
1456 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1457 which VF=16 and nunits=4, so the number of copies required is 4):
1459 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1461 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1462 VS1.1: vx.1 = memref1 VS1.2
1463 VS1.2: vx.2 = memref2 VS1.3
1464 VS1.3: vx.3 = memref3
1466 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1467 VSnew.1: vz1 = vx.1 + ... VSnew.2
1468 VSnew.2: vz2 = vx.2 + ... VSnew.3
1469 VSnew.3: vz3 = vx.3 + ...
1471 The vectorization of S1 is explained in vectorizable_load.
1472 The vectorization of S2:
1473 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1474 the function 'vect_get_vec_def_for_operand' is called to
1475 get the relevant vector-def for each operand of S2. For operand x it
1476 returns the vector-def 'vx.0'.
1478 To create the remaining copies of the vector-stmt (VSnew.j), this
1479 function is called to get the relevant vector-def for each operand. It is
1480 obtained from the respective VS1.j stmt, which is recorded in the
1481 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1483 For example, to obtain the vector-def 'vx.1' in order to create the
1484 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1485 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1486 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1487 and return its def ('vx.1').
1488 Overall, to create the above sequence this function will be called 3 times:
1489 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1490 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1491 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1494 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1496 gimple *vec_stmt_for_operand;
1497 stmt_vec_info def_stmt_info;
1499 /* Do nothing; can reuse same def. */
1500 if (dt == vect_external_def || dt == vect_constant_def )
1503 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1504 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1505 gcc_assert (def_stmt_info);
1506 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1507 gcc_assert (vec_stmt_for_operand);
1508 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1509 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1511 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1516 /* Get vectorized definitions for the operands to create a copy of an original
1517 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1520 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1521 vec<tree> *vec_oprnds0,
1522 vec<tree> *vec_oprnds1)
1524 tree vec_oprnd = vec_oprnds0->pop ();
1526 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1527 vec_oprnds0->quick_push (vec_oprnd);
1529 if (vec_oprnds1 && vec_oprnds1->length ())
1531 vec_oprnd = vec_oprnds1->pop ();
1532 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1533 vec_oprnds1->quick_push (vec_oprnd);
1538 /* Get vectorized definitions for OP0 and OP1.
1539 REDUC_INDEX is the index of reduction operand in case of reduction,
1540 and -1 otherwise. */
1543 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1544 vec<tree> *vec_oprnds0,
1545 vec<tree> *vec_oprnds1,
1546 slp_tree slp_node, int reduc_index)
1550 int nops = (op1 == NULL_TREE) ? 1 : 2;
1551 auto_vec<tree> ops (nops);
1552 auto_vec<vec<tree> > vec_defs (nops);
1554 ops.quick_push (op0);
1556 ops.quick_push (op1);
1558 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1560 *vec_oprnds0 = vec_defs[0];
1562 *vec_oprnds1 = vec_defs[1];
1568 vec_oprnds0->create (1);
1569 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1570 vec_oprnds0->quick_push (vec_oprnd);
1574 vec_oprnds1->create (1);
1575 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1576 vec_oprnds1->quick_push (vec_oprnd);
1582 /* Function vect_finish_stmt_generation.
1584 Insert a new stmt. */
1587 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1588 gimple_stmt_iterator *gsi)
1590 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1591 vec_info *vinfo = stmt_info->vinfo;
1593 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1595 if (!gsi_end_p (*gsi)
1596 && gimple_has_mem_ops (vec_stmt))
1598 gimple *at_stmt = gsi_stmt (*gsi);
1599 tree vuse = gimple_vuse (at_stmt);
1600 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1602 tree vdef = gimple_vdef (at_stmt);
1603 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1604 /* If we have an SSA vuse and insert a store, update virtual
1605 SSA form to avoid triggering the renamer. Do so only
1606 if we can easily see all uses - which is what almost always
1607 happens with the way vectorized stmts are inserted. */
1608 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1609 && ((is_gimple_assign (vec_stmt)
1610 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1611 || (is_gimple_call (vec_stmt)
1612 && !(gimple_call_flags (vec_stmt)
1613 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1615 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1616 gimple_set_vdef (vec_stmt, new_vdef);
1617 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1621 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1623 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1625 if (dump_enabled_p ())
1627 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1628 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1631 gimple_set_location (vec_stmt, gimple_location (stmt));
1633 /* While EH edges will generally prevent vectorization, stmt might
1634 e.g. be in a must-not-throw region. Ensure newly created stmts
1635 that could throw are part of the same region. */
1636 int lp_nr = lookup_stmt_eh_lp (stmt);
1637 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1638 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1641 /* We want to vectorize a call to combined function CFN with function
1642 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1643 as the types of all inputs. Check whether this is possible using
1644 an internal function, returning its code if so or IFN_LAST if not. */
1647 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1648 tree vectype_out, tree vectype_in)
1651 if (internal_fn_p (cfn))
1652 ifn = as_internal_fn (cfn);
1654 ifn = associated_internal_fn (fndecl);
1655 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1657 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1658 if (info.vectorizable)
1660 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1661 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1662 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1663 OPTIMIZE_FOR_SPEED))
1671 static tree permute_vec_elements (tree, tree, tree, gimple *,
1672 gimple_stmt_iterator *);
1674 /* STMT is a non-strided load or store, meaning that it accesses
1675 elements with a known constant step. Return -1 if that step
1676 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1679 compare_step_with_zero (gimple *stmt)
1681 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1682 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1684 if (loop_vinfo && nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt))
1685 step = STMT_VINFO_DR_STEP (stmt_info);
1687 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
1688 return tree_int_cst_compare (step, size_zero_node);
1691 /* If the target supports a permute mask that reverses the elements in
1692 a vector of type VECTYPE, return that mask, otherwise return null. */
1695 perm_mask_for_reverse (tree vectype)
1700 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1701 sel = XALLOCAVEC (unsigned char, nunits);
1703 for (i = 0; i < nunits; ++i)
1704 sel[i] = nunits - 1 - i;
1706 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
1708 return vect_gen_perm_mask_checked (vectype, sel);
1711 /* A subroutine of get_load_store_type, with a subset of the same
1712 arguments. Handle the case where STMT is part of a grouped load
1715 For stores, the statements in the group are all consecutive
1716 and there is no gap at the end. For loads, the statements in the
1717 group might not be consecutive; there can be gaps between statements
1718 as well as at the end. */
1721 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1722 vec_load_store_type vls_type,
1723 vect_memory_access_type *memory_access_type)
1725 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1726 vec_info *vinfo = stmt_info->vinfo;
1727 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1728 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1729 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1730 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1731 bool single_element_p = (stmt == first_stmt
1732 && !GROUP_NEXT_ELEMENT (stmt_info));
1733 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1734 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1736 /* True if the vectorized statements would access beyond the last
1737 statement in the group. */
1738 bool overrun_p = false;
1740 /* True if we can cope with such overrun by peeling for gaps, so that
1741 there is at least one final scalar iteration after the vector loop. */
1742 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1744 /* There can only be a gap at the end of the group if the stride is
1745 known at compile time. */
1746 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1748 /* Stores can't yet have gaps. */
1749 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1753 if (STMT_VINFO_STRIDED_P (stmt_info))
1755 /* Try to use consecutive accesses of GROUP_SIZE elements,
1756 separated by the stride, until we have a complete vector.
1757 Fall back to scalar accesses if that isn't possible. */
1758 if (nunits % group_size == 0)
1759 *memory_access_type = VMAT_STRIDED_SLP;
1761 *memory_access_type = VMAT_ELEMENTWISE;
1765 overrun_p = loop_vinfo && gap != 0;
1766 if (overrun_p && vls_type != VLS_LOAD)
1768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1769 "Grouped store with gaps requires"
1770 " non-consecutive accesses\n");
1773 /* If the access is aligned an overrun is fine. */
1776 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1778 if (overrun_p && !can_overrun_p)
1780 if (dump_enabled_p ())
1781 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1782 "Peeling for outer loop is not supported\n");
1785 *memory_access_type = VMAT_CONTIGUOUS;
1790 /* We can always handle this case using elementwise accesses,
1791 but see if something more efficient is available. */
1792 *memory_access_type = VMAT_ELEMENTWISE;
1794 /* If there is a gap at the end of the group then these optimizations
1795 would access excess elements in the last iteration. */
1796 bool would_overrun_p = (gap != 0);
1797 /* If the access is aligned an overrun is fine. */
1799 && aligned_access_p (STMT_VINFO_DATA_REF (stmt_info)))
1800 would_overrun_p = false;
1801 if (!STMT_VINFO_STRIDED_P (stmt_info)
1802 && (can_overrun_p || !would_overrun_p)
1803 && compare_step_with_zero (stmt) > 0)
1805 /* First try using LOAD/STORE_LANES. */
1806 if (vls_type == VLS_LOAD
1807 ? vect_load_lanes_supported (vectype, group_size)
1808 : vect_store_lanes_supported (vectype, group_size))
1810 *memory_access_type = VMAT_LOAD_STORE_LANES;
1811 overrun_p = would_overrun_p;
1814 /* If that fails, try using permuting loads. */
1815 if (*memory_access_type == VMAT_ELEMENTWISE
1816 && (vls_type == VLS_LOAD
1817 ? vect_grouped_load_supported (vectype, single_element_p,
1819 : vect_grouped_store_supported (vectype, group_size)))
1821 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1822 overrun_p = would_overrun_p;
1827 if (vls_type != VLS_LOAD && first_stmt == stmt)
1829 /* STMT is the leader of the group. Check the operands of all the
1830 stmts of the group. */
1831 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1834 gcc_assert (gimple_assign_single_p (next_stmt));
1835 tree op = gimple_assign_rhs1 (next_stmt);
1837 enum vect_def_type dt;
1838 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1840 if (dump_enabled_p ())
1841 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1842 "use not simple.\n");
1845 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1851 gcc_assert (can_overrun_p);
1852 if (dump_enabled_p ())
1853 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1854 "Data access with gaps requires scalar "
1856 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1862 /* A subroutine of get_load_store_type, with a subset of the same
1863 arguments. Handle the case where STMT is a load or store that
1864 accesses consecutive elements with a negative step. */
1866 static vect_memory_access_type
1867 get_negative_load_store_type (gimple *stmt, tree vectype,
1868 vec_load_store_type vls_type,
1869 unsigned int ncopies)
1871 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1872 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1873 dr_alignment_support alignment_support_scheme;
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1879 "multiple types with negative step.\n");
1880 return VMAT_ELEMENTWISE;
1883 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1884 if (alignment_support_scheme != dr_aligned
1885 && alignment_support_scheme != dr_unaligned_supported)
1887 if (dump_enabled_p ())
1888 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1889 "negative step but alignment required.\n");
1890 return VMAT_ELEMENTWISE;
1893 if (vls_type == VLS_STORE_INVARIANT)
1895 if (dump_enabled_p ())
1896 dump_printf_loc (MSG_NOTE, vect_location,
1897 "negative step with invariant source;"
1898 " no permute needed.\n");
1899 return VMAT_CONTIGUOUS_DOWN;
1902 if (!perm_mask_for_reverse (vectype))
1904 if (dump_enabled_p ())
1905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1906 "negative step and reversing not supported.\n");
1907 return VMAT_ELEMENTWISE;
1910 return VMAT_CONTIGUOUS_REVERSE;
1913 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1914 if there is a memory access type that the vectorized form can use,
1915 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1916 or scatters, fill in GS_INFO accordingly.
1918 SLP says whether we're performing SLP rather than loop vectorization.
1919 VECTYPE is the vector type that the vectorized statements will use.
1920 NCOPIES is the number of vector statements that will be needed. */
1923 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1924 vec_load_store_type vls_type, unsigned int ncopies,
1925 vect_memory_access_type *memory_access_type,
1926 gather_scatter_info *gs_info)
1928 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1929 vec_info *vinfo = stmt_info->vinfo;
1930 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1931 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1933 *memory_access_type = VMAT_GATHER_SCATTER;
1935 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1937 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1938 &gs_info->offset_dt,
1939 &gs_info->offset_vectype))
1941 if (dump_enabled_p ())
1942 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1943 "%s index use not simple.\n",
1944 vls_type == VLS_LOAD ? "gather" : "scatter");
1948 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1950 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1951 memory_access_type))
1954 else if (STMT_VINFO_STRIDED_P (stmt_info))
1957 *memory_access_type = VMAT_ELEMENTWISE;
1961 int cmp = compare_step_with_zero (stmt);
1963 *memory_access_type = get_negative_load_store_type
1964 (stmt, vectype, vls_type, ncopies);
1967 gcc_assert (vls_type == VLS_LOAD);
1968 *memory_access_type = VMAT_INVARIANT;
1971 *memory_access_type = VMAT_CONTIGUOUS;
1974 /* FIXME: At the moment the cost model seems to underestimate the
1975 cost of using elementwise accesses. This check preserves the
1976 traditional behavior until that can be fixed. */
1977 if (*memory_access_type == VMAT_ELEMENTWISE
1978 && !STMT_VINFO_STRIDED_P (stmt_info))
1980 if (dump_enabled_p ())
1981 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1982 "not falling back to elementwise accesses\n");
1988 /* Function vectorizable_mask_load_store.
1990 Check if STMT performs a conditional load or store that can be vectorized.
1991 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1992 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1993 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1996 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
1997 gimple **vec_stmt, slp_tree slp_node)
1999 tree vec_dest = NULL;
2000 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2001 stmt_vec_info prev_stmt_info;
2002 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2003 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2004 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2005 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2006 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2007 tree rhs_vectype = NULL_TREE;
2012 tree dataref_ptr = NULL_TREE;
2014 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2018 gather_scatter_info gs_info;
2019 vec_load_store_type vls_type;
2022 enum vect_def_type dt;
2024 if (slp_node != NULL)
2027 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2028 gcc_assert (ncopies >= 1);
2030 mask = gimple_call_arg (stmt, 2);
2032 if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
2035 /* FORNOW. This restriction should be relaxed. */
2036 if (nested_in_vect_loop && ncopies > 1)
2038 if (dump_enabled_p ())
2039 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2040 "multiple types in nested loop.");
2044 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2047 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2051 if (!STMT_VINFO_DATA_REF (stmt_info))
2054 elem_type = TREE_TYPE (vectype);
2056 if (TREE_CODE (mask) != SSA_NAME)
2059 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2063 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2065 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2066 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2069 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2071 tree rhs = gimple_call_arg (stmt, 3);
2072 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2074 if (dt == vect_constant_def || dt == vect_external_def)
2075 vls_type = VLS_STORE_INVARIANT;
2077 vls_type = VLS_STORE;
2080 vls_type = VLS_LOAD;
2082 vect_memory_access_type memory_access_type;
2083 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2084 &memory_access_type, &gs_info))
2087 if (memory_access_type == VMAT_GATHER_SCATTER)
2089 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2091 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2092 if (TREE_CODE (masktype) == INTEGER_TYPE)
2094 if (dump_enabled_p ())
2095 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2096 "masked gather with integer mask not supported.");
2100 else if (memory_access_type != VMAT_CONTIGUOUS)
2102 if (dump_enabled_p ())
2103 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2104 "unsupported access type for masked %s.\n",
2105 vls_type == VLS_LOAD ? "load" : "store");
2108 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2109 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2110 TYPE_MODE (mask_vectype),
2111 vls_type == VLS_LOAD)
2113 && !useless_type_conversion_p (vectype, rhs_vectype)))
2116 if (!vec_stmt) /* transformation not required. */
2118 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2119 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2120 if (vls_type == VLS_LOAD)
2121 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2124 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2125 dt, NULL, NULL, NULL);
2128 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2132 if (memory_access_type == VMAT_GATHER_SCATTER)
2134 tree vec_oprnd0 = NULL_TREE, op;
2135 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2136 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2137 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2138 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2139 tree mask_perm_mask = NULL_TREE;
2140 edge pe = loop_preheader_edge (loop);
2143 enum { NARROW, NONE, WIDEN } modifier;
2144 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2146 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2147 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2148 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2149 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2150 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2151 scaletype = TREE_VALUE (arglist);
2152 gcc_checking_assert (types_compatible_p (srctype, rettype)
2153 && types_compatible_p (srctype, masktype));
2155 if (nunits == gather_off_nunits)
2157 else if (nunits == gather_off_nunits / 2)
2159 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
2162 for (i = 0; i < gather_off_nunits; ++i)
2163 sel[i] = i | nunits;
2165 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
2167 else if (nunits == gather_off_nunits * 2)
2169 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
2172 for (i = 0; i < nunits; ++i)
2173 sel[i] = i < gather_off_nunits
2174 ? i : i + nunits - gather_off_nunits;
2176 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
2178 for (i = 0; i < nunits; ++i)
2179 sel[i] = i | gather_off_nunits;
2180 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
2185 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2187 ptr = fold_convert (ptrtype, gs_info.base);
2188 if (!is_gimple_min_invariant (ptr))
2190 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2191 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2192 gcc_assert (!new_bb);
2195 scale = build_int_cst (scaletype, gs_info.scale);
2197 prev_stmt_info = NULL;
2198 for (j = 0; j < ncopies; ++j)
2200 if (modifier == WIDEN && (j & 1))
2201 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2202 perm_mask, stmt, gsi);
2205 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2208 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2210 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2212 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2213 == TYPE_VECTOR_SUBPARTS (idxtype));
2214 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2215 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2217 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2218 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2222 if (mask_perm_mask && (j & 1))
2223 mask_op = permute_vec_elements (mask_op, mask_op,
2224 mask_perm_mask, stmt, gsi);
2228 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2231 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2232 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2236 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2238 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2239 == TYPE_VECTOR_SUBPARTS (masktype));
2240 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2241 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2243 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2244 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2250 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2253 if (!useless_type_conversion_p (vectype, rettype))
2255 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2256 == TYPE_VECTOR_SUBPARTS (rettype));
2257 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2258 gimple_call_set_lhs (new_stmt, op);
2259 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2260 var = make_ssa_name (vec_dest);
2261 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2262 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2266 var = make_ssa_name (vec_dest, new_stmt);
2267 gimple_call_set_lhs (new_stmt, var);
2270 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2272 if (modifier == NARROW)
2279 var = permute_vec_elements (prev_res, var,
2280 perm_mask, stmt, gsi);
2281 new_stmt = SSA_NAME_DEF_STMT (var);
2284 if (prev_stmt_info == NULL)
2285 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2287 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2288 prev_stmt_info = vinfo_for_stmt (new_stmt);
2291 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2293 if (STMT_VINFO_RELATED_STMT (stmt_info))
2295 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2296 stmt_info = vinfo_for_stmt (stmt);
2298 tree lhs = gimple_call_lhs (stmt);
2299 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2300 set_vinfo_for_stmt (new_stmt, stmt_info);
2301 set_vinfo_for_stmt (stmt, NULL);
2302 STMT_VINFO_STMT (stmt_info) = new_stmt;
2303 gsi_replace (gsi, new_stmt, true);
2306 else if (vls_type != VLS_LOAD)
2308 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2309 prev_stmt_info = NULL;
2310 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2311 for (i = 0; i < ncopies; i++)
2313 unsigned align, misalign;
2317 tree rhs = gimple_call_arg (stmt, 3);
2318 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2319 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2320 /* We should have catched mismatched types earlier. */
2321 gcc_assert (useless_type_conversion_p (vectype,
2322 TREE_TYPE (vec_rhs)));
2323 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2324 NULL_TREE, &dummy, gsi,
2325 &ptr_incr, false, &inv_p);
2326 gcc_assert (!inv_p);
2330 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2331 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2332 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2333 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2334 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2335 TYPE_SIZE_UNIT (vectype));
2338 align = TYPE_ALIGN_UNIT (vectype);
2339 if (aligned_access_p (dr))
2341 else if (DR_MISALIGNMENT (dr) == -1)
2343 align = TYPE_ALIGN_UNIT (elem_type);
2347 misalign = DR_MISALIGNMENT (dr);
2348 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2350 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2351 misalign ? least_bit_hwi (misalign) : align);
2353 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2354 ptr, vec_mask, vec_rhs);
2355 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2357 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2359 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2360 prev_stmt_info = vinfo_for_stmt (new_stmt);
2365 tree vec_mask = NULL_TREE;
2366 prev_stmt_info = NULL;
2367 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2368 for (i = 0; i < ncopies; i++)
2370 unsigned align, misalign;
2374 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2375 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2376 NULL_TREE, &dummy, gsi,
2377 &ptr_incr, false, &inv_p);
2378 gcc_assert (!inv_p);
2382 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2383 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2384 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2385 TYPE_SIZE_UNIT (vectype));
2388 align = TYPE_ALIGN_UNIT (vectype);
2389 if (aligned_access_p (dr))
2391 else if (DR_MISALIGNMENT (dr) == -1)
2393 align = TYPE_ALIGN_UNIT (elem_type);
2397 misalign = DR_MISALIGNMENT (dr);
2398 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2400 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2401 misalign ? least_bit_hwi (misalign) : align);
2403 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2405 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2406 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2408 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2410 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2411 prev_stmt_info = vinfo_for_stmt (new_stmt);
2415 if (vls_type == VLS_LOAD)
2417 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2419 if (STMT_VINFO_RELATED_STMT (stmt_info))
2421 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2422 stmt_info = vinfo_for_stmt (stmt);
2424 tree lhs = gimple_call_lhs (stmt);
2425 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2426 set_vinfo_for_stmt (new_stmt, stmt_info);
2427 set_vinfo_for_stmt (stmt, NULL);
2428 STMT_VINFO_STMT (stmt_info) = new_stmt;
2429 gsi_replace (gsi, new_stmt, true);
2435 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2436 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2437 in a single step. On success, store the binary pack code in
2441 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2442 tree_code *convert_code)
2444 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2445 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2449 int multi_step_cvt = 0;
2450 auto_vec <tree, 8> interm_types;
2451 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2452 &code, &multi_step_cvt,
2457 *convert_code = code;
2461 /* Function vectorizable_call.
2463 Check if GS performs a function call that can be vectorized.
2464 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2465 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2466 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2469 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2476 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2477 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2478 tree vectype_out, vectype_in;
2481 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2482 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2483 vec_info *vinfo = stmt_info->vinfo;
2484 tree fndecl, new_temp, rhs_type;
2486 enum vect_def_type dt[3]
2487 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2488 gimple *new_stmt = NULL;
2490 vec<tree> vargs = vNULL;
2491 enum { NARROW, NONE, WIDEN } modifier;
2495 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2498 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2502 /* Is GS a vectorizable call? */
2503 stmt = dyn_cast <gcall *> (gs);
2507 if (gimple_call_internal_p (stmt)
2508 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2509 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2510 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2513 if (gimple_call_lhs (stmt) == NULL_TREE
2514 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2517 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2519 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2521 /* Process function arguments. */
2522 rhs_type = NULL_TREE;
2523 vectype_in = NULL_TREE;
2524 nargs = gimple_call_num_args (stmt);
2526 /* Bail out if the function has more than three arguments, we do not have
2527 interesting builtin functions to vectorize with more than two arguments
2528 except for fma. No arguments is also not good. */
2529 if (nargs == 0 || nargs > 3)
2532 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2533 if (gimple_call_internal_p (stmt)
2534 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2537 rhs_type = unsigned_type_node;
2540 for (i = 0; i < nargs; i++)
2544 op = gimple_call_arg (stmt, i);
2546 /* We can only handle calls with arguments of the same type. */
2548 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2550 if (dump_enabled_p ())
2551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2552 "argument types differ.\n");
2556 rhs_type = TREE_TYPE (op);
2558 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2560 if (dump_enabled_p ())
2561 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2562 "use not simple.\n");
2567 vectype_in = opvectype;
2569 && opvectype != vectype_in)
2571 if (dump_enabled_p ())
2572 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2573 "argument vector types differ.\n");
2577 /* If all arguments are external or constant defs use a vector type with
2578 the same size as the output vector type. */
2580 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2582 gcc_assert (vectype_in);
2585 if (dump_enabled_p ())
2587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2588 "no vectype for scalar type ");
2589 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2590 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2597 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2598 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2599 if (nunits_in == nunits_out / 2)
2601 else if (nunits_out == nunits_in)
2603 else if (nunits_out == nunits_in / 2)
2608 /* We only handle functions that do not read or clobber memory. */
2609 if (gimple_vuse (stmt))
2611 if (dump_enabled_p ())
2612 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2613 "function reads from or writes to memory.\n");
2617 /* For now, we only vectorize functions if a target specific builtin
2618 is available. TODO -- in some cases, it might be profitable to
2619 insert the calls for pieces of the vector, in order to be able
2620 to vectorize other operations in the loop. */
2622 internal_fn ifn = IFN_LAST;
2623 combined_fn cfn = gimple_call_combined_fn (stmt);
2624 tree callee = gimple_call_fndecl (stmt);
2626 /* First try using an internal function. */
2627 tree_code convert_code = ERROR_MARK;
2629 && (modifier == NONE
2630 || (modifier == NARROW
2631 && simple_integer_narrowing (vectype_out, vectype_in,
2633 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2636 /* If that fails, try asking for a target-specific built-in function. */
2637 if (ifn == IFN_LAST)
2639 if (cfn != CFN_LAST)
2640 fndecl = targetm.vectorize.builtin_vectorized_function
2641 (cfn, vectype_out, vectype_in);
2643 fndecl = targetm.vectorize.builtin_md_vectorized_function
2644 (callee, vectype_out, vectype_in);
2647 if (ifn == IFN_LAST && !fndecl)
2649 if (cfn == CFN_GOMP_SIMD_LANE
2652 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2653 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2654 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2655 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2657 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2658 { 0, 1, 2, ... vf - 1 } vector. */
2659 gcc_assert (nargs == 0);
2663 if (dump_enabled_p ())
2664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2665 "function is not vectorizable.\n");
2672 else if (modifier == NARROW && ifn == IFN_LAST)
2673 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2675 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2677 /* Sanity check: make sure that at least one copy of the vectorized stmt
2678 needs to be generated. */
2679 gcc_assert (ncopies >= 1);
2681 if (!vec_stmt) /* transformation not required. */
2683 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2684 if (dump_enabled_p ())
2685 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2687 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2688 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2689 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2690 vec_promote_demote, stmt_info, 0, vect_body);
2697 if (dump_enabled_p ())
2698 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2701 scalar_dest = gimple_call_lhs (stmt);
2702 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2704 prev_stmt_info = NULL;
2705 if (modifier == NONE || ifn != IFN_LAST)
2707 tree prev_res = NULL_TREE;
2708 for (j = 0; j < ncopies; ++j)
2710 /* Build argument list for the vectorized call. */
2712 vargs.create (nargs);
2718 auto_vec<vec<tree> > vec_defs (nargs);
2719 vec<tree> vec_oprnds0;
2721 for (i = 0; i < nargs; i++)
2722 vargs.quick_push (gimple_call_arg (stmt, i));
2723 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2724 vec_oprnds0 = vec_defs[0];
2726 /* Arguments are ready. Create the new vector stmt. */
2727 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2730 for (k = 0; k < nargs; k++)
2732 vec<tree> vec_oprndsk = vec_defs[k];
2733 vargs[k] = vec_oprndsk[i];
2735 if (modifier == NARROW)
2737 tree half_res = make_ssa_name (vectype_in);
2738 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2739 gimple_call_set_lhs (new_stmt, half_res);
2740 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2743 prev_res = half_res;
2746 new_temp = make_ssa_name (vec_dest);
2747 new_stmt = gimple_build_assign (new_temp, convert_code,
2748 prev_res, half_res);
2752 if (ifn != IFN_LAST)
2753 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2755 new_stmt = gimple_build_call_vec (fndecl, vargs);
2756 new_temp = make_ssa_name (vec_dest, new_stmt);
2757 gimple_call_set_lhs (new_stmt, new_temp);
2759 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2760 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2763 for (i = 0; i < nargs; i++)
2765 vec<tree> vec_oprndsi = vec_defs[i];
2766 vec_oprndsi.release ();
2771 for (i = 0; i < nargs; i++)
2773 op = gimple_call_arg (stmt, i);
2776 = vect_get_vec_def_for_operand (op, stmt);
2779 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2781 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2784 vargs.quick_push (vec_oprnd0);
2787 if (gimple_call_internal_p (stmt)
2788 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2790 tree *v = XALLOCAVEC (tree, nunits_out);
2792 for (k = 0; k < nunits_out; ++k)
2793 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2794 tree cst = build_vector (vectype_out, v);
2796 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2797 gimple *init_stmt = gimple_build_assign (new_var, cst);
2798 vect_init_vector_1 (stmt, init_stmt, NULL);
2799 new_temp = make_ssa_name (vec_dest);
2800 new_stmt = gimple_build_assign (new_temp, new_var);
2802 else if (modifier == NARROW)
2804 tree half_res = make_ssa_name (vectype_in);
2805 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2806 gimple_call_set_lhs (new_stmt, half_res);
2807 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2810 prev_res = half_res;
2813 new_temp = make_ssa_name (vec_dest);
2814 new_stmt = gimple_build_assign (new_temp, convert_code,
2815 prev_res, half_res);
2819 if (ifn != IFN_LAST)
2820 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2822 new_stmt = gimple_build_call_vec (fndecl, vargs);
2823 new_temp = make_ssa_name (vec_dest, new_stmt);
2824 gimple_call_set_lhs (new_stmt, new_temp);
2826 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2828 if (j == (modifier == NARROW ? 1 : 0))
2829 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2831 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2833 prev_stmt_info = vinfo_for_stmt (new_stmt);
2836 else if (modifier == NARROW)
2838 for (j = 0; j < ncopies; ++j)
2840 /* Build argument list for the vectorized call. */
2842 vargs.create (nargs * 2);
2848 auto_vec<vec<tree> > vec_defs (nargs);
2849 vec<tree> vec_oprnds0;
2851 for (i = 0; i < nargs; i++)
2852 vargs.quick_push (gimple_call_arg (stmt, i));
2853 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2854 vec_oprnds0 = vec_defs[0];
2856 /* Arguments are ready. Create the new vector stmt. */
2857 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2861 for (k = 0; k < nargs; k++)
2863 vec<tree> vec_oprndsk = vec_defs[k];
2864 vargs.quick_push (vec_oprndsk[i]);
2865 vargs.quick_push (vec_oprndsk[i + 1]);
2867 if (ifn != IFN_LAST)
2868 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2870 new_stmt = gimple_build_call_vec (fndecl, vargs);
2871 new_temp = make_ssa_name (vec_dest, new_stmt);
2872 gimple_call_set_lhs (new_stmt, new_temp);
2873 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2874 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2877 for (i = 0; i < nargs; i++)
2879 vec<tree> vec_oprndsi = vec_defs[i];
2880 vec_oprndsi.release ();
2885 for (i = 0; i < nargs; i++)
2887 op = gimple_call_arg (stmt, i);
2891 = vect_get_vec_def_for_operand (op, stmt);
2893 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2897 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2899 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2901 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2904 vargs.quick_push (vec_oprnd0);
2905 vargs.quick_push (vec_oprnd1);
2908 new_stmt = gimple_build_call_vec (fndecl, vargs);
2909 new_temp = make_ssa_name (vec_dest, new_stmt);
2910 gimple_call_set_lhs (new_stmt, new_temp);
2911 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2914 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2916 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2918 prev_stmt_info = vinfo_for_stmt (new_stmt);
2921 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2924 /* No current target implements this case. */
2929 /* The call in STMT might prevent it from being removed in dce.
2930 We however cannot remove it here, due to the way the ssa name
2931 it defines is mapped to the new definition. So just replace
2932 rhs of the statement with something harmless. */
2937 type = TREE_TYPE (scalar_dest);
2938 if (is_pattern_stmt_p (stmt_info))
2939 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2941 lhs = gimple_call_lhs (stmt);
2943 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2944 set_vinfo_for_stmt (new_stmt, stmt_info);
2945 set_vinfo_for_stmt (stmt, NULL);
2946 STMT_VINFO_STMT (stmt_info) = new_stmt;
2947 gsi_replace (gsi, new_stmt, false);
2953 struct simd_call_arg_info
2957 enum vect_def_type dt;
2958 HOST_WIDE_INT linear_step;
2960 bool simd_lane_linear;
2963 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2964 is linear within simd lane (but not within whole loop), note it in
2968 vect_simd_lane_linear (tree op, struct loop *loop,
2969 struct simd_call_arg_info *arginfo)
2971 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
2973 if (!is_gimple_assign (def_stmt)
2974 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
2975 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
2978 tree base = gimple_assign_rhs1 (def_stmt);
2979 HOST_WIDE_INT linear_step = 0;
2980 tree v = gimple_assign_rhs2 (def_stmt);
2981 while (TREE_CODE (v) == SSA_NAME)
2984 def_stmt = SSA_NAME_DEF_STMT (v);
2985 if (is_gimple_assign (def_stmt))
2986 switch (gimple_assign_rhs_code (def_stmt))
2989 t = gimple_assign_rhs2 (def_stmt);
2990 if (linear_step || TREE_CODE (t) != INTEGER_CST)
2992 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
2993 v = gimple_assign_rhs1 (def_stmt);
2996 t = gimple_assign_rhs2 (def_stmt);
2997 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
2999 linear_step = tree_to_shwi (t);
3000 v = gimple_assign_rhs1 (def_stmt);
3003 t = gimple_assign_rhs1 (def_stmt);
3004 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3005 || (TYPE_PRECISION (TREE_TYPE (v))
3006 < TYPE_PRECISION (TREE_TYPE (t))))
3015 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3017 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3018 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3023 arginfo->linear_step = linear_step;
3025 arginfo->simd_lane_linear = true;
3031 /* Function vectorizable_simd_clone_call.
3033 Check if STMT performs a function call that can be vectorized
3034 by calling a simd clone of the function.
3035 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3036 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3037 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3040 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3041 gimple **vec_stmt, slp_tree slp_node)
3046 tree vec_oprnd0 = NULL_TREE;
3047 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3049 unsigned int nunits;
3050 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3051 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3052 vec_info *vinfo = stmt_info->vinfo;
3053 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3054 tree fndecl, new_temp;
3056 gimple *new_stmt = NULL;
3058 auto_vec<simd_call_arg_info> arginfo;
3059 vec<tree> vargs = vNULL;
3061 tree lhs, rtype, ratype;
3062 vec<constructor_elt, va_gc> *ret_ctor_elts;
3064 /* Is STMT a vectorizable call? */
3065 if (!is_gimple_call (stmt))
3068 fndecl = gimple_call_fndecl (stmt);
3069 if (fndecl == NULL_TREE)
3072 struct cgraph_node *node = cgraph_node::get (fndecl);
3073 if (node == NULL || node->simd_clones == NULL)
3076 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3079 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3083 if (gimple_call_lhs (stmt)
3084 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3087 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3089 vectype = STMT_VINFO_VECTYPE (stmt_info);
3091 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3098 /* Process function arguments. */
3099 nargs = gimple_call_num_args (stmt);
3101 /* Bail out if the function has zero arguments. */
3105 arginfo.reserve (nargs, true);
3107 for (i = 0; i < nargs; i++)
3109 simd_call_arg_info thisarginfo;
3112 thisarginfo.linear_step = 0;
3113 thisarginfo.align = 0;
3114 thisarginfo.op = NULL_TREE;
3115 thisarginfo.simd_lane_linear = false;
3117 op = gimple_call_arg (stmt, i);
3118 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3119 &thisarginfo.vectype)
3120 || thisarginfo.dt == vect_uninitialized_def)
3122 if (dump_enabled_p ())
3123 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3124 "use not simple.\n");
3128 if (thisarginfo.dt == vect_constant_def
3129 || thisarginfo.dt == vect_external_def)
3130 gcc_assert (thisarginfo.vectype == NULL_TREE);
3132 gcc_assert (thisarginfo.vectype != NULL_TREE);
3134 /* For linear arguments, the analyze phase should have saved
3135 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3136 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3137 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3139 gcc_assert (vec_stmt);
3140 thisarginfo.linear_step
3141 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3143 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3144 thisarginfo.simd_lane_linear
3145 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3146 == boolean_true_node);
3147 /* If loop has been peeled for alignment, we need to adjust it. */
3148 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3149 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3150 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3152 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3153 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3154 tree opt = TREE_TYPE (thisarginfo.op);
3155 bias = fold_convert (TREE_TYPE (step), bias);
3156 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3158 = fold_build2 (POINTER_TYPE_P (opt)
3159 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3160 thisarginfo.op, bias);
3164 && thisarginfo.dt != vect_constant_def
3165 && thisarginfo.dt != vect_external_def
3167 && TREE_CODE (op) == SSA_NAME
3168 && simple_iv (loop, loop_containing_stmt (stmt), op,
3170 && tree_fits_shwi_p (iv.step))
3172 thisarginfo.linear_step = tree_to_shwi (iv.step);
3173 thisarginfo.op = iv.base;
3175 else if ((thisarginfo.dt == vect_constant_def
3176 || thisarginfo.dt == vect_external_def)
3177 && POINTER_TYPE_P (TREE_TYPE (op)))
3178 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3179 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3181 if (POINTER_TYPE_P (TREE_TYPE (op))
3182 && !thisarginfo.linear_step
3184 && thisarginfo.dt != vect_constant_def
3185 && thisarginfo.dt != vect_external_def
3188 && TREE_CODE (op) == SSA_NAME)
3189 vect_simd_lane_linear (op, loop, &thisarginfo);
3191 arginfo.quick_push (thisarginfo);
3194 unsigned int badness = 0;
3195 struct cgraph_node *bestn = NULL;
3196 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3197 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3199 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3200 n = n->simdclone->next_clone)
3202 unsigned int this_badness = 0;
3203 if (n->simdclone->simdlen
3204 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3205 || n->simdclone->nargs != nargs)
3207 if (n->simdclone->simdlen
3208 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3209 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3210 - exact_log2 (n->simdclone->simdlen)) * 1024;
3211 if (n->simdclone->inbranch)
3212 this_badness += 2048;
3213 int target_badness = targetm.simd_clone.usable (n);
3214 if (target_badness < 0)
3216 this_badness += target_badness * 512;
3217 /* FORNOW: Have to add code to add the mask argument. */
3218 if (n->simdclone->inbranch)
3220 for (i = 0; i < nargs; i++)
3222 switch (n->simdclone->args[i].arg_type)
3224 case SIMD_CLONE_ARG_TYPE_VECTOR:
3225 if (!useless_type_conversion_p
3226 (n->simdclone->args[i].orig_type,
3227 TREE_TYPE (gimple_call_arg (stmt, i))))
3229 else if (arginfo[i].dt == vect_constant_def
3230 || arginfo[i].dt == vect_external_def
3231 || arginfo[i].linear_step)
3234 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3235 if (arginfo[i].dt != vect_constant_def
3236 && arginfo[i].dt != vect_external_def)
3239 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3240 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3241 if (arginfo[i].dt == vect_constant_def
3242 || arginfo[i].dt == vect_external_def
3243 || (arginfo[i].linear_step
3244 != n->simdclone->args[i].linear_step))
3247 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3248 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3249 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3250 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3251 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3252 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3256 case SIMD_CLONE_ARG_TYPE_MASK:
3259 if (i == (size_t) -1)
3261 if (n->simdclone->args[i].alignment > arginfo[i].align)
3266 if (arginfo[i].align)
3267 this_badness += (exact_log2 (arginfo[i].align)
3268 - exact_log2 (n->simdclone->args[i].alignment));
3270 if (i == (size_t) -1)
3272 if (bestn == NULL || this_badness < badness)
3275 badness = this_badness;
3282 for (i = 0; i < nargs; i++)
3283 if ((arginfo[i].dt == vect_constant_def
3284 || arginfo[i].dt == vect_external_def)
3285 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3288 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3290 if (arginfo[i].vectype == NULL
3291 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3292 > bestn->simdclone->simdlen))
3296 fndecl = bestn->decl;
3297 nunits = bestn->simdclone->simdlen;
3298 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3300 /* If the function isn't const, only allow it in simd loops where user
3301 has asserted that at least nunits consecutive iterations can be
3302 performed using SIMD instructions. */
3303 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3304 && gimple_vuse (stmt))
3307 /* Sanity check: make sure that at least one copy of the vectorized stmt
3308 needs to be generated. */
3309 gcc_assert (ncopies >= 1);
3311 if (!vec_stmt) /* transformation not required. */
3313 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3314 for (i = 0; i < nargs; i++)
3315 if ((bestn->simdclone->args[i].arg_type
3316 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3317 || (bestn->simdclone->args[i].arg_type
3318 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3320 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3322 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3323 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3324 ? size_type_node : TREE_TYPE (arginfo[i].op);
3325 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3326 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3327 tree sll = arginfo[i].simd_lane_linear
3328 ? boolean_true_node : boolean_false_node;
3329 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3331 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3332 if (dump_enabled_p ())
3333 dump_printf_loc (MSG_NOTE, vect_location,
3334 "=== vectorizable_simd_clone_call ===\n");
3335 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3341 if (dump_enabled_p ())
3342 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3345 scalar_dest = gimple_call_lhs (stmt);
3346 vec_dest = NULL_TREE;
3351 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3352 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3353 if (TREE_CODE (rtype) == ARRAY_TYPE)
3356 rtype = TREE_TYPE (ratype);
3360 prev_stmt_info = NULL;
3361 for (j = 0; j < ncopies; ++j)
3363 /* Build argument list for the vectorized call. */
3365 vargs.create (nargs);
3369 for (i = 0; i < nargs; i++)
3371 unsigned int k, l, m, o;
3373 op = gimple_call_arg (stmt, i);
3374 switch (bestn->simdclone->args[i].arg_type)
3376 case SIMD_CLONE_ARG_TYPE_VECTOR:
3377 atype = bestn->simdclone->args[i].vector_type;
3378 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3379 for (m = j * o; m < (j + 1) * o; m++)
3381 if (TYPE_VECTOR_SUBPARTS (atype)
3382 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3384 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3385 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3386 / TYPE_VECTOR_SUBPARTS (atype));
3387 gcc_assert ((k & (k - 1)) == 0);
3390 = vect_get_vec_def_for_operand (op, stmt);
3393 vec_oprnd0 = arginfo[i].op;
3394 if ((m & (k - 1)) == 0)
3396 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3399 arginfo[i].op = vec_oprnd0;
3401 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3403 bitsize_int ((m & (k - 1)) * prec));
3405 = gimple_build_assign (make_ssa_name (atype),
3407 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3408 vargs.safe_push (gimple_assign_lhs (new_stmt));
3412 k = (TYPE_VECTOR_SUBPARTS (atype)
3413 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3414 gcc_assert ((k & (k - 1)) == 0);
3415 vec<constructor_elt, va_gc> *ctor_elts;
3417 vec_alloc (ctor_elts, k);
3420 for (l = 0; l < k; l++)
3422 if (m == 0 && l == 0)
3424 = vect_get_vec_def_for_operand (op, stmt);
3427 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3429 arginfo[i].op = vec_oprnd0;
3432 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3436 vargs.safe_push (vec_oprnd0);
3439 vec_oprnd0 = build_constructor (atype, ctor_elts);
3441 = gimple_build_assign (make_ssa_name (atype),
3443 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3444 vargs.safe_push (gimple_assign_lhs (new_stmt));
3449 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3450 vargs.safe_push (op);
3452 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3453 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3458 = force_gimple_operand (arginfo[i].op, &stmts, true,
3463 edge pe = loop_preheader_edge (loop);
3464 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3465 gcc_assert (!new_bb);
3467 if (arginfo[i].simd_lane_linear)
3469 vargs.safe_push (arginfo[i].op);
3472 tree phi_res = copy_ssa_name (op);
3473 gphi *new_phi = create_phi_node (phi_res, loop->header);
3474 set_vinfo_for_stmt (new_phi,
3475 new_stmt_vec_info (new_phi, loop_vinfo));
3476 add_phi_arg (new_phi, arginfo[i].op,
3477 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3479 = POINTER_TYPE_P (TREE_TYPE (op))
3480 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3481 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3482 ? sizetype : TREE_TYPE (op);
3484 = wi::mul (bestn->simdclone->args[i].linear_step,
3486 tree tcst = wide_int_to_tree (type, cst);
3487 tree phi_arg = copy_ssa_name (op);
3489 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3490 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3491 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3492 set_vinfo_for_stmt (new_stmt,
3493 new_stmt_vec_info (new_stmt, loop_vinfo));
3494 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3496 arginfo[i].op = phi_res;
3497 vargs.safe_push (phi_res);
3502 = POINTER_TYPE_P (TREE_TYPE (op))
3503 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3504 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3505 ? sizetype : TREE_TYPE (op);
3507 = wi::mul (bestn->simdclone->args[i].linear_step,
3509 tree tcst = wide_int_to_tree (type, cst);
3510 new_temp = make_ssa_name (TREE_TYPE (op));
3511 new_stmt = gimple_build_assign (new_temp, code,
3512 arginfo[i].op, tcst);
3513 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3514 vargs.safe_push (new_temp);
3517 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3518 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3519 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3520 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3521 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3522 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3528 new_stmt = gimple_build_call_vec (fndecl, vargs);
3531 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3533 new_temp = create_tmp_var (ratype);
3534 else if (TYPE_VECTOR_SUBPARTS (vectype)
3535 == TYPE_VECTOR_SUBPARTS (rtype))
3536 new_temp = make_ssa_name (vec_dest, new_stmt);
3538 new_temp = make_ssa_name (rtype, new_stmt);
3539 gimple_call_set_lhs (new_stmt, new_temp);
3541 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3545 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3548 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3549 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3550 gcc_assert ((k & (k - 1)) == 0);
3551 for (l = 0; l < k; l++)
3556 t = build_fold_addr_expr (new_temp);
3557 t = build2 (MEM_REF, vectype, t,
3558 build_int_cst (TREE_TYPE (t),
3559 l * prec / BITS_PER_UNIT));
3562 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3563 size_int (prec), bitsize_int (l * prec));
3565 = gimple_build_assign (make_ssa_name (vectype), t);
3566 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3567 if (j == 0 && l == 0)
3568 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3570 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3572 prev_stmt_info = vinfo_for_stmt (new_stmt);
3577 tree clobber = build_constructor (ratype, NULL);
3578 TREE_THIS_VOLATILE (clobber) = 1;
3579 new_stmt = gimple_build_assign (new_temp, clobber);
3580 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3584 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3586 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3587 / TYPE_VECTOR_SUBPARTS (rtype));
3588 gcc_assert ((k & (k - 1)) == 0);
3589 if ((j & (k - 1)) == 0)
3590 vec_alloc (ret_ctor_elts, k);
3593 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3594 for (m = 0; m < o; m++)
3596 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3597 size_int (m), NULL_TREE, NULL_TREE);
3599 = gimple_build_assign (make_ssa_name (rtype), tem);
3600 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3601 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3602 gimple_assign_lhs (new_stmt));
3604 tree clobber = build_constructor (ratype, NULL);
3605 TREE_THIS_VOLATILE (clobber) = 1;
3606 new_stmt = gimple_build_assign (new_temp, clobber);
3607 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3610 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3611 if ((j & (k - 1)) != k - 1)
3613 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3615 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3616 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3618 if ((unsigned) j == k - 1)
3619 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3621 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3623 prev_stmt_info = vinfo_for_stmt (new_stmt);
3628 tree t = build_fold_addr_expr (new_temp);
3629 t = build2 (MEM_REF, vectype, t,
3630 build_int_cst (TREE_TYPE (t), 0));
3632 = gimple_build_assign (make_ssa_name (vec_dest), t);
3633 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3634 tree clobber = build_constructor (ratype, NULL);
3635 TREE_THIS_VOLATILE (clobber) = 1;
3636 vect_finish_stmt_generation (stmt,
3637 gimple_build_assign (new_temp,
3643 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3645 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3647 prev_stmt_info = vinfo_for_stmt (new_stmt);
3652 /* The call in STMT might prevent it from being removed in dce.
3653 We however cannot remove it here, due to the way the ssa name
3654 it defines is mapped to the new definition. So just replace
3655 rhs of the statement with something harmless. */
3662 type = TREE_TYPE (scalar_dest);
3663 if (is_pattern_stmt_p (stmt_info))
3664 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3666 lhs = gimple_call_lhs (stmt);
3667 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3670 new_stmt = gimple_build_nop ();
3671 set_vinfo_for_stmt (new_stmt, stmt_info);
3672 set_vinfo_for_stmt (stmt, NULL);
3673 STMT_VINFO_STMT (stmt_info) = new_stmt;
3674 gsi_replace (gsi, new_stmt, true);
3675 unlink_stmt_vdef (stmt);
3681 /* Function vect_gen_widened_results_half
3683 Create a vector stmt whose code, type, number of arguments, and result
3684 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3685 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3686 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3687 needs to be created (DECL is a function-decl of a target-builtin).
3688 STMT is the original scalar stmt that we are vectorizing. */
3691 vect_gen_widened_results_half (enum tree_code code,
3693 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3694 tree vec_dest, gimple_stmt_iterator *gsi,
3700 /* Generate half of the widened result: */
3701 if (code == CALL_EXPR)
3703 /* Target specific support */
3704 if (op_type == binary_op)
3705 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3707 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3708 new_temp = make_ssa_name (vec_dest, new_stmt);
3709 gimple_call_set_lhs (new_stmt, new_temp);
3713 /* Generic support */
3714 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3715 if (op_type != binary_op)
3717 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3718 new_temp = make_ssa_name (vec_dest, new_stmt);
3719 gimple_assign_set_lhs (new_stmt, new_temp);
3721 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3727 /* Get vectorized definitions for loop-based vectorization. For the first
3728 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3729 scalar operand), and for the rest we get a copy with
3730 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3731 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3732 The vectors are collected into VEC_OPRNDS. */
3735 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3736 vec<tree> *vec_oprnds, int multi_step_cvt)
3740 /* Get first vector operand. */
3741 /* All the vector operands except the very first one (that is scalar oprnd)
3743 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3744 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3746 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3748 vec_oprnds->quick_push (vec_oprnd);
3750 /* Get second vector operand. */
3751 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3752 vec_oprnds->quick_push (vec_oprnd);
3756 /* For conversion in multiple steps, continue to get operands
3759 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3763 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3764 For multi-step conversions store the resulting vectors and call the function
3768 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3769 int multi_step_cvt, gimple *stmt,
3771 gimple_stmt_iterator *gsi,
3772 slp_tree slp_node, enum tree_code code,
3773 stmt_vec_info *prev_stmt_info)
3776 tree vop0, vop1, new_tmp, vec_dest;
3778 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3780 vec_dest = vec_dsts.pop ();
3782 for (i = 0; i < vec_oprnds->length (); i += 2)
3784 /* Create demotion operation. */
3785 vop0 = (*vec_oprnds)[i];
3786 vop1 = (*vec_oprnds)[i + 1];
3787 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3788 new_tmp = make_ssa_name (vec_dest, new_stmt);
3789 gimple_assign_set_lhs (new_stmt, new_tmp);
3790 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3793 /* Store the resulting vector for next recursive call. */
3794 (*vec_oprnds)[i/2] = new_tmp;
3797 /* This is the last step of the conversion sequence. Store the
3798 vectors in SLP_NODE or in vector info of the scalar statement
3799 (or in STMT_VINFO_RELATED_STMT chain). */
3801 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3804 if (!*prev_stmt_info)
3805 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3807 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3809 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3814 /* For multi-step demotion operations we first generate demotion operations
3815 from the source type to the intermediate types, and then combine the
3816 results (stored in VEC_OPRNDS) in demotion operation to the destination
3820 /* At each level of recursion we have half of the operands we had at the
3822 vec_oprnds->truncate ((i+1)/2);
3823 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3824 stmt, vec_dsts, gsi, slp_node,
3825 VEC_PACK_TRUNC_EXPR,
3829 vec_dsts.quick_push (vec_dest);
3833 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3834 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3835 the resulting vectors and call the function recursively. */
3838 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3839 vec<tree> *vec_oprnds1,
3840 gimple *stmt, tree vec_dest,
3841 gimple_stmt_iterator *gsi,
3842 enum tree_code code1,
3843 enum tree_code code2, tree decl1,
3844 tree decl2, int op_type)
3847 tree vop0, vop1, new_tmp1, new_tmp2;
3848 gimple *new_stmt1, *new_stmt2;
3849 vec<tree> vec_tmp = vNULL;
3851 vec_tmp.create (vec_oprnds0->length () * 2);
3852 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3854 if (op_type == binary_op)
3855 vop1 = (*vec_oprnds1)[i];
3859 /* Generate the two halves of promotion operation. */
3860 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3861 op_type, vec_dest, gsi, stmt);
3862 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3863 op_type, vec_dest, gsi, stmt);
3864 if (is_gimple_call (new_stmt1))
3866 new_tmp1 = gimple_call_lhs (new_stmt1);
3867 new_tmp2 = gimple_call_lhs (new_stmt2);
3871 new_tmp1 = gimple_assign_lhs (new_stmt1);
3872 new_tmp2 = gimple_assign_lhs (new_stmt2);
3875 /* Store the results for the next step. */
3876 vec_tmp.quick_push (new_tmp1);
3877 vec_tmp.quick_push (new_tmp2);
3880 vec_oprnds0->release ();
3881 *vec_oprnds0 = vec_tmp;
3885 /* Check if STMT performs a conversion operation, that can be vectorized.
3886 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3887 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3888 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3891 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
3892 gimple **vec_stmt, slp_tree slp_node)
3896 tree op0, op1 = NULL_TREE;
3897 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3898 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3899 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3900 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3901 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3902 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3905 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3906 gimple *new_stmt = NULL;
3907 stmt_vec_info prev_stmt_info;
3910 tree vectype_out, vectype_in;
3912 tree lhs_type, rhs_type;
3913 enum { NARROW, NONE, WIDEN } modifier;
3914 vec<tree> vec_oprnds0 = vNULL;
3915 vec<tree> vec_oprnds1 = vNULL;
3917 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3918 vec_info *vinfo = stmt_info->vinfo;
3919 int multi_step_cvt = 0;
3920 vec<tree> interm_types = vNULL;
3921 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3923 machine_mode rhs_mode;
3924 unsigned short fltsz;
3926 /* Is STMT a vectorizable conversion? */
3928 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3931 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3935 if (!is_gimple_assign (stmt))
3938 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3941 code = gimple_assign_rhs_code (stmt);
3942 if (!CONVERT_EXPR_CODE_P (code)
3943 && code != FIX_TRUNC_EXPR
3944 && code != FLOAT_EXPR
3945 && code != WIDEN_MULT_EXPR
3946 && code != WIDEN_LSHIFT_EXPR)
3949 op_type = TREE_CODE_LENGTH (code);
3951 /* Check types of lhs and rhs. */
3952 scalar_dest = gimple_assign_lhs (stmt);
3953 lhs_type = TREE_TYPE (scalar_dest);
3954 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3956 op0 = gimple_assign_rhs1 (stmt);
3957 rhs_type = TREE_TYPE (op0);
3959 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3960 && !((INTEGRAL_TYPE_P (lhs_type)
3961 && INTEGRAL_TYPE_P (rhs_type))
3962 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3963 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3966 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
3967 && ((INTEGRAL_TYPE_P (lhs_type)
3968 && (TYPE_PRECISION (lhs_type)
3969 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3970 || (INTEGRAL_TYPE_P (rhs_type)
3971 && (TYPE_PRECISION (rhs_type)
3972 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
3974 if (dump_enabled_p ())
3975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3976 "type conversion to/from bit-precision unsupported."
3981 /* Check the operands of the operation. */
3982 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
3984 if (dump_enabled_p ())
3985 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3986 "use not simple.\n");
3989 if (op_type == binary_op)
3993 op1 = gimple_assign_rhs2 (stmt);
3994 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3995 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3997 if (CONSTANT_CLASS_P (op0))
3998 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4000 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4004 if (dump_enabled_p ())
4005 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4006 "use not simple.\n");
4011 /* If op0 is an external or constant defs use a vector type of
4012 the same size as the output vector type. */
4014 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4016 gcc_assert (vectype_in);
4019 if (dump_enabled_p ())
4021 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4022 "no vectype for scalar type ");
4023 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4024 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4030 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4031 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4033 if (dump_enabled_p ())
4035 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4036 "can't convert between boolean and non "
4038 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4039 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4045 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4046 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4047 if (nunits_in < nunits_out)
4049 else if (nunits_out == nunits_in)
4054 /* Multiple types in SLP are handled by creating the appropriate number of
4055 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4059 else if (modifier == NARROW)
4060 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4062 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4064 /* Sanity check: make sure that at least one copy of the vectorized stmt
4065 needs to be generated. */
4066 gcc_assert (ncopies >= 1);
4068 /* Supportable by target? */
4072 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4074 if (supportable_convert_operation (code, vectype_out, vectype_in,
4079 if (dump_enabled_p ())
4080 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4081 "conversion not supported by target.\n");
4085 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4086 &code1, &code2, &multi_step_cvt,
4089 /* Binary widening operation can only be supported directly by the
4091 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4095 if (code != FLOAT_EXPR
4096 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4097 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4100 rhs_mode = TYPE_MODE (rhs_type);
4101 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
4102 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
4103 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
4104 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
4107 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4108 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4109 if (cvt_type == NULL_TREE)
4112 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4114 if (!supportable_convert_operation (code, vectype_out,
4115 cvt_type, &decl1, &codecvt1))
4118 else if (!supportable_widening_operation (code, stmt, vectype_out,
4119 cvt_type, &codecvt1,
4120 &codecvt2, &multi_step_cvt,
4124 gcc_assert (multi_step_cvt == 0);
4126 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4127 vectype_in, &code1, &code2,
4128 &multi_step_cvt, &interm_types))
4132 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
4135 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4136 codecvt2 = ERROR_MARK;
4140 interm_types.safe_push (cvt_type);
4141 cvt_type = NULL_TREE;
4146 gcc_assert (op_type == unary_op);
4147 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4148 &code1, &multi_step_cvt,
4152 if (code != FIX_TRUNC_EXPR
4153 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4154 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4157 rhs_mode = TYPE_MODE (rhs_type);
4159 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4160 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4161 if (cvt_type == NULL_TREE)
4163 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4166 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4167 &code1, &multi_step_cvt,
4176 if (!vec_stmt) /* transformation not required. */
4178 if (dump_enabled_p ())
4179 dump_printf_loc (MSG_NOTE, vect_location,
4180 "=== vectorizable_conversion ===\n");
4181 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4183 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4184 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4186 else if (modifier == NARROW)
4188 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4189 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4193 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4194 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4196 interm_types.release ();
4201 if (dump_enabled_p ())
4202 dump_printf_loc (MSG_NOTE, vect_location,
4203 "transform conversion. ncopies = %d.\n", ncopies);
4205 if (op_type == binary_op)
4207 if (CONSTANT_CLASS_P (op0))
4208 op0 = fold_convert (TREE_TYPE (op1), op0);
4209 else if (CONSTANT_CLASS_P (op1))
4210 op1 = fold_convert (TREE_TYPE (op0), op1);
4213 /* In case of multi-step conversion, we first generate conversion operations
4214 to the intermediate types, and then from that types to the final one.
4215 We create vector destinations for the intermediate type (TYPES) received
4216 from supportable_*_operation, and store them in the correct order
4217 for future use in vect_create_vectorized_*_stmts (). */
4218 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4219 vec_dest = vect_create_destination_var (scalar_dest,
4220 (cvt_type && modifier == WIDEN)
4221 ? cvt_type : vectype_out);
4222 vec_dsts.quick_push (vec_dest);
4226 for (i = interm_types.length () - 1;
4227 interm_types.iterate (i, &intermediate_type); i--)
4229 vec_dest = vect_create_destination_var (scalar_dest,
4231 vec_dsts.quick_push (vec_dest);
4236 vec_dest = vect_create_destination_var (scalar_dest,
4238 ? vectype_out : cvt_type);
4242 if (modifier == WIDEN)
4244 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4245 if (op_type == binary_op)
4246 vec_oprnds1.create (1);
4248 else if (modifier == NARROW)
4249 vec_oprnds0.create (
4250 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4252 else if (code == WIDEN_LSHIFT_EXPR)
4253 vec_oprnds1.create (slp_node->vec_stmts_size);
4256 prev_stmt_info = NULL;
4260 for (j = 0; j < ncopies; j++)
4263 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
4266 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4268 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4270 /* Arguments are ready, create the new vector stmt. */
4271 if (code1 == CALL_EXPR)
4273 new_stmt = gimple_build_call (decl1, 1, vop0);
4274 new_temp = make_ssa_name (vec_dest, new_stmt);
4275 gimple_call_set_lhs (new_stmt, new_temp);
4279 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4280 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4281 new_temp = make_ssa_name (vec_dest, new_stmt);
4282 gimple_assign_set_lhs (new_stmt, new_temp);
4285 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4287 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4290 if (!prev_stmt_info)
4291 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4293 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4294 prev_stmt_info = vinfo_for_stmt (new_stmt);
4301 /* In case the vectorization factor (VF) is bigger than the number
4302 of elements that we can fit in a vectype (nunits), we have to
4303 generate more than one vector stmt - i.e - we need to "unroll"
4304 the vector stmt by a factor VF/nunits. */
4305 for (j = 0; j < ncopies; j++)
4312 if (code == WIDEN_LSHIFT_EXPR)
4317 /* Store vec_oprnd1 for every vector stmt to be created
4318 for SLP_NODE. We check during the analysis that all
4319 the shift arguments are the same. */
4320 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4321 vec_oprnds1.quick_push (vec_oprnd1);
4323 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4327 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4328 &vec_oprnds1, slp_node, -1);
4332 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4333 vec_oprnds0.quick_push (vec_oprnd0);
4334 if (op_type == binary_op)
4336 if (code == WIDEN_LSHIFT_EXPR)
4339 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4340 vec_oprnds1.quick_push (vec_oprnd1);
4346 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4347 vec_oprnds0.truncate (0);
4348 vec_oprnds0.quick_push (vec_oprnd0);
4349 if (op_type == binary_op)
4351 if (code == WIDEN_LSHIFT_EXPR)
4354 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4356 vec_oprnds1.truncate (0);
4357 vec_oprnds1.quick_push (vec_oprnd1);
4361 /* Arguments are ready. Create the new vector stmts. */
4362 for (i = multi_step_cvt; i >= 0; i--)
4364 tree this_dest = vec_dsts[i];
4365 enum tree_code c1 = code1, c2 = code2;
4366 if (i == 0 && codecvt2 != ERROR_MARK)
4371 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4373 stmt, this_dest, gsi,
4374 c1, c2, decl1, decl2,
4378 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4382 if (codecvt1 == CALL_EXPR)
4384 new_stmt = gimple_build_call (decl1, 1, vop0);
4385 new_temp = make_ssa_name (vec_dest, new_stmt);
4386 gimple_call_set_lhs (new_stmt, new_temp);
4390 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4391 new_temp = make_ssa_name (vec_dest);
4392 new_stmt = gimple_build_assign (new_temp, codecvt1,
4396 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4399 new_stmt = SSA_NAME_DEF_STMT (vop0);
4402 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4405 if (!prev_stmt_info)
4406 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4408 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4409 prev_stmt_info = vinfo_for_stmt (new_stmt);
4414 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4418 /* In case the vectorization factor (VF) is bigger than the number
4419 of elements that we can fit in a vectype (nunits), we have to
4420 generate more than one vector stmt - i.e - we need to "unroll"
4421 the vector stmt by a factor VF/nunits. */
4422 for (j = 0; j < ncopies; j++)
4426 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4430 vec_oprnds0.truncate (0);
4431 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4432 vect_pow2 (multi_step_cvt) - 1);
4435 /* Arguments are ready. Create the new vector stmts. */
4437 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4439 if (codecvt1 == CALL_EXPR)
4441 new_stmt = gimple_build_call (decl1, 1, vop0);
4442 new_temp = make_ssa_name (vec_dest, new_stmt);
4443 gimple_call_set_lhs (new_stmt, new_temp);
4447 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4448 new_temp = make_ssa_name (vec_dest);
4449 new_stmt = gimple_build_assign (new_temp, codecvt1,
4453 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4454 vec_oprnds0[i] = new_temp;
4457 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4458 stmt, vec_dsts, gsi,
4463 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4467 vec_oprnds0.release ();
4468 vec_oprnds1.release ();
4469 interm_types.release ();
4475 /* Function vectorizable_assignment.
4477 Check if STMT performs an assignment (copy) that can be vectorized.
4478 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4479 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4480 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4483 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4484 gimple **vec_stmt, slp_tree slp_node)
4489 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4490 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4493 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4496 vec<tree> vec_oprnds = vNULL;
4498 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4499 vec_info *vinfo = stmt_info->vinfo;
4500 gimple *new_stmt = NULL;
4501 stmt_vec_info prev_stmt_info = NULL;
4502 enum tree_code code;
4505 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4508 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4512 /* Is vectorizable assignment? */
4513 if (!is_gimple_assign (stmt))
4516 scalar_dest = gimple_assign_lhs (stmt);
4517 if (TREE_CODE (scalar_dest) != SSA_NAME)
4520 code = gimple_assign_rhs_code (stmt);
4521 if (gimple_assign_single_p (stmt)
4522 || code == PAREN_EXPR
4523 || CONVERT_EXPR_CODE_P (code))
4524 op = gimple_assign_rhs1 (stmt);
4528 if (code == VIEW_CONVERT_EXPR)
4529 op = TREE_OPERAND (op, 0);
4531 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4532 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4534 /* Multiple types in SLP are handled by creating the appropriate number of
4535 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4540 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4542 gcc_assert (ncopies >= 1);
4544 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4546 if (dump_enabled_p ())
4547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4548 "use not simple.\n");
4552 /* We can handle NOP_EXPR conversions that do not change the number
4553 of elements or the vector size. */
4554 if ((CONVERT_EXPR_CODE_P (code)
4555 || code == VIEW_CONVERT_EXPR)
4557 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4558 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4559 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4562 /* We do not handle bit-precision changes. */
4563 if ((CONVERT_EXPR_CODE_P (code)
4564 || code == VIEW_CONVERT_EXPR)
4565 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4566 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4567 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4568 || ((TYPE_PRECISION (TREE_TYPE (op))
4569 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4570 /* But a conversion that does not change the bit-pattern is ok. */
4571 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4572 > TYPE_PRECISION (TREE_TYPE (op)))
4573 && TYPE_UNSIGNED (TREE_TYPE (op)))
4574 /* Conversion between boolean types of different sizes is
4575 a simple assignment in case their vectypes are same
4577 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4578 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4580 if (dump_enabled_p ())
4581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4582 "type conversion to/from bit-precision "
4587 if (!vec_stmt) /* transformation not required. */
4589 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4590 if (dump_enabled_p ())
4591 dump_printf_loc (MSG_NOTE, vect_location,
4592 "=== vectorizable_assignment ===\n");
4593 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4598 if (dump_enabled_p ())
4599 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4602 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4605 for (j = 0; j < ncopies; j++)
4609 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4611 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4613 /* Arguments are ready. create the new vector stmt. */
4614 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4616 if (CONVERT_EXPR_CODE_P (code)
4617 || code == VIEW_CONVERT_EXPR)
4618 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4619 new_stmt = gimple_build_assign (vec_dest, vop);
4620 new_temp = make_ssa_name (vec_dest, new_stmt);
4621 gimple_assign_set_lhs (new_stmt, new_temp);
4622 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4624 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4631 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4633 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4635 prev_stmt_info = vinfo_for_stmt (new_stmt);
4638 vec_oprnds.release ();
4643 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4644 either as shift by a scalar or by a vector. */
4647 vect_supportable_shift (enum tree_code code, tree scalar_type)
4650 machine_mode vec_mode;
4655 vectype = get_vectype_for_scalar_type (scalar_type);
4659 optab = optab_for_tree_code (code, vectype, optab_scalar);
4661 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4663 optab = optab_for_tree_code (code, vectype, optab_vector);
4665 || (optab_handler (optab, TYPE_MODE (vectype))
4666 == CODE_FOR_nothing))
4670 vec_mode = TYPE_MODE (vectype);
4671 icode = (int) optab_handler (optab, vec_mode);
4672 if (icode == CODE_FOR_nothing)
4679 /* Function vectorizable_shift.
4681 Check if STMT performs a shift operation that can be vectorized.
4682 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4683 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4684 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4687 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4688 gimple **vec_stmt, slp_tree slp_node)
4692 tree op0, op1 = NULL;
4693 tree vec_oprnd1 = NULL_TREE;
4694 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4696 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4697 enum tree_code code;
4698 machine_mode vec_mode;
4702 machine_mode optab_op2_mode;
4704 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4705 gimple *new_stmt = NULL;
4706 stmt_vec_info prev_stmt_info;
4713 vec<tree> vec_oprnds0 = vNULL;
4714 vec<tree> vec_oprnds1 = vNULL;
4717 bool scalar_shift_arg = true;
4718 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4719 vec_info *vinfo = stmt_info->vinfo;
4722 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4725 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4729 /* Is STMT a vectorizable binary/unary operation? */
4730 if (!is_gimple_assign (stmt))
4733 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4736 code = gimple_assign_rhs_code (stmt);
4738 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4739 || code == RROTATE_EXPR))
4742 scalar_dest = gimple_assign_lhs (stmt);
4743 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4744 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4745 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4747 if (dump_enabled_p ())
4748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4749 "bit-precision shifts not supported.\n");
4753 op0 = gimple_assign_rhs1 (stmt);
4754 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4756 if (dump_enabled_p ())
4757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4758 "use not simple.\n");
4761 /* If op0 is an external or constant def use a vector type with
4762 the same size as the output vector type. */
4764 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4766 gcc_assert (vectype);
4769 if (dump_enabled_p ())
4770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4771 "no vectype for scalar type\n");
4775 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4776 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4777 if (nunits_out != nunits_in)
4780 op1 = gimple_assign_rhs2 (stmt);
4781 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4783 if (dump_enabled_p ())
4784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4785 "use not simple.\n");
4790 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4794 /* Multiple types in SLP are handled by creating the appropriate number of
4795 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4800 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4802 gcc_assert (ncopies >= 1);
4804 /* Determine whether the shift amount is a vector, or scalar. If the
4805 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4807 if ((dt[1] == vect_internal_def
4808 || dt[1] == vect_induction_def)
4810 scalar_shift_arg = false;
4811 else if (dt[1] == vect_constant_def
4812 || dt[1] == vect_external_def
4813 || dt[1] == vect_internal_def)
4815 /* In SLP, need to check whether the shift count is the same,
4816 in loops if it is a constant or invariant, it is always
4820 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4823 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4824 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4825 scalar_shift_arg = false;
4828 /* If the shift amount is computed by a pattern stmt we cannot
4829 use the scalar amount directly thus give up and use a vector
4831 if (dt[1] == vect_internal_def)
4833 gimple *def = SSA_NAME_DEF_STMT (op1);
4834 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4835 scalar_shift_arg = false;
4840 if (dump_enabled_p ())
4841 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4842 "operand mode requires invariant argument.\n");
4846 /* Vector shifted by vector. */
4847 if (!scalar_shift_arg)
4849 optab = optab_for_tree_code (code, vectype, optab_vector);
4850 if (dump_enabled_p ())
4851 dump_printf_loc (MSG_NOTE, vect_location,
4852 "vector/vector shift/rotate found.\n");
4855 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4856 if (op1_vectype == NULL_TREE
4857 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4859 if (dump_enabled_p ())
4860 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4861 "unusable type for last operand in"
4862 " vector/vector shift/rotate.\n");
4866 /* See if the machine has a vector shifted by scalar insn and if not
4867 then see if it has a vector shifted by vector insn. */
4870 optab = optab_for_tree_code (code, vectype, optab_scalar);
4872 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4874 if (dump_enabled_p ())
4875 dump_printf_loc (MSG_NOTE, vect_location,
4876 "vector/scalar shift/rotate found.\n");
4880 optab = optab_for_tree_code (code, vectype, optab_vector);
4882 && (optab_handler (optab, TYPE_MODE (vectype))
4883 != CODE_FOR_nothing))
4885 scalar_shift_arg = false;
4887 if (dump_enabled_p ())
4888 dump_printf_loc (MSG_NOTE, vect_location,
4889 "vector/vector shift/rotate found.\n");
4891 /* Unlike the other binary operators, shifts/rotates have
4892 the rhs being int, instead of the same type as the lhs,
4893 so make sure the scalar is the right type if we are
4894 dealing with vectors of long long/long/short/char. */
4895 if (dt[1] == vect_constant_def)
4896 op1 = fold_convert (TREE_TYPE (vectype), op1);
4897 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4901 && TYPE_MODE (TREE_TYPE (vectype))
4902 != TYPE_MODE (TREE_TYPE (op1)))
4904 if (dump_enabled_p ())
4905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4906 "unusable type for last operand in"
4907 " vector/vector shift/rotate.\n");
4910 if (vec_stmt && !slp_node)
4912 op1 = fold_convert (TREE_TYPE (vectype), op1);
4913 op1 = vect_init_vector (stmt, op1,
4914 TREE_TYPE (vectype), NULL);
4921 /* Supportable by target? */
4924 if (dump_enabled_p ())
4925 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4929 vec_mode = TYPE_MODE (vectype);
4930 icode = (int) optab_handler (optab, vec_mode);
4931 if (icode == CODE_FOR_nothing)
4933 if (dump_enabled_p ())
4934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4935 "op not supported by target.\n");
4936 /* Check only during analysis. */
4937 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4938 || (vf < vect_min_worthwhile_factor (code)
4941 if (dump_enabled_p ())
4942 dump_printf_loc (MSG_NOTE, vect_location,
4943 "proceeding using word mode.\n");
4946 /* Worthwhile without SIMD support? Check only during analysis. */
4947 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4948 && vf < vect_min_worthwhile_factor (code)
4951 if (dump_enabled_p ())
4952 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4953 "not worthwhile without SIMD support.\n");
4957 if (!vec_stmt) /* transformation not required. */
4959 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4960 if (dump_enabled_p ())
4961 dump_printf_loc (MSG_NOTE, vect_location,
4962 "=== vectorizable_shift ===\n");
4963 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4969 if (dump_enabled_p ())
4970 dump_printf_loc (MSG_NOTE, vect_location,
4971 "transform binary/unary operation.\n");
4974 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4976 prev_stmt_info = NULL;
4977 for (j = 0; j < ncopies; j++)
4982 if (scalar_shift_arg)
4984 /* Vector shl and shr insn patterns can be defined with scalar
4985 operand 2 (shift operand). In this case, use constant or loop
4986 invariant op1 directly, without extending it to vector mode
4988 optab_op2_mode = insn_data[icode].operand[2].mode;
4989 if (!VECTOR_MODE_P (optab_op2_mode))
4991 if (dump_enabled_p ())
4992 dump_printf_loc (MSG_NOTE, vect_location,
4993 "operand 1 using scalar mode.\n");
4995 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4996 vec_oprnds1.quick_push (vec_oprnd1);
4999 /* Store vec_oprnd1 for every vector stmt to be created
5000 for SLP_NODE. We check during the analysis that all
5001 the shift arguments are the same.
5002 TODO: Allow different constants for different vector
5003 stmts generated for an SLP instance. */
5004 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5005 vec_oprnds1.quick_push (vec_oprnd1);
5010 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5011 (a special case for certain kind of vector shifts); otherwise,
5012 operand 1 should be of a vector type (the usual case). */
5014 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5017 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5021 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5023 /* Arguments are ready. Create the new vector stmt. */
5024 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5026 vop1 = vec_oprnds1[i];
5027 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5028 new_temp = make_ssa_name (vec_dest, new_stmt);
5029 gimple_assign_set_lhs (new_stmt, new_temp);
5030 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5032 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5039 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5041 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5042 prev_stmt_info = vinfo_for_stmt (new_stmt);
5045 vec_oprnds0.release ();
5046 vec_oprnds1.release ();
5052 /* Function vectorizable_operation.
5054 Check if STMT performs a binary, unary or ternary operation that can
5056 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5057 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5058 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5061 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5062 gimple **vec_stmt, slp_tree slp_node)
5066 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5067 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5069 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5070 enum tree_code code;
5071 machine_mode vec_mode;
5075 bool target_support_p;
5077 enum vect_def_type dt[3]
5078 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5079 gimple *new_stmt = NULL;
5080 stmt_vec_info prev_stmt_info;
5086 vec<tree> vec_oprnds0 = vNULL;
5087 vec<tree> vec_oprnds1 = vNULL;
5088 vec<tree> vec_oprnds2 = vNULL;
5089 tree vop0, vop1, vop2;
5090 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5091 vec_info *vinfo = stmt_info->vinfo;
5094 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5097 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5101 /* Is STMT a vectorizable binary/unary operation? */
5102 if (!is_gimple_assign (stmt))
5105 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5108 code = gimple_assign_rhs_code (stmt);
5110 /* For pointer addition, we should use the normal plus for
5111 the vector addition. */
5112 if (code == POINTER_PLUS_EXPR)
5115 /* Support only unary or binary operations. */
5116 op_type = TREE_CODE_LENGTH (code);
5117 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5119 if (dump_enabled_p ())
5120 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5121 "num. args = %d (not unary/binary/ternary op).\n",
5126 scalar_dest = gimple_assign_lhs (stmt);
5127 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5129 /* Most operations cannot handle bit-precision types without extra
5131 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5132 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5133 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
5134 /* Exception are bitwise binary operations. */
5135 && code != BIT_IOR_EXPR
5136 && code != BIT_XOR_EXPR
5137 && code != BIT_AND_EXPR)
5139 if (dump_enabled_p ())
5140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5141 "bit-precision arithmetic not supported.\n");
5145 op0 = gimple_assign_rhs1 (stmt);
5146 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5148 if (dump_enabled_p ())
5149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5150 "use not simple.\n");
5153 /* If op0 is an external or constant def use a vector type with
5154 the same size as the output vector type. */
5157 /* For boolean type we cannot determine vectype by
5158 invariant value (don't know whether it is a vector
5159 of booleans or vector of integers). We use output
5160 vectype because operations on boolean don't change
5162 if (TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE)
5164 if (TREE_CODE (TREE_TYPE (scalar_dest)) != BOOLEAN_TYPE)
5166 if (dump_enabled_p ())
5167 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5168 "not supported operation on bool value.\n");
5171 vectype = vectype_out;
5174 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5177 gcc_assert (vectype);
5180 if (dump_enabled_p ())
5182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5183 "no vectype for scalar type ");
5184 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5186 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5192 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5193 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5194 if (nunits_out != nunits_in)
5197 if (op_type == binary_op || op_type == ternary_op)
5199 op1 = gimple_assign_rhs2 (stmt);
5200 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5202 if (dump_enabled_p ())
5203 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5204 "use not simple.\n");
5208 if (op_type == ternary_op)
5210 op2 = gimple_assign_rhs3 (stmt);
5211 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5213 if (dump_enabled_p ())
5214 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5215 "use not simple.\n");
5221 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5225 /* Multiple types in SLP are handled by creating the appropriate number of
5226 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5231 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
5233 gcc_assert (ncopies >= 1);
5235 /* Shifts are handled in vectorizable_shift (). */
5236 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5237 || code == RROTATE_EXPR)
5240 /* Supportable by target? */
5242 vec_mode = TYPE_MODE (vectype);
5243 if (code == MULT_HIGHPART_EXPR)
5244 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5247 optab = optab_for_tree_code (code, vectype, optab_default);
5250 if (dump_enabled_p ())
5251 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5255 target_support_p = (optab_handler (optab, vec_mode)
5256 != CODE_FOR_nothing);
5259 if (!target_support_p)
5261 if (dump_enabled_p ())
5262 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5263 "op not supported by target.\n");
5264 /* Check only during analysis. */
5265 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5266 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
5268 if (dump_enabled_p ())
5269 dump_printf_loc (MSG_NOTE, vect_location,
5270 "proceeding using word mode.\n");
5273 /* Worthwhile without SIMD support? Check only during analysis. */
5274 if (!VECTOR_MODE_P (vec_mode)
5276 && vf < vect_min_worthwhile_factor (code))
5278 if (dump_enabled_p ())
5279 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5280 "not worthwhile without SIMD support.\n");
5284 if (!vec_stmt) /* transformation not required. */
5286 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5287 if (dump_enabled_p ())
5288 dump_printf_loc (MSG_NOTE, vect_location,
5289 "=== vectorizable_operation ===\n");
5290 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5296 if (dump_enabled_p ())
5297 dump_printf_loc (MSG_NOTE, vect_location,
5298 "transform binary/unary operation.\n");
5301 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5303 /* In case the vectorization factor (VF) is bigger than the number
5304 of elements that we can fit in a vectype (nunits), we have to generate
5305 more than one vector stmt - i.e - we need to "unroll" the
5306 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5307 from one copy of the vector stmt to the next, in the field
5308 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5309 stages to find the correct vector defs to be used when vectorizing
5310 stmts that use the defs of the current stmt. The example below
5311 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5312 we need to create 4 vectorized stmts):
5314 before vectorization:
5315 RELATED_STMT VEC_STMT
5319 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5321 RELATED_STMT VEC_STMT
5322 VS1_0: vx0 = memref0 VS1_1 -
5323 VS1_1: vx1 = memref1 VS1_2 -
5324 VS1_2: vx2 = memref2 VS1_3 -
5325 VS1_3: vx3 = memref3 - -
5326 S1: x = load - VS1_0
5329 step2: vectorize stmt S2 (done here):
5330 To vectorize stmt S2 we first need to find the relevant vector
5331 def for the first operand 'x'. This is, as usual, obtained from
5332 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5333 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5334 relevant vector def 'vx0'. Having found 'vx0' we can generate
5335 the vector stmt VS2_0, and as usual, record it in the
5336 STMT_VINFO_VEC_STMT of stmt S2.
5337 When creating the second copy (VS2_1), we obtain the relevant vector
5338 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5339 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5340 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5341 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5342 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5343 chain of stmts and pointers:
5344 RELATED_STMT VEC_STMT
5345 VS1_0: vx0 = memref0 VS1_1 -
5346 VS1_1: vx1 = memref1 VS1_2 -
5347 VS1_2: vx2 = memref2 VS1_3 -
5348 VS1_3: vx3 = memref3 - -
5349 S1: x = load - VS1_0
5350 VS2_0: vz0 = vx0 + v1 VS2_1 -
5351 VS2_1: vz1 = vx1 + v1 VS2_2 -
5352 VS2_2: vz2 = vx2 + v1 VS2_3 -
5353 VS2_3: vz3 = vx3 + v1 - -
5354 S2: z = x + 1 - VS2_0 */
5356 prev_stmt_info = NULL;
5357 for (j = 0; j < ncopies; j++)
5362 if (op_type == binary_op || op_type == ternary_op)
5363 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5366 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5368 if (op_type == ternary_op)
5369 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5374 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5375 if (op_type == ternary_op)
5377 tree vec_oprnd = vec_oprnds2.pop ();
5378 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5383 /* Arguments are ready. Create the new vector stmt. */
5384 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5386 vop1 = ((op_type == binary_op || op_type == ternary_op)
5387 ? vec_oprnds1[i] : NULL_TREE);
5388 vop2 = ((op_type == ternary_op)
5389 ? vec_oprnds2[i] : NULL_TREE);
5390 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5391 new_temp = make_ssa_name (vec_dest, new_stmt);
5392 gimple_assign_set_lhs (new_stmt, new_temp);
5393 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5395 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5402 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5404 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5405 prev_stmt_info = vinfo_for_stmt (new_stmt);
5408 vec_oprnds0.release ();
5409 vec_oprnds1.release ();
5410 vec_oprnds2.release ();
5415 /* A helper function to ensure data reference DR's base alignment
5419 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5424 if (DR_VECT_AUX (dr)->base_misaligned)
5426 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5427 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5429 if (decl_in_symtab_p (base_decl))
5430 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5433 SET_DECL_ALIGN (base_decl, TYPE_ALIGN (vectype));
5434 DECL_USER_ALIGN (base_decl) = 1;
5436 DR_VECT_AUX (dr)->base_misaligned = false;
5441 /* Function get_group_alias_ptr_type.
5443 Return the alias type for the group starting at FIRST_STMT. */
5446 get_group_alias_ptr_type (gimple *first_stmt)
5448 struct data_reference *first_dr, *next_dr;
5451 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5452 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5455 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5456 if (get_alias_set (DR_REF (first_dr))
5457 != get_alias_set (DR_REF (next_dr)))
5459 if (dump_enabled_p ())
5460 dump_printf_loc (MSG_NOTE, vect_location,
5461 "conflicting alias set types.\n");
5462 return ptr_type_node;
5464 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5466 return reference_alias_ptr_type (DR_REF (first_dr));
5470 /* Function vectorizable_store.
5472 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5474 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5475 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5476 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5479 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5485 tree vec_oprnd = NULL_TREE;
5486 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5487 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5489 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5490 struct loop *loop = NULL;
5491 machine_mode vec_mode;
5493 enum dr_alignment_support alignment_support_scheme;
5495 enum vect_def_type dt;
5496 stmt_vec_info prev_stmt_info = NULL;
5497 tree dataref_ptr = NULL_TREE;
5498 tree dataref_offset = NULL_TREE;
5499 gimple *ptr_incr = NULL;
5502 gimple *next_stmt, *first_stmt;
5504 unsigned int group_size, i;
5505 vec<tree> oprnds = vNULL;
5506 vec<tree> result_chain = vNULL;
5508 tree offset = NULL_TREE;
5509 vec<tree> vec_oprnds = vNULL;
5510 bool slp = (slp_node != NULL);
5511 unsigned int vec_num;
5512 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5513 vec_info *vinfo = stmt_info->vinfo;
5515 gather_scatter_info gs_info;
5516 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5519 vec_load_store_type vls_type;
5522 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5525 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5529 /* Is vectorizable store? */
5531 if (!is_gimple_assign (stmt))
5534 scalar_dest = gimple_assign_lhs (stmt);
5535 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5536 && is_pattern_stmt_p (stmt_info))
5537 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5538 if (TREE_CODE (scalar_dest) != ARRAY_REF
5539 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5540 && TREE_CODE (scalar_dest) != INDIRECT_REF
5541 && TREE_CODE (scalar_dest) != COMPONENT_REF
5542 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5543 && TREE_CODE (scalar_dest) != REALPART_EXPR
5544 && TREE_CODE (scalar_dest) != MEM_REF)
5547 /* Cannot have hybrid store SLP -- that would mean storing to the
5548 same location twice. */
5549 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5551 gcc_assert (gimple_assign_single_p (stmt));
5553 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5554 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5558 loop = LOOP_VINFO_LOOP (loop_vinfo);
5559 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5564 /* Multiple types in SLP are handled by creating the appropriate number of
5565 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5570 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5572 gcc_assert (ncopies >= 1);
5574 /* FORNOW. This restriction should be relaxed. */
5575 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5577 if (dump_enabled_p ())
5578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5579 "multiple types in nested loop.\n");
5583 op = gimple_assign_rhs1 (stmt);
5585 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5587 if (dump_enabled_p ())
5588 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5589 "use not simple.\n");
5593 if (dt == vect_constant_def || dt == vect_external_def)
5594 vls_type = VLS_STORE_INVARIANT;
5596 vls_type = VLS_STORE;
5598 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5601 elem_type = TREE_TYPE (vectype);
5602 vec_mode = TYPE_MODE (vectype);
5604 /* FORNOW. In some cases can vectorize even if data-type not supported
5605 (e.g. - array initialization with 0). */
5606 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5609 if (!STMT_VINFO_DATA_REF (stmt_info))
5612 vect_memory_access_type memory_access_type;
5613 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5614 &memory_access_type, &gs_info))
5617 if (!vec_stmt) /* transformation not required. */
5619 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5620 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5621 /* The SLP costs are calculated during SLP analysis. */
5622 if (!PURE_SLP_STMT (stmt_info))
5623 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5627 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5631 ensure_base_align (stmt_info, dr);
5633 if (memory_access_type == VMAT_GATHER_SCATTER)
5635 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5636 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5637 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5638 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5639 edge pe = loop_preheader_edge (loop);
5642 enum { NARROW, NONE, WIDEN } modifier;
5643 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5645 if (nunits == (unsigned int) scatter_off_nunits)
5647 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5649 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5652 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5653 sel[i] = i | nunits;
5655 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
5656 gcc_assert (perm_mask != NULL_TREE);
5658 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5660 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5663 for (i = 0; i < (unsigned int) nunits; ++i)
5664 sel[i] = i | scatter_off_nunits;
5666 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5667 gcc_assert (perm_mask != NULL_TREE);
5673 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5674 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5675 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5676 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5677 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5678 scaletype = TREE_VALUE (arglist);
5680 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5681 && TREE_CODE (rettype) == VOID_TYPE);
5683 ptr = fold_convert (ptrtype, gs_info.base);
5684 if (!is_gimple_min_invariant (ptr))
5686 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5687 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5688 gcc_assert (!new_bb);
5691 /* Currently we support only unconditional scatter stores,
5692 so mask should be all ones. */
5693 mask = build_int_cst (masktype, -1);
5694 mask = vect_init_vector (stmt, mask, masktype, NULL);
5696 scale = build_int_cst (scaletype, gs_info.scale);
5698 prev_stmt_info = NULL;
5699 for (j = 0; j < ncopies; ++j)
5704 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5706 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5708 else if (modifier != NONE && (j & 1))
5710 if (modifier == WIDEN)
5713 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5714 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5717 else if (modifier == NARROW)
5719 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5722 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5731 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5733 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5737 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5739 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5740 == TYPE_VECTOR_SUBPARTS (srctype));
5741 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5742 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5743 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5744 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5748 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5750 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5751 == TYPE_VECTOR_SUBPARTS (idxtype));
5752 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5753 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5754 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5755 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5760 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5762 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5764 if (prev_stmt_info == NULL)
5765 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5767 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5768 prev_stmt_info = vinfo_for_stmt (new_stmt);
5773 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5776 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5777 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5778 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5780 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5783 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5785 /* We vectorize all the stmts of the interleaving group when we
5786 reach the last stmt in the group. */
5787 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5788 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5797 grouped_store = false;
5798 /* VEC_NUM is the number of vect stmts to be created for this
5800 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5801 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5802 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5803 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5804 op = gimple_assign_rhs1 (first_stmt);
5807 /* VEC_NUM is the number of vect stmts to be created for this
5809 vec_num = group_size;
5811 ref_type = get_group_alias_ptr_type (first_stmt);
5817 group_size = vec_num = 1;
5818 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
5821 if (dump_enabled_p ())
5822 dump_printf_loc (MSG_NOTE, vect_location,
5823 "transform store. ncopies = %d\n", ncopies);
5825 if (memory_access_type == VMAT_ELEMENTWISE
5826 || memory_access_type == VMAT_STRIDED_SLP)
5828 gimple_stmt_iterator incr_gsi;
5834 gimple_seq stmts = NULL;
5835 tree stride_base, stride_step, alias_off;
5839 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5842 = fold_build_pointer_plus
5843 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5844 size_binop (PLUS_EXPR,
5845 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5846 convert_to_ptrofftype (DR_INIT (first_dr))));
5847 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5849 /* For a store with loop-invariant (but other than power-of-2)
5850 stride (i.e. not a grouped access) like so:
5852 for (i = 0; i < n; i += stride)
5855 we generate a new induction variable and new stores from
5856 the components of the (vectorized) rhs:
5858 for (j = 0; ; j += VF*stride)
5863 array[j + stride] = tmp2;
5867 unsigned nstores = nunits;
5869 tree ltype = elem_type;
5872 if (group_size < nunits
5873 && nunits % group_size == 0)
5875 nstores = nunits / group_size;
5877 ltype = build_vector_type (elem_type, group_size);
5879 else if (group_size >= nunits
5880 && group_size % nunits == 0)
5886 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5887 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5890 ivstep = stride_step;
5891 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5892 build_int_cst (TREE_TYPE (ivstep), vf));
5894 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5896 create_iv (stride_base, ivstep, NULL,
5897 loop, &incr_gsi, insert_after,
5899 incr = gsi_stmt (incr_gsi);
5900 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
5902 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5904 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5906 prev_stmt_info = NULL;
5907 alias_off = build_int_cst (ref_type, 0);
5908 next_stmt = first_stmt;
5909 for (g = 0; g < group_size; g++)
5911 running_off = offvar;
5914 tree size = TYPE_SIZE_UNIT (ltype);
5915 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
5917 tree newoff = copy_ssa_name (running_off, NULL);
5918 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5920 vect_finish_stmt_generation (stmt, incr, gsi);
5921 running_off = newoff;
5923 unsigned int group_el = 0;
5924 unsigned HOST_WIDE_INT
5925 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
5926 for (j = 0; j < ncopies; j++)
5928 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5929 and first_stmt == stmt. */
5934 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5936 vec_oprnd = vec_oprnds[0];
5940 gcc_assert (gimple_assign_single_p (next_stmt));
5941 op = gimple_assign_rhs1 (next_stmt);
5942 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5948 vec_oprnd = vec_oprnds[j];
5951 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
5952 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5956 for (i = 0; i < nstores; i++)
5958 tree newref, newoff;
5959 gimple *incr, *assign;
5960 tree size = TYPE_SIZE (ltype);
5961 /* Extract the i'th component. */
5962 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5963 bitsize_int (i), size);
5964 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5967 elem = force_gimple_operand_gsi (gsi, elem, true,
5971 tree this_off = build_int_cst (TREE_TYPE (alias_off),
5973 newref = build2 (MEM_REF, ltype,
5974 running_off, this_off);
5976 /* And store it to *running_off. */
5977 assign = gimple_build_assign (newref, elem);
5978 vect_finish_stmt_generation (stmt, assign, gsi);
5982 || group_el == group_size)
5984 newoff = copy_ssa_name (running_off, NULL);
5985 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5986 running_off, stride_step);
5987 vect_finish_stmt_generation (stmt, incr, gsi);
5989 running_off = newoff;
5992 if (g == group_size - 1
5995 if (j == 0 && i == 0)
5996 STMT_VINFO_VEC_STMT (stmt_info)
5997 = *vec_stmt = assign;
5999 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6000 prev_stmt_info = vinfo_for_stmt (assign);
6004 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6011 auto_vec<tree> dr_chain (group_size);
6012 oprnds.create (group_size);
6014 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6015 gcc_assert (alignment_support_scheme);
6016 /* Targets with store-lane instructions must not require explicit
6018 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6019 || alignment_support_scheme == dr_aligned
6020 || alignment_support_scheme == dr_unaligned_supported);
6022 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6023 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6024 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6026 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6027 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6029 aggr_type = vectype;
6031 /* In case the vectorization factor (VF) is bigger than the number
6032 of elements that we can fit in a vectype (nunits), we have to generate
6033 more than one vector stmt - i.e - we need to "unroll" the
6034 vector stmt by a factor VF/nunits. For more details see documentation in
6035 vect_get_vec_def_for_copy_stmt. */
6037 /* In case of interleaving (non-unit grouped access):
6044 We create vectorized stores starting from base address (the access of the
6045 first stmt in the chain (S2 in the above example), when the last store stmt
6046 of the chain (S4) is reached:
6049 VS2: &base + vec_size*1 = vx0
6050 VS3: &base + vec_size*2 = vx1
6051 VS4: &base + vec_size*3 = vx3
6053 Then permutation statements are generated:
6055 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6056 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6059 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6060 (the order of the data-refs in the output of vect_permute_store_chain
6061 corresponds to the order of scalar stmts in the interleaving chain - see
6062 the documentation of vect_permute_store_chain()).
6064 In case of both multiple types and interleaving, above vector stores and
6065 permutation stmts are created for every copy. The result vector stmts are
6066 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6067 STMT_VINFO_RELATED_STMT for the next copies.
6070 prev_stmt_info = NULL;
6071 for (j = 0; j < ncopies; j++)
6078 /* Get vectorized arguments for SLP_NODE. */
6079 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6080 NULL, slp_node, -1);
6082 vec_oprnd = vec_oprnds[0];
6086 /* For interleaved stores we collect vectorized defs for all the
6087 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6088 used as an input to vect_permute_store_chain(), and OPRNDS as
6089 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6091 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6092 OPRNDS are of size 1. */
6093 next_stmt = first_stmt;
6094 for (i = 0; i < group_size; i++)
6096 /* Since gaps are not supported for interleaved stores,
6097 GROUP_SIZE is the exact number of stmts in the chain.
6098 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6099 there is no interleaving, GROUP_SIZE is 1, and only one
6100 iteration of the loop will be executed. */
6101 gcc_assert (next_stmt
6102 && gimple_assign_single_p (next_stmt));
6103 op = gimple_assign_rhs1 (next_stmt);
6105 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6106 dr_chain.quick_push (vec_oprnd);
6107 oprnds.quick_push (vec_oprnd);
6108 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6112 /* We should have catched mismatched types earlier. */
6113 gcc_assert (useless_type_conversion_p (vectype,
6114 TREE_TYPE (vec_oprnd)));
6115 bool simd_lane_access_p
6116 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6117 if (simd_lane_access_p
6118 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6119 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6120 && integer_zerop (DR_OFFSET (first_dr))
6121 && integer_zerop (DR_INIT (first_dr))
6122 && alias_sets_conflict_p (get_alias_set (aggr_type),
6123 get_alias_set (TREE_TYPE (ref_type))))
6125 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6126 dataref_offset = build_int_cst (ref_type, 0);
6131 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6132 simd_lane_access_p ? loop : NULL,
6133 offset, &dummy, gsi, &ptr_incr,
6134 simd_lane_access_p, &inv_p);
6135 gcc_assert (bb_vinfo || !inv_p);
6139 /* For interleaved stores we created vectorized defs for all the
6140 defs stored in OPRNDS in the previous iteration (previous copy).
6141 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6142 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6144 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6145 OPRNDS are of size 1. */
6146 for (i = 0; i < group_size; i++)
6149 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6150 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6151 dr_chain[i] = vec_oprnd;
6152 oprnds[i] = vec_oprnd;
6156 = int_const_binop (PLUS_EXPR, dataref_offset,
6157 TYPE_SIZE_UNIT (aggr_type));
6159 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6160 TYPE_SIZE_UNIT (aggr_type));
6163 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6167 /* Combine all the vectors into an array. */
6168 vec_array = create_vector_array (vectype, vec_num);
6169 for (i = 0; i < vec_num; i++)
6171 vec_oprnd = dr_chain[i];
6172 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6176 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6177 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6178 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
6179 gimple_call_set_lhs (new_stmt, data_ref);
6180 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6188 result_chain.create (group_size);
6190 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6194 next_stmt = first_stmt;
6195 for (i = 0; i < vec_num; i++)
6197 unsigned align, misalign;
6200 /* Bump the vector pointer. */
6201 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6205 vec_oprnd = vec_oprnds[i];
6206 else if (grouped_store)
6207 /* For grouped stores vectorized defs are interleaved in
6208 vect_permute_store_chain(). */
6209 vec_oprnd = result_chain[i];
6211 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
6215 : build_int_cst (ref_type, 0));
6216 align = TYPE_ALIGN_UNIT (vectype);
6217 if (aligned_access_p (first_dr))
6219 else if (DR_MISALIGNMENT (first_dr) == -1)
6221 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6222 align = TYPE_ALIGN_UNIT (elem_type);
6224 align = get_object_alignment (DR_REF (first_dr))
6227 TREE_TYPE (data_ref)
6228 = build_aligned_type (TREE_TYPE (data_ref),
6229 align * BITS_PER_UNIT);
6233 TREE_TYPE (data_ref)
6234 = build_aligned_type (TREE_TYPE (data_ref),
6235 TYPE_ALIGN (elem_type));
6236 misalign = DR_MISALIGNMENT (first_dr);
6238 if (dataref_offset == NULL_TREE
6239 && TREE_CODE (dataref_ptr) == SSA_NAME)
6240 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6243 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6245 tree perm_mask = perm_mask_for_reverse (vectype);
6247 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6249 tree new_temp = make_ssa_name (perm_dest);
6251 /* Generate the permute statement. */
6253 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6254 vec_oprnd, perm_mask);
6255 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6257 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6258 vec_oprnd = new_temp;
6261 /* Arguments are ready. Create the new vector stmt. */
6262 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6263 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6268 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6276 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6278 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6279 prev_stmt_info = vinfo_for_stmt (new_stmt);
6284 result_chain.release ();
6285 vec_oprnds.release ();
6290 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6291 VECTOR_CST mask. No checks are made that the target platform supports the
6292 mask, so callers may wish to test can_vec_perm_p separately, or use
6293 vect_gen_perm_mask_checked. */
6296 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6298 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6301 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6303 mask_elt_type = lang_hooks.types.type_for_mode
6304 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6305 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6307 mask_elts = XALLOCAVEC (tree, nunits);
6308 for (i = nunits - 1; i >= 0; i--)
6309 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6310 mask_vec = build_vector (mask_type, mask_elts);
6315 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6316 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6319 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6321 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6322 return vect_gen_perm_mask_any (vectype, sel);
6325 /* Given a vector variable X and Y, that was generated for the scalar
6326 STMT, generate instructions to permute the vector elements of X and Y
6327 using permutation mask MASK_VEC, insert them at *GSI and return the
6328 permuted vector variable. */
6331 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6332 gimple_stmt_iterator *gsi)
6334 tree vectype = TREE_TYPE (x);
6335 tree perm_dest, data_ref;
6338 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6339 data_ref = make_ssa_name (perm_dest);
6341 /* Generate the permute statement. */
6342 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6343 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6348 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6349 inserting them on the loops preheader edge. Returns true if we
6350 were successful in doing so (and thus STMT can be moved then),
6351 otherwise returns false. */
6354 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6360 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6362 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6363 if (!gimple_nop_p (def_stmt)
6364 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6366 /* Make sure we don't need to recurse. While we could do
6367 so in simple cases when there are more complex use webs
6368 we don't have an easy way to preserve stmt order to fulfil
6369 dependencies within them. */
6372 if (gimple_code (def_stmt) == GIMPLE_PHI)
6374 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6376 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6377 if (!gimple_nop_p (def_stmt2)
6378 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6388 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6390 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6391 if (!gimple_nop_p (def_stmt)
6392 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6394 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6395 gsi_remove (&gsi, false);
6396 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6403 /* vectorizable_load.
6405 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6407 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6408 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6409 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6412 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6413 slp_tree slp_node, slp_instance slp_node_instance)
6416 tree vec_dest = NULL;
6417 tree data_ref = NULL;
6418 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6419 stmt_vec_info prev_stmt_info;
6420 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6421 struct loop *loop = NULL;
6422 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6423 bool nested_in_vect_loop = false;
6424 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6428 gimple *new_stmt = NULL;
6430 enum dr_alignment_support alignment_support_scheme;
6431 tree dataref_ptr = NULL_TREE;
6432 tree dataref_offset = NULL_TREE;
6433 gimple *ptr_incr = NULL;
6435 int i, j, group_size, group_gap_adj;
6436 tree msq = NULL_TREE, lsq;
6437 tree offset = NULL_TREE;
6438 tree byte_offset = NULL_TREE;
6439 tree realignment_token = NULL_TREE;
6441 vec<tree> dr_chain = vNULL;
6442 bool grouped_load = false;
6444 gimple *first_stmt_for_drptr = NULL;
6446 bool compute_in_loop = false;
6447 struct loop *at_loop;
6449 bool slp = (slp_node != NULL);
6450 bool slp_perm = false;
6451 enum tree_code code;
6452 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6455 gather_scatter_info gs_info;
6456 vec_info *vinfo = stmt_info->vinfo;
6459 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6462 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6466 /* Is vectorizable load? */
6467 if (!is_gimple_assign (stmt))
6470 scalar_dest = gimple_assign_lhs (stmt);
6471 if (TREE_CODE (scalar_dest) != SSA_NAME)
6474 code = gimple_assign_rhs_code (stmt);
6475 if (code != ARRAY_REF
6476 && code != BIT_FIELD_REF
6477 && code != INDIRECT_REF
6478 && code != COMPONENT_REF
6479 && code != IMAGPART_EXPR
6480 && code != REALPART_EXPR
6482 && TREE_CODE_CLASS (code) != tcc_declaration)
6485 if (!STMT_VINFO_DATA_REF (stmt_info))
6488 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6489 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6493 loop = LOOP_VINFO_LOOP (loop_vinfo);
6494 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6495 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6500 /* Multiple types in SLP are handled by creating the appropriate number of
6501 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6506 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6508 gcc_assert (ncopies >= 1);
6510 /* FORNOW. This restriction should be relaxed. */
6511 if (nested_in_vect_loop && ncopies > 1)
6513 if (dump_enabled_p ())
6514 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6515 "multiple types in nested loop.\n");
6519 /* Invalidate assumptions made by dependence analysis when vectorization
6520 on the unrolled body effectively re-orders stmts. */
6522 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6523 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6524 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6526 if (dump_enabled_p ())
6527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6528 "cannot perform implicit CSE when unrolling "
6529 "with negative dependence distance\n");
6533 elem_type = TREE_TYPE (vectype);
6534 mode = TYPE_MODE (vectype);
6536 /* FORNOW. In some cases can vectorize even if data-type not supported
6537 (e.g. - data copies). */
6538 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6540 if (dump_enabled_p ())
6541 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6542 "Aligned load, but unsupported type.\n");
6546 /* Check if the load is a part of an interleaving chain. */
6547 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6549 grouped_load = true;
6551 gcc_assert (!nested_in_vect_loop);
6552 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6554 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6555 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6557 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6560 /* Invalidate assumptions made by dependence analysis when vectorization
6561 on the unrolled body effectively re-orders stmts. */
6562 if (!PURE_SLP_STMT (stmt_info)
6563 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6564 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6565 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6567 if (dump_enabled_p ())
6568 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6569 "cannot perform implicit CSE when performing "
6570 "group loads with negative dependence distance\n");
6574 /* Similarly when the stmt is a load that is both part of a SLP
6575 instance and a loop vectorized stmt via the same-dr mechanism
6576 we have to give up. */
6577 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6578 && (STMT_SLP_TYPE (stmt_info)
6579 != STMT_SLP_TYPE (vinfo_for_stmt
6580 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6582 if (dump_enabled_p ())
6583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6584 "conflicting SLP types for CSEd load\n");
6589 vect_memory_access_type memory_access_type;
6590 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6591 &memory_access_type, &gs_info))
6594 if (!vec_stmt) /* transformation not required. */
6597 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6598 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6599 /* The SLP costs are calculated during SLP analysis. */
6600 if (!PURE_SLP_STMT (stmt_info))
6601 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6607 gcc_assert (memory_access_type
6608 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6610 if (dump_enabled_p ())
6611 dump_printf_loc (MSG_NOTE, vect_location,
6612 "transform load. ncopies = %d\n", ncopies);
6616 ensure_base_align (stmt_info, dr);
6618 if (memory_access_type == VMAT_GATHER_SCATTER)
6620 tree vec_oprnd0 = NULL_TREE, op;
6621 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6622 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6623 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6624 edge pe = loop_preheader_edge (loop);
6627 enum { NARROW, NONE, WIDEN } modifier;
6628 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6630 if (nunits == gather_off_nunits)
6632 else if (nunits == gather_off_nunits / 2)
6634 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6637 for (i = 0; i < gather_off_nunits; ++i)
6638 sel[i] = i | nunits;
6640 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
6642 else if (nunits == gather_off_nunits * 2)
6644 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6647 for (i = 0; i < nunits; ++i)
6648 sel[i] = i < gather_off_nunits
6649 ? i : i + nunits - gather_off_nunits;
6651 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6657 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6658 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6659 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6660 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6661 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6662 scaletype = TREE_VALUE (arglist);
6663 gcc_checking_assert (types_compatible_p (srctype, rettype));
6665 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6667 ptr = fold_convert (ptrtype, gs_info.base);
6668 if (!is_gimple_min_invariant (ptr))
6670 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6671 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6672 gcc_assert (!new_bb);
6675 /* Currently we support only unconditional gather loads,
6676 so mask should be all ones. */
6677 if (TREE_CODE (masktype) == INTEGER_TYPE)
6678 mask = build_int_cst (masktype, -1);
6679 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6681 mask = build_int_cst (TREE_TYPE (masktype), -1);
6682 mask = build_vector_from_val (masktype, mask);
6683 mask = vect_init_vector (stmt, mask, masktype, NULL);
6685 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6689 for (j = 0; j < 6; ++j)
6691 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6692 mask = build_real (TREE_TYPE (masktype), r);
6693 mask = build_vector_from_val (masktype, mask);
6694 mask = vect_init_vector (stmt, mask, masktype, NULL);
6699 scale = build_int_cst (scaletype, gs_info.scale);
6701 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6702 merge = build_int_cst (TREE_TYPE (rettype), 0);
6703 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6707 for (j = 0; j < 6; ++j)
6709 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6710 merge = build_real (TREE_TYPE (rettype), r);
6714 merge = build_vector_from_val (rettype, merge);
6715 merge = vect_init_vector (stmt, merge, rettype, NULL);
6717 prev_stmt_info = NULL;
6718 for (j = 0; j < ncopies; ++j)
6720 if (modifier == WIDEN && (j & 1))
6721 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6722 perm_mask, stmt, gsi);
6725 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6728 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6730 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6732 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6733 == TYPE_VECTOR_SUBPARTS (idxtype));
6734 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6735 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6737 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6738 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6743 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6745 if (!useless_type_conversion_p (vectype, rettype))
6747 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6748 == TYPE_VECTOR_SUBPARTS (rettype));
6749 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6750 gimple_call_set_lhs (new_stmt, op);
6751 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6752 var = make_ssa_name (vec_dest);
6753 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6755 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6759 var = make_ssa_name (vec_dest, new_stmt);
6760 gimple_call_set_lhs (new_stmt, var);
6763 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6765 if (modifier == NARROW)
6772 var = permute_vec_elements (prev_res, var,
6773 perm_mask, stmt, gsi);
6774 new_stmt = SSA_NAME_DEF_STMT (var);
6777 if (prev_stmt_info == NULL)
6778 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6780 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6781 prev_stmt_info = vinfo_for_stmt (new_stmt);
6786 if (memory_access_type == VMAT_ELEMENTWISE
6787 || memory_access_type == VMAT_STRIDED_SLP)
6789 gimple_stmt_iterator incr_gsi;
6795 vec<constructor_elt, va_gc> *v = NULL;
6796 gimple_seq stmts = NULL;
6797 tree stride_base, stride_step, alias_off;
6799 gcc_assert (!nested_in_vect_loop);
6801 if (slp && grouped_load)
6803 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6804 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6805 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6806 ref_type = get_group_alias_ptr_type (first_stmt);
6813 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6817 = fold_build_pointer_plus
6818 (DR_BASE_ADDRESS (first_dr),
6819 size_binop (PLUS_EXPR,
6820 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6821 convert_to_ptrofftype (DR_INIT (first_dr))));
6822 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6824 /* For a load with loop-invariant (but other than power-of-2)
6825 stride (i.e. not a grouped access) like so:
6827 for (i = 0; i < n; i += stride)
6830 we generate a new induction variable and new accesses to
6831 form a new vector (or vectors, depending on ncopies):
6833 for (j = 0; ; j += VF*stride)
6835 tmp2 = array[j + stride];
6837 vectemp = {tmp1, tmp2, ...}
6840 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6841 build_int_cst (TREE_TYPE (stride_step), vf));
6843 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6845 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6846 loop, &incr_gsi, insert_after,
6848 incr = gsi_stmt (incr_gsi);
6849 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6851 stride_step = force_gimple_operand (unshare_expr (stride_step),
6852 &stmts, true, NULL_TREE);
6854 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6856 prev_stmt_info = NULL;
6857 running_off = offvar;
6858 alias_off = build_int_cst (ref_type, 0);
6859 int nloads = nunits;
6861 tree ltype = TREE_TYPE (vectype);
6862 tree lvectype = vectype;
6863 auto_vec<tree> dr_chain;
6864 if (memory_access_type == VMAT_STRIDED_SLP)
6866 if (group_size < nunits)
6868 /* Avoid emitting a constructor of vector elements by performing
6869 the loads using an integer type of the same size,
6870 constructing a vector of those and then re-interpreting it
6871 as the original vector type. This works around the fact
6872 that the vec_init optab was only designed for scalar
6873 element modes and thus expansion goes through memory.
6874 This avoids a huge runtime penalty due to the general
6875 inability to perform store forwarding from smaller stores
6876 to a larger load. */
6878 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
6879 enum machine_mode elmode = mode_for_size (lsize, MODE_INT, 0);
6880 enum machine_mode vmode = mode_for_vector (elmode,
6881 nunits / group_size);
6882 /* If we can't construct such a vector fall back to
6883 element loads of the original vector type. */
6884 if (VECTOR_MODE_P (vmode)
6885 && optab_handler (vec_init_optab, vmode) != CODE_FOR_nothing)
6887 nloads = nunits / group_size;
6889 ltype = build_nonstandard_integer_type (lsize, 1);
6890 lvectype = build_vector_type (ltype, nloads);
6899 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6903 /* For SLP permutation support we need to load the whole group,
6904 not only the number of vector stmts the permutation result
6908 ncopies = (group_size * vf + nunits - 1) / nunits;
6909 dr_chain.create (ncopies);
6912 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6915 unsigned HOST_WIDE_INT
6916 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6917 for (j = 0; j < ncopies; j++)
6920 vec_alloc (v, nloads);
6921 for (i = 0; i < nloads; i++)
6923 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6925 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6926 build2 (MEM_REF, ltype,
6927 running_off, this_off));
6928 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6930 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
6931 gimple_assign_lhs (new_stmt));
6935 || group_el == group_size)
6937 tree newoff = copy_ssa_name (running_off);
6938 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6939 running_off, stride_step);
6940 vect_finish_stmt_generation (stmt, incr, gsi);
6942 running_off = newoff;
6948 tree vec_inv = build_constructor (lvectype, v);
6949 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
6950 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6951 if (lvectype != vectype)
6953 new_stmt = gimple_build_assign (make_ssa_name (vectype),
6955 build1 (VIEW_CONVERT_EXPR,
6956 vectype, new_temp));
6957 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6964 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6966 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6971 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6973 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6974 prev_stmt_info = vinfo_for_stmt (new_stmt);
6980 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6981 slp_node_instance, false, &n_perms);
6988 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6989 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6990 /* For SLP vectorization we directly vectorize a subchain
6991 without permutation. */
6992 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6993 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6994 /* For BB vectorization always use the first stmt to base
6995 the data ref pointer on. */
6997 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6999 /* Check if the chain of loads is already vectorized. */
7000 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7001 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7002 ??? But we can only do so if there is exactly one
7003 as we have no way to get at the rest. Leave the CSE
7005 ??? With the group load eventually participating
7006 in multiple different permutations (having multiple
7007 slp nodes which refer to the same group) the CSE
7008 is even wrong code. See PR56270. */
7011 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7014 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7017 /* VEC_NUM is the number of vect stmts to be created for this group. */
7020 grouped_load = false;
7021 /* For SLP permutation support we need to load the whole group,
7022 not only the number of vector stmts the permutation result
7025 vec_num = (group_size * vf + nunits - 1) / nunits;
7027 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7028 group_gap_adj = vf * group_size - nunits * vec_num;
7031 vec_num = group_size;
7033 ref_type = get_group_alias_ptr_type (first_stmt);
7039 group_size = vec_num = 1;
7041 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7044 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7045 gcc_assert (alignment_support_scheme);
7046 /* Targets with load-lane instructions must not require explicit
7048 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7049 || alignment_support_scheme == dr_aligned
7050 || alignment_support_scheme == dr_unaligned_supported);
7052 /* In case the vectorization factor (VF) is bigger than the number
7053 of elements that we can fit in a vectype (nunits), we have to generate
7054 more than one vector stmt - i.e - we need to "unroll" the
7055 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7056 from one copy of the vector stmt to the next, in the field
7057 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7058 stages to find the correct vector defs to be used when vectorizing
7059 stmts that use the defs of the current stmt. The example below
7060 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7061 need to create 4 vectorized stmts):
7063 before vectorization:
7064 RELATED_STMT VEC_STMT
7068 step 1: vectorize stmt S1:
7069 We first create the vector stmt VS1_0, and, as usual, record a
7070 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7071 Next, we create the vector stmt VS1_1, and record a pointer to
7072 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7073 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7075 RELATED_STMT VEC_STMT
7076 VS1_0: vx0 = memref0 VS1_1 -
7077 VS1_1: vx1 = memref1 VS1_2 -
7078 VS1_2: vx2 = memref2 VS1_3 -
7079 VS1_3: vx3 = memref3 - -
7080 S1: x = load - VS1_0
7083 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7084 information we recorded in RELATED_STMT field is used to vectorize
7087 /* In case of interleaving (non-unit grouped access):
7094 Vectorized loads are created in the order of memory accesses
7095 starting from the access of the first stmt of the chain:
7098 VS2: vx1 = &base + vec_size*1
7099 VS3: vx3 = &base + vec_size*2
7100 VS4: vx4 = &base + vec_size*3
7102 Then permutation statements are generated:
7104 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7105 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7108 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7109 (the order of the data-refs in the output of vect_permute_load_chain
7110 corresponds to the order of scalar stmts in the interleaving chain - see
7111 the documentation of vect_permute_load_chain()).
7112 The generation of permutation stmts and recording them in
7113 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7115 In case of both multiple types and interleaving, the vector loads and
7116 permutation stmts above are created for every copy. The result vector
7117 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7118 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7120 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7121 on a target that supports unaligned accesses (dr_unaligned_supported)
7122 we generate the following code:
7126 p = p + indx * vectype_size;
7131 Otherwise, the data reference is potentially unaligned on a target that
7132 does not support unaligned accesses (dr_explicit_realign_optimized) -
7133 then generate the following code, in which the data in each iteration is
7134 obtained by two vector loads, one from the previous iteration, and one
7135 from the current iteration:
7137 msq_init = *(floor(p1))
7138 p2 = initial_addr + VS - 1;
7139 realignment_token = call target_builtin;
7142 p2 = p2 + indx * vectype_size
7144 vec_dest = realign_load (msq, lsq, realignment_token)
7149 /* If the misalignment remains the same throughout the execution of the
7150 loop, we can create the init_addr and permutation mask at the loop
7151 preheader. Otherwise, it needs to be created inside the loop.
7152 This can only occur when vectorizing memory accesses in the inner-loop
7153 nested within an outer-loop that is being vectorized. */
7155 if (nested_in_vect_loop
7156 && (TREE_INT_CST_LOW (DR_STEP (dr))
7157 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7159 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7160 compute_in_loop = true;
7163 if ((alignment_support_scheme == dr_explicit_realign_optimized
7164 || alignment_support_scheme == dr_explicit_realign)
7165 && !compute_in_loop)
7167 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7168 alignment_support_scheme, NULL_TREE,
7170 if (alignment_support_scheme == dr_explicit_realign_optimized)
7172 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7173 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7180 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7181 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7183 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7184 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7186 aggr_type = vectype;
7188 prev_stmt_info = NULL;
7189 for (j = 0; j < ncopies; j++)
7191 /* 1. Create the vector or array pointer update chain. */
7194 bool simd_lane_access_p
7195 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7196 if (simd_lane_access_p
7197 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7198 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7199 && integer_zerop (DR_OFFSET (first_dr))
7200 && integer_zerop (DR_INIT (first_dr))
7201 && alias_sets_conflict_p (get_alias_set (aggr_type),
7202 get_alias_set (TREE_TYPE (ref_type)))
7203 && (alignment_support_scheme == dr_aligned
7204 || alignment_support_scheme == dr_unaligned_supported))
7206 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7207 dataref_offset = build_int_cst (ref_type, 0);
7210 else if (first_stmt_for_drptr
7211 && first_stmt != first_stmt_for_drptr)
7214 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7215 at_loop, offset, &dummy, gsi,
7216 &ptr_incr, simd_lane_access_p,
7217 &inv_p, byte_offset);
7218 /* Adjust the pointer by the difference to first_stmt. */
7219 data_reference_p ptrdr
7220 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7221 tree diff = fold_convert (sizetype,
7222 size_binop (MINUS_EXPR,
7225 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7230 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7231 offset, &dummy, gsi, &ptr_incr,
7232 simd_lane_access_p, &inv_p,
7235 else if (dataref_offset)
7236 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7237 TYPE_SIZE_UNIT (aggr_type));
7239 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7240 TYPE_SIZE_UNIT (aggr_type));
7242 if (grouped_load || slp_perm)
7243 dr_chain.create (vec_num);
7245 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7249 vec_array = create_vector_array (vectype, vec_num);
7252 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7253 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7254 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7255 gimple_call_set_lhs (new_stmt, vec_array);
7256 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7258 /* Extract each vector into an SSA_NAME. */
7259 for (i = 0; i < vec_num; i++)
7261 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7263 dr_chain.quick_push (new_temp);
7266 /* Record the mapping between SSA_NAMEs and statements. */
7267 vect_record_grouped_load_vectors (stmt, dr_chain);
7271 for (i = 0; i < vec_num; i++)
7274 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7277 /* 2. Create the vector-load in the loop. */
7278 switch (alignment_support_scheme)
7281 case dr_unaligned_supported:
7283 unsigned int align, misalign;
7286 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7289 : build_int_cst (ref_type, 0));
7290 align = TYPE_ALIGN_UNIT (vectype);
7291 if (alignment_support_scheme == dr_aligned)
7293 gcc_assert (aligned_access_p (first_dr));
7296 else if (DR_MISALIGNMENT (first_dr) == -1)
7298 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7299 align = TYPE_ALIGN_UNIT (elem_type);
7301 align = (get_object_alignment (DR_REF (first_dr))
7304 TREE_TYPE (data_ref)
7305 = build_aligned_type (TREE_TYPE (data_ref),
7306 align * BITS_PER_UNIT);
7310 TREE_TYPE (data_ref)
7311 = build_aligned_type (TREE_TYPE (data_ref),
7312 TYPE_ALIGN (elem_type));
7313 misalign = DR_MISALIGNMENT (first_dr);
7315 if (dataref_offset == NULL_TREE
7316 && TREE_CODE (dataref_ptr) == SSA_NAME)
7317 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7321 case dr_explicit_realign:
7325 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7327 if (compute_in_loop)
7328 msq = vect_setup_realignment (first_stmt, gsi,
7330 dr_explicit_realign,
7333 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7334 ptr = copy_ssa_name (dataref_ptr);
7336 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7337 new_stmt = gimple_build_assign
7338 (ptr, BIT_AND_EXPR, dataref_ptr,
7340 (TREE_TYPE (dataref_ptr),
7341 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7342 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7344 = build2 (MEM_REF, vectype, ptr,
7345 build_int_cst (ref_type, 0));
7346 vec_dest = vect_create_destination_var (scalar_dest,
7348 new_stmt = gimple_build_assign (vec_dest, data_ref);
7349 new_temp = make_ssa_name (vec_dest, new_stmt);
7350 gimple_assign_set_lhs (new_stmt, new_temp);
7351 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7352 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7353 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7356 bump = size_binop (MULT_EXPR, vs,
7357 TYPE_SIZE_UNIT (elem_type));
7358 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7359 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7360 new_stmt = gimple_build_assign
7361 (NULL_TREE, BIT_AND_EXPR, ptr,
7364 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7365 ptr = copy_ssa_name (ptr, new_stmt);
7366 gimple_assign_set_lhs (new_stmt, ptr);
7367 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7369 = build2 (MEM_REF, vectype, ptr,
7370 build_int_cst (ref_type, 0));
7373 case dr_explicit_realign_optimized:
7374 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7375 new_temp = copy_ssa_name (dataref_ptr);
7377 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7378 new_stmt = gimple_build_assign
7379 (new_temp, BIT_AND_EXPR, dataref_ptr,
7381 (TREE_TYPE (dataref_ptr),
7382 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7383 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7385 = build2 (MEM_REF, vectype, new_temp,
7386 build_int_cst (ref_type, 0));
7391 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7392 new_stmt = gimple_build_assign (vec_dest, data_ref);
7393 new_temp = make_ssa_name (vec_dest, new_stmt);
7394 gimple_assign_set_lhs (new_stmt, new_temp);
7395 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7397 /* 3. Handle explicit realignment if necessary/supported.
7399 vec_dest = realign_load (msq, lsq, realignment_token) */
7400 if (alignment_support_scheme == dr_explicit_realign_optimized
7401 || alignment_support_scheme == dr_explicit_realign)
7403 lsq = gimple_assign_lhs (new_stmt);
7404 if (!realignment_token)
7405 realignment_token = dataref_ptr;
7406 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7407 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7408 msq, lsq, realignment_token);
7409 new_temp = make_ssa_name (vec_dest, new_stmt);
7410 gimple_assign_set_lhs (new_stmt, new_temp);
7411 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7413 if (alignment_support_scheme == dr_explicit_realign_optimized)
7416 if (i == vec_num - 1 && j == ncopies - 1)
7417 add_phi_arg (phi, lsq,
7418 loop_latch_edge (containing_loop),
7424 /* 4. Handle invariant-load. */
7425 if (inv_p && !bb_vinfo)
7427 gcc_assert (!grouped_load);
7428 /* If we have versioned for aliasing or the loop doesn't
7429 have any data dependencies that would preclude this,
7430 then we are sure this is a loop invariant load and
7431 thus we can insert it on the preheader edge. */
7432 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7433 && !nested_in_vect_loop
7434 && hoist_defs_of_uses (stmt, loop))
7436 if (dump_enabled_p ())
7438 dump_printf_loc (MSG_NOTE, vect_location,
7439 "hoisting out of the vectorized "
7441 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7443 tree tem = copy_ssa_name (scalar_dest);
7444 gsi_insert_on_edge_immediate
7445 (loop_preheader_edge (loop),
7446 gimple_build_assign (tem,
7448 (gimple_assign_rhs1 (stmt))));
7449 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7450 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7451 set_vinfo_for_stmt (new_stmt,
7452 new_stmt_vec_info (new_stmt, vinfo));
7456 gimple_stmt_iterator gsi2 = *gsi;
7458 new_temp = vect_init_vector (stmt, scalar_dest,
7460 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7464 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7466 tree perm_mask = perm_mask_for_reverse (vectype);
7467 new_temp = permute_vec_elements (new_temp, new_temp,
7468 perm_mask, stmt, gsi);
7469 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7472 /* Collect vector loads and later create their permutation in
7473 vect_transform_grouped_load (). */
7474 if (grouped_load || slp_perm)
7475 dr_chain.quick_push (new_temp);
7477 /* Store vector loads in the corresponding SLP_NODE. */
7478 if (slp && !slp_perm)
7479 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7481 /* Bump the vector pointer to account for a gap or for excess
7482 elements loaded for a permuted SLP load. */
7483 if (group_gap_adj != 0)
7487 = wide_int_to_tree (sizetype,
7488 wi::smul (TYPE_SIZE_UNIT (elem_type),
7489 group_gap_adj, &ovf));
7490 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7495 if (slp && !slp_perm)
7501 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7502 slp_node_instance, false,
7505 dr_chain.release ();
7513 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7514 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7515 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7520 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7522 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7523 prev_stmt_info = vinfo_for_stmt (new_stmt);
7526 dr_chain.release ();
7532 /* Function vect_is_simple_cond.
7535 LOOP - the loop that is being vectorized.
7536 COND - Condition that is checked for simple use.
7539 *COMP_VECTYPE - the vector type for the comparison.
7541 Returns whether a COND can be vectorized. Checks whether
7542 condition operands are supportable using vec_is_simple_use. */
7545 vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
7548 enum vect_def_type dt;
7549 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7552 if (TREE_CODE (cond) == SSA_NAME
7553 && TREE_CODE (TREE_TYPE (cond)) == BOOLEAN_TYPE)
7555 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7556 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7559 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7564 if (!COMPARISON_CLASS_P (cond))
7567 lhs = TREE_OPERAND (cond, 0);
7568 rhs = TREE_OPERAND (cond, 1);
7570 if (TREE_CODE (lhs) == SSA_NAME)
7572 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7573 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
7576 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7577 && TREE_CODE (lhs) != FIXED_CST)
7580 if (TREE_CODE (rhs) == SSA_NAME)
7582 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7583 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
7586 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
7587 && TREE_CODE (rhs) != FIXED_CST)
7590 if (vectype1 && vectype2
7591 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7594 *comp_vectype = vectype1 ? vectype1 : vectype2;
7598 /* vectorizable_condition.
7600 Check if STMT is conditional modify expression that can be vectorized.
7601 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7602 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7605 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7606 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7607 else clause if it is 2).
7609 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7612 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7613 gimple **vec_stmt, tree reduc_def, int reduc_index,
7616 tree scalar_dest = NULL_TREE;
7617 tree vec_dest = NULL_TREE;
7618 tree cond_expr, then_clause, else_clause;
7619 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7620 tree comp_vectype = NULL_TREE;
7621 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7622 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7625 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7626 enum vect_def_type dt, dts[4];
7628 enum tree_code code;
7629 stmt_vec_info prev_stmt_info = NULL;
7631 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7632 vec<tree> vec_oprnds0 = vNULL;
7633 vec<tree> vec_oprnds1 = vNULL;
7634 vec<tree> vec_oprnds2 = vNULL;
7635 vec<tree> vec_oprnds3 = vNULL;
7637 bool masked = false;
7639 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7642 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7644 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7647 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7648 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7652 /* FORNOW: not yet supported. */
7653 if (STMT_VINFO_LIVE_P (stmt_info))
7655 if (dump_enabled_p ())
7656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7657 "value used after loop.\n");
7662 /* Is vectorizable conditional operation? */
7663 if (!is_gimple_assign (stmt))
7666 code = gimple_assign_rhs_code (stmt);
7668 if (code != COND_EXPR)
7671 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7672 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7673 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7678 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7680 gcc_assert (ncopies >= 1);
7681 if (reduc_index && ncopies > 1)
7682 return false; /* FORNOW */
7684 cond_expr = gimple_assign_rhs1 (stmt);
7685 then_clause = gimple_assign_rhs2 (stmt);
7686 else_clause = gimple_assign_rhs3 (stmt);
7688 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
7693 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt,
7696 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt,
7700 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7703 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7706 masked = !COMPARISON_CLASS_P (cond_expr);
7707 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7709 if (vec_cmp_type == NULL_TREE)
7714 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7715 return expand_vec_cond_expr_p (vectype, comp_vectype,
7716 TREE_CODE (cond_expr));
7723 vec_oprnds0.create (1);
7724 vec_oprnds1.create (1);
7725 vec_oprnds2.create (1);
7726 vec_oprnds3.create (1);
7730 scalar_dest = gimple_assign_lhs (stmt);
7731 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7733 /* Handle cond expr. */
7734 for (j = 0; j < ncopies; j++)
7736 gassign *new_stmt = NULL;
7741 auto_vec<tree, 4> ops;
7742 auto_vec<vec<tree>, 4> vec_defs;
7745 ops.safe_push (cond_expr);
7748 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7749 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7751 ops.safe_push (then_clause);
7752 ops.safe_push (else_clause);
7753 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7754 vec_oprnds3 = vec_defs.pop ();
7755 vec_oprnds2 = vec_defs.pop ();
7757 vec_oprnds1 = vec_defs.pop ();
7758 vec_oprnds0 = vec_defs.pop ();
7766 = vect_get_vec_def_for_operand (cond_expr, stmt,
7768 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7774 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7775 stmt, comp_vectype);
7776 vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
7777 loop_vinfo, >emp, &dts[0]);
7780 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7781 stmt, comp_vectype);
7782 vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
7783 loop_vinfo, >emp, &dts[1]);
7785 if (reduc_index == 1)
7786 vec_then_clause = reduc_def;
7789 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7791 vect_is_simple_use (then_clause, loop_vinfo,
7794 if (reduc_index == 2)
7795 vec_else_clause = reduc_def;
7798 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7800 vect_is_simple_use (else_clause, loop_vinfo, >emp, &dts[3]);
7807 = vect_get_vec_def_for_stmt_copy (dts[0],
7808 vec_oprnds0.pop ());
7811 = vect_get_vec_def_for_stmt_copy (dts[1],
7812 vec_oprnds1.pop ());
7814 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7815 vec_oprnds2.pop ());
7816 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7817 vec_oprnds3.pop ());
7822 vec_oprnds0.quick_push (vec_cond_lhs);
7824 vec_oprnds1.quick_push (vec_cond_rhs);
7825 vec_oprnds2.quick_push (vec_then_clause);
7826 vec_oprnds3.quick_push (vec_else_clause);
7829 /* Arguments are ready. Create the new vector stmt. */
7830 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7832 vec_then_clause = vec_oprnds2[i];
7833 vec_else_clause = vec_oprnds3[i];
7836 vec_compare = vec_cond_lhs;
7839 vec_cond_rhs = vec_oprnds1[i];
7840 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7841 vec_cond_lhs, vec_cond_rhs);
7843 new_temp = make_ssa_name (vec_dest);
7844 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
7845 vec_compare, vec_then_clause,
7847 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7849 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7856 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7858 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7860 prev_stmt_info = vinfo_for_stmt (new_stmt);
7863 vec_oprnds0.release ();
7864 vec_oprnds1.release ();
7865 vec_oprnds2.release ();
7866 vec_oprnds3.release ();
7871 /* vectorizable_comparison.
7873 Check if STMT is comparison expression that can be vectorized.
7874 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7875 comparison, put it in VEC_STMT, and insert it at GSI.
7877 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7880 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
7881 gimple **vec_stmt, tree reduc_def,
7884 tree lhs, rhs1, rhs2;
7885 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7886 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7887 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7888 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
7890 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7891 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
7894 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7895 stmt_vec_info prev_stmt_info = NULL;
7897 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7898 vec<tree> vec_oprnds0 = vNULL;
7899 vec<tree> vec_oprnds1 = vNULL;
7904 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7907 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
7910 mask_type = vectype;
7911 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7916 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7918 gcc_assert (ncopies >= 1);
7919 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7920 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7924 if (STMT_VINFO_LIVE_P (stmt_info))
7926 if (dump_enabled_p ())
7927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7928 "value used after loop.\n");
7932 if (!is_gimple_assign (stmt))
7935 code = gimple_assign_rhs_code (stmt);
7937 if (TREE_CODE_CLASS (code) != tcc_comparison)
7940 rhs1 = gimple_assign_rhs1 (stmt);
7941 rhs2 = gimple_assign_rhs2 (stmt);
7943 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
7944 &dts[0], &vectype1))
7947 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
7948 &dts[1], &vectype2))
7951 if (vectype1 && vectype2
7952 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7955 vectype = vectype1 ? vectype1 : vectype2;
7957 /* Invariant comparison. */
7960 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
7961 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
7964 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
7967 /* Can't compare mask and non-mask types. */
7968 if (vectype1 && vectype2
7969 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
7972 /* Boolean values may have another representation in vectors
7973 and therefore we prefer bit operations over comparison for
7974 them (which also works for scalar masks). We store opcodes
7975 to use in bitop1 and bitop2. Statement is vectorized as
7976 BITOP2 (rhs1 BITOP1 rhs2) or
7977 rhs1 BITOP2 (BITOP1 rhs2)
7978 depending on bitop1 and bitop2 arity. */
7979 if (VECTOR_BOOLEAN_TYPE_P (vectype))
7981 if (code == GT_EXPR)
7983 bitop1 = BIT_NOT_EXPR;
7984 bitop2 = BIT_AND_EXPR;
7986 else if (code == GE_EXPR)
7988 bitop1 = BIT_NOT_EXPR;
7989 bitop2 = BIT_IOR_EXPR;
7991 else if (code == LT_EXPR)
7993 bitop1 = BIT_NOT_EXPR;
7994 bitop2 = BIT_AND_EXPR;
7995 std::swap (rhs1, rhs2);
7996 std::swap (dts[0], dts[1]);
7998 else if (code == LE_EXPR)
8000 bitop1 = BIT_NOT_EXPR;
8001 bitop2 = BIT_IOR_EXPR;
8002 std::swap (rhs1, rhs2);
8003 std::swap (dts[0], dts[1]);
8007 bitop1 = BIT_XOR_EXPR;
8008 if (code == EQ_EXPR)
8009 bitop2 = BIT_NOT_EXPR;
8015 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8016 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8018 if (bitop1 == NOP_EXPR)
8019 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8022 machine_mode mode = TYPE_MODE (vectype);
8025 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8026 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8029 if (bitop2 != NOP_EXPR)
8031 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8032 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8042 vec_oprnds0.create (1);
8043 vec_oprnds1.create (1);
8047 lhs = gimple_assign_lhs (stmt);
8048 mask = vect_create_destination_var (lhs, mask_type);
8050 /* Handle cmp expr. */
8051 for (j = 0; j < ncopies; j++)
8053 gassign *new_stmt = NULL;
8058 auto_vec<tree, 2> ops;
8059 auto_vec<vec<tree>, 2> vec_defs;
8061 ops.safe_push (rhs1);
8062 ops.safe_push (rhs2);
8063 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
8064 vec_oprnds1 = vec_defs.pop ();
8065 vec_oprnds0 = vec_defs.pop ();
8069 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8070 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8075 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8076 vec_oprnds0.pop ());
8077 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8078 vec_oprnds1.pop ());
8083 vec_oprnds0.quick_push (vec_rhs1);
8084 vec_oprnds1.quick_push (vec_rhs2);
8087 /* Arguments are ready. Create the new vector stmt. */
8088 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8090 vec_rhs2 = vec_oprnds1[i];
8092 new_temp = make_ssa_name (mask);
8093 if (bitop1 == NOP_EXPR)
8095 new_stmt = gimple_build_assign (new_temp, code,
8096 vec_rhs1, vec_rhs2);
8097 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8101 if (bitop1 == BIT_NOT_EXPR)
8102 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8104 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8106 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8107 if (bitop2 != NOP_EXPR)
8109 tree res = make_ssa_name (mask);
8110 if (bitop2 == BIT_NOT_EXPR)
8111 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8113 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8115 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8119 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8126 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8128 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8130 prev_stmt_info = vinfo_for_stmt (new_stmt);
8133 vec_oprnds0.release ();
8134 vec_oprnds1.release ();
8139 /* Make sure the statement is vectorizable. */
8142 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
8144 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8145 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8146 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8148 tree scalar_type, vectype;
8149 gimple *pattern_stmt;
8150 gimple_seq pattern_def_seq;
8152 if (dump_enabled_p ())
8154 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8155 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8158 if (gimple_has_volatile_ops (stmt))
8160 if (dump_enabled_p ())
8161 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8162 "not vectorized: stmt has volatile operands\n");
8167 /* Skip stmts that do not need to be vectorized. In loops this is expected
8169 - the COND_EXPR which is the loop exit condition
8170 - any LABEL_EXPRs in the loop
8171 - computations that are used only for array indexing or loop control.
8172 In basic blocks we only analyze statements that are a part of some SLP
8173 instance, therefore, all the statements are relevant.
8175 Pattern statement needs to be analyzed instead of the original statement
8176 if the original statement is not relevant. Otherwise, we analyze both
8177 statements. In basic blocks we are called from some SLP instance
8178 traversal, don't analyze pattern stmts instead, the pattern stmts
8179 already will be part of SLP instance. */
8181 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8182 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8183 && !STMT_VINFO_LIVE_P (stmt_info))
8185 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8187 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8188 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8190 /* Analyze PATTERN_STMT instead of the original stmt. */
8191 stmt = pattern_stmt;
8192 stmt_info = vinfo_for_stmt (pattern_stmt);
8193 if (dump_enabled_p ())
8195 dump_printf_loc (MSG_NOTE, vect_location,
8196 "==> examining pattern statement: ");
8197 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8202 if (dump_enabled_p ())
8203 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8208 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8211 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8212 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8214 /* Analyze PATTERN_STMT too. */
8215 if (dump_enabled_p ())
8217 dump_printf_loc (MSG_NOTE, vect_location,
8218 "==> examining pattern statement: ");
8219 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8222 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
8226 if (is_pattern_stmt_p (stmt_info)
8228 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8230 gimple_stmt_iterator si;
8232 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8234 gimple *pattern_def_stmt = gsi_stmt (si);
8235 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8236 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8238 /* Analyze def stmt of STMT if it's a pattern stmt. */
8239 if (dump_enabled_p ())
8241 dump_printf_loc (MSG_NOTE, vect_location,
8242 "==> examining pattern def statement: ");
8243 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8246 if (!vect_analyze_stmt (pattern_def_stmt,
8247 need_to_vectorize, node))
8253 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8255 case vect_internal_def:
8258 case vect_reduction_def:
8259 case vect_nested_cycle:
8260 gcc_assert (!bb_vinfo
8261 && (relevance == vect_used_in_outer
8262 || relevance == vect_used_in_outer_by_reduction
8263 || relevance == vect_used_by_reduction
8264 || relevance == vect_unused_in_scope
8265 || relevance == vect_used_only_live));
8268 case vect_induction_def:
8269 case vect_constant_def:
8270 case vect_external_def:
8271 case vect_unknown_def_type:
8278 gcc_assert (PURE_SLP_STMT (stmt_info));
8280 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
8281 if (dump_enabled_p ())
8283 dump_printf_loc (MSG_NOTE, vect_location,
8284 "get vectype for scalar type: ");
8285 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
8286 dump_printf (MSG_NOTE, "\n");
8289 vectype = get_vectype_for_scalar_type (scalar_type);
8292 if (dump_enabled_p ())
8294 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8295 "not SLPed: unsupported data-type ");
8296 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8298 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8303 if (dump_enabled_p ())
8305 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8306 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
8307 dump_printf (MSG_NOTE, "\n");
8310 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8313 if (STMT_VINFO_RELEVANT_P (stmt_info))
8315 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8316 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8317 || (is_gimple_call (stmt)
8318 && gimple_call_lhs (stmt) == NULL_TREE));
8319 *need_to_vectorize = true;
8322 if (PURE_SLP_STMT (stmt_info) && !node)
8324 dump_printf_loc (MSG_NOTE, vect_location,
8325 "handled only by SLP analysis\n");
8331 && (STMT_VINFO_RELEVANT_P (stmt_info)
8332 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8333 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8334 || vectorizable_conversion (stmt, NULL, NULL, node)
8335 || vectorizable_shift (stmt, NULL, NULL, node)
8336 || vectorizable_operation (stmt, NULL, NULL, node)
8337 || vectorizable_assignment (stmt, NULL, NULL, node)
8338 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8339 || vectorizable_call (stmt, NULL, NULL, node)
8340 || vectorizable_store (stmt, NULL, NULL, node)
8341 || vectorizable_reduction (stmt, NULL, NULL, node)
8342 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8343 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8347 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8348 || vectorizable_conversion (stmt, NULL, NULL, node)
8349 || vectorizable_shift (stmt, NULL, NULL, node)
8350 || vectorizable_operation (stmt, NULL, NULL, node)
8351 || vectorizable_assignment (stmt, NULL, NULL, node)
8352 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8353 || vectorizable_call (stmt, NULL, NULL, node)
8354 || vectorizable_store (stmt, NULL, NULL, node)
8355 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8356 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8361 if (dump_enabled_p ())
8363 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8364 "not vectorized: relevant stmt not ");
8365 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8366 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8375 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8376 need extra handling, except for vectorizable reductions. */
8377 if (STMT_VINFO_LIVE_P (stmt_info)
8378 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8379 ok = vectorizable_live_operation (stmt, NULL, NULL, -1, NULL);
8383 if (dump_enabled_p ())
8385 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8386 "not vectorized: live stmt not ");
8387 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8388 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8398 /* Function vect_transform_stmt.
8400 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8403 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8404 bool *grouped_store, slp_tree slp_node,
8405 slp_instance slp_node_instance)
8407 bool is_store = false;
8408 gimple *vec_stmt = NULL;
8409 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8412 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8413 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8415 switch (STMT_VINFO_TYPE (stmt_info))
8417 case type_demotion_vec_info_type:
8418 case type_promotion_vec_info_type:
8419 case type_conversion_vec_info_type:
8420 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8424 case induc_vec_info_type:
8425 gcc_assert (!slp_node);
8426 done = vectorizable_induction (stmt, gsi, &vec_stmt);
8430 case shift_vec_info_type:
8431 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8435 case op_vec_info_type:
8436 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8440 case assignment_vec_info_type:
8441 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8445 case load_vec_info_type:
8446 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8451 case store_vec_info_type:
8452 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8454 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8456 /* In case of interleaving, the whole chain is vectorized when the
8457 last store in the chain is reached. Store stmts before the last
8458 one are skipped, and there vec_stmt_info shouldn't be freed
8460 *grouped_store = true;
8461 if (STMT_VINFO_VEC_STMT (stmt_info))
8468 case condition_vec_info_type:
8469 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8473 case comparison_vec_info_type:
8474 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8478 case call_vec_info_type:
8479 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8480 stmt = gsi_stmt (*gsi);
8481 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
8485 case call_simd_clone_vec_info_type:
8486 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8487 stmt = gsi_stmt (*gsi);
8490 case reduc_vec_info_type:
8491 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8496 if (!STMT_VINFO_LIVE_P (stmt_info))
8498 if (dump_enabled_p ())
8499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8500 "stmt not supported.\n");
8505 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8506 This would break hybrid SLP vectorization. */
8508 gcc_assert (!vec_stmt
8509 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8511 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8512 is being vectorized, but outside the immediately enclosing loop. */
8514 && STMT_VINFO_LOOP_VINFO (stmt_info)
8515 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8516 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8517 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8518 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8519 || STMT_VINFO_RELEVANT (stmt_info) ==
8520 vect_used_in_outer_by_reduction))
8522 struct loop *innerloop = LOOP_VINFO_LOOP (
8523 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8524 imm_use_iterator imm_iter;
8525 use_operand_p use_p;
8529 if (dump_enabled_p ())
8530 dump_printf_loc (MSG_NOTE, vect_location,
8531 "Record the vdef for outer-loop vectorization.\n");
8533 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8534 (to be used when vectorizing outer-loop stmts that use the DEF of
8536 if (gimple_code (stmt) == GIMPLE_PHI)
8537 scalar_dest = PHI_RESULT (stmt);
8539 scalar_dest = gimple_assign_lhs (stmt);
8541 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8543 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8545 exit_phi = USE_STMT (use_p);
8546 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8551 /* Handle stmts whose DEF is used outside the loop-nest that is
8552 being vectorized. */
8557 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8559 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8560 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8561 && STMT_VINFO_TYPE (slp_stmt_info) != reduc_vec_info_type)
8563 done = vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8569 else if (STMT_VINFO_LIVE_P (stmt_info)
8570 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8572 done = vectorizable_live_operation (stmt, gsi, slp_node, -1, &vec_stmt);
8577 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8583 /* Remove a group of stores (for SLP or interleaving), free their
8587 vect_remove_stores (gimple *first_stmt)
8589 gimple *next = first_stmt;
8591 gimple_stmt_iterator next_si;
8595 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8597 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8598 if (is_pattern_stmt_p (stmt_info))
8599 next = STMT_VINFO_RELATED_STMT (stmt_info);
8600 /* Free the attached stmt_vec_info and remove the stmt. */
8601 next_si = gsi_for_stmt (next);
8602 unlink_stmt_vdef (next);
8603 gsi_remove (&next_si, true);
8604 release_defs (next);
8605 free_stmt_vec_info (next);
8611 /* Function new_stmt_vec_info.
8613 Create and initialize a new stmt_vec_info struct for STMT. */
8616 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8619 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8621 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8622 STMT_VINFO_STMT (res) = stmt;
8624 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8625 STMT_VINFO_LIVE_P (res) = false;
8626 STMT_VINFO_VECTYPE (res) = NULL;
8627 STMT_VINFO_VEC_STMT (res) = NULL;
8628 STMT_VINFO_VECTORIZABLE (res) = true;
8629 STMT_VINFO_IN_PATTERN_P (res) = false;
8630 STMT_VINFO_RELATED_STMT (res) = NULL;
8631 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8632 STMT_VINFO_DATA_REF (res) = NULL;
8633 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8634 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
8636 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8637 STMT_VINFO_DR_OFFSET (res) = NULL;
8638 STMT_VINFO_DR_INIT (res) = NULL;
8639 STMT_VINFO_DR_STEP (res) = NULL;
8640 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8642 if (gimple_code (stmt) == GIMPLE_PHI
8643 && is_loop_header_bb_p (gimple_bb (stmt)))
8644 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8646 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8648 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8649 STMT_SLP_TYPE (res) = loop_vect;
8650 STMT_VINFO_NUM_SLP_USES (res) = 0;
8652 GROUP_FIRST_ELEMENT (res) = NULL;
8653 GROUP_NEXT_ELEMENT (res) = NULL;
8654 GROUP_SIZE (res) = 0;
8655 GROUP_STORE_COUNT (res) = 0;
8656 GROUP_GAP (res) = 0;
8657 GROUP_SAME_DR_STMT (res) = NULL;
8663 /* Create a hash table for stmt_vec_info. */
8666 init_stmt_vec_info_vec (void)
8668 gcc_assert (!stmt_vec_info_vec.exists ());
8669 stmt_vec_info_vec.create (50);
8673 /* Free hash table for stmt_vec_info. */
8676 free_stmt_vec_info_vec (void)
8680 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8682 free_stmt_vec_info (STMT_VINFO_STMT (info));
8683 gcc_assert (stmt_vec_info_vec.exists ());
8684 stmt_vec_info_vec.release ();
8688 /* Free stmt vectorization related info. */
8691 free_stmt_vec_info (gimple *stmt)
8693 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8698 /* Check if this statement has a related "pattern stmt"
8699 (introduced by the vectorizer during the pattern recognition
8700 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8702 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8704 stmt_vec_info patt_info
8705 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8708 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8709 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8710 gimple_set_bb (patt_stmt, NULL);
8711 tree lhs = gimple_get_lhs (patt_stmt);
8712 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8713 release_ssa_name (lhs);
8716 gimple_stmt_iterator si;
8717 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8719 gimple *seq_stmt = gsi_stmt (si);
8720 gimple_set_bb (seq_stmt, NULL);
8721 lhs = gimple_get_lhs (seq_stmt);
8722 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8723 release_ssa_name (lhs);
8724 free_stmt_vec_info (seq_stmt);
8727 free_stmt_vec_info (patt_stmt);
8731 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8732 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8733 set_vinfo_for_stmt (stmt, NULL);
8738 /* Function get_vectype_for_scalar_type_and_size.
8740 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8744 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8746 machine_mode inner_mode = TYPE_MODE (scalar_type);
8747 machine_mode simd_mode;
8748 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8755 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8756 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8759 /* For vector types of elements whose mode precision doesn't
8760 match their types precision we use a element type of mode
8761 precision. The vectorization routines will have to make sure
8762 they support the proper result truncation/extension.
8763 We also make sure to build vector types with INTEGER_TYPE
8764 component type only. */
8765 if (INTEGRAL_TYPE_P (scalar_type)
8766 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8767 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8768 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8769 TYPE_UNSIGNED (scalar_type));
8771 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8772 When the component mode passes the above test simply use a type
8773 corresponding to that mode. The theory is that any use that
8774 would cause problems with this will disable vectorization anyway. */
8775 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8776 && !INTEGRAL_TYPE_P (scalar_type))
8777 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8779 /* We can't build a vector type of elements with alignment bigger than
8781 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
8782 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8783 TYPE_UNSIGNED (scalar_type));
8785 /* If we felt back to using the mode fail if there was
8786 no scalar type for it. */
8787 if (scalar_type == NULL_TREE)
8790 /* If no size was supplied use the mode the target prefers. Otherwise
8791 lookup a vector mode of the specified size. */
8793 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
8795 simd_mode = mode_for_vector (inner_mode, size / nbytes);
8796 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
8800 vectype = build_vector_type (scalar_type, nunits);
8802 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8803 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
8809 unsigned int current_vector_size;
8811 /* Function get_vectype_for_scalar_type.
8813 Returns the vector type corresponding to SCALAR_TYPE as supported
8817 get_vectype_for_scalar_type (tree scalar_type)
8820 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
8821 current_vector_size);
8823 && current_vector_size == 0)
8824 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
8828 /* Function get_mask_type_for_scalar_type.
8830 Returns the mask type corresponding to a result of comparison
8831 of vectors of specified SCALAR_TYPE as supported by target. */
8834 get_mask_type_for_scalar_type (tree scalar_type)
8836 tree vectype = get_vectype_for_scalar_type (scalar_type);
8841 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
8842 current_vector_size);
8845 /* Function get_same_sized_vectype
8847 Returns a vector type corresponding to SCALAR_TYPE of size
8848 VECTOR_TYPE if supported by the target. */
8851 get_same_sized_vectype (tree scalar_type, tree vector_type)
8853 if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
8854 return build_same_sized_truth_vector_type (vector_type);
8856 return get_vectype_for_scalar_type_and_size
8857 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
8860 /* Function vect_is_simple_use.
8863 VINFO - the vect info of the loop or basic block that is being vectorized.
8864 OPERAND - operand in the loop or bb.
8866 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8867 DT - the type of definition
8869 Returns whether a stmt with OPERAND can be vectorized.
8870 For loops, supportable operands are constants, loop invariants, and operands
8871 that are defined by the current iteration of the loop. Unsupportable
8872 operands are those that are defined by a previous iteration of the loop (as
8873 is the case in reduction/induction computations).
8874 For basic blocks, supportable operands are constants and bb invariants.
8875 For now, operands defined outside the basic block are not supported. */
8878 vect_is_simple_use (tree operand, vec_info *vinfo,
8879 gimple **def_stmt, enum vect_def_type *dt)
8882 *dt = vect_unknown_def_type;
8884 if (dump_enabled_p ())
8886 dump_printf_loc (MSG_NOTE, vect_location,
8887 "vect_is_simple_use: operand ");
8888 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
8889 dump_printf (MSG_NOTE, "\n");
8892 if (CONSTANT_CLASS_P (operand))
8894 *dt = vect_constant_def;
8898 if (is_gimple_min_invariant (operand))
8900 *dt = vect_external_def;
8904 if (TREE_CODE (operand) != SSA_NAME)
8906 if (dump_enabled_p ())
8907 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8912 if (SSA_NAME_IS_DEFAULT_DEF (operand))
8914 *dt = vect_external_def;
8918 *def_stmt = SSA_NAME_DEF_STMT (operand);
8919 if (dump_enabled_p ())
8921 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
8922 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
8925 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8926 *dt = vect_external_def;
8929 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
8930 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
8933 if (dump_enabled_p ())
8935 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8938 case vect_uninitialized_def:
8939 dump_printf (MSG_NOTE, "uninitialized\n");
8941 case vect_constant_def:
8942 dump_printf (MSG_NOTE, "constant\n");
8944 case vect_external_def:
8945 dump_printf (MSG_NOTE, "external\n");
8947 case vect_internal_def:
8948 dump_printf (MSG_NOTE, "internal\n");
8950 case vect_induction_def:
8951 dump_printf (MSG_NOTE, "induction\n");
8953 case vect_reduction_def:
8954 dump_printf (MSG_NOTE, "reduction\n");
8956 case vect_double_reduction_def:
8957 dump_printf (MSG_NOTE, "double reduction\n");
8959 case vect_nested_cycle:
8960 dump_printf (MSG_NOTE, "nested cycle\n");
8962 case vect_unknown_def_type:
8963 dump_printf (MSG_NOTE, "unknown\n");
8968 if (*dt == vect_unknown_def_type)
8970 if (dump_enabled_p ())
8971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8972 "Unsupported pattern.\n");
8976 switch (gimple_code (*def_stmt))
8983 if (dump_enabled_p ())
8984 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8985 "unsupported defining stmt:\n");
8992 /* Function vect_is_simple_use.
8994 Same as vect_is_simple_use but also determines the vector operand
8995 type of OPERAND and stores it to *VECTYPE. If the definition of
8996 OPERAND is vect_uninitialized_def, vect_constant_def or
8997 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8998 is responsible to compute the best suited vector type for the
9002 vect_is_simple_use (tree operand, vec_info *vinfo,
9003 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9005 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9008 /* Now get a vector type if the def is internal, otherwise supply
9009 NULL_TREE and leave it up to the caller to figure out a proper
9010 type for the use stmt. */
9011 if (*dt == vect_internal_def
9012 || *dt == vect_induction_def
9013 || *dt == vect_reduction_def
9014 || *dt == vect_double_reduction_def
9015 || *dt == vect_nested_cycle)
9017 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9019 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9020 && !STMT_VINFO_RELEVANT (stmt_info)
9021 && !STMT_VINFO_LIVE_P (stmt_info))
9022 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9024 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9025 gcc_assert (*vectype != NULL_TREE);
9027 else if (*dt == vect_uninitialized_def
9028 || *dt == vect_constant_def
9029 || *dt == vect_external_def)
9030 *vectype = NULL_TREE;
9038 /* Function supportable_widening_operation
9040 Check whether an operation represented by the code CODE is a
9041 widening operation that is supported by the target platform in
9042 vector form (i.e., when operating on arguments of type VECTYPE_IN
9043 producing a result of type VECTYPE_OUT).
9045 Widening operations we currently support are NOP (CONVERT), FLOAT
9046 and WIDEN_MULT. This function checks if these operations are supported
9047 by the target platform either directly (via vector tree-codes), or via
9051 - CODE1 and CODE2 are codes of vector operations to be used when
9052 vectorizing the operation, if available.
9053 - MULTI_STEP_CVT determines the number of required intermediate steps in
9054 case of multi-step conversion (like char->short->int - in that case
9055 MULTI_STEP_CVT will be 1).
9056 - INTERM_TYPES contains the intermediate type required to perform the
9057 widening operation (short in the above example). */
9060 supportable_widening_operation (enum tree_code code, gimple *stmt,
9061 tree vectype_out, tree vectype_in,
9062 enum tree_code *code1, enum tree_code *code2,
9063 int *multi_step_cvt,
9064 vec<tree> *interm_types)
9066 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9067 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9068 struct loop *vect_loop = NULL;
9069 machine_mode vec_mode;
9070 enum insn_code icode1, icode2;
9071 optab optab1, optab2;
9072 tree vectype = vectype_in;
9073 tree wide_vectype = vectype_out;
9074 enum tree_code c1, c2;
9076 tree prev_type, intermediate_type;
9077 machine_mode intermediate_mode, prev_mode;
9078 optab optab3, optab4;
9080 *multi_step_cvt = 0;
9082 vect_loop = LOOP_VINFO_LOOP (loop_info);
9086 case WIDEN_MULT_EXPR:
9087 /* The result of a vectorized widening operation usually requires
9088 two vectors (because the widened results do not fit into one vector).
9089 The generated vector results would normally be expected to be
9090 generated in the same order as in the original scalar computation,
9091 i.e. if 8 results are generated in each vector iteration, they are
9092 to be organized as follows:
9093 vect1: [res1,res2,res3,res4],
9094 vect2: [res5,res6,res7,res8].
9096 However, in the special case that the result of the widening
9097 operation is used in a reduction computation only, the order doesn't
9098 matter (because when vectorizing a reduction we change the order of
9099 the computation). Some targets can take advantage of this and
9100 generate more efficient code. For example, targets like Altivec,
9101 that support widen_mult using a sequence of {mult_even,mult_odd}
9102 generate the following vectors:
9103 vect1: [res1,res3,res5,res7],
9104 vect2: [res2,res4,res6,res8].
9106 When vectorizing outer-loops, we execute the inner-loop sequentially
9107 (each vectorized inner-loop iteration contributes to VF outer-loop
9108 iterations in parallel). We therefore don't allow to change the
9109 order of the computation in the inner-loop during outer-loop
9111 /* TODO: Another case in which order doesn't *really* matter is when we
9112 widen and then contract again, e.g. (short)((int)x * y >> 8).
9113 Normally, pack_trunc performs an even/odd permute, whereas the
9114 repack from an even/odd expansion would be an interleave, which
9115 would be significantly simpler for e.g. AVX2. */
9116 /* In any case, in order to avoid duplicating the code below, recurse
9117 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9118 are properly set up for the caller. If we fail, we'll continue with
9119 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9121 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9122 && !nested_in_vect_loop_p (vect_loop, stmt)
9123 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9124 stmt, vectype_out, vectype_in,
9125 code1, code2, multi_step_cvt,
9128 /* Elements in a vector with vect_used_by_reduction property cannot
9129 be reordered if the use chain with this property does not have the
9130 same operation. One such an example is s += a * b, where elements
9131 in a and b cannot be reordered. Here we check if the vector defined
9132 by STMT is only directly used in the reduction statement. */
9133 tree lhs = gimple_assign_lhs (stmt);
9134 use_operand_p dummy;
9136 stmt_vec_info use_stmt_info = NULL;
9137 if (single_imm_use (lhs, &dummy, &use_stmt)
9138 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9139 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9142 c1 = VEC_WIDEN_MULT_LO_EXPR;
9143 c2 = VEC_WIDEN_MULT_HI_EXPR;
9156 case VEC_WIDEN_MULT_EVEN_EXPR:
9157 /* Support the recursion induced just above. */
9158 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9159 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9162 case WIDEN_LSHIFT_EXPR:
9163 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9164 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9168 c1 = VEC_UNPACK_LO_EXPR;
9169 c2 = VEC_UNPACK_HI_EXPR;
9173 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9174 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9177 case FIX_TRUNC_EXPR:
9178 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9179 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9180 computing the operation. */
9187 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9190 if (code == FIX_TRUNC_EXPR)
9192 /* The signedness is determined from output operand. */
9193 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9194 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9198 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9199 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9202 if (!optab1 || !optab2)
9205 vec_mode = TYPE_MODE (vectype);
9206 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9207 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9213 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9214 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9215 /* For scalar masks we may have different boolean
9216 vector types having the same QImode. Thus we
9217 add additional check for elements number. */
9218 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9219 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9220 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9222 /* Check if it's a multi-step conversion that can be done using intermediate
9225 prev_type = vectype;
9226 prev_mode = vec_mode;
9228 if (!CONVERT_EXPR_CODE_P (code))
9231 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9232 intermediate steps in promotion sequence. We try
9233 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9235 interm_types->create (MAX_INTERM_CVT_STEPS);
9236 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9238 intermediate_mode = insn_data[icode1].operand[0].mode;
9239 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9242 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9243 current_vector_size);
9244 if (intermediate_mode != TYPE_MODE (intermediate_type))
9249 = lang_hooks.types.type_for_mode (intermediate_mode,
9250 TYPE_UNSIGNED (prev_type));
9252 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9253 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9255 if (!optab3 || !optab4
9256 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9257 || insn_data[icode1].operand[0].mode != intermediate_mode
9258 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9259 || insn_data[icode2].operand[0].mode != intermediate_mode
9260 || ((icode1 = optab_handler (optab3, intermediate_mode))
9261 == CODE_FOR_nothing)
9262 || ((icode2 = optab_handler (optab4, intermediate_mode))
9263 == CODE_FOR_nothing))
9266 interm_types->quick_push (intermediate_type);
9267 (*multi_step_cvt)++;
9269 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9270 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9271 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9272 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9273 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9275 prev_type = intermediate_type;
9276 prev_mode = intermediate_mode;
9279 interm_types->release ();
9284 /* Function supportable_narrowing_operation
9286 Check whether an operation represented by the code CODE is a
9287 narrowing operation that is supported by the target platform in
9288 vector form (i.e., when operating on arguments of type VECTYPE_IN
9289 and producing a result of type VECTYPE_OUT).
9291 Narrowing operations we currently support are NOP (CONVERT) and
9292 FIX_TRUNC. This function checks if these operations are supported by
9293 the target platform directly via vector tree-codes.
9296 - CODE1 is the code of a vector operation to be used when
9297 vectorizing the operation, if available.
9298 - MULTI_STEP_CVT determines the number of required intermediate steps in
9299 case of multi-step conversion (like int->short->char - in that case
9300 MULTI_STEP_CVT will be 1).
9301 - INTERM_TYPES contains the intermediate type required to perform the
9302 narrowing operation (short in the above example). */
9305 supportable_narrowing_operation (enum tree_code code,
9306 tree vectype_out, tree vectype_in,
9307 enum tree_code *code1, int *multi_step_cvt,
9308 vec<tree> *interm_types)
9310 machine_mode vec_mode;
9311 enum insn_code icode1;
9312 optab optab1, interm_optab;
9313 tree vectype = vectype_in;
9314 tree narrow_vectype = vectype_out;
9316 tree intermediate_type, prev_type;
9317 machine_mode intermediate_mode, prev_mode;
9321 *multi_step_cvt = 0;
9325 c1 = VEC_PACK_TRUNC_EXPR;
9328 case FIX_TRUNC_EXPR:
9329 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9333 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9334 tree code and optabs used for computing the operation. */
9341 if (code == FIX_TRUNC_EXPR)
9342 /* The signedness is determined from output operand. */
9343 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9345 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9350 vec_mode = TYPE_MODE (vectype);
9351 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9356 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9357 /* For scalar masks we may have different boolean
9358 vector types having the same QImode. Thus we
9359 add additional check for elements number. */
9360 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9361 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9362 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9364 /* Check if it's a multi-step conversion that can be done using intermediate
9366 prev_mode = vec_mode;
9367 prev_type = vectype;
9368 if (code == FIX_TRUNC_EXPR)
9369 uns = TYPE_UNSIGNED (vectype_out);
9371 uns = TYPE_UNSIGNED (vectype);
9373 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9374 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9375 costly than signed. */
9376 if (code == FIX_TRUNC_EXPR && uns)
9378 enum insn_code icode2;
9381 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9383 = optab_for_tree_code (c1, intermediate_type, optab_default);
9384 if (interm_optab != unknown_optab
9385 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9386 && insn_data[icode1].operand[0].mode
9387 == insn_data[icode2].operand[0].mode)
9390 optab1 = interm_optab;
9395 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9396 intermediate steps in promotion sequence. We try
9397 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9398 interm_types->create (MAX_INTERM_CVT_STEPS);
9399 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9401 intermediate_mode = insn_data[icode1].operand[0].mode;
9402 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9405 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9406 current_vector_size);
9407 if (intermediate_mode != TYPE_MODE (intermediate_type))
9412 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9414 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9417 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9418 || insn_data[icode1].operand[0].mode != intermediate_mode
9419 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9420 == CODE_FOR_nothing))
9423 interm_types->quick_push (intermediate_type);
9424 (*multi_step_cvt)++;
9426 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9427 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9428 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9429 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9431 prev_mode = intermediate_mode;
9432 prev_type = intermediate_type;
9433 optab1 = interm_optab;
9436 interm_types->release ();