1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Return the vectorized type for the given statement. */
58 stmt_vectype (struct _stmt_vec_info *stmt_info)
60 return STMT_VINFO_VECTYPE (stmt_info);
63 /* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
66 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
68 gimple *stmt = STMT_VINFO_STMT (stmt_info);
69 basic_block bb = gimple_bb (stmt);
70 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
76 loop = LOOP_VINFO_LOOP (loop_vinfo);
78 return (bb->loop_father == loop->inner);
81 /* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
86 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
87 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
88 int misalign, enum vect_cost_model_location where)
92 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
93 stmt_info_for_cost si = { count, kind,
94 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
96 body_cost_vec->safe_push (si);
98 (builtin_vectorization_cost (kind, vectype, misalign) * count);
101 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
102 count, kind, stmt_info, misalign, where);
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
108 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
120 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
121 tree array, unsigned HOST_WIDE_INT n)
123 tree vect_type, vect, vect_name, array_ref;
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
146 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
147 tree array, unsigned HOST_WIDE_INT n)
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
165 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
169 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
170 /* Arrays have the same alignment as their type. */
171 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
175 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
177 /* Function vect_mark_relevant.
179 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
182 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
183 enum vect_relevant relevant, bool live_p)
185 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
186 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
187 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
188 gimple *pattern_stmt;
190 if (dump_enabled_p ())
192 dump_printf_loc (MSG_NOTE, vect_location,
193 "mark relevant %d, live %d: ", relevant, live_p);
194 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
197 /* If this stmt is an original stmt in a pattern, we might need to mark its
198 related pattern stmt instead of the original stmt. However, such stmts
199 may have their own uses that are not in any pattern, in such cases the
200 stmt itself should be marked. */
201 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
203 /* This is the last stmt in a sequence that was detected as a
204 pattern that can potentially be vectorized. Don't mark the stmt
205 as relevant/live because it's not going to be vectorized.
206 Instead mark the pattern-stmt that replaces it. */
208 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
210 if (dump_enabled_p ())
211 dump_printf_loc (MSG_NOTE, vect_location,
212 "last stmt in pattern. don't mark"
213 " relevant/live.\n");
214 stmt_info = vinfo_for_stmt (pattern_stmt);
215 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
216 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
217 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
221 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
222 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
223 STMT_VINFO_RELEVANT (stmt_info) = relevant;
225 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
226 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
228 if (dump_enabled_p ())
229 dump_printf_loc (MSG_NOTE, vect_location,
230 "already marked relevant/live.\n");
234 worklist->safe_push (stmt);
238 /* Function vect_stmt_relevant_p.
240 Return true if STMT in loop that is represented by LOOP_VINFO is
241 "relevant for vectorization".
243 A stmt is considered "relevant for vectorization" if:
244 - it has uses outside the loop.
245 - it has vdefs (it alters memory).
246 - control stmts in the loop (except for the exit condition).
248 CHECKME: what other side effects would the vectorizer allow? */
251 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
252 enum vect_relevant *relevant, bool *live_p)
254 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
256 imm_use_iterator imm_iter;
260 *relevant = vect_unused_in_scope;
263 /* cond stmt other than loop exit cond. */
264 if (is_ctrl_stmt (stmt)
265 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
266 != loop_exit_ctrl_vec_info_type)
267 *relevant = vect_used_in_scope;
269 /* changing memory. */
270 if (gimple_code (stmt) != GIMPLE_PHI)
271 if (gimple_vdef (stmt)
272 && !gimple_clobber_p (stmt))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_NOTE, vect_location,
276 "vec_stmt_relevant_p: stmt has vdefs.\n");
277 *relevant = vect_used_in_scope;
280 /* uses outside the loop. */
281 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
283 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
285 basic_block bb = gimple_bb (USE_STMT (use_p));
286 if (!flow_bb_inside_loop_p (loop, bb))
288 if (dump_enabled_p ())
289 dump_printf_loc (MSG_NOTE, vect_location,
290 "vec_stmt_relevant_p: used out of loop.\n");
292 if (is_gimple_debug (USE_STMT (use_p)))
295 /* We expect all such uses to be in the loop exit phis
296 (because of loop closed form) */
297 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
298 gcc_assert (bb == single_exit (loop)->dest);
305 return (*live_p || *relevant);
309 /* Function exist_non_indexing_operands_for_use_p
311 USE is one of the uses attached to STMT. Check if USE is
312 used in STMT for anything other than indexing an array. */
315 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
318 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
320 /* USE corresponds to some operand in STMT. If there is no data
321 reference in STMT, then any operand that corresponds to USE
322 is not indexing an array. */
323 if (!STMT_VINFO_DATA_REF (stmt_info))
326 /* STMT has a data_ref. FORNOW this means that its of one of
330 (This should have been verified in analyze_data_refs).
332 'var' in the second case corresponds to a def, not a use,
333 so USE cannot correspond to any operands that are not used
336 Therefore, all we need to check is if STMT falls into the
337 first case, and whether var corresponds to USE. */
339 if (!gimple_assign_copy_p (stmt))
341 if (is_gimple_call (stmt)
342 && gimple_call_internal_p (stmt))
343 switch (gimple_call_internal_fn (stmt))
346 operand = gimple_call_arg (stmt, 3);
351 operand = gimple_call_arg (stmt, 2);
361 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
363 operand = gimple_assign_rhs1 (stmt);
364 if (TREE_CODE (operand) != SSA_NAME)
375 Function process_use.
378 - a USE in STMT in a loop represented by LOOP_VINFO
379 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
380 that defined USE. This is done by calling mark_relevant and passing it
381 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
382 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
386 Generally, LIVE_P and RELEVANT are used to define the liveness and
387 relevance info of the DEF_STMT of this USE:
388 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
389 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
391 - case 1: If USE is used only for address computations (e.g. array indexing),
392 which does not need to be directly vectorized, then the liveness/relevance
393 of the respective DEF_STMT is left unchanged.
394 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
395 skip DEF_STMT cause it had already been processed.
396 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
397 be modified accordingly.
399 Return true if everything is as expected. Return false otherwise. */
402 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
403 enum vect_relevant relevant, vec<gimple *> *worklist,
406 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
407 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
408 stmt_vec_info dstmt_vinfo;
409 basic_block bb, def_bb;
411 enum vect_def_type dt;
413 /* case 1: we are only interested in uses that need to be vectorized. Uses
414 that are used for address computation are not considered relevant. */
415 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
418 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
420 if (dump_enabled_p ())
421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
422 "not vectorized: unsupported use in stmt.\n");
426 if (!def_stmt || gimple_nop_p (def_stmt))
429 def_bb = gimple_bb (def_stmt);
430 if (!flow_bb_inside_loop_p (loop, def_bb))
432 if (dump_enabled_p ())
433 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
437 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
438 DEF_STMT must have already been processed, because this should be the
439 only way that STMT, which is a reduction-phi, was put in the worklist,
440 as there should be no other uses for DEF_STMT in the loop. So we just
441 check that everything is as expected, and we are done. */
442 dstmt_vinfo = vinfo_for_stmt (def_stmt);
443 bb = gimple_bb (stmt);
444 if (gimple_code (stmt) == GIMPLE_PHI
445 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
446 && gimple_code (def_stmt) != GIMPLE_PHI
447 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
448 && bb->loop_father == def_bb->loop_father)
450 if (dump_enabled_p ())
451 dump_printf_loc (MSG_NOTE, vect_location,
452 "reduc-stmt defining reduc-phi in the same nest.\n");
453 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
454 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
455 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
456 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
457 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
461 /* case 3a: outer-loop stmt defining an inner-loop stmt:
462 outer-loop-header-bb:
468 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
470 if (dump_enabled_p ())
471 dump_printf_loc (MSG_NOTE, vect_location,
472 "outer-loop def-stmt defining inner-loop stmt.\n");
476 case vect_unused_in_scope:
477 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
478 vect_used_in_scope : vect_unused_in_scope;
481 case vect_used_in_outer_by_reduction:
482 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
483 relevant = vect_used_by_reduction;
486 case vect_used_in_outer:
487 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
488 relevant = vect_used_in_scope;
491 case vect_used_in_scope:
499 /* case 3b: inner-loop stmt defining an outer-loop stmt:
500 outer-loop-header-bb:
504 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
506 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
508 if (dump_enabled_p ())
509 dump_printf_loc (MSG_NOTE, vect_location,
510 "inner-loop def-stmt defining outer-loop stmt.\n");
514 case vect_unused_in_scope:
515 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
516 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
517 vect_used_in_outer_by_reduction : vect_unused_in_scope;
520 case vect_used_by_reduction:
521 relevant = vect_used_in_outer_by_reduction;
524 case vect_used_in_scope:
525 relevant = vect_used_in_outer;
533 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
538 /* Function vect_mark_stmts_to_be_vectorized.
540 Not all stmts in the loop need to be vectorized. For example:
549 Stmt 1 and 3 do not need to be vectorized, because loop control and
550 addressing of vectorized data-refs are handled differently.
552 This pass detects such stmts. */
555 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
557 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
558 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
559 unsigned int nbbs = loop->num_nodes;
560 gimple_stmt_iterator si;
563 stmt_vec_info stmt_vinfo;
567 enum vect_relevant relevant, tmp_relevant;
568 enum vect_def_type def_type;
570 if (dump_enabled_p ())
571 dump_printf_loc (MSG_NOTE, vect_location,
572 "=== vect_mark_stmts_to_be_vectorized ===\n");
574 auto_vec<gimple *, 64> worklist;
576 /* 1. Init worklist. */
577 for (i = 0; i < nbbs; i++)
580 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
583 if (dump_enabled_p ())
585 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
586 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
589 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
590 vect_mark_relevant (&worklist, phi, relevant, live_p);
592 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
594 stmt = gsi_stmt (si);
595 if (dump_enabled_p ())
597 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
598 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
601 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
602 vect_mark_relevant (&worklist, stmt, relevant, live_p);
606 /* 2. Process_worklist */
607 while (worklist.length () > 0)
612 stmt = worklist.pop ();
613 if (dump_enabled_p ())
615 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
616 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
619 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
620 (DEF_STMT) as relevant/irrelevant and live/dead according to the
621 liveness and relevance properties of STMT. */
622 stmt_vinfo = vinfo_for_stmt (stmt);
623 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
624 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
626 /* Generally, the liveness and relevance properties of STMT are
627 propagated as is to the DEF_STMTs of its USEs:
628 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
629 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
631 One exception is when STMT has been identified as defining a reduction
632 variable; in this case we set the liveness/relevance as follows:
634 relevant = vect_used_by_reduction
635 This is because we distinguish between two kinds of relevant stmts -
636 those that are used by a reduction computation, and those that are
637 (also) used by a regular computation. This allows us later on to
638 identify stmts that are used solely by a reduction, and therefore the
639 order of the results that they produce does not have to be kept. */
641 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
642 tmp_relevant = relevant;
645 case vect_reduction_def:
646 switch (tmp_relevant)
648 case vect_unused_in_scope:
649 relevant = vect_used_by_reduction;
652 case vect_used_by_reduction:
653 if (gimple_code (stmt) == GIMPLE_PHI)
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
660 "unsupported use of reduction.\n");
667 case vect_nested_cycle:
668 if (tmp_relevant != vect_unused_in_scope
669 && tmp_relevant != vect_used_in_outer_by_reduction
670 && tmp_relevant != vect_used_in_outer)
672 if (dump_enabled_p ())
673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
674 "unsupported use of nested cycle.\n");
682 case vect_double_reduction_def:
683 if (tmp_relevant != vect_unused_in_scope
684 && tmp_relevant != vect_used_by_reduction)
686 if (dump_enabled_p ())
687 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
688 "unsupported use of double reduction.\n");
700 if (is_pattern_stmt_p (stmt_vinfo))
702 /* Pattern statements are not inserted into the code, so
703 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
704 have to scan the RHS or function arguments instead. */
705 if (is_gimple_assign (stmt))
707 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
708 tree op = gimple_assign_rhs1 (stmt);
711 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
713 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
714 live_p, relevant, &worklist, false)
715 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
716 live_p, relevant, &worklist, false))
720 for (; i < gimple_num_ops (stmt); i++)
722 op = gimple_op (stmt, i);
723 if (TREE_CODE (op) == SSA_NAME
724 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
729 else if (is_gimple_call (stmt))
731 for (i = 0; i < gimple_call_num_args (stmt); i++)
733 tree arg = gimple_call_arg (stmt, i);
734 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
741 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
743 tree op = USE_FROM_PTR (use_p);
744 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
749 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
752 tree decl = vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL);
754 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
758 } /* while worklist */
764 /* Function vect_model_simple_cost.
766 Models cost for simple operations, i.e. those that only emit ncopies of a
767 single op. Right now, this does not account for multiple insns that could
768 be generated for the single vector op. We will handle that shortly. */
771 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
772 enum vect_def_type *dt,
773 stmt_vector_for_cost *prologue_cost_vec,
774 stmt_vector_for_cost *body_cost_vec)
777 int inside_cost = 0, prologue_cost = 0;
779 /* The SLP costs were already calculated during SLP tree build. */
780 if (PURE_SLP_STMT (stmt_info))
783 /* FORNOW: Assuming maximum 2 args per stmts. */
784 for (i = 0; i < 2; i++)
785 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
786 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
787 stmt_info, 0, vect_prologue);
789 /* Pass the inside-of-loop statements to the target-specific cost model. */
790 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
791 stmt_info, 0, vect_body);
793 if (dump_enabled_p ())
794 dump_printf_loc (MSG_NOTE, vect_location,
795 "vect_model_simple_cost: inside_cost = %d, "
796 "prologue_cost = %d .\n", inside_cost, prologue_cost);
800 /* Model cost for type demotion and promotion operations. PWR is normally
801 zero for single-step promotions and demotions. It will be one if
802 two-step promotion/demotion is required, and so on. Each additional
803 step doubles the number of instructions required. */
806 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
807 enum vect_def_type *dt, int pwr)
810 int inside_cost = 0, prologue_cost = 0;
811 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
812 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
813 void *target_cost_data;
815 /* The SLP costs were already calculated during SLP tree build. */
816 if (PURE_SLP_STMT (stmt_info))
820 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
822 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
824 for (i = 0; i < pwr + 1; i++)
826 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
828 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
829 vec_promote_demote, stmt_info, 0,
833 /* FORNOW: Assuming maximum 2 args per stmts. */
834 for (i = 0; i < 2; i++)
835 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
836 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
837 stmt_info, 0, vect_prologue);
839 if (dump_enabled_p ())
840 dump_printf_loc (MSG_NOTE, vect_location,
841 "vect_model_promotion_demotion_cost: inside_cost = %d, "
842 "prologue_cost = %d .\n", inside_cost, prologue_cost);
845 /* Function vect_cost_group_size
847 For grouped load or store, return the group_size only if it is the first
848 load or store of a group, else return 1. This ensures that group size is
849 only returned once per group. */
852 vect_cost_group_size (stmt_vec_info stmt_info)
854 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
856 if (first_stmt == STMT_VINFO_STMT (stmt_info))
857 return GROUP_SIZE (stmt_info);
863 /* Function vect_model_store_cost
865 Models cost for stores. In the case of grouped accesses, one access
866 has the overhead of the grouped access attributed to it. */
869 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
870 bool store_lanes_p, enum vect_def_type dt,
872 stmt_vector_for_cost *prologue_cost_vec,
873 stmt_vector_for_cost *body_cost_vec)
876 unsigned int inside_cost = 0, prologue_cost = 0;
877 struct data_reference *first_dr;
880 if (dt == vect_constant_def || dt == vect_external_def)
881 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
882 stmt_info, 0, vect_prologue);
884 /* Grouped access? */
885 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
889 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
894 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
895 group_size = vect_cost_group_size (stmt_info);
898 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
900 /* Not a grouped access. */
904 first_dr = STMT_VINFO_DATA_REF (stmt_info);
907 /* We assume that the cost of a single store-lanes instruction is
908 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
909 access is instead being provided by a permute-and-store operation,
910 include the cost of the permutes. */
911 if (!store_lanes_p && group_size > 1
912 && !STMT_VINFO_STRIDED_P (stmt_info))
914 /* Uses a high and low interleave or shuffle operations for each
916 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
917 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
918 stmt_info, 0, vect_body);
920 if (dump_enabled_p ())
921 dump_printf_loc (MSG_NOTE, vect_location,
922 "vect_model_store_cost: strided group_size = %d .\n",
926 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
927 /* Costs of the stores. */
928 if (STMT_VINFO_STRIDED_P (stmt_info)
929 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
931 /* N scalar stores plus extracting the elements. */
932 inside_cost += record_stmt_cost (body_cost_vec,
933 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
934 scalar_store, stmt_info, 0, vect_body);
937 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
939 if (STMT_VINFO_STRIDED_P (stmt_info))
940 inside_cost += record_stmt_cost (body_cost_vec,
941 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
942 vec_to_scalar, stmt_info, 0, vect_body);
944 if (dump_enabled_p ())
945 dump_printf_loc (MSG_NOTE, vect_location,
946 "vect_model_store_cost: inside_cost = %d, "
947 "prologue_cost = %d .\n", inside_cost, prologue_cost);
951 /* Calculate cost of DR's memory access. */
953 vect_get_store_cost (struct data_reference *dr, int ncopies,
954 unsigned int *inside_cost,
955 stmt_vector_for_cost *body_cost_vec)
957 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
958 gimple *stmt = DR_STMT (dr);
959 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
961 switch (alignment_support_scheme)
965 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
966 vector_store, stmt_info, 0,
969 if (dump_enabled_p ())
970 dump_printf_loc (MSG_NOTE, vect_location,
971 "vect_model_store_cost: aligned.\n");
975 case dr_unaligned_supported:
977 /* Here, we assign an additional cost for the unaligned store. */
978 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
979 unaligned_store, stmt_info,
980 DR_MISALIGNMENT (dr), vect_body);
981 if (dump_enabled_p ())
982 dump_printf_loc (MSG_NOTE, vect_location,
983 "vect_model_store_cost: unaligned supported by "
988 case dr_unaligned_unsupported:
990 *inside_cost = VECT_MAX_COST;
992 if (dump_enabled_p ())
993 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
994 "vect_model_store_cost: unsupported access.\n");
1004 /* Function vect_model_load_cost
1006 Models cost for loads. In the case of grouped accesses, the last access
1007 has the overhead of the grouped access attributed to it. Since unaligned
1008 accesses are supported for loads, we also account for the costs of the
1009 access scheme chosen. */
1012 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1013 bool load_lanes_p, slp_tree slp_node,
1014 stmt_vector_for_cost *prologue_cost_vec,
1015 stmt_vector_for_cost *body_cost_vec)
1019 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1020 unsigned int inside_cost = 0, prologue_cost = 0;
1022 /* Grouped accesses? */
1023 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1024 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1026 group_size = vect_cost_group_size (stmt_info);
1027 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1029 /* Not a grouped access. */
1036 /* We assume that the cost of a single load-lanes instruction is
1037 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1038 access is instead being provided by a load-and-permute operation,
1039 include the cost of the permutes. */
1040 if (!load_lanes_p && group_size > 1
1041 && !STMT_VINFO_STRIDED_P (stmt_info))
1043 /* Uses an even and odd extract operations or shuffle operations
1044 for each needed permute. */
1045 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1046 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1047 stmt_info, 0, vect_body);
1049 if (dump_enabled_p ())
1050 dump_printf_loc (MSG_NOTE, vect_location,
1051 "vect_model_load_cost: strided group_size = %d .\n",
1055 /* The loads themselves. */
1056 if (STMT_VINFO_STRIDED_P (stmt_info)
1057 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1059 /* N scalar loads plus gathering them into a vector. */
1060 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1061 inside_cost += record_stmt_cost (body_cost_vec,
1062 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1063 scalar_load, stmt_info, 0, vect_body);
1066 vect_get_load_cost (first_dr, ncopies,
1067 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1068 || group_size > 1 || slp_node),
1069 &inside_cost, &prologue_cost,
1070 prologue_cost_vec, body_cost_vec, true);
1071 if (STMT_VINFO_STRIDED_P (stmt_info))
1072 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1073 stmt_info, 0, vect_body);
1075 if (dump_enabled_p ())
1076 dump_printf_loc (MSG_NOTE, vect_location,
1077 "vect_model_load_cost: inside_cost = %d, "
1078 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1082 /* Calculate cost of DR's memory access. */
1084 vect_get_load_cost (struct data_reference *dr, int ncopies,
1085 bool add_realign_cost, unsigned int *inside_cost,
1086 unsigned int *prologue_cost,
1087 stmt_vector_for_cost *prologue_cost_vec,
1088 stmt_vector_for_cost *body_cost_vec,
1089 bool record_prologue_costs)
1091 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1092 gimple *stmt = DR_STMT (dr);
1093 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1095 switch (alignment_support_scheme)
1099 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1100 stmt_info, 0, vect_body);
1102 if (dump_enabled_p ())
1103 dump_printf_loc (MSG_NOTE, vect_location,
1104 "vect_model_load_cost: aligned.\n");
1108 case dr_unaligned_supported:
1110 /* Here, we assign an additional cost for the unaligned load. */
1111 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1112 unaligned_load, stmt_info,
1113 DR_MISALIGNMENT (dr), vect_body);
1115 if (dump_enabled_p ())
1116 dump_printf_loc (MSG_NOTE, vect_location,
1117 "vect_model_load_cost: unaligned supported by "
1122 case dr_explicit_realign:
1124 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1125 vector_load, stmt_info, 0, vect_body);
1126 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1127 vec_perm, stmt_info, 0, vect_body);
1129 /* FIXME: If the misalignment remains fixed across the iterations of
1130 the containing loop, the following cost should be added to the
1132 if (targetm.vectorize.builtin_mask_for_load)
1133 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1134 stmt_info, 0, vect_body);
1136 if (dump_enabled_p ())
1137 dump_printf_loc (MSG_NOTE, vect_location,
1138 "vect_model_load_cost: explicit realign\n");
1142 case dr_explicit_realign_optimized:
1144 if (dump_enabled_p ())
1145 dump_printf_loc (MSG_NOTE, vect_location,
1146 "vect_model_load_cost: unaligned software "
1149 /* Unaligned software pipeline has a load of an address, an initial
1150 load, and possibly a mask operation to "prime" the loop. However,
1151 if this is an access in a group of loads, which provide grouped
1152 access, then the above cost should only be considered for one
1153 access in the group. Inside the loop, there is a load op
1154 and a realignment op. */
1156 if (add_realign_cost && record_prologue_costs)
1158 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1159 vector_stmt, stmt_info,
1161 if (targetm.vectorize.builtin_mask_for_load)
1162 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1163 vector_stmt, stmt_info,
1167 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1168 stmt_info, 0, vect_body);
1169 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1170 stmt_info, 0, vect_body);
1172 if (dump_enabled_p ())
1173 dump_printf_loc (MSG_NOTE, vect_location,
1174 "vect_model_load_cost: explicit realign optimized"
1180 case dr_unaligned_unsupported:
1182 *inside_cost = VECT_MAX_COST;
1184 if (dump_enabled_p ())
1185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1186 "vect_model_load_cost: unsupported access.\n");
1195 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1196 the loop preheader for the vectorized stmt STMT. */
1199 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1202 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1205 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1206 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1210 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1214 if (nested_in_vect_loop_p (loop, stmt))
1217 pe = loop_preheader_edge (loop);
1218 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1219 gcc_assert (!new_bb);
1223 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1225 gimple_stmt_iterator gsi_bb_start;
1227 gcc_assert (bb_vinfo);
1228 bb = BB_VINFO_BB (bb_vinfo);
1229 gsi_bb_start = gsi_after_labels (bb);
1230 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1234 if (dump_enabled_p ())
1236 dump_printf_loc (MSG_NOTE, vect_location,
1237 "created new init_stmt: ");
1238 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1242 /* Function vect_init_vector.
1244 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1245 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1246 vector type a vector with all elements equal to VAL is created first.
1247 Place the initialization at BSI if it is not NULL. Otherwise, place the
1248 initialization at the loop preheader.
1249 Return the DEF of INIT_STMT.
1250 It will be used in the vectorization of STMT. */
1253 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1258 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1259 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1261 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1262 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1264 /* Scalar boolean value should be transformed into
1265 all zeros or all ones value before building a vector. */
1266 if (VECTOR_BOOLEAN_TYPE_P (type))
1268 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1269 tree false_val = build_zero_cst (TREE_TYPE (type));
1271 if (CONSTANT_CLASS_P (val))
1272 val = integer_zerop (val) ? false_val : true_val;
1275 new_temp = make_ssa_name (TREE_TYPE (type));
1276 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1277 val, true_val, false_val);
1278 vect_init_vector_1 (stmt, init_stmt, gsi);
1282 else if (CONSTANT_CLASS_P (val))
1283 val = fold_convert (TREE_TYPE (type), val);
1286 new_temp = make_ssa_name (TREE_TYPE (type));
1287 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1288 init_stmt = gimple_build_assign (new_temp,
1289 fold_build1 (VIEW_CONVERT_EXPR,
1293 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1294 vect_init_vector_1 (stmt, init_stmt, gsi);
1298 val = build_vector_from_val (type, val);
1301 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1302 init_stmt = gimple_build_assign (new_temp, val);
1303 vect_init_vector_1 (stmt, init_stmt, gsi);
1308 /* Function vect_get_vec_def_for_operand.
1310 OP is an operand in STMT. This function returns a (vector) def that will be
1311 used in the vectorized stmt for STMT.
1313 In the case that OP is an SSA_NAME which is defined in the loop, then
1314 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1316 In case OP is an invariant or constant, a new stmt that creates a vector def
1317 needs to be introduced. VECTYPE may be used to specify a required type for
1318 vector invariant. */
1321 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1326 stmt_vec_info def_stmt_info = NULL;
1327 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1328 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1329 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1330 enum vect_def_type dt;
1334 if (dump_enabled_p ())
1336 dump_printf_loc (MSG_NOTE, vect_location,
1337 "vect_get_vec_def_for_operand: ");
1338 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1339 dump_printf (MSG_NOTE, "\n");
1342 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1343 gcc_assert (is_simple_use);
1344 if (dump_enabled_p ())
1346 int loc_printed = 0;
1350 dump_printf (MSG_NOTE, " def_stmt = ");
1352 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1353 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1359 /* operand is a constant or a loop invariant. */
1360 case vect_constant_def:
1361 case vect_external_def:
1364 vector_type = vectype;
1365 else if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
1366 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1367 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1369 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1371 gcc_assert (vector_type);
1372 return vect_init_vector (stmt, op, vector_type, NULL);
1375 /* operand is defined inside the loop. */
1376 case vect_internal_def:
1378 /* Get the def from the vectorized stmt. */
1379 def_stmt_info = vinfo_for_stmt (def_stmt);
1381 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1382 /* Get vectorized pattern statement. */
1384 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1385 && !STMT_VINFO_RELEVANT (def_stmt_info))
1386 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1387 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1388 gcc_assert (vec_stmt);
1389 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1390 vec_oprnd = PHI_RESULT (vec_stmt);
1391 else if (is_gimple_call (vec_stmt))
1392 vec_oprnd = gimple_call_lhs (vec_stmt);
1394 vec_oprnd = gimple_assign_lhs (vec_stmt);
1398 /* operand is defined by a loop header phi - reduction */
1399 case vect_reduction_def:
1400 case vect_double_reduction_def:
1401 case vect_nested_cycle:
1402 /* Code should use get_initial_def_for_reduction. */
1405 /* operand is defined by loop-header phi - induction. */
1406 case vect_induction_def:
1408 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1410 /* Get the def from the vectorized stmt. */
1411 def_stmt_info = vinfo_for_stmt (def_stmt);
1412 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1413 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1414 vec_oprnd = PHI_RESULT (vec_stmt);
1416 vec_oprnd = gimple_get_lhs (vec_stmt);
1426 /* Function vect_get_vec_def_for_stmt_copy
1428 Return a vector-def for an operand. This function is used when the
1429 vectorized stmt to be created (by the caller to this function) is a "copy"
1430 created in case the vectorized result cannot fit in one vector, and several
1431 copies of the vector-stmt are required. In this case the vector-def is
1432 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1433 of the stmt that defines VEC_OPRND.
1434 DT is the type of the vector def VEC_OPRND.
1437 In case the vectorization factor (VF) is bigger than the number
1438 of elements that can fit in a vectype (nunits), we have to generate
1439 more than one vector stmt to vectorize the scalar stmt. This situation
1440 arises when there are multiple data-types operated upon in the loop; the
1441 smallest data-type determines the VF, and as a result, when vectorizing
1442 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1443 vector stmt (each computing a vector of 'nunits' results, and together
1444 computing 'VF' results in each iteration). This function is called when
1445 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1446 which VF=16 and nunits=4, so the number of copies required is 4):
1448 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1450 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1451 VS1.1: vx.1 = memref1 VS1.2
1452 VS1.2: vx.2 = memref2 VS1.3
1453 VS1.3: vx.3 = memref3
1455 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1456 VSnew.1: vz1 = vx.1 + ... VSnew.2
1457 VSnew.2: vz2 = vx.2 + ... VSnew.3
1458 VSnew.3: vz3 = vx.3 + ...
1460 The vectorization of S1 is explained in vectorizable_load.
1461 The vectorization of S2:
1462 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1463 the function 'vect_get_vec_def_for_operand' is called to
1464 get the relevant vector-def for each operand of S2. For operand x it
1465 returns the vector-def 'vx.0'.
1467 To create the remaining copies of the vector-stmt (VSnew.j), this
1468 function is called to get the relevant vector-def for each operand. It is
1469 obtained from the respective VS1.j stmt, which is recorded in the
1470 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1472 For example, to obtain the vector-def 'vx.1' in order to create the
1473 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1474 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1475 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1476 and return its def ('vx.1').
1477 Overall, to create the above sequence this function will be called 3 times:
1478 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1479 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1480 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1483 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1485 gimple *vec_stmt_for_operand;
1486 stmt_vec_info def_stmt_info;
1488 /* Do nothing; can reuse same def. */
1489 if (dt == vect_external_def || dt == vect_constant_def )
1492 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1493 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1494 gcc_assert (def_stmt_info);
1495 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1496 gcc_assert (vec_stmt_for_operand);
1497 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1498 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1500 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1505 /* Get vectorized definitions for the operands to create a copy of an original
1506 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1509 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1510 vec<tree> *vec_oprnds0,
1511 vec<tree> *vec_oprnds1)
1513 tree vec_oprnd = vec_oprnds0->pop ();
1515 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1516 vec_oprnds0->quick_push (vec_oprnd);
1518 if (vec_oprnds1 && vec_oprnds1->length ())
1520 vec_oprnd = vec_oprnds1->pop ();
1521 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1522 vec_oprnds1->quick_push (vec_oprnd);
1527 /* Get vectorized definitions for OP0 and OP1.
1528 REDUC_INDEX is the index of reduction operand in case of reduction,
1529 and -1 otherwise. */
1532 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1533 vec<tree> *vec_oprnds0,
1534 vec<tree> *vec_oprnds1,
1535 slp_tree slp_node, int reduc_index)
1539 int nops = (op1 == NULL_TREE) ? 1 : 2;
1540 auto_vec<tree> ops (nops);
1541 auto_vec<vec<tree> > vec_defs (nops);
1543 ops.quick_push (op0);
1545 ops.quick_push (op1);
1547 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1549 *vec_oprnds0 = vec_defs[0];
1551 *vec_oprnds1 = vec_defs[1];
1557 vec_oprnds0->create (1);
1558 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1559 vec_oprnds0->quick_push (vec_oprnd);
1563 vec_oprnds1->create (1);
1564 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1565 vec_oprnds1->quick_push (vec_oprnd);
1571 /* Function vect_finish_stmt_generation.
1573 Insert a new stmt. */
1576 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1577 gimple_stmt_iterator *gsi)
1579 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1580 vec_info *vinfo = stmt_info->vinfo;
1582 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1584 if (!gsi_end_p (*gsi)
1585 && gimple_has_mem_ops (vec_stmt))
1587 gimple *at_stmt = gsi_stmt (*gsi);
1588 tree vuse = gimple_vuse (at_stmt);
1589 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1591 tree vdef = gimple_vdef (at_stmt);
1592 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1593 /* If we have an SSA vuse and insert a store, update virtual
1594 SSA form to avoid triggering the renamer. Do so only
1595 if we can easily see all uses - which is what almost always
1596 happens with the way vectorized stmts are inserted. */
1597 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1598 && ((is_gimple_assign (vec_stmt)
1599 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1600 || (is_gimple_call (vec_stmt)
1601 && !(gimple_call_flags (vec_stmt)
1602 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1604 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1605 gimple_set_vdef (vec_stmt, new_vdef);
1606 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1610 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1612 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1614 if (dump_enabled_p ())
1616 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1617 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1620 gimple_set_location (vec_stmt, gimple_location (stmt));
1622 /* While EH edges will generally prevent vectorization, stmt might
1623 e.g. be in a must-not-throw region. Ensure newly created stmts
1624 that could throw are part of the same region. */
1625 int lp_nr = lookup_stmt_eh_lp (stmt);
1626 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1627 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1630 /* We want to vectorize a call to combined function CFN with function
1631 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1632 as the types of all inputs. Check whether this is possible using
1633 an internal function, returning its code if so or IFN_LAST if not. */
1636 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1637 tree vectype_out, tree vectype_in)
1640 if (internal_fn_p (cfn))
1641 ifn = as_internal_fn (cfn);
1643 ifn = associated_internal_fn (fndecl);
1644 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1646 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1647 if (info.vectorizable)
1649 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1650 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1651 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1652 OPTIMIZE_FOR_SPEED))
1660 static tree permute_vec_elements (tree, tree, tree, gimple *,
1661 gimple_stmt_iterator *);
1664 /* Function vectorizable_mask_load_store.
1666 Check if STMT performs a conditional load or store that can be vectorized.
1667 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1668 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1669 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1672 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
1673 gimple **vec_stmt, slp_tree slp_node)
1675 tree vec_dest = NULL;
1676 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1677 stmt_vec_info prev_stmt_info;
1678 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1679 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1680 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1681 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1682 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1683 tree rhs_vectype = NULL_TREE;
1688 tree dataref_ptr = NULL_TREE;
1690 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1694 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1695 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1696 int gather_scale = 1;
1697 enum vect_def_type gather_dt = vect_unknown_def_type;
1701 enum vect_def_type dt;
1703 if (slp_node != NULL)
1706 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1707 gcc_assert (ncopies >= 1);
1709 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1710 mask = gimple_call_arg (stmt, 2);
1712 if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
1715 /* FORNOW. This restriction should be relaxed. */
1716 if (nested_in_vect_loop && ncopies > 1)
1718 if (dump_enabled_p ())
1719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1720 "multiple types in nested loop.");
1724 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1727 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
1731 if (!STMT_VINFO_DATA_REF (stmt_info))
1734 elem_type = TREE_TYPE (vectype);
1736 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1739 if (STMT_VINFO_STRIDED_P (stmt_info))
1742 if (TREE_CODE (mask) != SSA_NAME)
1745 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
1749 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
1751 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
1752 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
1757 tree rhs = gimple_call_arg (stmt, 3);
1758 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
1762 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1765 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
1766 &gather_off, &gather_scale);
1767 gcc_assert (gather_decl);
1768 if (!vect_is_simple_use (gather_off, loop_vinfo, &def_stmt, &gather_dt,
1769 &gather_off_vectype))
1771 if (dump_enabled_p ())
1772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1773 "gather index use not simple.");
1777 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1779 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1780 if (TREE_CODE (masktype) == INTEGER_TYPE)
1782 if (dump_enabled_p ())
1783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1784 "masked gather with integer mask not supported.");
1788 else if (tree_int_cst_compare (nested_in_vect_loop
1789 ? STMT_VINFO_DR_STEP (stmt_info)
1790 : DR_STEP (dr), size_zero_node) <= 0)
1792 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1793 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
1794 TYPE_MODE (mask_vectype),
1797 && !useless_type_conversion_p (vectype, rhs_vectype)))
1800 if (!vec_stmt) /* transformation not required. */
1802 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1804 vect_model_store_cost (stmt_info, ncopies, false, dt,
1807 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1813 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1815 tree vec_oprnd0 = NULL_TREE, op;
1816 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1817 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1818 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1819 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1820 tree mask_perm_mask = NULL_TREE;
1821 edge pe = loop_preheader_edge (loop);
1824 enum { NARROW, NONE, WIDEN } modifier;
1825 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1827 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1828 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1829 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1830 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1831 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1832 scaletype = TREE_VALUE (arglist);
1833 gcc_checking_assert (types_compatible_p (srctype, rettype)
1834 && types_compatible_p (srctype, masktype));
1836 if (nunits == gather_off_nunits)
1838 else if (nunits == gather_off_nunits / 2)
1840 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1843 for (i = 0; i < gather_off_nunits; ++i)
1844 sel[i] = i | nunits;
1846 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1848 else if (nunits == gather_off_nunits * 2)
1850 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1853 for (i = 0; i < nunits; ++i)
1854 sel[i] = i < gather_off_nunits
1855 ? i : i + nunits - gather_off_nunits;
1857 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1859 for (i = 0; i < nunits; ++i)
1860 sel[i] = i | gather_off_nunits;
1861 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1866 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1868 ptr = fold_convert (ptrtype, gather_base);
1869 if (!is_gimple_min_invariant (ptr))
1871 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1872 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1873 gcc_assert (!new_bb);
1876 scale = build_int_cst (scaletype, gather_scale);
1878 prev_stmt_info = NULL;
1879 for (j = 0; j < ncopies; ++j)
1881 if (modifier == WIDEN && (j & 1))
1882 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1883 perm_mask, stmt, gsi);
1886 = vect_get_vec_def_for_operand (gather_off, stmt);
1889 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1891 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1893 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1894 == TYPE_VECTOR_SUBPARTS (idxtype));
1895 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
1896 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1898 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1899 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1903 if (mask_perm_mask && (j & 1))
1904 mask_op = permute_vec_elements (mask_op, mask_op,
1905 mask_perm_mask, stmt, gsi);
1909 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
1912 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
1913 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1917 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1919 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1920 == TYPE_VECTOR_SUBPARTS (masktype));
1921 var = vect_get_new_ssa_name (masktype, vect_simple_var);
1922 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1924 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
1925 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1931 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
1934 if (!useless_type_conversion_p (vectype, rettype))
1936 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
1937 == TYPE_VECTOR_SUBPARTS (rettype));
1938 op = vect_get_new_ssa_name (rettype, vect_simple_var);
1939 gimple_call_set_lhs (new_stmt, op);
1940 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1941 var = make_ssa_name (vec_dest);
1942 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
1943 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1947 var = make_ssa_name (vec_dest, new_stmt);
1948 gimple_call_set_lhs (new_stmt, var);
1951 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1953 if (modifier == NARROW)
1960 var = permute_vec_elements (prev_res, var,
1961 perm_mask, stmt, gsi);
1962 new_stmt = SSA_NAME_DEF_STMT (var);
1965 if (prev_stmt_info == NULL)
1966 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1968 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1969 prev_stmt_info = vinfo_for_stmt (new_stmt);
1972 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
1974 if (STMT_VINFO_RELATED_STMT (stmt_info))
1976 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1977 stmt_info = vinfo_for_stmt (stmt);
1979 tree lhs = gimple_call_lhs (stmt);
1980 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
1981 set_vinfo_for_stmt (new_stmt, stmt_info);
1982 set_vinfo_for_stmt (stmt, NULL);
1983 STMT_VINFO_STMT (stmt_info) = new_stmt;
1984 gsi_replace (gsi, new_stmt, true);
1989 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
1990 prev_stmt_info = NULL;
1991 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
1992 for (i = 0; i < ncopies; i++)
1994 unsigned align, misalign;
1998 tree rhs = gimple_call_arg (stmt, 3);
1999 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2000 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2001 /* We should have catched mismatched types earlier. */
2002 gcc_assert (useless_type_conversion_p (vectype,
2003 TREE_TYPE (vec_rhs)));
2004 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2005 NULL_TREE, &dummy, gsi,
2006 &ptr_incr, false, &inv_p);
2007 gcc_assert (!inv_p);
2011 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2012 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2013 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2014 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2015 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2016 TYPE_SIZE_UNIT (vectype));
2019 align = TYPE_ALIGN_UNIT (vectype);
2020 if (aligned_access_p (dr))
2022 else if (DR_MISALIGNMENT (dr) == -1)
2024 align = TYPE_ALIGN_UNIT (elem_type);
2028 misalign = DR_MISALIGNMENT (dr);
2029 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2031 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2032 misalign ? misalign & -misalign : align);
2034 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2035 ptr, vec_mask, vec_rhs);
2036 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2038 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2040 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2041 prev_stmt_info = vinfo_for_stmt (new_stmt);
2046 tree vec_mask = NULL_TREE;
2047 prev_stmt_info = NULL;
2048 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2049 for (i = 0; i < ncopies; i++)
2051 unsigned align, misalign;
2055 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2056 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2057 NULL_TREE, &dummy, gsi,
2058 &ptr_incr, false, &inv_p);
2059 gcc_assert (!inv_p);
2063 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2064 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2065 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2066 TYPE_SIZE_UNIT (vectype));
2069 align = TYPE_ALIGN_UNIT (vectype);
2070 if (aligned_access_p (dr))
2072 else if (DR_MISALIGNMENT (dr) == -1)
2074 align = TYPE_ALIGN_UNIT (elem_type);
2078 misalign = DR_MISALIGNMENT (dr);
2079 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2081 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2082 misalign ? misalign & -misalign : align);
2084 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2086 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2087 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2089 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2091 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2092 prev_stmt_info = vinfo_for_stmt (new_stmt);
2098 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2100 if (STMT_VINFO_RELATED_STMT (stmt_info))
2102 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2103 stmt_info = vinfo_for_stmt (stmt);
2105 tree lhs = gimple_call_lhs (stmt);
2106 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2107 set_vinfo_for_stmt (new_stmt, stmt_info);
2108 set_vinfo_for_stmt (stmt, NULL);
2109 STMT_VINFO_STMT (stmt_info) = new_stmt;
2110 gsi_replace (gsi, new_stmt, true);
2116 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2117 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2118 in a single step. On success, store the binary pack code in
2122 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2123 tree_code *convert_code)
2125 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2126 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2130 int multi_step_cvt = 0;
2131 auto_vec <tree, 8> interm_types;
2132 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2133 &code, &multi_step_cvt,
2138 *convert_code = code;
2142 /* Function vectorizable_call.
2144 Check if GS performs a function call that can be vectorized.
2145 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2146 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2147 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2150 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2157 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2158 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2159 tree vectype_out, vectype_in;
2162 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2163 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2164 vec_info *vinfo = stmt_info->vinfo;
2165 tree fndecl, new_temp, rhs_type;
2167 enum vect_def_type dt[3]
2168 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2169 gimple *new_stmt = NULL;
2171 vec<tree> vargs = vNULL;
2172 enum { NARROW, NONE, WIDEN } modifier;
2176 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2179 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2183 /* Is GS a vectorizable call? */
2184 stmt = dyn_cast <gcall *> (gs);
2188 if (gimple_call_internal_p (stmt)
2189 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2190 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2191 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2194 if (gimple_call_lhs (stmt) == NULL_TREE
2195 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2198 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2200 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2202 /* Process function arguments. */
2203 rhs_type = NULL_TREE;
2204 vectype_in = NULL_TREE;
2205 nargs = gimple_call_num_args (stmt);
2207 /* Bail out if the function has more than three arguments, we do not have
2208 interesting builtin functions to vectorize with more than two arguments
2209 except for fma. No arguments is also not good. */
2210 if (nargs == 0 || nargs > 3)
2213 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2214 if (gimple_call_internal_p (stmt)
2215 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2218 rhs_type = unsigned_type_node;
2221 for (i = 0; i < nargs; i++)
2225 op = gimple_call_arg (stmt, i);
2227 /* We can only handle calls with arguments of the same type. */
2229 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2231 if (dump_enabled_p ())
2232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2233 "argument types differ.\n");
2237 rhs_type = TREE_TYPE (op);
2239 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2241 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2243 "use not simple.\n");
2248 vectype_in = opvectype;
2250 && opvectype != vectype_in)
2252 if (dump_enabled_p ())
2253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2254 "argument vector types differ.\n");
2258 /* If all arguments are external or constant defs use a vector type with
2259 the same size as the output vector type. */
2261 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2263 gcc_assert (vectype_in);
2266 if (dump_enabled_p ())
2268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2269 "no vectype for scalar type ");
2270 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2271 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2278 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2279 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2280 if (nunits_in == nunits_out / 2)
2282 else if (nunits_out == nunits_in)
2284 else if (nunits_out == nunits_in / 2)
2289 /* We only handle functions that do not read or clobber memory. */
2290 if (gimple_vuse (stmt))
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2294 "function reads from or writes to memory.\n");
2298 /* For now, we only vectorize functions if a target specific builtin
2299 is available. TODO -- in some cases, it might be profitable to
2300 insert the calls for pieces of the vector, in order to be able
2301 to vectorize other operations in the loop. */
2303 internal_fn ifn = IFN_LAST;
2304 combined_fn cfn = gimple_call_combined_fn (stmt);
2305 tree callee = gimple_call_fndecl (stmt);
2307 /* First try using an internal function. */
2308 tree_code convert_code = ERROR_MARK;
2310 && (modifier == NONE
2311 || (modifier == NARROW
2312 && simple_integer_narrowing (vectype_out, vectype_in,
2314 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2317 /* If that fails, try asking for a target-specific built-in function. */
2318 if (ifn == IFN_LAST)
2320 if (cfn != CFN_LAST)
2321 fndecl = targetm.vectorize.builtin_vectorized_function
2322 (cfn, vectype_out, vectype_in);
2324 fndecl = targetm.vectorize.builtin_md_vectorized_function
2325 (callee, vectype_out, vectype_in);
2328 if (ifn == IFN_LAST && !fndecl)
2330 if (cfn == CFN_GOMP_SIMD_LANE
2333 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2334 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2335 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2336 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2338 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2339 { 0, 1, 2, ... vf - 1 } vector. */
2340 gcc_assert (nargs == 0);
2344 if (dump_enabled_p ())
2345 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2346 "function is not vectorizable.\n");
2351 if (slp_node || PURE_SLP_STMT (stmt_info))
2353 else if (modifier == NARROW && ifn == IFN_LAST)
2354 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2356 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2358 /* Sanity check: make sure that at least one copy of the vectorized stmt
2359 needs to be generated. */
2360 gcc_assert (ncopies >= 1);
2362 if (!vec_stmt) /* transformation not required. */
2364 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2365 if (dump_enabled_p ())
2366 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2368 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2369 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2370 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2371 vec_promote_demote, stmt_info, 0, vect_body);
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2382 scalar_dest = gimple_call_lhs (stmt);
2383 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2385 prev_stmt_info = NULL;
2386 if (modifier == NONE || ifn != IFN_LAST)
2388 tree prev_res = NULL_TREE;
2389 for (j = 0; j < ncopies; ++j)
2391 /* Build argument list for the vectorized call. */
2393 vargs.create (nargs);
2399 auto_vec<vec<tree> > vec_defs (nargs);
2400 vec<tree> vec_oprnds0;
2402 for (i = 0; i < nargs; i++)
2403 vargs.quick_push (gimple_call_arg (stmt, i));
2404 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2405 vec_oprnds0 = vec_defs[0];
2407 /* Arguments are ready. Create the new vector stmt. */
2408 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2411 for (k = 0; k < nargs; k++)
2413 vec<tree> vec_oprndsk = vec_defs[k];
2414 vargs[k] = vec_oprndsk[i];
2416 if (modifier == NARROW)
2418 tree half_res = make_ssa_name (vectype_in);
2419 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2420 gimple_call_set_lhs (new_stmt, half_res);
2421 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2424 prev_res = half_res;
2427 new_temp = make_ssa_name (vec_dest);
2428 new_stmt = gimple_build_assign (new_temp, convert_code,
2429 prev_res, half_res);
2433 if (ifn != IFN_LAST)
2434 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2436 new_stmt = gimple_build_call_vec (fndecl, vargs);
2437 new_temp = make_ssa_name (vec_dest, new_stmt);
2438 gimple_call_set_lhs (new_stmt, new_temp);
2440 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2441 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2444 for (i = 0; i < nargs; i++)
2446 vec<tree> vec_oprndsi = vec_defs[i];
2447 vec_oprndsi.release ();
2452 for (i = 0; i < nargs; i++)
2454 op = gimple_call_arg (stmt, i);
2457 = vect_get_vec_def_for_operand (op, stmt);
2460 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2462 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2465 vargs.quick_push (vec_oprnd0);
2468 if (gimple_call_internal_p (stmt)
2469 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2471 tree *v = XALLOCAVEC (tree, nunits_out);
2473 for (k = 0; k < nunits_out; ++k)
2474 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2475 tree cst = build_vector (vectype_out, v);
2477 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2478 gimple *init_stmt = gimple_build_assign (new_var, cst);
2479 vect_init_vector_1 (stmt, init_stmt, NULL);
2480 new_temp = make_ssa_name (vec_dest);
2481 new_stmt = gimple_build_assign (new_temp, new_var);
2483 else if (modifier == NARROW)
2485 tree half_res = make_ssa_name (vectype_in);
2486 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2487 gimple_call_set_lhs (new_stmt, half_res);
2488 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2491 prev_res = half_res;
2494 new_temp = make_ssa_name (vec_dest);
2495 new_stmt = gimple_build_assign (new_temp, convert_code,
2496 prev_res, half_res);
2500 if (ifn != IFN_LAST)
2501 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2503 new_stmt = gimple_build_call_vec (fndecl, vargs);
2504 new_temp = make_ssa_name (vec_dest, new_stmt);
2505 gimple_call_set_lhs (new_stmt, new_temp);
2507 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2509 if (j == (modifier == NARROW ? 1 : 0))
2510 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2512 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2514 prev_stmt_info = vinfo_for_stmt (new_stmt);
2517 else if (modifier == NARROW)
2519 for (j = 0; j < ncopies; ++j)
2521 /* Build argument list for the vectorized call. */
2523 vargs.create (nargs * 2);
2529 auto_vec<vec<tree> > vec_defs (nargs);
2530 vec<tree> vec_oprnds0;
2532 for (i = 0; i < nargs; i++)
2533 vargs.quick_push (gimple_call_arg (stmt, i));
2534 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2535 vec_oprnds0 = vec_defs[0];
2537 /* Arguments are ready. Create the new vector stmt. */
2538 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2542 for (k = 0; k < nargs; k++)
2544 vec<tree> vec_oprndsk = vec_defs[k];
2545 vargs.quick_push (vec_oprndsk[i]);
2546 vargs.quick_push (vec_oprndsk[i + 1]);
2548 if (ifn != IFN_LAST)
2549 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2551 new_stmt = gimple_build_call_vec (fndecl, vargs);
2552 new_temp = make_ssa_name (vec_dest, new_stmt);
2553 gimple_call_set_lhs (new_stmt, new_temp);
2554 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2555 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2558 for (i = 0; i < nargs; i++)
2560 vec<tree> vec_oprndsi = vec_defs[i];
2561 vec_oprndsi.release ();
2566 for (i = 0; i < nargs; i++)
2568 op = gimple_call_arg (stmt, i);
2572 = vect_get_vec_def_for_operand (op, stmt);
2574 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2578 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2580 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2582 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2585 vargs.quick_push (vec_oprnd0);
2586 vargs.quick_push (vec_oprnd1);
2589 new_stmt = gimple_build_call_vec (fndecl, vargs);
2590 new_temp = make_ssa_name (vec_dest, new_stmt);
2591 gimple_call_set_lhs (new_stmt, new_temp);
2592 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2595 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2597 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2599 prev_stmt_info = vinfo_for_stmt (new_stmt);
2602 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2605 /* No current target implements this case. */
2610 /* The call in STMT might prevent it from being removed in dce.
2611 We however cannot remove it here, due to the way the ssa name
2612 it defines is mapped to the new definition. So just replace
2613 rhs of the statement with something harmless. */
2618 type = TREE_TYPE (scalar_dest);
2619 if (is_pattern_stmt_p (stmt_info))
2620 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2622 lhs = gimple_call_lhs (stmt);
2624 if (gimple_call_internal_p (stmt)
2625 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2627 /* Replace uses of the lhs of GOMP_SIMD_LANE call outside the loop
2628 with vf - 1 rather than 0, that is the last iteration of the
2630 imm_use_iterator iter;
2631 use_operand_p use_p;
2633 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2635 basic_block use_bb = gimple_bb (use_stmt);
2637 && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), use_bb))
2639 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2640 SET_USE (use_p, build_int_cst (TREE_TYPE (lhs),
2641 ncopies * nunits_out - 1));
2642 update_stmt (use_stmt);
2647 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2648 set_vinfo_for_stmt (new_stmt, stmt_info);
2649 set_vinfo_for_stmt (stmt, NULL);
2650 STMT_VINFO_STMT (stmt_info) = new_stmt;
2651 gsi_replace (gsi, new_stmt, false);
2657 struct simd_call_arg_info
2661 enum vect_def_type dt;
2662 HOST_WIDE_INT linear_step;
2664 bool simd_lane_linear;
2667 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2668 is linear within simd lane (but not within whole loop), note it in
2672 vect_simd_lane_linear (tree op, struct loop *loop,
2673 struct simd_call_arg_info *arginfo)
2675 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
2677 if (!is_gimple_assign (def_stmt)
2678 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
2679 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
2682 tree base = gimple_assign_rhs1 (def_stmt);
2683 HOST_WIDE_INT linear_step = 0;
2684 tree v = gimple_assign_rhs2 (def_stmt);
2685 while (TREE_CODE (v) == SSA_NAME)
2688 def_stmt = SSA_NAME_DEF_STMT (v);
2689 if (is_gimple_assign (def_stmt))
2690 switch (gimple_assign_rhs_code (def_stmt))
2693 t = gimple_assign_rhs2 (def_stmt);
2694 if (linear_step || TREE_CODE (t) != INTEGER_CST)
2696 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
2697 v = gimple_assign_rhs1 (def_stmt);
2700 t = gimple_assign_rhs2 (def_stmt);
2701 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
2703 linear_step = tree_to_shwi (t);
2704 v = gimple_assign_rhs1 (def_stmt);
2707 t = gimple_assign_rhs1 (def_stmt);
2708 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
2709 || (TYPE_PRECISION (TREE_TYPE (v))
2710 < TYPE_PRECISION (TREE_TYPE (t))))
2719 else if (is_gimple_call (def_stmt)
2720 && gimple_call_internal_p (def_stmt)
2721 && gimple_call_internal_fn (def_stmt) == IFN_GOMP_SIMD_LANE
2723 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
2724 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
2729 arginfo->linear_step = linear_step;
2731 arginfo->simd_lane_linear = true;
2737 /* Function vectorizable_simd_clone_call.
2739 Check if STMT performs a function call that can be vectorized
2740 by calling a simd clone of the function.
2741 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2742 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2743 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2746 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
2747 gimple **vec_stmt, slp_tree slp_node)
2752 tree vec_oprnd0 = NULL_TREE;
2753 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2755 unsigned int nunits;
2756 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2757 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2758 vec_info *vinfo = stmt_info->vinfo;
2759 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2760 tree fndecl, new_temp;
2762 gimple *new_stmt = NULL;
2764 vec<simd_call_arg_info> arginfo = vNULL;
2765 vec<tree> vargs = vNULL;
2767 tree lhs, rtype, ratype;
2768 vec<constructor_elt, va_gc> *ret_ctor_elts;
2770 /* Is STMT a vectorizable call? */
2771 if (!is_gimple_call (stmt))
2774 fndecl = gimple_call_fndecl (stmt);
2775 if (fndecl == NULL_TREE)
2778 struct cgraph_node *node = cgraph_node::get (fndecl);
2779 if (node == NULL || node->simd_clones == NULL)
2782 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2785 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2789 if (gimple_call_lhs (stmt)
2790 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2793 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2795 vectype = STMT_VINFO_VECTYPE (stmt_info);
2797 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2801 if (slp_node || PURE_SLP_STMT (stmt_info))
2804 /* Process function arguments. */
2805 nargs = gimple_call_num_args (stmt);
2807 /* Bail out if the function has zero arguments. */
2811 arginfo.create (nargs);
2813 for (i = 0; i < nargs; i++)
2815 simd_call_arg_info thisarginfo;
2818 thisarginfo.linear_step = 0;
2819 thisarginfo.align = 0;
2820 thisarginfo.op = NULL_TREE;
2821 thisarginfo.simd_lane_linear = false;
2823 op = gimple_call_arg (stmt, i);
2824 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
2825 &thisarginfo.vectype)
2826 || thisarginfo.dt == vect_uninitialized_def)
2828 if (dump_enabled_p ())
2829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2830 "use not simple.\n");
2835 if (thisarginfo.dt == vect_constant_def
2836 || thisarginfo.dt == vect_external_def)
2837 gcc_assert (thisarginfo.vectype == NULL_TREE);
2839 gcc_assert (thisarginfo.vectype != NULL_TREE);
2841 /* For linear arguments, the analyze phase should have saved
2842 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2843 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2844 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
2846 gcc_assert (vec_stmt);
2847 thisarginfo.linear_step
2848 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
2850 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
2851 thisarginfo.simd_lane_linear
2852 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
2853 == boolean_true_node);
2854 /* If loop has been peeled for alignment, we need to adjust it. */
2855 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2856 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2857 if (n1 != n2 && !thisarginfo.simd_lane_linear)
2859 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2860 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
2861 tree opt = TREE_TYPE (thisarginfo.op);
2862 bias = fold_convert (TREE_TYPE (step), bias);
2863 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2865 = fold_build2 (POINTER_TYPE_P (opt)
2866 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2867 thisarginfo.op, bias);
2871 && thisarginfo.dt != vect_constant_def
2872 && thisarginfo.dt != vect_external_def
2874 && TREE_CODE (op) == SSA_NAME
2875 && simple_iv (loop, loop_containing_stmt (stmt), op,
2877 && tree_fits_shwi_p (iv.step))
2879 thisarginfo.linear_step = tree_to_shwi (iv.step);
2880 thisarginfo.op = iv.base;
2882 else if ((thisarginfo.dt == vect_constant_def
2883 || thisarginfo.dt == vect_external_def)
2884 && POINTER_TYPE_P (TREE_TYPE (op)))
2885 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2886 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2888 if (POINTER_TYPE_P (TREE_TYPE (op))
2889 && !thisarginfo.linear_step
2891 && thisarginfo.dt != vect_constant_def
2892 && thisarginfo.dt != vect_external_def
2895 && TREE_CODE (op) == SSA_NAME)
2896 vect_simd_lane_linear (op, loop, &thisarginfo);
2898 arginfo.quick_push (thisarginfo);
2901 unsigned int badness = 0;
2902 struct cgraph_node *bestn = NULL;
2903 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2904 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2906 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2907 n = n->simdclone->next_clone)
2909 unsigned int this_badness = 0;
2910 if (n->simdclone->simdlen
2911 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2912 || n->simdclone->nargs != nargs)
2914 if (n->simdclone->simdlen
2915 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2916 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2917 - exact_log2 (n->simdclone->simdlen)) * 1024;
2918 if (n->simdclone->inbranch)
2919 this_badness += 2048;
2920 int target_badness = targetm.simd_clone.usable (n);
2921 if (target_badness < 0)
2923 this_badness += target_badness * 512;
2924 /* FORNOW: Have to add code to add the mask argument. */
2925 if (n->simdclone->inbranch)
2927 for (i = 0; i < nargs; i++)
2929 switch (n->simdclone->args[i].arg_type)
2931 case SIMD_CLONE_ARG_TYPE_VECTOR:
2932 if (!useless_type_conversion_p
2933 (n->simdclone->args[i].orig_type,
2934 TREE_TYPE (gimple_call_arg (stmt, i))))
2936 else if (arginfo[i].dt == vect_constant_def
2937 || arginfo[i].dt == vect_external_def
2938 || arginfo[i].linear_step)
2941 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2942 if (arginfo[i].dt != vect_constant_def
2943 && arginfo[i].dt != vect_external_def)
2946 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2947 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
2948 if (arginfo[i].dt == vect_constant_def
2949 || arginfo[i].dt == vect_external_def
2950 || (arginfo[i].linear_step
2951 != n->simdclone->args[i].linear_step))
2954 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2955 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
2956 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
2957 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
2958 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
2959 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
2963 case SIMD_CLONE_ARG_TYPE_MASK:
2966 if (i == (size_t) -1)
2968 if (n->simdclone->args[i].alignment > arginfo[i].align)
2973 if (arginfo[i].align)
2974 this_badness += (exact_log2 (arginfo[i].align)
2975 - exact_log2 (n->simdclone->args[i].alignment));
2977 if (i == (size_t) -1)
2979 if (bestn == NULL || this_badness < badness)
2982 badness = this_badness;
2992 for (i = 0; i < nargs; i++)
2993 if ((arginfo[i].dt == vect_constant_def
2994 || arginfo[i].dt == vect_external_def)
2995 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2998 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3000 if (arginfo[i].vectype == NULL
3001 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3002 > bestn->simdclone->simdlen))
3009 fndecl = bestn->decl;
3010 nunits = bestn->simdclone->simdlen;
3011 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3013 /* If the function isn't const, only allow it in simd loops where user
3014 has asserted that at least nunits consecutive iterations can be
3015 performed using SIMD instructions. */
3016 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3017 && gimple_vuse (stmt))
3023 /* Sanity check: make sure that at least one copy of the vectorized stmt
3024 needs to be generated. */
3025 gcc_assert (ncopies >= 1);
3027 if (!vec_stmt) /* transformation not required. */
3029 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3030 for (i = 0; i < nargs; i++)
3031 if ((bestn->simdclone->args[i].arg_type
3032 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3033 || (bestn->simdclone->args[i].arg_type
3034 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3036 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3038 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3039 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3040 ? size_type_node : TREE_TYPE (arginfo[i].op);
3041 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3042 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3043 tree sll = arginfo[i].simd_lane_linear
3044 ? boolean_true_node : boolean_false_node;
3045 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3047 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3048 if (dump_enabled_p ())
3049 dump_printf_loc (MSG_NOTE, vect_location,
3050 "=== vectorizable_simd_clone_call ===\n");
3051 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3058 if (dump_enabled_p ())
3059 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3062 scalar_dest = gimple_call_lhs (stmt);
3063 vec_dest = NULL_TREE;
3068 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3069 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3070 if (TREE_CODE (rtype) == ARRAY_TYPE)
3073 rtype = TREE_TYPE (ratype);
3077 prev_stmt_info = NULL;
3078 for (j = 0; j < ncopies; ++j)
3080 /* Build argument list for the vectorized call. */
3082 vargs.create (nargs);
3086 for (i = 0; i < nargs; i++)
3088 unsigned int k, l, m, o;
3090 op = gimple_call_arg (stmt, i);
3091 switch (bestn->simdclone->args[i].arg_type)
3093 case SIMD_CLONE_ARG_TYPE_VECTOR:
3094 atype = bestn->simdclone->args[i].vector_type;
3095 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3096 for (m = j * o; m < (j + 1) * o; m++)
3098 if (TYPE_VECTOR_SUBPARTS (atype)
3099 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3101 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3102 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3103 / TYPE_VECTOR_SUBPARTS (atype));
3104 gcc_assert ((k & (k - 1)) == 0);
3107 = vect_get_vec_def_for_operand (op, stmt);
3110 vec_oprnd0 = arginfo[i].op;
3111 if ((m & (k - 1)) == 0)
3113 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3116 arginfo[i].op = vec_oprnd0;
3118 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3120 bitsize_int ((m & (k - 1)) * prec));
3122 = gimple_build_assign (make_ssa_name (atype),
3124 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3125 vargs.safe_push (gimple_assign_lhs (new_stmt));
3129 k = (TYPE_VECTOR_SUBPARTS (atype)
3130 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3131 gcc_assert ((k & (k - 1)) == 0);
3132 vec<constructor_elt, va_gc> *ctor_elts;
3134 vec_alloc (ctor_elts, k);
3137 for (l = 0; l < k; l++)
3139 if (m == 0 && l == 0)
3141 = vect_get_vec_def_for_operand (op, stmt);
3144 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3146 arginfo[i].op = vec_oprnd0;
3149 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3153 vargs.safe_push (vec_oprnd0);
3156 vec_oprnd0 = build_constructor (atype, ctor_elts);
3158 = gimple_build_assign (make_ssa_name (atype),
3160 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3161 vargs.safe_push (gimple_assign_lhs (new_stmt));
3166 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3167 vargs.safe_push (op);
3169 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3170 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3175 = force_gimple_operand (arginfo[i].op, &stmts, true,
3180 edge pe = loop_preheader_edge (loop);
3181 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3182 gcc_assert (!new_bb);
3184 if (arginfo[i].simd_lane_linear)
3186 vargs.safe_push (arginfo[i].op);
3189 tree phi_res = copy_ssa_name (op);
3190 gphi *new_phi = create_phi_node (phi_res, loop->header);
3191 set_vinfo_for_stmt (new_phi,
3192 new_stmt_vec_info (new_phi, loop_vinfo));
3193 add_phi_arg (new_phi, arginfo[i].op,
3194 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3196 = POINTER_TYPE_P (TREE_TYPE (op))
3197 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3198 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3199 ? sizetype : TREE_TYPE (op);
3201 = wi::mul (bestn->simdclone->args[i].linear_step,
3203 tree tcst = wide_int_to_tree (type, cst);
3204 tree phi_arg = copy_ssa_name (op);
3206 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3207 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3208 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3209 set_vinfo_for_stmt (new_stmt,
3210 new_stmt_vec_info (new_stmt, loop_vinfo));
3211 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3213 arginfo[i].op = phi_res;
3214 vargs.safe_push (phi_res);
3219 = POINTER_TYPE_P (TREE_TYPE (op))
3220 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3221 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3222 ? sizetype : TREE_TYPE (op);
3224 = wi::mul (bestn->simdclone->args[i].linear_step,
3226 tree tcst = wide_int_to_tree (type, cst);
3227 new_temp = make_ssa_name (TREE_TYPE (op));
3228 new_stmt = gimple_build_assign (new_temp, code,
3229 arginfo[i].op, tcst);
3230 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3231 vargs.safe_push (new_temp);
3234 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3235 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3236 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3237 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3238 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3239 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3245 new_stmt = gimple_build_call_vec (fndecl, vargs);
3248 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3250 new_temp = create_tmp_var (ratype);
3251 else if (TYPE_VECTOR_SUBPARTS (vectype)
3252 == TYPE_VECTOR_SUBPARTS (rtype))
3253 new_temp = make_ssa_name (vec_dest, new_stmt);
3255 new_temp = make_ssa_name (rtype, new_stmt);
3256 gimple_call_set_lhs (new_stmt, new_temp);
3258 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3262 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3265 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3266 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3267 gcc_assert ((k & (k - 1)) == 0);
3268 for (l = 0; l < k; l++)
3273 t = build_fold_addr_expr (new_temp);
3274 t = build2 (MEM_REF, vectype, t,
3275 build_int_cst (TREE_TYPE (t),
3276 l * prec / BITS_PER_UNIT));
3279 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3280 size_int (prec), bitsize_int (l * prec));
3282 = gimple_build_assign (make_ssa_name (vectype), t);
3283 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3284 if (j == 0 && l == 0)
3285 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3287 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3289 prev_stmt_info = vinfo_for_stmt (new_stmt);
3294 tree clobber = build_constructor (ratype, NULL);
3295 TREE_THIS_VOLATILE (clobber) = 1;
3296 new_stmt = gimple_build_assign (new_temp, clobber);
3297 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3301 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3303 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3304 / TYPE_VECTOR_SUBPARTS (rtype));
3305 gcc_assert ((k & (k - 1)) == 0);
3306 if ((j & (k - 1)) == 0)
3307 vec_alloc (ret_ctor_elts, k);
3310 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3311 for (m = 0; m < o; m++)
3313 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3314 size_int (m), NULL_TREE, NULL_TREE);
3316 = gimple_build_assign (make_ssa_name (rtype), tem);
3317 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3318 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3319 gimple_assign_lhs (new_stmt));
3321 tree clobber = build_constructor (ratype, NULL);
3322 TREE_THIS_VOLATILE (clobber) = 1;
3323 new_stmt = gimple_build_assign (new_temp, clobber);
3324 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3327 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3328 if ((j & (k - 1)) != k - 1)
3330 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3332 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3333 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3335 if ((unsigned) j == k - 1)
3336 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3338 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3340 prev_stmt_info = vinfo_for_stmt (new_stmt);
3345 tree t = build_fold_addr_expr (new_temp);
3346 t = build2 (MEM_REF, vectype, t,
3347 build_int_cst (TREE_TYPE (t), 0));
3349 = gimple_build_assign (make_ssa_name (vec_dest), t);
3350 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3351 tree clobber = build_constructor (ratype, NULL);
3352 TREE_THIS_VOLATILE (clobber) = 1;
3353 vect_finish_stmt_generation (stmt,
3354 gimple_build_assign (new_temp,
3360 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3362 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3364 prev_stmt_info = vinfo_for_stmt (new_stmt);
3369 /* The call in STMT might prevent it from being removed in dce.
3370 We however cannot remove it here, due to the way the ssa name
3371 it defines is mapped to the new definition. So just replace
3372 rhs of the statement with something harmless. */
3379 type = TREE_TYPE (scalar_dest);
3380 if (is_pattern_stmt_p (stmt_info))
3381 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3383 lhs = gimple_call_lhs (stmt);
3384 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3387 new_stmt = gimple_build_nop ();
3388 set_vinfo_for_stmt (new_stmt, stmt_info);
3389 set_vinfo_for_stmt (stmt, NULL);
3390 STMT_VINFO_STMT (stmt_info) = new_stmt;
3391 gsi_replace (gsi, new_stmt, true);
3392 unlink_stmt_vdef (stmt);
3398 /* Function vect_gen_widened_results_half
3400 Create a vector stmt whose code, type, number of arguments, and result
3401 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3402 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3403 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3404 needs to be created (DECL is a function-decl of a target-builtin).
3405 STMT is the original scalar stmt that we are vectorizing. */
3408 vect_gen_widened_results_half (enum tree_code code,
3410 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3411 tree vec_dest, gimple_stmt_iterator *gsi,
3417 /* Generate half of the widened result: */
3418 if (code == CALL_EXPR)
3420 /* Target specific support */
3421 if (op_type == binary_op)
3422 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3424 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3425 new_temp = make_ssa_name (vec_dest, new_stmt);
3426 gimple_call_set_lhs (new_stmt, new_temp);
3430 /* Generic support */
3431 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3432 if (op_type != binary_op)
3434 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3435 new_temp = make_ssa_name (vec_dest, new_stmt);
3436 gimple_assign_set_lhs (new_stmt, new_temp);
3438 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3444 /* Get vectorized definitions for loop-based vectorization. For the first
3445 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3446 scalar operand), and for the rest we get a copy with
3447 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3448 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3449 The vectors are collected into VEC_OPRNDS. */
3452 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3453 vec<tree> *vec_oprnds, int multi_step_cvt)
3457 /* Get first vector operand. */
3458 /* All the vector operands except the very first one (that is scalar oprnd)
3460 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3461 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3463 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3465 vec_oprnds->quick_push (vec_oprnd);
3467 /* Get second vector operand. */
3468 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3469 vec_oprnds->quick_push (vec_oprnd);
3473 /* For conversion in multiple steps, continue to get operands
3476 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3480 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3481 For multi-step conversions store the resulting vectors and call the function
3485 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3486 int multi_step_cvt, gimple *stmt,
3488 gimple_stmt_iterator *gsi,
3489 slp_tree slp_node, enum tree_code code,
3490 stmt_vec_info *prev_stmt_info)
3493 tree vop0, vop1, new_tmp, vec_dest;
3495 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3497 vec_dest = vec_dsts.pop ();
3499 for (i = 0; i < vec_oprnds->length (); i += 2)
3501 /* Create demotion operation. */
3502 vop0 = (*vec_oprnds)[i];
3503 vop1 = (*vec_oprnds)[i + 1];
3504 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3505 new_tmp = make_ssa_name (vec_dest, new_stmt);
3506 gimple_assign_set_lhs (new_stmt, new_tmp);
3507 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3510 /* Store the resulting vector for next recursive call. */
3511 (*vec_oprnds)[i/2] = new_tmp;
3514 /* This is the last step of the conversion sequence. Store the
3515 vectors in SLP_NODE or in vector info of the scalar statement
3516 (or in STMT_VINFO_RELATED_STMT chain). */
3518 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3521 if (!*prev_stmt_info)
3522 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3524 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3526 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3531 /* For multi-step demotion operations we first generate demotion operations
3532 from the source type to the intermediate types, and then combine the
3533 results (stored in VEC_OPRNDS) in demotion operation to the destination
3537 /* At each level of recursion we have half of the operands we had at the
3539 vec_oprnds->truncate ((i+1)/2);
3540 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3541 stmt, vec_dsts, gsi, slp_node,
3542 VEC_PACK_TRUNC_EXPR,
3546 vec_dsts.quick_push (vec_dest);
3550 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3551 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3552 the resulting vectors and call the function recursively. */
3555 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3556 vec<tree> *vec_oprnds1,
3557 gimple *stmt, tree vec_dest,
3558 gimple_stmt_iterator *gsi,
3559 enum tree_code code1,
3560 enum tree_code code2, tree decl1,
3561 tree decl2, int op_type)
3564 tree vop0, vop1, new_tmp1, new_tmp2;
3565 gimple *new_stmt1, *new_stmt2;
3566 vec<tree> vec_tmp = vNULL;
3568 vec_tmp.create (vec_oprnds0->length () * 2);
3569 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3571 if (op_type == binary_op)
3572 vop1 = (*vec_oprnds1)[i];
3576 /* Generate the two halves of promotion operation. */
3577 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3578 op_type, vec_dest, gsi, stmt);
3579 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3580 op_type, vec_dest, gsi, stmt);
3581 if (is_gimple_call (new_stmt1))
3583 new_tmp1 = gimple_call_lhs (new_stmt1);
3584 new_tmp2 = gimple_call_lhs (new_stmt2);
3588 new_tmp1 = gimple_assign_lhs (new_stmt1);
3589 new_tmp2 = gimple_assign_lhs (new_stmt2);
3592 /* Store the results for the next step. */
3593 vec_tmp.quick_push (new_tmp1);
3594 vec_tmp.quick_push (new_tmp2);
3597 vec_oprnds0->release ();
3598 *vec_oprnds0 = vec_tmp;
3602 /* Check if STMT performs a conversion operation, that can be vectorized.
3603 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3604 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3605 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3608 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
3609 gimple **vec_stmt, slp_tree slp_node)
3613 tree op0, op1 = NULL_TREE;
3614 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3615 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3616 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3617 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3618 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3619 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3622 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3623 gimple *new_stmt = NULL;
3624 stmt_vec_info prev_stmt_info;
3627 tree vectype_out, vectype_in;
3629 tree lhs_type, rhs_type;
3630 enum { NARROW, NONE, WIDEN } modifier;
3631 vec<tree> vec_oprnds0 = vNULL;
3632 vec<tree> vec_oprnds1 = vNULL;
3634 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3635 vec_info *vinfo = stmt_info->vinfo;
3636 int multi_step_cvt = 0;
3637 vec<tree> vec_dsts = vNULL;
3638 vec<tree> interm_types = vNULL;
3639 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3641 machine_mode rhs_mode;
3642 unsigned short fltsz;
3644 /* Is STMT a vectorizable conversion? */
3646 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3649 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3653 if (!is_gimple_assign (stmt))
3656 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3659 code = gimple_assign_rhs_code (stmt);
3660 if (!CONVERT_EXPR_CODE_P (code)
3661 && code != FIX_TRUNC_EXPR
3662 && code != FLOAT_EXPR
3663 && code != WIDEN_MULT_EXPR
3664 && code != WIDEN_LSHIFT_EXPR)
3667 op_type = TREE_CODE_LENGTH (code);
3669 /* Check types of lhs and rhs. */
3670 scalar_dest = gimple_assign_lhs (stmt);
3671 lhs_type = TREE_TYPE (scalar_dest);
3672 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3674 op0 = gimple_assign_rhs1 (stmt);
3675 rhs_type = TREE_TYPE (op0);
3677 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3678 && !((INTEGRAL_TYPE_P (lhs_type)
3679 && INTEGRAL_TYPE_P (rhs_type))
3680 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3681 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3684 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
3685 && ((INTEGRAL_TYPE_P (lhs_type)
3686 && (TYPE_PRECISION (lhs_type)
3687 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3688 || (INTEGRAL_TYPE_P (rhs_type)
3689 && (TYPE_PRECISION (rhs_type)
3690 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
3692 if (dump_enabled_p ())
3693 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3694 "type conversion to/from bit-precision unsupported."
3699 /* Check the operands of the operation. */
3700 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
3702 if (dump_enabled_p ())
3703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3704 "use not simple.\n");
3707 if (op_type == binary_op)
3711 op1 = gimple_assign_rhs2 (stmt);
3712 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3713 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3715 if (CONSTANT_CLASS_P (op0))
3716 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
3718 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
3722 if (dump_enabled_p ())
3723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3724 "use not simple.\n");
3729 /* If op0 is an external or constant defs use a vector type of
3730 the same size as the output vector type. */
3732 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3734 gcc_assert (vectype_in);
3737 if (dump_enabled_p ())
3739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3740 "no vectype for scalar type ");
3741 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3742 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3748 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3749 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
3751 if (dump_enabled_p ())
3753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3754 "can't convert between boolean and non "
3756 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3757 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3763 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3764 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3765 if (nunits_in < nunits_out)
3767 else if (nunits_out == nunits_in)
3772 /* Multiple types in SLP are handled by creating the appropriate number of
3773 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3775 if (slp_node || PURE_SLP_STMT (stmt_info))
3777 else if (modifier == NARROW)
3778 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3780 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3782 /* Sanity check: make sure that at least one copy of the vectorized stmt
3783 needs to be generated. */
3784 gcc_assert (ncopies >= 1);
3786 /* Supportable by target? */
3790 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3792 if (supportable_convert_operation (code, vectype_out, vectype_in,
3797 if (dump_enabled_p ())
3798 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3799 "conversion not supported by target.\n");
3803 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3804 &code1, &code2, &multi_step_cvt,
3807 /* Binary widening operation can only be supported directly by the
3809 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3813 if (code != FLOAT_EXPR
3814 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3815 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3818 rhs_mode = TYPE_MODE (rhs_type);
3819 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3820 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3821 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3822 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3825 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3826 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3827 if (cvt_type == NULL_TREE)
3830 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3832 if (!supportable_convert_operation (code, vectype_out,
3833 cvt_type, &decl1, &codecvt1))
3836 else if (!supportable_widening_operation (code, stmt, vectype_out,
3837 cvt_type, &codecvt1,
3838 &codecvt2, &multi_step_cvt,
3842 gcc_assert (multi_step_cvt == 0);
3844 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3845 vectype_in, &code1, &code2,
3846 &multi_step_cvt, &interm_types))
3850 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3853 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3854 codecvt2 = ERROR_MARK;
3858 interm_types.safe_push (cvt_type);
3859 cvt_type = NULL_TREE;
3864 gcc_assert (op_type == unary_op);
3865 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3866 &code1, &multi_step_cvt,
3870 if (code != FIX_TRUNC_EXPR
3871 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3872 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3875 rhs_mode = TYPE_MODE (rhs_type);
3877 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3878 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3879 if (cvt_type == NULL_TREE)
3881 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3884 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3885 &code1, &multi_step_cvt,
3894 if (!vec_stmt) /* transformation not required. */
3896 if (dump_enabled_p ())
3897 dump_printf_loc (MSG_NOTE, vect_location,
3898 "=== vectorizable_conversion ===\n");
3899 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3901 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3902 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3904 else if (modifier == NARROW)
3906 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3907 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3911 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3912 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3914 interm_types.release ();
3919 if (dump_enabled_p ())
3920 dump_printf_loc (MSG_NOTE, vect_location,
3921 "transform conversion. ncopies = %d.\n", ncopies);
3923 if (op_type == binary_op)
3925 if (CONSTANT_CLASS_P (op0))
3926 op0 = fold_convert (TREE_TYPE (op1), op0);
3927 else if (CONSTANT_CLASS_P (op1))
3928 op1 = fold_convert (TREE_TYPE (op0), op1);
3931 /* In case of multi-step conversion, we first generate conversion operations
3932 to the intermediate types, and then from that types to the final one.
3933 We create vector destinations for the intermediate type (TYPES) received
3934 from supportable_*_operation, and store them in the correct order
3935 for future use in vect_create_vectorized_*_stmts (). */
3936 vec_dsts.create (multi_step_cvt + 1);
3937 vec_dest = vect_create_destination_var (scalar_dest,
3938 (cvt_type && modifier == WIDEN)
3939 ? cvt_type : vectype_out);
3940 vec_dsts.quick_push (vec_dest);
3944 for (i = interm_types.length () - 1;
3945 interm_types.iterate (i, &intermediate_type); i--)
3947 vec_dest = vect_create_destination_var (scalar_dest,
3949 vec_dsts.quick_push (vec_dest);
3954 vec_dest = vect_create_destination_var (scalar_dest,
3956 ? vectype_out : cvt_type);
3960 if (modifier == WIDEN)
3962 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3963 if (op_type == binary_op)
3964 vec_oprnds1.create (1);
3966 else if (modifier == NARROW)
3967 vec_oprnds0.create (
3968 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3970 else if (code == WIDEN_LSHIFT_EXPR)
3971 vec_oprnds1.create (slp_node->vec_stmts_size);
3974 prev_stmt_info = NULL;
3978 for (j = 0; j < ncopies; j++)
3981 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3984 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3986 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3988 /* Arguments are ready, create the new vector stmt. */
3989 if (code1 == CALL_EXPR)
3991 new_stmt = gimple_build_call (decl1, 1, vop0);
3992 new_temp = make_ssa_name (vec_dest, new_stmt);
3993 gimple_call_set_lhs (new_stmt, new_temp);
3997 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3998 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3999 new_temp = make_ssa_name (vec_dest, new_stmt);
4000 gimple_assign_set_lhs (new_stmt, new_temp);
4003 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4005 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4008 if (!prev_stmt_info)
4009 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4011 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4012 prev_stmt_info = vinfo_for_stmt (new_stmt);
4019 /* In case the vectorization factor (VF) is bigger than the number
4020 of elements that we can fit in a vectype (nunits), we have to
4021 generate more than one vector stmt - i.e - we need to "unroll"
4022 the vector stmt by a factor VF/nunits. */
4023 for (j = 0; j < ncopies; j++)
4030 if (code == WIDEN_LSHIFT_EXPR)
4035 /* Store vec_oprnd1 for every vector stmt to be created
4036 for SLP_NODE. We check during the analysis that all
4037 the shift arguments are the same. */
4038 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4039 vec_oprnds1.quick_push (vec_oprnd1);
4041 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4045 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4046 &vec_oprnds1, slp_node, -1);
4050 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4051 vec_oprnds0.quick_push (vec_oprnd0);
4052 if (op_type == binary_op)
4054 if (code == WIDEN_LSHIFT_EXPR)
4057 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4058 vec_oprnds1.quick_push (vec_oprnd1);
4064 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4065 vec_oprnds0.truncate (0);
4066 vec_oprnds0.quick_push (vec_oprnd0);
4067 if (op_type == binary_op)
4069 if (code == WIDEN_LSHIFT_EXPR)
4072 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4074 vec_oprnds1.truncate (0);
4075 vec_oprnds1.quick_push (vec_oprnd1);
4079 /* Arguments are ready. Create the new vector stmts. */
4080 for (i = multi_step_cvt; i >= 0; i--)
4082 tree this_dest = vec_dsts[i];
4083 enum tree_code c1 = code1, c2 = code2;
4084 if (i == 0 && codecvt2 != ERROR_MARK)
4089 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4091 stmt, this_dest, gsi,
4092 c1, c2, decl1, decl2,
4096 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4100 if (codecvt1 == CALL_EXPR)
4102 new_stmt = gimple_build_call (decl1, 1, vop0);
4103 new_temp = make_ssa_name (vec_dest, new_stmt);
4104 gimple_call_set_lhs (new_stmt, new_temp);
4108 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4109 new_temp = make_ssa_name (vec_dest);
4110 new_stmt = gimple_build_assign (new_temp, codecvt1,
4114 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4117 new_stmt = SSA_NAME_DEF_STMT (vop0);
4120 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4123 if (!prev_stmt_info)
4124 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4126 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4127 prev_stmt_info = vinfo_for_stmt (new_stmt);
4132 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4136 /* In case the vectorization factor (VF) is bigger than the number
4137 of elements that we can fit in a vectype (nunits), we have to
4138 generate more than one vector stmt - i.e - we need to "unroll"
4139 the vector stmt by a factor VF/nunits. */
4140 for (j = 0; j < ncopies; j++)
4144 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4148 vec_oprnds0.truncate (0);
4149 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4150 vect_pow2 (multi_step_cvt) - 1);
4153 /* Arguments are ready. Create the new vector stmts. */
4155 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4157 if (codecvt1 == CALL_EXPR)
4159 new_stmt = gimple_build_call (decl1, 1, vop0);
4160 new_temp = make_ssa_name (vec_dest, new_stmt);
4161 gimple_call_set_lhs (new_stmt, new_temp);
4165 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4166 new_temp = make_ssa_name (vec_dest);
4167 new_stmt = gimple_build_assign (new_temp, codecvt1,
4171 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4172 vec_oprnds0[i] = new_temp;
4175 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4176 stmt, vec_dsts, gsi,
4181 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4185 vec_oprnds0.release ();
4186 vec_oprnds1.release ();
4187 vec_dsts.release ();
4188 interm_types.release ();
4194 /* Function vectorizable_assignment.
4196 Check if STMT performs an assignment (copy) that can be vectorized.
4197 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4198 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4199 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4202 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4203 gimple **vec_stmt, slp_tree slp_node)
4208 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4212 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4215 vec<tree> vec_oprnds = vNULL;
4217 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4218 vec_info *vinfo = stmt_info->vinfo;
4219 gimple *new_stmt = NULL;
4220 stmt_vec_info prev_stmt_info = NULL;
4221 enum tree_code code;
4224 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4227 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4231 /* Is vectorizable assignment? */
4232 if (!is_gimple_assign (stmt))
4235 scalar_dest = gimple_assign_lhs (stmt);
4236 if (TREE_CODE (scalar_dest) != SSA_NAME)
4239 code = gimple_assign_rhs_code (stmt);
4240 if (gimple_assign_single_p (stmt)
4241 || code == PAREN_EXPR
4242 || CONVERT_EXPR_CODE_P (code))
4243 op = gimple_assign_rhs1 (stmt);
4247 if (code == VIEW_CONVERT_EXPR)
4248 op = TREE_OPERAND (op, 0);
4250 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4251 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4253 /* Multiple types in SLP are handled by creating the appropriate number of
4254 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4256 if (slp_node || PURE_SLP_STMT (stmt_info))
4259 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4261 gcc_assert (ncopies >= 1);
4263 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4265 if (dump_enabled_p ())
4266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4267 "use not simple.\n");
4271 /* We can handle NOP_EXPR conversions that do not change the number
4272 of elements or the vector size. */
4273 if ((CONVERT_EXPR_CODE_P (code)
4274 || code == VIEW_CONVERT_EXPR)
4276 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4277 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4278 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4281 /* We do not handle bit-precision changes. */
4282 if ((CONVERT_EXPR_CODE_P (code)
4283 || code == VIEW_CONVERT_EXPR)
4284 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4285 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4286 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4287 || ((TYPE_PRECISION (TREE_TYPE (op))
4288 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4289 /* But a conversion that does not change the bit-pattern is ok. */
4290 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4291 > TYPE_PRECISION (TREE_TYPE (op)))
4292 && TYPE_UNSIGNED (TREE_TYPE (op)))
4293 /* Conversion between boolean types of different sizes is
4294 a simple assignment in case their vectypes are same
4296 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4297 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4299 if (dump_enabled_p ())
4300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4301 "type conversion to/from bit-precision "
4306 if (!vec_stmt) /* transformation not required. */
4308 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4309 if (dump_enabled_p ())
4310 dump_printf_loc (MSG_NOTE, vect_location,
4311 "=== vectorizable_assignment ===\n");
4312 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4317 if (dump_enabled_p ())
4318 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4321 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4324 for (j = 0; j < ncopies; j++)
4328 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4330 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4332 /* Arguments are ready. create the new vector stmt. */
4333 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4335 if (CONVERT_EXPR_CODE_P (code)
4336 || code == VIEW_CONVERT_EXPR)
4337 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4338 new_stmt = gimple_build_assign (vec_dest, vop);
4339 new_temp = make_ssa_name (vec_dest, new_stmt);
4340 gimple_assign_set_lhs (new_stmt, new_temp);
4341 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4343 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4350 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4352 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4354 prev_stmt_info = vinfo_for_stmt (new_stmt);
4357 vec_oprnds.release ();
4362 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4363 either as shift by a scalar or by a vector. */
4366 vect_supportable_shift (enum tree_code code, tree scalar_type)
4369 machine_mode vec_mode;
4374 vectype = get_vectype_for_scalar_type (scalar_type);
4378 optab = optab_for_tree_code (code, vectype, optab_scalar);
4380 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4382 optab = optab_for_tree_code (code, vectype, optab_vector);
4384 || (optab_handler (optab, TYPE_MODE (vectype))
4385 == CODE_FOR_nothing))
4389 vec_mode = TYPE_MODE (vectype);
4390 icode = (int) optab_handler (optab, vec_mode);
4391 if (icode == CODE_FOR_nothing)
4398 /* Function vectorizable_shift.
4400 Check if STMT performs a shift operation that can be vectorized.
4401 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4402 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4403 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4406 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4407 gimple **vec_stmt, slp_tree slp_node)
4411 tree op0, op1 = NULL;
4412 tree vec_oprnd1 = NULL_TREE;
4413 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4415 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4416 enum tree_code code;
4417 machine_mode vec_mode;
4421 machine_mode optab_op2_mode;
4423 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4424 gimple *new_stmt = NULL;
4425 stmt_vec_info prev_stmt_info;
4432 vec<tree> vec_oprnds0 = vNULL;
4433 vec<tree> vec_oprnds1 = vNULL;
4436 bool scalar_shift_arg = true;
4437 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4438 vec_info *vinfo = stmt_info->vinfo;
4441 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4444 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4448 /* Is STMT a vectorizable binary/unary operation? */
4449 if (!is_gimple_assign (stmt))
4452 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4455 code = gimple_assign_rhs_code (stmt);
4457 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4458 || code == RROTATE_EXPR))
4461 scalar_dest = gimple_assign_lhs (stmt);
4462 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4463 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4464 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4466 if (dump_enabled_p ())
4467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4468 "bit-precision shifts not supported.\n");
4472 op0 = gimple_assign_rhs1 (stmt);
4473 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4475 if (dump_enabled_p ())
4476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4477 "use not simple.\n");
4480 /* If op0 is an external or constant def use a vector type with
4481 the same size as the output vector type. */
4483 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4485 gcc_assert (vectype);
4488 if (dump_enabled_p ())
4489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4490 "no vectype for scalar type\n");
4494 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4495 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4496 if (nunits_out != nunits_in)
4499 op1 = gimple_assign_rhs2 (stmt);
4500 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4502 if (dump_enabled_p ())
4503 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4504 "use not simple.\n");
4509 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4513 /* Multiple types in SLP are handled by creating the appropriate number of
4514 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4516 if (slp_node || PURE_SLP_STMT (stmt_info))
4519 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4521 gcc_assert (ncopies >= 1);
4523 /* Determine whether the shift amount is a vector, or scalar. If the
4524 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4526 if ((dt[1] == vect_internal_def
4527 || dt[1] == vect_induction_def)
4529 scalar_shift_arg = false;
4530 else if (dt[1] == vect_constant_def
4531 || dt[1] == vect_external_def
4532 || dt[1] == vect_internal_def)
4534 /* In SLP, need to check whether the shift count is the same,
4535 in loops if it is a constant or invariant, it is always
4539 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4542 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4543 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4544 scalar_shift_arg = false;
4547 /* If the shift amount is computed by a pattern stmt we cannot
4548 use the scalar amount directly thus give up and use a vector
4550 if (dt[1] == vect_internal_def)
4552 gimple *def = SSA_NAME_DEF_STMT (op1);
4553 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4554 scalar_shift_arg = false;
4559 if (dump_enabled_p ())
4560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4561 "operand mode requires invariant argument.\n");
4565 /* Vector shifted by vector. */
4566 if (!scalar_shift_arg)
4568 optab = optab_for_tree_code (code, vectype, optab_vector);
4569 if (dump_enabled_p ())
4570 dump_printf_loc (MSG_NOTE, vect_location,
4571 "vector/vector shift/rotate found.\n");
4574 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4575 if (op1_vectype == NULL_TREE
4576 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4578 if (dump_enabled_p ())
4579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4580 "unusable type for last operand in"
4581 " vector/vector shift/rotate.\n");
4585 /* See if the machine has a vector shifted by scalar insn and if not
4586 then see if it has a vector shifted by vector insn. */
4589 optab = optab_for_tree_code (code, vectype, optab_scalar);
4591 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4593 if (dump_enabled_p ())
4594 dump_printf_loc (MSG_NOTE, vect_location,
4595 "vector/scalar shift/rotate found.\n");
4599 optab = optab_for_tree_code (code, vectype, optab_vector);
4601 && (optab_handler (optab, TYPE_MODE (vectype))
4602 != CODE_FOR_nothing))
4604 scalar_shift_arg = false;
4606 if (dump_enabled_p ())
4607 dump_printf_loc (MSG_NOTE, vect_location,
4608 "vector/vector shift/rotate found.\n");
4610 /* Unlike the other binary operators, shifts/rotates have
4611 the rhs being int, instead of the same type as the lhs,
4612 so make sure the scalar is the right type if we are
4613 dealing with vectors of long long/long/short/char. */
4614 if (dt[1] == vect_constant_def)
4615 op1 = fold_convert (TREE_TYPE (vectype), op1);
4616 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4620 && TYPE_MODE (TREE_TYPE (vectype))
4621 != TYPE_MODE (TREE_TYPE (op1)))
4623 if (dump_enabled_p ())
4624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4625 "unusable type for last operand in"
4626 " vector/vector shift/rotate.\n");
4629 if (vec_stmt && !slp_node)
4631 op1 = fold_convert (TREE_TYPE (vectype), op1);
4632 op1 = vect_init_vector (stmt, op1,
4633 TREE_TYPE (vectype), NULL);
4640 /* Supportable by target? */
4643 if (dump_enabled_p ())
4644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4648 vec_mode = TYPE_MODE (vectype);
4649 icode = (int) optab_handler (optab, vec_mode);
4650 if (icode == CODE_FOR_nothing)
4652 if (dump_enabled_p ())
4653 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4654 "op not supported by target.\n");
4655 /* Check only during analysis. */
4656 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4657 || (vf < vect_min_worthwhile_factor (code)
4660 if (dump_enabled_p ())
4661 dump_printf_loc (MSG_NOTE, vect_location,
4662 "proceeding using word mode.\n");
4665 /* Worthwhile without SIMD support? Check only during analysis. */
4666 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4667 && vf < vect_min_worthwhile_factor (code)
4670 if (dump_enabled_p ())
4671 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4672 "not worthwhile without SIMD support.\n");
4676 if (!vec_stmt) /* transformation not required. */
4678 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4679 if (dump_enabled_p ())
4680 dump_printf_loc (MSG_NOTE, vect_location,
4681 "=== vectorizable_shift ===\n");
4682 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4688 if (dump_enabled_p ())
4689 dump_printf_loc (MSG_NOTE, vect_location,
4690 "transform binary/unary operation.\n");
4693 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4695 prev_stmt_info = NULL;
4696 for (j = 0; j < ncopies; j++)
4701 if (scalar_shift_arg)
4703 /* Vector shl and shr insn patterns can be defined with scalar
4704 operand 2 (shift operand). In this case, use constant or loop
4705 invariant op1 directly, without extending it to vector mode
4707 optab_op2_mode = insn_data[icode].operand[2].mode;
4708 if (!VECTOR_MODE_P (optab_op2_mode))
4710 if (dump_enabled_p ())
4711 dump_printf_loc (MSG_NOTE, vect_location,
4712 "operand 1 using scalar mode.\n");
4714 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4715 vec_oprnds1.quick_push (vec_oprnd1);
4718 /* Store vec_oprnd1 for every vector stmt to be created
4719 for SLP_NODE. We check during the analysis that all
4720 the shift arguments are the same.
4721 TODO: Allow different constants for different vector
4722 stmts generated for an SLP instance. */
4723 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4724 vec_oprnds1.quick_push (vec_oprnd1);
4729 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4730 (a special case for certain kind of vector shifts); otherwise,
4731 operand 1 should be of a vector type (the usual case). */
4733 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4736 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4740 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4742 /* Arguments are ready. Create the new vector stmt. */
4743 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4745 vop1 = vec_oprnds1[i];
4746 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4747 new_temp = make_ssa_name (vec_dest, new_stmt);
4748 gimple_assign_set_lhs (new_stmt, new_temp);
4749 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4751 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4758 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4760 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4761 prev_stmt_info = vinfo_for_stmt (new_stmt);
4764 vec_oprnds0.release ();
4765 vec_oprnds1.release ();
4771 /* Function vectorizable_operation.
4773 Check if STMT performs a binary, unary or ternary operation that can
4775 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4776 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4777 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4780 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
4781 gimple **vec_stmt, slp_tree slp_node)
4785 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4786 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4788 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4789 enum tree_code code;
4790 machine_mode vec_mode;
4794 bool target_support_p;
4796 enum vect_def_type dt[3]
4797 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4798 gimple *new_stmt = NULL;
4799 stmt_vec_info prev_stmt_info;
4805 vec<tree> vec_oprnds0 = vNULL;
4806 vec<tree> vec_oprnds1 = vNULL;
4807 vec<tree> vec_oprnds2 = vNULL;
4808 tree vop0, vop1, vop2;
4809 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4810 vec_info *vinfo = stmt_info->vinfo;
4813 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4816 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4820 /* Is STMT a vectorizable binary/unary operation? */
4821 if (!is_gimple_assign (stmt))
4824 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4827 code = gimple_assign_rhs_code (stmt);
4829 /* For pointer addition, we should use the normal plus for
4830 the vector addition. */
4831 if (code == POINTER_PLUS_EXPR)
4834 /* Support only unary or binary operations. */
4835 op_type = TREE_CODE_LENGTH (code);
4836 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4838 if (dump_enabled_p ())
4839 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4840 "num. args = %d (not unary/binary/ternary op).\n",
4845 scalar_dest = gimple_assign_lhs (stmt);
4846 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4848 /* Most operations cannot handle bit-precision types without extra
4850 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4851 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4852 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4853 /* Exception are bitwise binary operations. */
4854 && code != BIT_IOR_EXPR
4855 && code != BIT_XOR_EXPR
4856 && code != BIT_AND_EXPR)
4858 if (dump_enabled_p ())
4859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4860 "bit-precision arithmetic not supported.\n");
4864 op0 = gimple_assign_rhs1 (stmt);
4865 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4867 if (dump_enabled_p ())
4868 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4869 "use not simple.\n");
4872 /* If op0 is an external or constant def use a vector type with
4873 the same size as the output vector type. */
4876 /* For boolean type we cannot determine vectype by
4877 invariant value (don't know whether it is a vector
4878 of booleans or vector of integers). We use output
4879 vectype because operations on boolean don't change
4881 if (TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE)
4883 if (TREE_CODE (TREE_TYPE (scalar_dest)) != BOOLEAN_TYPE)
4885 if (dump_enabled_p ())
4886 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4887 "not supported operation on bool value.\n");
4890 vectype = vectype_out;
4893 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4896 gcc_assert (vectype);
4899 if (dump_enabled_p ())
4901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4902 "no vectype for scalar type ");
4903 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4905 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4911 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4912 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4913 if (nunits_out != nunits_in)
4916 if (op_type == binary_op || op_type == ternary_op)
4918 op1 = gimple_assign_rhs2 (stmt);
4919 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
4921 if (dump_enabled_p ())
4922 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4923 "use not simple.\n");
4927 if (op_type == ternary_op)
4929 op2 = gimple_assign_rhs3 (stmt);
4930 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
4932 if (dump_enabled_p ())
4933 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4934 "use not simple.\n");
4940 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4944 /* Multiple types in SLP are handled by creating the appropriate number of
4945 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4947 if (slp_node || PURE_SLP_STMT (stmt_info))
4950 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4952 gcc_assert (ncopies >= 1);
4954 /* Shifts are handled in vectorizable_shift (). */
4955 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4956 || code == RROTATE_EXPR)
4959 /* Supportable by target? */
4961 vec_mode = TYPE_MODE (vectype);
4962 if (code == MULT_HIGHPART_EXPR)
4963 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
4966 optab = optab_for_tree_code (code, vectype, optab_default);
4969 if (dump_enabled_p ())
4970 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4974 target_support_p = (optab_handler (optab, vec_mode)
4975 != CODE_FOR_nothing);
4978 if (!target_support_p)
4980 if (dump_enabled_p ())
4981 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4982 "op not supported by target.\n");
4983 /* Check only during analysis. */
4984 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4985 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4987 if (dump_enabled_p ())
4988 dump_printf_loc (MSG_NOTE, vect_location,
4989 "proceeding using word mode.\n");
4992 /* Worthwhile without SIMD support? Check only during analysis. */
4993 if (!VECTOR_MODE_P (vec_mode)
4995 && vf < vect_min_worthwhile_factor (code))
4997 if (dump_enabled_p ())
4998 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4999 "not worthwhile without SIMD support.\n");
5003 if (!vec_stmt) /* transformation not required. */
5005 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5006 if (dump_enabled_p ())
5007 dump_printf_loc (MSG_NOTE, vect_location,
5008 "=== vectorizable_operation ===\n");
5009 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5015 if (dump_enabled_p ())
5016 dump_printf_loc (MSG_NOTE, vect_location,
5017 "transform binary/unary operation.\n");
5020 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5022 /* In case the vectorization factor (VF) is bigger than the number
5023 of elements that we can fit in a vectype (nunits), we have to generate
5024 more than one vector stmt - i.e - we need to "unroll" the
5025 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5026 from one copy of the vector stmt to the next, in the field
5027 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5028 stages to find the correct vector defs to be used when vectorizing
5029 stmts that use the defs of the current stmt. The example below
5030 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5031 we need to create 4 vectorized stmts):
5033 before vectorization:
5034 RELATED_STMT VEC_STMT
5038 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5040 RELATED_STMT VEC_STMT
5041 VS1_0: vx0 = memref0 VS1_1 -
5042 VS1_1: vx1 = memref1 VS1_2 -
5043 VS1_2: vx2 = memref2 VS1_3 -
5044 VS1_3: vx3 = memref3 - -
5045 S1: x = load - VS1_0
5048 step2: vectorize stmt S2 (done here):
5049 To vectorize stmt S2 we first need to find the relevant vector
5050 def for the first operand 'x'. This is, as usual, obtained from
5051 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5052 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5053 relevant vector def 'vx0'. Having found 'vx0' we can generate
5054 the vector stmt VS2_0, and as usual, record it in the
5055 STMT_VINFO_VEC_STMT of stmt S2.
5056 When creating the second copy (VS2_1), we obtain the relevant vector
5057 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5058 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5059 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5060 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5061 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5062 chain of stmts and pointers:
5063 RELATED_STMT VEC_STMT
5064 VS1_0: vx0 = memref0 VS1_1 -
5065 VS1_1: vx1 = memref1 VS1_2 -
5066 VS1_2: vx2 = memref2 VS1_3 -
5067 VS1_3: vx3 = memref3 - -
5068 S1: x = load - VS1_0
5069 VS2_0: vz0 = vx0 + v1 VS2_1 -
5070 VS2_1: vz1 = vx1 + v1 VS2_2 -
5071 VS2_2: vz2 = vx2 + v1 VS2_3 -
5072 VS2_3: vz3 = vx3 + v1 - -
5073 S2: z = x + 1 - VS2_0 */
5075 prev_stmt_info = NULL;
5076 for (j = 0; j < ncopies; j++)
5081 if (op_type == binary_op || op_type == ternary_op)
5082 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5085 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5087 if (op_type == ternary_op)
5088 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5093 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5094 if (op_type == ternary_op)
5096 tree vec_oprnd = vec_oprnds2.pop ();
5097 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5102 /* Arguments are ready. Create the new vector stmt. */
5103 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5105 vop1 = ((op_type == binary_op || op_type == ternary_op)
5106 ? vec_oprnds1[i] : NULL_TREE);
5107 vop2 = ((op_type == ternary_op)
5108 ? vec_oprnds2[i] : NULL_TREE);
5109 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5110 new_temp = make_ssa_name (vec_dest, new_stmt);
5111 gimple_assign_set_lhs (new_stmt, new_temp);
5112 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5114 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5121 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5123 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5124 prev_stmt_info = vinfo_for_stmt (new_stmt);
5127 vec_oprnds0.release ();
5128 vec_oprnds1.release ();
5129 vec_oprnds2.release ();
5134 /* A helper function to ensure data reference DR's base alignment
5138 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5143 if (DR_VECT_AUX (dr)->base_misaligned)
5145 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5146 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5148 if (decl_in_symtab_p (base_decl))
5149 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5152 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
5153 DECL_USER_ALIGN (base_decl) = 1;
5155 DR_VECT_AUX (dr)->base_misaligned = false;
5160 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5161 reversal of the vector elements. If that is impossible to do,
5165 perm_mask_for_reverse (tree vectype)
5170 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5171 sel = XALLOCAVEC (unsigned char, nunits);
5173 for (i = 0; i < nunits; ++i)
5174 sel[i] = nunits - 1 - i;
5176 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5178 return vect_gen_perm_mask_checked (vectype, sel);
5181 /* Function get_group_alias_ptr_type.
5183 Return the alias type for the group starting at FIRST_STMT. */
5186 get_group_alias_ptr_type (gimple *first_stmt)
5188 struct data_reference *first_dr, *next_dr;
5191 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5192 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5195 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5196 if (get_alias_set (DR_REF (first_dr))
5197 != get_alias_set (DR_REF (next_dr)))
5199 if (dump_enabled_p ())
5200 dump_printf_loc (MSG_NOTE, vect_location,
5201 "conflicting alias set types.\n");
5202 return ptr_type_node;
5204 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5206 return reference_alias_ptr_type (DR_REF (first_dr));
5210 /* Function vectorizable_store.
5212 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5214 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5215 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5216 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5219 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5225 tree vec_oprnd = NULL_TREE;
5226 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5227 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5229 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5230 struct loop *loop = NULL;
5231 machine_mode vec_mode;
5233 enum dr_alignment_support alignment_support_scheme;
5235 enum vect_def_type dt;
5236 stmt_vec_info prev_stmt_info = NULL;
5237 tree dataref_ptr = NULL_TREE;
5238 tree dataref_offset = NULL_TREE;
5239 gimple *ptr_incr = NULL;
5242 gimple *next_stmt, *first_stmt = NULL;
5243 bool grouped_store = false;
5244 bool store_lanes_p = false;
5245 unsigned int group_size, i;
5246 vec<tree> dr_chain = vNULL;
5247 vec<tree> oprnds = vNULL;
5248 vec<tree> result_chain = vNULL;
5250 bool negative = false;
5251 tree offset = NULL_TREE;
5252 vec<tree> vec_oprnds = vNULL;
5253 bool slp = (slp_node != NULL);
5254 unsigned int vec_num;
5255 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5256 vec_info *vinfo = stmt_info->vinfo;
5258 tree scatter_base = NULL_TREE, scatter_off = NULL_TREE;
5259 tree scatter_off_vectype = NULL_TREE, scatter_decl = NULL_TREE;
5260 int scatter_scale = 1;
5261 enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
5262 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5266 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5269 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5273 /* Is vectorizable store? */
5275 if (!is_gimple_assign (stmt))
5278 scalar_dest = gimple_assign_lhs (stmt);
5279 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5280 && is_pattern_stmt_p (stmt_info))
5281 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5282 if (TREE_CODE (scalar_dest) != ARRAY_REF
5283 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5284 && TREE_CODE (scalar_dest) != INDIRECT_REF
5285 && TREE_CODE (scalar_dest) != COMPONENT_REF
5286 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5287 && TREE_CODE (scalar_dest) != REALPART_EXPR
5288 && TREE_CODE (scalar_dest) != MEM_REF)
5291 gcc_assert (gimple_assign_single_p (stmt));
5293 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5294 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5297 loop = LOOP_VINFO_LOOP (loop_vinfo);
5299 /* Multiple types in SLP are handled by creating the appropriate number of
5300 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5302 if (slp || PURE_SLP_STMT (stmt_info))
5305 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5307 gcc_assert (ncopies >= 1);
5309 /* FORNOW. This restriction should be relaxed. */
5310 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5312 if (dump_enabled_p ())
5313 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5314 "multiple types in nested loop.\n");
5318 op = gimple_assign_rhs1 (stmt);
5320 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5322 if (dump_enabled_p ())
5323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5324 "use not simple.\n");
5328 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5331 elem_type = TREE_TYPE (vectype);
5332 vec_mode = TYPE_MODE (vectype);
5334 /* FORNOW. In some cases can vectorize even if data-type not supported
5335 (e.g. - array initialization with 0). */
5336 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5339 if (!STMT_VINFO_DATA_REF (stmt_info))
5342 if (!STMT_VINFO_STRIDED_P (stmt_info))
5345 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5346 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5347 size_zero_node) < 0;
5348 if (negative && ncopies > 1)
5350 if (dump_enabled_p ())
5351 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5352 "multiple types with negative step.\n");
5357 gcc_assert (!grouped_store);
5358 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5359 if (alignment_support_scheme != dr_aligned
5360 && alignment_support_scheme != dr_unaligned_supported)
5362 if (dump_enabled_p ())
5363 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5364 "negative step but alignment required.\n");
5367 if (dt != vect_constant_def
5368 && dt != vect_external_def
5369 && !perm_mask_for_reverse (vectype))
5371 if (dump_enabled_p ())
5372 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5373 "negative step and reversing not supported.\n");
5379 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5381 grouped_store = true;
5382 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5383 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5385 && !PURE_SLP_STMT (stmt_info)
5386 && !STMT_VINFO_STRIDED_P (stmt_info))
5388 if (vect_store_lanes_supported (vectype, group_size))
5389 store_lanes_p = true;
5390 else if (!vect_grouped_store_supported (vectype, group_size))
5394 if (STMT_VINFO_STRIDED_P (stmt_info)
5395 && (slp || PURE_SLP_STMT (stmt_info))
5396 && (group_size > nunits
5397 || nunits % group_size != 0))
5399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5400 "unhandled strided group store\n");
5404 if (first_stmt == stmt)
5406 /* STMT is the leader of the group. Check the operands of all the
5407 stmts of the group. */
5408 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5411 gcc_assert (gimple_assign_single_p (next_stmt));
5412 op = gimple_assign_rhs1 (next_stmt);
5413 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
5415 if (dump_enabled_p ())
5416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5417 "use not simple.\n");
5420 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5425 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5428 scatter_decl = vect_check_gather_scatter (stmt, loop_vinfo, &scatter_base,
5429 &scatter_off, &scatter_scale);
5430 gcc_assert (scatter_decl);
5431 if (!vect_is_simple_use (scatter_off, vinfo, &def_stmt, &scatter_idx_dt,
5432 &scatter_off_vectype))
5434 if (dump_enabled_p ())
5435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5436 "scatter index use not simple.");
5441 if (!vec_stmt) /* transformation not required. */
5443 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5444 /* The SLP costs are calculated during SLP analysis. */
5445 if (!PURE_SLP_STMT (stmt_info))
5446 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5453 ensure_base_align (stmt_info, dr);
5455 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5457 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5458 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (scatter_decl));
5459 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5460 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5461 edge pe = loop_preheader_edge (loop);
5464 enum { NARROW, NONE, WIDEN } modifier;
5465 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (scatter_off_vectype);
5467 if (nunits == (unsigned int) scatter_off_nunits)
5469 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5471 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5474 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5475 sel[i] = i | nunits;
5477 perm_mask = vect_gen_perm_mask_checked (scatter_off_vectype, sel);
5478 gcc_assert (perm_mask != NULL_TREE);
5480 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5482 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5485 for (i = 0; i < (unsigned int) nunits; ++i)
5486 sel[i] = i | scatter_off_nunits;
5488 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5489 gcc_assert (perm_mask != NULL_TREE);
5495 rettype = TREE_TYPE (TREE_TYPE (scatter_decl));
5496 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5497 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5498 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5499 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5500 scaletype = TREE_VALUE (arglist);
5502 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5503 && TREE_CODE (rettype) == VOID_TYPE);
5505 ptr = fold_convert (ptrtype, scatter_base);
5506 if (!is_gimple_min_invariant (ptr))
5508 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5509 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5510 gcc_assert (!new_bb);
5513 /* Currently we support only unconditional scatter stores,
5514 so mask should be all ones. */
5515 mask = build_int_cst (masktype, -1);
5516 mask = vect_init_vector (stmt, mask, masktype, NULL);
5518 scale = build_int_cst (scaletype, scatter_scale);
5520 prev_stmt_info = NULL;
5521 for (j = 0; j < ncopies; ++j)
5526 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5528 = vect_get_vec_def_for_operand (scatter_off, stmt);
5530 else if (modifier != NONE && (j & 1))
5532 if (modifier == WIDEN)
5535 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5536 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5539 else if (modifier == NARROW)
5541 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5544 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5552 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5554 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5557 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5559 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5560 == TYPE_VECTOR_SUBPARTS (srctype));
5561 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5562 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5563 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5564 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5568 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5570 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5571 == TYPE_VECTOR_SUBPARTS (idxtype));
5572 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5573 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5574 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5575 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5580 = gimple_build_call (scatter_decl, 5, ptr, mask, op, src, scale);
5582 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5584 if (prev_stmt_info == NULL)
5585 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5587 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5588 prev_stmt_info = vinfo_for_stmt (new_stmt);
5595 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5596 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5598 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5601 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5603 /* We vectorize all the stmts of the interleaving group when we
5604 reach the last stmt in the group. */
5605 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5606 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5615 grouped_store = false;
5616 /* VEC_NUM is the number of vect stmts to be created for this
5618 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5619 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5620 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5621 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5622 op = gimple_assign_rhs1 (first_stmt);
5625 /* VEC_NUM is the number of vect stmts to be created for this
5627 vec_num = group_size;
5629 ref_type = get_group_alias_ptr_type (first_stmt);
5635 group_size = vec_num = 1;
5636 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
5639 if (dump_enabled_p ())
5640 dump_printf_loc (MSG_NOTE, vect_location,
5641 "transform store. ncopies = %d\n", ncopies);
5643 if (STMT_VINFO_STRIDED_P (stmt_info))
5645 gimple_stmt_iterator incr_gsi;
5651 gimple_seq stmts = NULL;
5652 tree stride_base, stride_step, alias_off;
5656 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5659 = fold_build_pointer_plus
5660 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5661 size_binop (PLUS_EXPR,
5662 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5663 convert_to_ptrofftype (DR_INIT (first_dr))));
5664 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5666 /* For a store with loop-invariant (but other than power-of-2)
5667 stride (i.e. not a grouped access) like so:
5669 for (i = 0; i < n; i += stride)
5672 we generate a new induction variable and new stores from
5673 the components of the (vectorized) rhs:
5675 for (j = 0; ; j += VF*stride)
5680 array[j + stride] = tmp2;
5684 unsigned nstores = nunits;
5685 tree ltype = elem_type;
5688 nstores = nunits / group_size;
5689 if (group_size < nunits)
5690 ltype = build_vector_type (elem_type, group_size);
5693 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5694 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5698 ivstep = stride_step;
5699 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5700 build_int_cst (TREE_TYPE (ivstep),
5701 ncopies * nstores));
5703 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5705 create_iv (stride_base, ivstep, NULL,
5706 loop, &incr_gsi, insert_after,
5708 incr = gsi_stmt (incr_gsi);
5709 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
5711 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5713 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5715 prev_stmt_info = NULL;
5716 alias_off = build_int_cst (ref_type, 0);
5717 next_stmt = first_stmt;
5718 for (g = 0; g < group_size; g++)
5720 running_off = offvar;
5723 tree size = TYPE_SIZE_UNIT (ltype);
5724 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
5726 tree newoff = copy_ssa_name (running_off, NULL);
5727 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5729 vect_finish_stmt_generation (stmt, incr, gsi);
5730 running_off = newoff;
5732 for (j = 0; j < ncopies; j++)
5734 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5735 and first_stmt == stmt. */
5740 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5742 vec_oprnd = vec_oprnds[0];
5746 gcc_assert (gimple_assign_single_p (next_stmt));
5747 op = gimple_assign_rhs1 (next_stmt);
5748 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5754 vec_oprnd = vec_oprnds[j];
5757 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
5758 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5762 for (i = 0; i < nstores; i++)
5764 tree newref, newoff;
5765 gimple *incr, *assign;
5766 tree size = TYPE_SIZE (ltype);
5767 /* Extract the i'th component. */
5768 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5769 bitsize_int (i), size);
5770 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5773 elem = force_gimple_operand_gsi (gsi, elem, true,
5777 newref = build2 (MEM_REF, ltype,
5778 running_off, alias_off);
5780 /* And store it to *running_off. */
5781 assign = gimple_build_assign (newref, elem);
5782 vect_finish_stmt_generation (stmt, assign, gsi);
5784 newoff = copy_ssa_name (running_off, NULL);
5785 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5786 running_off, stride_step);
5787 vect_finish_stmt_generation (stmt, incr, gsi);
5789 running_off = newoff;
5790 if (g == group_size - 1
5793 if (j == 0 && i == 0)
5794 STMT_VINFO_VEC_STMT (stmt_info)
5795 = *vec_stmt = assign;
5797 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5798 prev_stmt_info = vinfo_for_stmt (assign);
5802 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5807 dr_chain.create (group_size);
5808 oprnds.create (group_size);
5810 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5811 gcc_assert (alignment_support_scheme);
5812 /* Targets with store-lane instructions must not require explicit
5814 gcc_assert (!store_lanes_p
5815 || alignment_support_scheme == dr_aligned
5816 || alignment_support_scheme == dr_unaligned_supported);
5819 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5822 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5824 aggr_type = vectype;
5826 /* In case the vectorization factor (VF) is bigger than the number
5827 of elements that we can fit in a vectype (nunits), we have to generate
5828 more than one vector stmt - i.e - we need to "unroll" the
5829 vector stmt by a factor VF/nunits. For more details see documentation in
5830 vect_get_vec_def_for_copy_stmt. */
5832 /* In case of interleaving (non-unit grouped access):
5839 We create vectorized stores starting from base address (the access of the
5840 first stmt in the chain (S2 in the above example), when the last store stmt
5841 of the chain (S4) is reached:
5844 VS2: &base + vec_size*1 = vx0
5845 VS3: &base + vec_size*2 = vx1
5846 VS4: &base + vec_size*3 = vx3
5848 Then permutation statements are generated:
5850 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5851 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5854 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5855 (the order of the data-refs in the output of vect_permute_store_chain
5856 corresponds to the order of scalar stmts in the interleaving chain - see
5857 the documentation of vect_permute_store_chain()).
5859 In case of both multiple types and interleaving, above vector stores and
5860 permutation stmts are created for every copy. The result vector stmts are
5861 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5862 STMT_VINFO_RELATED_STMT for the next copies.
5865 prev_stmt_info = NULL;
5866 for (j = 0; j < ncopies; j++)
5873 /* Get vectorized arguments for SLP_NODE. */
5874 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5875 NULL, slp_node, -1);
5877 vec_oprnd = vec_oprnds[0];
5881 /* For interleaved stores we collect vectorized defs for all the
5882 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5883 used as an input to vect_permute_store_chain(), and OPRNDS as
5884 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5886 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5887 OPRNDS are of size 1. */
5888 next_stmt = first_stmt;
5889 for (i = 0; i < group_size; i++)
5891 /* Since gaps are not supported for interleaved stores,
5892 GROUP_SIZE is the exact number of stmts in the chain.
5893 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5894 there is no interleaving, GROUP_SIZE is 1, and only one
5895 iteration of the loop will be executed. */
5896 gcc_assert (next_stmt
5897 && gimple_assign_single_p (next_stmt));
5898 op = gimple_assign_rhs1 (next_stmt);
5900 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5901 dr_chain.quick_push (vec_oprnd);
5902 oprnds.quick_push (vec_oprnd);
5903 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5907 /* We should have catched mismatched types earlier. */
5908 gcc_assert (useless_type_conversion_p (vectype,
5909 TREE_TYPE (vec_oprnd)));
5910 bool simd_lane_access_p
5911 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5912 if (simd_lane_access_p
5913 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5914 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5915 && integer_zerop (DR_OFFSET (first_dr))
5916 && integer_zerop (DR_INIT (first_dr))
5917 && alias_sets_conflict_p (get_alias_set (aggr_type),
5918 get_alias_set (TREE_TYPE (ref_type))))
5920 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5921 dataref_offset = build_int_cst (ref_type, 0);
5926 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5927 simd_lane_access_p ? loop : NULL,
5928 offset, &dummy, gsi, &ptr_incr,
5929 simd_lane_access_p, &inv_p);
5930 gcc_assert (bb_vinfo || !inv_p);
5934 /* For interleaved stores we created vectorized defs for all the
5935 defs stored in OPRNDS in the previous iteration (previous copy).
5936 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5937 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5939 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5940 OPRNDS are of size 1. */
5941 for (i = 0; i < group_size; i++)
5944 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
5945 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5946 dr_chain[i] = vec_oprnd;
5947 oprnds[i] = vec_oprnd;
5951 = int_const_binop (PLUS_EXPR, dataref_offset,
5952 TYPE_SIZE_UNIT (aggr_type));
5954 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5955 TYPE_SIZE_UNIT (aggr_type));
5962 /* Combine all the vectors into an array. */
5963 vec_array = create_vector_array (vectype, vec_num);
5964 for (i = 0; i < vec_num; i++)
5966 vec_oprnd = dr_chain[i];
5967 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5971 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5972 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
5973 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5974 gimple_call_set_lhs (new_stmt, data_ref);
5975 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5983 result_chain.create (group_size);
5985 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5989 next_stmt = first_stmt;
5990 for (i = 0; i < vec_num; i++)
5992 unsigned align, misalign;
5995 /* Bump the vector pointer. */
5996 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6000 vec_oprnd = vec_oprnds[i];
6001 else if (grouped_store)
6002 /* For grouped stores vectorized defs are interleaved in
6003 vect_permute_store_chain(). */
6004 vec_oprnd = result_chain[i];
6006 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
6010 : build_int_cst (ref_type, 0));
6011 align = TYPE_ALIGN_UNIT (vectype);
6012 if (aligned_access_p (first_dr))
6014 else if (DR_MISALIGNMENT (first_dr) == -1)
6016 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6017 align = TYPE_ALIGN_UNIT (elem_type);
6019 align = get_object_alignment (DR_REF (first_dr))
6022 TREE_TYPE (data_ref)
6023 = build_aligned_type (TREE_TYPE (data_ref),
6024 align * BITS_PER_UNIT);
6028 TREE_TYPE (data_ref)
6029 = build_aligned_type (TREE_TYPE (data_ref),
6030 TYPE_ALIGN (elem_type));
6031 misalign = DR_MISALIGNMENT (first_dr);
6033 if (dataref_offset == NULL_TREE
6034 && TREE_CODE (dataref_ptr) == SSA_NAME)
6035 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6039 && dt != vect_constant_def
6040 && dt != vect_external_def)
6042 tree perm_mask = perm_mask_for_reverse (vectype);
6044 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6046 tree new_temp = make_ssa_name (perm_dest);
6048 /* Generate the permute statement. */
6050 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6051 vec_oprnd, perm_mask);
6052 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6054 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6055 vec_oprnd = new_temp;
6058 /* Arguments are ready. Create the new vector stmt. */
6059 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6060 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6065 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6073 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6075 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6076 prev_stmt_info = vinfo_for_stmt (new_stmt);
6080 dr_chain.release ();
6082 result_chain.release ();
6083 vec_oprnds.release ();
6088 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6089 VECTOR_CST mask. No checks are made that the target platform supports the
6090 mask, so callers may wish to test can_vec_perm_p separately, or use
6091 vect_gen_perm_mask_checked. */
6094 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6096 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6099 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6101 mask_elt_type = lang_hooks.types.type_for_mode
6102 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6103 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6105 mask_elts = XALLOCAVEC (tree, nunits);
6106 for (i = nunits - 1; i >= 0; i--)
6107 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6108 mask_vec = build_vector (mask_type, mask_elts);
6113 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6114 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6117 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6119 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6120 return vect_gen_perm_mask_any (vectype, sel);
6123 /* Given a vector variable X and Y, that was generated for the scalar
6124 STMT, generate instructions to permute the vector elements of X and Y
6125 using permutation mask MASK_VEC, insert them at *GSI and return the
6126 permuted vector variable. */
6129 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6130 gimple_stmt_iterator *gsi)
6132 tree vectype = TREE_TYPE (x);
6133 tree perm_dest, data_ref;
6136 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6137 data_ref = make_ssa_name (perm_dest);
6139 /* Generate the permute statement. */
6140 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6141 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6146 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6147 inserting them on the loops preheader edge. Returns true if we
6148 were successful in doing so (and thus STMT can be moved then),
6149 otherwise returns false. */
6152 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6158 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6160 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6161 if (!gimple_nop_p (def_stmt)
6162 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6164 /* Make sure we don't need to recurse. While we could do
6165 so in simple cases when there are more complex use webs
6166 we don't have an easy way to preserve stmt order to fulfil
6167 dependencies within them. */
6170 if (gimple_code (def_stmt) == GIMPLE_PHI)
6172 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6174 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6175 if (!gimple_nop_p (def_stmt2)
6176 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6186 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6188 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6189 if (!gimple_nop_p (def_stmt)
6190 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6192 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6193 gsi_remove (&gsi, false);
6194 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6201 /* vectorizable_load.
6203 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6205 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6206 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6207 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6210 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6211 slp_tree slp_node, slp_instance slp_node_instance)
6214 tree vec_dest = NULL;
6215 tree data_ref = NULL;
6216 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6217 stmt_vec_info prev_stmt_info;
6218 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6219 struct loop *loop = NULL;
6220 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6221 bool nested_in_vect_loop = false;
6222 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6226 gimple *new_stmt = NULL;
6228 enum dr_alignment_support alignment_support_scheme;
6229 tree dataref_ptr = NULL_TREE;
6230 tree dataref_offset = NULL_TREE;
6231 gimple *ptr_incr = NULL;
6233 int i, j, group_size, group_gap_adj;
6234 tree msq = NULL_TREE, lsq;
6235 tree offset = NULL_TREE;
6236 tree byte_offset = NULL_TREE;
6237 tree realignment_token = NULL_TREE;
6239 vec<tree> dr_chain = vNULL;
6240 bool grouped_load = false;
6241 bool load_lanes_p = false;
6243 gimple *first_stmt_for_drptr = NULL;
6245 bool negative = false;
6246 bool compute_in_loop = false;
6247 struct loop *at_loop;
6249 bool slp = (slp_node != NULL);
6250 bool slp_perm = false;
6251 enum tree_code code;
6252 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6255 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
6256 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
6257 int gather_scale = 1;
6258 enum vect_def_type gather_dt = vect_unknown_def_type;
6259 vec_info *vinfo = stmt_info->vinfo;
6262 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6265 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6269 /* Is vectorizable load? */
6270 if (!is_gimple_assign (stmt))
6273 scalar_dest = gimple_assign_lhs (stmt);
6274 if (TREE_CODE (scalar_dest) != SSA_NAME)
6277 code = gimple_assign_rhs_code (stmt);
6278 if (code != ARRAY_REF
6279 && code != BIT_FIELD_REF
6280 && code != INDIRECT_REF
6281 && code != COMPONENT_REF
6282 && code != IMAGPART_EXPR
6283 && code != REALPART_EXPR
6285 && TREE_CODE_CLASS (code) != tcc_declaration)
6288 if (!STMT_VINFO_DATA_REF (stmt_info))
6291 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6292 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6296 loop = LOOP_VINFO_LOOP (loop_vinfo);
6297 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6298 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6303 /* Multiple types in SLP are handled by creating the appropriate number of
6304 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6306 if (slp || PURE_SLP_STMT (stmt_info))
6309 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6311 gcc_assert (ncopies >= 1);
6313 /* FORNOW. This restriction should be relaxed. */
6314 if (nested_in_vect_loop && ncopies > 1)
6316 if (dump_enabled_p ())
6317 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6318 "multiple types in nested loop.\n");
6322 /* Invalidate assumptions made by dependence analysis when vectorization
6323 on the unrolled body effectively re-orders stmts. */
6325 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6326 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6327 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6329 if (dump_enabled_p ())
6330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6331 "cannot perform implicit CSE when unrolling "
6332 "with negative dependence distance\n");
6336 elem_type = TREE_TYPE (vectype);
6337 mode = TYPE_MODE (vectype);
6339 /* FORNOW. In some cases can vectorize even if data-type not supported
6340 (e.g. - data copies). */
6341 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6343 if (dump_enabled_p ())
6344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6345 "Aligned load, but unsupported type.\n");
6349 /* Check if the load is a part of an interleaving chain. */
6350 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6352 grouped_load = true;
6354 gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6356 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6357 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6360 && !PURE_SLP_STMT (stmt_info)
6361 && !STMT_VINFO_STRIDED_P (stmt_info))
6363 if (vect_load_lanes_supported (vectype, group_size))
6364 load_lanes_p = true;
6365 else if (!vect_grouped_load_supported (vectype, group_size))
6369 /* If this is single-element interleaving with an element distance
6370 that leaves unused vector loads around punt - we at least create
6371 very sub-optimal code in that case (and blow up memory,
6373 if (first_stmt == stmt
6374 && !GROUP_NEXT_ELEMENT (stmt_info))
6376 if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
6378 if (dump_enabled_p ())
6379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6380 "single-element interleaving not supported "
6381 "for not adjacent vector loads\n");
6385 /* Single-element interleaving requires peeling for gaps. */
6386 gcc_assert (GROUP_GAP (stmt_info));
6389 /* If there is a gap in the end of the group or the group size cannot
6390 be made a multiple of the vector element count then we access excess
6391 elements in the last iteration and thus need to peel that off. */
6393 && ! STMT_VINFO_STRIDED_P (stmt_info)
6394 && (GROUP_GAP (vinfo_for_stmt (first_stmt)) != 0
6395 || (!slp && !load_lanes_p && vf % group_size != 0)))
6397 if (dump_enabled_p ())
6398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6399 "Data access with gaps requires scalar "
6403 if (dump_enabled_p ())
6404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6405 "Peeling for outer loop is not supported\n");
6409 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
6412 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6415 /* ??? The following is overly pessimistic (as well as the loop
6416 case above) in the case we can statically determine the excess
6417 elements loaded are within the bounds of a decl that is accessed.
6418 Likewise for BB vectorizations using masked loads is a possibility. */
6419 if (bb_vinfo && slp_perm && group_size % nunits != 0)
6421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6422 "BB vectorization with gaps at the end of a load "
6423 "is not supported\n");
6427 /* Invalidate assumptions made by dependence analysis when vectorization
6428 on the unrolled body effectively re-orders stmts. */
6429 if (!PURE_SLP_STMT (stmt_info)
6430 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6431 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6432 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6434 if (dump_enabled_p ())
6435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6436 "cannot perform implicit CSE when performing "
6437 "group loads with negative dependence distance\n");
6441 /* Similarly when the stmt is a load that is both part of a SLP
6442 instance and a loop vectorized stmt via the same-dr mechanism
6443 we have to give up. */
6444 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6445 && (STMT_SLP_TYPE (stmt_info)
6446 != STMT_SLP_TYPE (vinfo_for_stmt
6447 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6449 if (dump_enabled_p ())
6450 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6451 "conflicting SLP types for CSEd load\n");
6457 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6460 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
6461 &gather_off, &gather_scale);
6462 gcc_assert (gather_decl);
6463 if (!vect_is_simple_use (gather_off, vinfo, &def_stmt, &gather_dt,
6464 &gather_off_vectype))
6466 if (dump_enabled_p ())
6467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6468 "gather index use not simple.\n");
6472 else if (STMT_VINFO_STRIDED_P (stmt_info))
6475 && (slp || PURE_SLP_STMT (stmt_info)))
6476 && (group_size > nunits
6477 || nunits % group_size != 0))
6479 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6480 "unhandled strided group load\n");
6486 negative = tree_int_cst_compare (nested_in_vect_loop
6487 ? STMT_VINFO_DR_STEP (stmt_info)
6489 size_zero_node) < 0;
6490 if (negative && ncopies > 1)
6492 if (dump_enabled_p ())
6493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6494 "multiple types with negative step.\n");
6502 if (dump_enabled_p ())
6503 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6504 "negative step for group load not supported"
6508 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6509 if (alignment_support_scheme != dr_aligned
6510 && alignment_support_scheme != dr_unaligned_supported)
6512 if (dump_enabled_p ())
6513 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6514 "negative step but alignment required.\n");
6517 if (!perm_mask_for_reverse (vectype))
6519 if (dump_enabled_p ())
6520 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6521 "negative step and reversing not supported."
6528 if (!vec_stmt) /* transformation not required. */
6530 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6531 /* The SLP costs are calculated during SLP analysis. */
6532 if (!PURE_SLP_STMT (stmt_info))
6533 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6538 if (dump_enabled_p ())
6539 dump_printf_loc (MSG_NOTE, vect_location,
6540 "transform load. ncopies = %d\n", ncopies);
6544 ensure_base_align (stmt_info, dr);
6546 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6548 tree vec_oprnd0 = NULL_TREE, op;
6549 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6550 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6551 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6552 edge pe = loop_preheader_edge (loop);
6555 enum { NARROW, NONE, WIDEN } modifier;
6556 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6558 if (nunits == gather_off_nunits)
6560 else if (nunits == gather_off_nunits / 2)
6562 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6565 for (i = 0; i < gather_off_nunits; ++i)
6566 sel[i] = i | nunits;
6568 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
6570 else if (nunits == gather_off_nunits * 2)
6572 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6575 for (i = 0; i < nunits; ++i)
6576 sel[i] = i < gather_off_nunits
6577 ? i : i + nunits - gather_off_nunits;
6579 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6585 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6586 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6587 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6588 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6589 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6590 scaletype = TREE_VALUE (arglist);
6591 gcc_checking_assert (types_compatible_p (srctype, rettype));
6593 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6595 ptr = fold_convert (ptrtype, gather_base);
6596 if (!is_gimple_min_invariant (ptr))
6598 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6599 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6600 gcc_assert (!new_bb);
6603 /* Currently we support only unconditional gather loads,
6604 so mask should be all ones. */
6605 if (TREE_CODE (masktype) == INTEGER_TYPE)
6606 mask = build_int_cst (masktype, -1);
6607 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6609 mask = build_int_cst (TREE_TYPE (masktype), -1);
6610 mask = build_vector_from_val (masktype, mask);
6611 mask = vect_init_vector (stmt, mask, masktype, NULL);
6613 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6617 for (j = 0; j < 6; ++j)
6619 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6620 mask = build_real (TREE_TYPE (masktype), r);
6621 mask = build_vector_from_val (masktype, mask);
6622 mask = vect_init_vector (stmt, mask, masktype, NULL);
6627 scale = build_int_cst (scaletype, gather_scale);
6629 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6630 merge = build_int_cst (TREE_TYPE (rettype), 0);
6631 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6635 for (j = 0; j < 6; ++j)
6637 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6638 merge = build_real (TREE_TYPE (rettype), r);
6642 merge = build_vector_from_val (rettype, merge);
6643 merge = vect_init_vector (stmt, merge, rettype, NULL);
6645 prev_stmt_info = NULL;
6646 for (j = 0; j < ncopies; ++j)
6648 if (modifier == WIDEN && (j & 1))
6649 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6650 perm_mask, stmt, gsi);
6653 = vect_get_vec_def_for_operand (gather_off, stmt);
6656 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6658 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6660 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6661 == TYPE_VECTOR_SUBPARTS (idxtype));
6662 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6663 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6665 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6666 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6671 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6673 if (!useless_type_conversion_p (vectype, rettype))
6675 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6676 == TYPE_VECTOR_SUBPARTS (rettype));
6677 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6678 gimple_call_set_lhs (new_stmt, op);
6679 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6680 var = make_ssa_name (vec_dest);
6681 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6683 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6687 var = make_ssa_name (vec_dest, new_stmt);
6688 gimple_call_set_lhs (new_stmt, var);
6691 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6693 if (modifier == NARROW)
6700 var = permute_vec_elements (prev_res, var,
6701 perm_mask, stmt, gsi);
6702 new_stmt = SSA_NAME_DEF_STMT (var);
6705 if (prev_stmt_info == NULL)
6706 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6708 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6709 prev_stmt_info = vinfo_for_stmt (new_stmt);
6713 else if (STMT_VINFO_STRIDED_P (stmt_info))
6715 gimple_stmt_iterator incr_gsi;
6721 vec<constructor_elt, va_gc> *v = NULL;
6722 gimple_seq stmts = NULL;
6723 tree stride_base, stride_step, alias_off;
6725 gcc_assert (!nested_in_vect_loop);
6727 if (slp && grouped_load)
6729 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6730 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6731 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6732 ref_type = get_group_alias_ptr_type (first_stmt);
6739 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6743 = fold_build_pointer_plus
6744 (DR_BASE_ADDRESS (first_dr),
6745 size_binop (PLUS_EXPR,
6746 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6747 convert_to_ptrofftype (DR_INIT (first_dr))));
6748 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6750 /* For a load with loop-invariant (but other than power-of-2)
6751 stride (i.e. not a grouped access) like so:
6753 for (i = 0; i < n; i += stride)
6756 we generate a new induction variable and new accesses to
6757 form a new vector (or vectors, depending on ncopies):
6759 for (j = 0; ; j += VF*stride)
6761 tmp2 = array[j + stride];
6763 vectemp = {tmp1, tmp2, ...}
6766 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6767 build_int_cst (TREE_TYPE (stride_step), vf));
6769 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6771 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6772 loop, &incr_gsi, insert_after,
6774 incr = gsi_stmt (incr_gsi);
6775 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6777 stride_step = force_gimple_operand (unshare_expr (stride_step),
6778 &stmts, true, NULL_TREE);
6780 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6782 prev_stmt_info = NULL;
6783 running_off = offvar;
6784 alias_off = build_int_cst (ref_type, 0);
6785 int nloads = nunits;
6786 tree ltype = TREE_TYPE (vectype);
6787 auto_vec<tree> dr_chain;
6790 nloads = nunits / group_size;
6791 if (group_size < nunits)
6792 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6795 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6796 /* For SLP permutation support we need to load the whole group,
6797 not only the number of vector stmts the permutation result
6801 ncopies = (group_size * vf + nunits - 1) / nunits;
6802 dr_chain.create (ncopies);
6805 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6807 for (j = 0; j < ncopies; j++)
6813 vec_alloc (v, nloads);
6814 for (i = 0; i < nloads; i++)
6816 tree newref, newoff;
6818 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6820 newref = force_gimple_operand_gsi (gsi, newref, true,
6823 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6824 newoff = copy_ssa_name (running_off);
6825 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6826 running_off, stride_step);
6827 vect_finish_stmt_generation (stmt, incr, gsi);
6829 running_off = newoff;
6832 vec_inv = build_constructor (vectype, v);
6833 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6834 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6838 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6839 build2 (MEM_REF, ltype,
6840 running_off, alias_off));
6841 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6843 tree newoff = copy_ssa_name (running_off);
6844 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6845 running_off, stride_step);
6846 vect_finish_stmt_generation (stmt, incr, gsi);
6848 running_off = newoff;
6854 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6856 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6861 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6863 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6864 prev_stmt_info = vinfo_for_stmt (new_stmt);
6868 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6869 slp_node_instance, false);
6875 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6876 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6877 /* For SLP vectorization we directly vectorize a subchain
6878 without permutation. */
6879 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6880 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6881 /* For BB vectorization always use the first stmt to base
6882 the data ref pointer on. */
6884 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6886 /* Check if the chain of loads is already vectorized. */
6887 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6888 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6889 ??? But we can only do so if there is exactly one
6890 as we have no way to get at the rest. Leave the CSE
6892 ??? With the group load eventually participating
6893 in multiple different permutations (having multiple
6894 slp nodes which refer to the same group) the CSE
6895 is even wrong code. See PR56270. */
6898 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6901 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6904 /* VEC_NUM is the number of vect stmts to be created for this group. */
6907 grouped_load = false;
6908 /* For SLP permutation support we need to load the whole group,
6909 not only the number of vector stmts the permutation result
6912 vec_num = (group_size * vf + nunits - 1) / nunits;
6914 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6915 group_gap_adj = vf * group_size - nunits * vec_num;
6918 vec_num = group_size;
6920 ref_type = get_group_alias_ptr_type (first_stmt);
6926 group_size = vec_num = 1;
6928 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6931 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6932 gcc_assert (alignment_support_scheme);
6933 /* Targets with load-lane instructions must not require explicit
6935 gcc_assert (!load_lanes_p
6936 || alignment_support_scheme == dr_aligned
6937 || alignment_support_scheme == dr_unaligned_supported);
6939 /* In case the vectorization factor (VF) is bigger than the number
6940 of elements that we can fit in a vectype (nunits), we have to generate
6941 more than one vector stmt - i.e - we need to "unroll" the
6942 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6943 from one copy of the vector stmt to the next, in the field
6944 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6945 stages to find the correct vector defs to be used when vectorizing
6946 stmts that use the defs of the current stmt. The example below
6947 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6948 need to create 4 vectorized stmts):
6950 before vectorization:
6951 RELATED_STMT VEC_STMT
6955 step 1: vectorize stmt S1:
6956 We first create the vector stmt VS1_0, and, as usual, record a
6957 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6958 Next, we create the vector stmt VS1_1, and record a pointer to
6959 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6960 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6962 RELATED_STMT VEC_STMT
6963 VS1_0: vx0 = memref0 VS1_1 -
6964 VS1_1: vx1 = memref1 VS1_2 -
6965 VS1_2: vx2 = memref2 VS1_3 -
6966 VS1_3: vx3 = memref3 - -
6967 S1: x = load - VS1_0
6970 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6971 information we recorded in RELATED_STMT field is used to vectorize
6974 /* In case of interleaving (non-unit grouped access):
6981 Vectorized loads are created in the order of memory accesses
6982 starting from the access of the first stmt of the chain:
6985 VS2: vx1 = &base + vec_size*1
6986 VS3: vx3 = &base + vec_size*2
6987 VS4: vx4 = &base + vec_size*3
6989 Then permutation statements are generated:
6991 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6992 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6995 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6996 (the order of the data-refs in the output of vect_permute_load_chain
6997 corresponds to the order of scalar stmts in the interleaving chain - see
6998 the documentation of vect_permute_load_chain()).
6999 The generation of permutation stmts and recording them in
7000 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7002 In case of both multiple types and interleaving, the vector loads and
7003 permutation stmts above are created for every copy. The result vector
7004 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7005 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7007 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7008 on a target that supports unaligned accesses (dr_unaligned_supported)
7009 we generate the following code:
7013 p = p + indx * vectype_size;
7018 Otherwise, the data reference is potentially unaligned on a target that
7019 does not support unaligned accesses (dr_explicit_realign_optimized) -
7020 then generate the following code, in which the data in each iteration is
7021 obtained by two vector loads, one from the previous iteration, and one
7022 from the current iteration:
7024 msq_init = *(floor(p1))
7025 p2 = initial_addr + VS - 1;
7026 realignment_token = call target_builtin;
7029 p2 = p2 + indx * vectype_size
7031 vec_dest = realign_load (msq, lsq, realignment_token)
7036 /* If the misalignment remains the same throughout the execution of the
7037 loop, we can create the init_addr and permutation mask at the loop
7038 preheader. Otherwise, it needs to be created inside the loop.
7039 This can only occur when vectorizing memory accesses in the inner-loop
7040 nested within an outer-loop that is being vectorized. */
7042 if (nested_in_vect_loop
7043 && (TREE_INT_CST_LOW (DR_STEP (dr))
7044 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7046 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7047 compute_in_loop = true;
7050 if ((alignment_support_scheme == dr_explicit_realign_optimized
7051 || alignment_support_scheme == dr_explicit_realign)
7052 && !compute_in_loop)
7054 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7055 alignment_support_scheme, NULL_TREE,
7057 if (alignment_support_scheme == dr_explicit_realign_optimized)
7059 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7060 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7068 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7071 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7073 aggr_type = vectype;
7075 prev_stmt_info = NULL;
7076 for (j = 0; j < ncopies; j++)
7078 /* 1. Create the vector or array pointer update chain. */
7081 bool simd_lane_access_p
7082 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7083 if (simd_lane_access_p
7084 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7085 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7086 && integer_zerop (DR_OFFSET (first_dr))
7087 && integer_zerop (DR_INIT (first_dr))
7088 && alias_sets_conflict_p (get_alias_set (aggr_type),
7089 get_alias_set (TREE_TYPE (ref_type)))
7090 && (alignment_support_scheme == dr_aligned
7091 || alignment_support_scheme == dr_unaligned_supported))
7093 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7094 dataref_offset = build_int_cst (ref_type, 0);
7097 else if (first_stmt_for_drptr
7098 && first_stmt != first_stmt_for_drptr)
7101 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7102 at_loop, offset, &dummy, gsi,
7103 &ptr_incr, simd_lane_access_p,
7104 &inv_p, byte_offset);
7105 /* Adjust the pointer by the difference to first_stmt. */
7106 data_reference_p ptrdr
7107 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7108 tree diff = fold_convert (sizetype,
7109 size_binop (MINUS_EXPR,
7112 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7117 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7118 offset, &dummy, gsi, &ptr_incr,
7119 simd_lane_access_p, &inv_p,
7122 else if (dataref_offset)
7123 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7124 TYPE_SIZE_UNIT (aggr_type));
7126 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7127 TYPE_SIZE_UNIT (aggr_type));
7129 if (grouped_load || slp_perm)
7130 dr_chain.create (vec_num);
7136 vec_array = create_vector_array (vectype, vec_num);
7139 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7140 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7141 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7142 gimple_call_set_lhs (new_stmt, vec_array);
7143 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7145 /* Extract each vector into an SSA_NAME. */
7146 for (i = 0; i < vec_num; i++)
7148 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7150 dr_chain.quick_push (new_temp);
7153 /* Record the mapping between SSA_NAMEs and statements. */
7154 vect_record_grouped_load_vectors (stmt, dr_chain);
7158 for (i = 0; i < vec_num; i++)
7161 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7164 /* 2. Create the vector-load in the loop. */
7165 switch (alignment_support_scheme)
7168 case dr_unaligned_supported:
7170 unsigned int align, misalign;
7173 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7176 : build_int_cst (ref_type, 0));
7177 align = TYPE_ALIGN_UNIT (vectype);
7178 if (alignment_support_scheme == dr_aligned)
7180 gcc_assert (aligned_access_p (first_dr));
7183 else if (DR_MISALIGNMENT (first_dr) == -1)
7185 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7186 align = TYPE_ALIGN_UNIT (elem_type);
7188 align = (get_object_alignment (DR_REF (first_dr))
7191 TREE_TYPE (data_ref)
7192 = build_aligned_type (TREE_TYPE (data_ref),
7193 align * BITS_PER_UNIT);
7197 TREE_TYPE (data_ref)
7198 = build_aligned_type (TREE_TYPE (data_ref),
7199 TYPE_ALIGN (elem_type));
7200 misalign = DR_MISALIGNMENT (first_dr);
7202 if (dataref_offset == NULL_TREE
7203 && TREE_CODE (dataref_ptr) == SSA_NAME)
7204 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7208 case dr_explicit_realign:
7212 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7214 if (compute_in_loop)
7215 msq = vect_setup_realignment (first_stmt, gsi,
7217 dr_explicit_realign,
7220 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7221 ptr = copy_ssa_name (dataref_ptr);
7223 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7224 new_stmt = gimple_build_assign
7225 (ptr, BIT_AND_EXPR, dataref_ptr,
7227 (TREE_TYPE (dataref_ptr),
7228 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7229 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7231 = build2 (MEM_REF, vectype, ptr,
7232 build_int_cst (ref_type, 0));
7233 vec_dest = vect_create_destination_var (scalar_dest,
7235 new_stmt = gimple_build_assign (vec_dest, data_ref);
7236 new_temp = make_ssa_name (vec_dest, new_stmt);
7237 gimple_assign_set_lhs (new_stmt, new_temp);
7238 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7239 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7240 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7243 bump = size_binop (MULT_EXPR, vs,
7244 TYPE_SIZE_UNIT (elem_type));
7245 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7246 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7247 new_stmt = gimple_build_assign
7248 (NULL_TREE, BIT_AND_EXPR, ptr,
7251 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7252 ptr = copy_ssa_name (ptr, new_stmt);
7253 gimple_assign_set_lhs (new_stmt, ptr);
7254 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7256 = build2 (MEM_REF, vectype, ptr,
7257 build_int_cst (ref_type, 0));
7260 case dr_explicit_realign_optimized:
7261 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7262 new_temp = copy_ssa_name (dataref_ptr);
7264 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7265 new_stmt = gimple_build_assign
7266 (new_temp, BIT_AND_EXPR, dataref_ptr,
7268 (TREE_TYPE (dataref_ptr),
7269 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7270 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7272 = build2 (MEM_REF, vectype, new_temp,
7273 build_int_cst (ref_type, 0));
7278 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7279 new_stmt = gimple_build_assign (vec_dest, data_ref);
7280 new_temp = make_ssa_name (vec_dest, new_stmt);
7281 gimple_assign_set_lhs (new_stmt, new_temp);
7282 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7284 /* 3. Handle explicit realignment if necessary/supported.
7286 vec_dest = realign_load (msq, lsq, realignment_token) */
7287 if (alignment_support_scheme == dr_explicit_realign_optimized
7288 || alignment_support_scheme == dr_explicit_realign)
7290 lsq = gimple_assign_lhs (new_stmt);
7291 if (!realignment_token)
7292 realignment_token = dataref_ptr;
7293 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7294 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7295 msq, lsq, realignment_token);
7296 new_temp = make_ssa_name (vec_dest, new_stmt);
7297 gimple_assign_set_lhs (new_stmt, new_temp);
7298 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7300 if (alignment_support_scheme == dr_explicit_realign_optimized)
7303 if (i == vec_num - 1 && j == ncopies - 1)
7304 add_phi_arg (phi, lsq,
7305 loop_latch_edge (containing_loop),
7311 /* 4. Handle invariant-load. */
7312 if (inv_p && !bb_vinfo)
7314 gcc_assert (!grouped_load);
7315 /* If we have versioned for aliasing or the loop doesn't
7316 have any data dependencies that would preclude this,
7317 then we are sure this is a loop invariant load and
7318 thus we can insert it on the preheader edge. */
7319 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7320 && !nested_in_vect_loop
7321 && hoist_defs_of_uses (stmt, loop))
7323 if (dump_enabled_p ())
7325 dump_printf_loc (MSG_NOTE, vect_location,
7326 "hoisting out of the vectorized "
7328 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7330 tree tem = copy_ssa_name (scalar_dest);
7331 gsi_insert_on_edge_immediate
7332 (loop_preheader_edge (loop),
7333 gimple_build_assign (tem,
7335 (gimple_assign_rhs1 (stmt))));
7336 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7337 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7338 set_vinfo_for_stmt (new_stmt,
7339 new_stmt_vec_info (new_stmt, vinfo));
7343 gimple_stmt_iterator gsi2 = *gsi;
7345 new_temp = vect_init_vector (stmt, scalar_dest,
7347 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7353 tree perm_mask = perm_mask_for_reverse (vectype);
7354 new_temp = permute_vec_elements (new_temp, new_temp,
7355 perm_mask, stmt, gsi);
7356 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7359 /* Collect vector loads and later create their permutation in
7360 vect_transform_grouped_load (). */
7361 if (grouped_load || slp_perm)
7362 dr_chain.quick_push (new_temp);
7364 /* Store vector loads in the corresponding SLP_NODE. */
7365 if (slp && !slp_perm)
7366 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7368 /* Bump the vector pointer to account for a gap or for excess
7369 elements loaded for a permuted SLP load. */
7370 if (group_gap_adj != 0)
7374 = wide_int_to_tree (sizetype,
7375 wi::smul (TYPE_SIZE_UNIT (elem_type),
7376 group_gap_adj, &ovf));
7377 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7382 if (slp && !slp_perm)
7387 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7388 slp_node_instance, false))
7390 dr_chain.release ();
7399 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7400 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7405 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7407 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7408 prev_stmt_info = vinfo_for_stmt (new_stmt);
7411 dr_chain.release ();
7417 /* Function vect_is_simple_cond.
7420 LOOP - the loop that is being vectorized.
7421 COND - Condition that is checked for simple use.
7424 *COMP_VECTYPE - the vector type for the comparison.
7426 Returns whether a COND can be vectorized. Checks whether
7427 condition operands are supportable using vec_is_simple_use. */
7430 vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
7433 enum vect_def_type dt;
7434 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7437 if (TREE_CODE (cond) == SSA_NAME
7438 && TREE_CODE (TREE_TYPE (cond)) == BOOLEAN_TYPE)
7440 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7441 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7444 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7449 if (!COMPARISON_CLASS_P (cond))
7452 lhs = TREE_OPERAND (cond, 0);
7453 rhs = TREE_OPERAND (cond, 1);
7455 if (TREE_CODE (lhs) == SSA_NAME)
7457 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7458 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
7461 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7462 && TREE_CODE (lhs) != FIXED_CST)
7465 if (TREE_CODE (rhs) == SSA_NAME)
7467 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7468 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
7471 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
7472 && TREE_CODE (rhs) != FIXED_CST)
7475 if (vectype1 && vectype2
7476 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7479 *comp_vectype = vectype1 ? vectype1 : vectype2;
7483 /* vectorizable_condition.
7485 Check if STMT is conditional modify expression that can be vectorized.
7486 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7487 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7490 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7491 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7492 else clause if it is 2).
7494 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7497 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7498 gimple **vec_stmt, tree reduc_def, int reduc_index,
7501 tree scalar_dest = NULL_TREE;
7502 tree vec_dest = NULL_TREE;
7503 tree cond_expr, then_clause, else_clause;
7504 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7505 tree comp_vectype = NULL_TREE;
7506 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7507 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7510 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7511 enum vect_def_type dt, dts[4];
7513 enum tree_code code;
7514 stmt_vec_info prev_stmt_info = NULL;
7516 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7517 vec<tree> vec_oprnds0 = vNULL;
7518 vec<tree> vec_oprnds1 = vNULL;
7519 vec<tree> vec_oprnds2 = vNULL;
7520 vec<tree> vec_oprnds3 = vNULL;
7522 bool masked = false;
7524 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7527 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7529 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7532 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7533 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7537 /* FORNOW: not yet supported. */
7538 if (STMT_VINFO_LIVE_P (stmt_info))
7540 if (dump_enabled_p ())
7541 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7542 "value used after loop.\n");
7547 /* Is vectorizable conditional operation? */
7548 if (!is_gimple_assign (stmt))
7551 code = gimple_assign_rhs_code (stmt);
7553 if (code != COND_EXPR)
7556 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7557 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7558 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7560 if (slp_node || PURE_SLP_STMT (stmt_info))
7563 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7565 gcc_assert (ncopies >= 1);
7566 if (reduc_index && ncopies > 1)
7567 return false; /* FORNOW */
7569 cond_expr = gimple_assign_rhs1 (stmt);
7570 then_clause = gimple_assign_rhs2 (stmt);
7571 else_clause = gimple_assign_rhs3 (stmt);
7573 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
7578 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt,
7581 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt,
7585 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7588 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7591 masked = !COMPARISON_CLASS_P (cond_expr);
7592 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7594 if (vec_cmp_type == NULL_TREE)
7599 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7600 return expand_vec_cond_expr_p (vectype, comp_vectype);
7607 vec_oprnds0.create (1);
7608 vec_oprnds1.create (1);
7609 vec_oprnds2.create (1);
7610 vec_oprnds3.create (1);
7614 scalar_dest = gimple_assign_lhs (stmt);
7615 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7617 /* Handle cond expr. */
7618 for (j = 0; j < ncopies; j++)
7620 gassign *new_stmt = NULL;
7625 auto_vec<tree, 4> ops;
7626 auto_vec<vec<tree>, 4> vec_defs;
7629 ops.safe_push (cond_expr);
7632 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7633 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7635 ops.safe_push (then_clause);
7636 ops.safe_push (else_clause);
7637 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7638 vec_oprnds3 = vec_defs.pop ();
7639 vec_oprnds2 = vec_defs.pop ();
7641 vec_oprnds1 = vec_defs.pop ();
7642 vec_oprnds0 = vec_defs.pop ();
7645 vec_defs.release ();
7653 = vect_get_vec_def_for_operand (cond_expr, stmt,
7655 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7661 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7662 stmt, comp_vectype);
7663 vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
7664 loop_vinfo, >emp, &dts[0]);
7667 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7668 stmt, comp_vectype);
7669 vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
7670 loop_vinfo, >emp, &dts[1]);
7672 if (reduc_index == 1)
7673 vec_then_clause = reduc_def;
7676 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7678 vect_is_simple_use (then_clause, loop_vinfo,
7681 if (reduc_index == 2)
7682 vec_else_clause = reduc_def;
7685 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7687 vect_is_simple_use (else_clause, loop_vinfo, >emp, &dts[3]);
7694 = vect_get_vec_def_for_stmt_copy (dts[0],
7695 vec_oprnds0.pop ());
7698 = vect_get_vec_def_for_stmt_copy (dts[1],
7699 vec_oprnds1.pop ());
7701 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7702 vec_oprnds2.pop ());
7703 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7704 vec_oprnds3.pop ());
7709 vec_oprnds0.quick_push (vec_cond_lhs);
7711 vec_oprnds1.quick_push (vec_cond_rhs);
7712 vec_oprnds2.quick_push (vec_then_clause);
7713 vec_oprnds3.quick_push (vec_else_clause);
7716 /* Arguments are ready. Create the new vector stmt. */
7717 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7719 vec_then_clause = vec_oprnds2[i];
7720 vec_else_clause = vec_oprnds3[i];
7723 vec_compare = vec_cond_lhs;
7726 vec_cond_rhs = vec_oprnds1[i];
7727 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7728 vec_cond_lhs, vec_cond_rhs);
7730 new_temp = make_ssa_name (vec_dest);
7731 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
7732 vec_compare, vec_then_clause,
7734 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7736 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7743 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7745 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7747 prev_stmt_info = vinfo_for_stmt (new_stmt);
7750 vec_oprnds0.release ();
7751 vec_oprnds1.release ();
7752 vec_oprnds2.release ();
7753 vec_oprnds3.release ();
7758 /* vectorizable_comparison.
7760 Check if STMT is comparison expression that can be vectorized.
7761 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7762 comparison, put it in VEC_STMT, and insert it at GSI.
7764 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7767 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
7768 gimple **vec_stmt, tree reduc_def,
7771 tree lhs, rhs1, rhs2;
7772 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7773 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7774 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7775 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
7777 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7778 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
7781 enum tree_code code;
7782 stmt_vec_info prev_stmt_info = NULL;
7784 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7785 vec<tree> vec_oprnds0 = vNULL;
7786 vec<tree> vec_oprnds1 = vNULL;
7791 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7794 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
7797 mask_type = vectype;
7798 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7800 if (slp_node || PURE_SLP_STMT (stmt_info))
7803 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7805 gcc_assert (ncopies >= 1);
7806 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7807 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7811 if (STMT_VINFO_LIVE_P (stmt_info))
7813 if (dump_enabled_p ())
7814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7815 "value used after loop.\n");
7819 if (!is_gimple_assign (stmt))
7822 code = gimple_assign_rhs_code (stmt);
7824 if (TREE_CODE_CLASS (code) != tcc_comparison)
7827 rhs1 = gimple_assign_rhs1 (stmt);
7828 rhs2 = gimple_assign_rhs2 (stmt);
7830 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
7831 &dts[0], &vectype1))
7834 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
7835 &dts[1], &vectype2))
7838 if (vectype1 && vectype2
7839 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7842 vectype = vectype1 ? vectype1 : vectype2;
7844 /* Invariant comparison. */
7847 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
7848 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
7851 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
7856 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
7857 vect_model_simple_cost (stmt_info, ncopies, dts, NULL, NULL);
7858 return expand_vec_cmp_expr_p (vectype, mask_type);
7864 vec_oprnds0.create (1);
7865 vec_oprnds1.create (1);
7869 lhs = gimple_assign_lhs (stmt);
7870 mask = vect_create_destination_var (lhs, mask_type);
7872 /* Handle cmp expr. */
7873 for (j = 0; j < ncopies; j++)
7875 gassign *new_stmt = NULL;
7880 auto_vec<tree, 2> ops;
7881 auto_vec<vec<tree>, 2> vec_defs;
7883 ops.safe_push (rhs1);
7884 ops.safe_push (rhs2);
7885 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7886 vec_oprnds1 = vec_defs.pop ();
7887 vec_oprnds0 = vec_defs.pop ();
7891 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
7892 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
7897 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
7898 vec_oprnds0.pop ());
7899 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
7900 vec_oprnds1.pop ());
7905 vec_oprnds0.quick_push (vec_rhs1);
7906 vec_oprnds1.quick_push (vec_rhs2);
7909 /* Arguments are ready. Create the new vector stmt. */
7910 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
7912 vec_rhs2 = vec_oprnds1[i];
7914 new_temp = make_ssa_name (mask);
7915 new_stmt = gimple_build_assign (new_temp, code, vec_rhs1, vec_rhs2);
7916 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7918 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7925 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7927 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7929 prev_stmt_info = vinfo_for_stmt (new_stmt);
7932 vec_oprnds0.release ();
7933 vec_oprnds1.release ();
7938 /* Make sure the statement is vectorizable. */
7941 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
7943 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7944 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7945 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7947 tree scalar_type, vectype;
7948 gimple *pattern_stmt;
7949 gimple_seq pattern_def_seq;
7951 if (dump_enabled_p ())
7953 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7954 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7957 if (gimple_has_volatile_ops (stmt))
7959 if (dump_enabled_p ())
7960 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7961 "not vectorized: stmt has volatile operands\n");
7966 /* Skip stmts that do not need to be vectorized. In loops this is expected
7968 - the COND_EXPR which is the loop exit condition
7969 - any LABEL_EXPRs in the loop
7970 - computations that are used only for array indexing or loop control.
7971 In basic blocks we only analyze statements that are a part of some SLP
7972 instance, therefore, all the statements are relevant.
7974 Pattern statement needs to be analyzed instead of the original statement
7975 if the original statement is not relevant. Otherwise, we analyze both
7976 statements. In basic blocks we are called from some SLP instance
7977 traversal, don't analyze pattern stmts instead, the pattern stmts
7978 already will be part of SLP instance. */
7980 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7981 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7982 && !STMT_VINFO_LIVE_P (stmt_info))
7984 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7986 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7987 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7989 /* Analyze PATTERN_STMT instead of the original stmt. */
7990 stmt = pattern_stmt;
7991 stmt_info = vinfo_for_stmt (pattern_stmt);
7992 if (dump_enabled_p ())
7994 dump_printf_loc (MSG_NOTE, vect_location,
7995 "==> examining pattern statement: ");
7996 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8001 if (dump_enabled_p ())
8002 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8007 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8010 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8011 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8013 /* Analyze PATTERN_STMT too. */
8014 if (dump_enabled_p ())
8016 dump_printf_loc (MSG_NOTE, vect_location,
8017 "==> examining pattern statement: ");
8018 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8021 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
8025 if (is_pattern_stmt_p (stmt_info)
8027 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8029 gimple_stmt_iterator si;
8031 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8033 gimple *pattern_def_stmt = gsi_stmt (si);
8034 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8035 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8037 /* Analyze def stmt of STMT if it's a pattern stmt. */
8038 if (dump_enabled_p ())
8040 dump_printf_loc (MSG_NOTE, vect_location,
8041 "==> examining pattern def statement: ");
8042 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8045 if (!vect_analyze_stmt (pattern_def_stmt,
8046 need_to_vectorize, node))
8052 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8054 case vect_internal_def:
8057 case vect_reduction_def:
8058 case vect_nested_cycle:
8059 gcc_assert (!bb_vinfo
8060 && (relevance == vect_used_in_outer
8061 || relevance == vect_used_in_outer_by_reduction
8062 || relevance == vect_used_by_reduction
8063 || relevance == vect_unused_in_scope));
8066 case vect_induction_def:
8067 case vect_constant_def:
8068 case vect_external_def:
8069 case vect_unknown_def_type:
8076 gcc_assert (PURE_SLP_STMT (stmt_info));
8078 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
8079 if (dump_enabled_p ())
8081 dump_printf_loc (MSG_NOTE, vect_location,
8082 "get vectype for scalar type: ");
8083 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
8084 dump_printf (MSG_NOTE, "\n");
8087 vectype = get_vectype_for_scalar_type (scalar_type);
8090 if (dump_enabled_p ())
8092 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8093 "not SLPed: unsupported data-type ");
8094 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8096 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8101 if (dump_enabled_p ())
8103 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8104 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
8105 dump_printf (MSG_NOTE, "\n");
8108 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8111 if (STMT_VINFO_RELEVANT_P (stmt_info))
8113 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8114 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8115 || (is_gimple_call (stmt)
8116 && gimple_call_lhs (stmt) == NULL_TREE));
8117 *need_to_vectorize = true;
8120 if (PURE_SLP_STMT (stmt_info) && !node)
8122 dump_printf_loc (MSG_NOTE, vect_location,
8123 "handled only by SLP analysis\n");
8129 && (STMT_VINFO_RELEVANT_P (stmt_info)
8130 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8131 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8132 || vectorizable_conversion (stmt, NULL, NULL, node)
8133 || vectorizable_shift (stmt, NULL, NULL, node)
8134 || vectorizable_operation (stmt, NULL, NULL, node)
8135 || vectorizable_assignment (stmt, NULL, NULL, node)
8136 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8137 || vectorizable_call (stmt, NULL, NULL, node)
8138 || vectorizable_store (stmt, NULL, NULL, node)
8139 || vectorizable_reduction (stmt, NULL, NULL, node)
8140 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8141 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8145 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8146 || vectorizable_conversion (stmt, NULL, NULL, node)
8147 || vectorizable_shift (stmt, NULL, NULL, node)
8148 || vectorizable_operation (stmt, NULL, NULL, node)
8149 || vectorizable_assignment (stmt, NULL, NULL, node)
8150 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8151 || vectorizable_call (stmt, NULL, NULL, node)
8152 || vectorizable_store (stmt, NULL, NULL, node)
8153 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8154 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8159 if (dump_enabled_p ())
8161 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8162 "not vectorized: relevant stmt not ");
8163 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8164 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8173 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8174 need extra handling, except for vectorizable reductions. */
8175 if (STMT_VINFO_LIVE_P (stmt_info)
8176 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8177 ok = vectorizable_live_operation (stmt, NULL, NULL);
8181 if (dump_enabled_p ())
8183 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8184 "not vectorized: live stmt not ");
8185 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8186 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8196 /* Function vect_transform_stmt.
8198 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8201 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8202 bool *grouped_store, slp_tree slp_node,
8203 slp_instance slp_node_instance)
8205 bool is_store = false;
8206 gimple *vec_stmt = NULL;
8207 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8210 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8212 switch (STMT_VINFO_TYPE (stmt_info))
8214 case type_demotion_vec_info_type:
8215 case type_promotion_vec_info_type:
8216 case type_conversion_vec_info_type:
8217 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8221 case induc_vec_info_type:
8222 gcc_assert (!slp_node);
8223 done = vectorizable_induction (stmt, gsi, &vec_stmt);
8227 case shift_vec_info_type:
8228 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8232 case op_vec_info_type:
8233 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8237 case assignment_vec_info_type:
8238 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8242 case load_vec_info_type:
8243 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8248 case store_vec_info_type:
8249 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8251 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8253 /* In case of interleaving, the whole chain is vectorized when the
8254 last store in the chain is reached. Store stmts before the last
8255 one are skipped, and there vec_stmt_info shouldn't be freed
8257 *grouped_store = true;
8258 if (STMT_VINFO_VEC_STMT (stmt_info))
8265 case condition_vec_info_type:
8266 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8270 case comparison_vec_info_type:
8271 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8275 case call_vec_info_type:
8276 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8277 stmt = gsi_stmt (*gsi);
8278 if (is_gimple_call (stmt)
8279 && gimple_call_internal_p (stmt)
8280 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
8284 case call_simd_clone_vec_info_type:
8285 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8286 stmt = gsi_stmt (*gsi);
8289 case reduc_vec_info_type:
8290 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8295 if (!STMT_VINFO_LIVE_P (stmt_info))
8297 if (dump_enabled_p ())
8298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8299 "stmt not supported.\n");
8304 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8305 This would break hybrid SLP vectorization. */
8307 gcc_assert (!vec_stmt
8308 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8310 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8311 is being vectorized, but outside the immediately enclosing loop. */
8313 && STMT_VINFO_LOOP_VINFO (stmt_info)
8314 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8315 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8316 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8317 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8318 || STMT_VINFO_RELEVANT (stmt_info) ==
8319 vect_used_in_outer_by_reduction))
8321 struct loop *innerloop = LOOP_VINFO_LOOP (
8322 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8323 imm_use_iterator imm_iter;
8324 use_operand_p use_p;
8328 if (dump_enabled_p ())
8329 dump_printf_loc (MSG_NOTE, vect_location,
8330 "Record the vdef for outer-loop vectorization.\n");
8332 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8333 (to be used when vectorizing outer-loop stmts that use the DEF of
8335 if (gimple_code (stmt) == GIMPLE_PHI)
8336 scalar_dest = PHI_RESULT (stmt);
8338 scalar_dest = gimple_assign_lhs (stmt);
8340 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8342 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8344 exit_phi = USE_STMT (use_p);
8345 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8350 /* Handle stmts whose DEF is used outside the loop-nest that is
8351 being vectorized. */
8352 if (STMT_VINFO_LIVE_P (stmt_info)
8353 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8355 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
8360 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8366 /* Remove a group of stores (for SLP or interleaving), free their
8370 vect_remove_stores (gimple *first_stmt)
8372 gimple *next = first_stmt;
8374 gimple_stmt_iterator next_si;
8378 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8380 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8381 if (is_pattern_stmt_p (stmt_info))
8382 next = STMT_VINFO_RELATED_STMT (stmt_info);
8383 /* Free the attached stmt_vec_info and remove the stmt. */
8384 next_si = gsi_for_stmt (next);
8385 unlink_stmt_vdef (next);
8386 gsi_remove (&next_si, true);
8387 release_defs (next);
8388 free_stmt_vec_info (next);
8394 /* Function new_stmt_vec_info.
8396 Create and initialize a new stmt_vec_info struct for STMT. */
8399 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8402 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8404 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8405 STMT_VINFO_STMT (res) = stmt;
8407 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8408 STMT_VINFO_LIVE_P (res) = false;
8409 STMT_VINFO_VECTYPE (res) = NULL;
8410 STMT_VINFO_VEC_STMT (res) = NULL;
8411 STMT_VINFO_VECTORIZABLE (res) = true;
8412 STMT_VINFO_IN_PATTERN_P (res) = false;
8413 STMT_VINFO_RELATED_STMT (res) = NULL;
8414 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8415 STMT_VINFO_DATA_REF (res) = NULL;
8416 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8418 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8419 STMT_VINFO_DR_OFFSET (res) = NULL;
8420 STMT_VINFO_DR_INIT (res) = NULL;
8421 STMT_VINFO_DR_STEP (res) = NULL;
8422 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8424 if (gimple_code (stmt) == GIMPLE_PHI
8425 && is_loop_header_bb_p (gimple_bb (stmt)))
8426 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8428 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8430 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8431 STMT_SLP_TYPE (res) = loop_vect;
8432 STMT_VINFO_NUM_SLP_USES (res) = 0;
8434 GROUP_FIRST_ELEMENT (res) = NULL;
8435 GROUP_NEXT_ELEMENT (res) = NULL;
8436 GROUP_SIZE (res) = 0;
8437 GROUP_STORE_COUNT (res) = 0;
8438 GROUP_GAP (res) = 0;
8439 GROUP_SAME_DR_STMT (res) = NULL;
8445 /* Create a hash table for stmt_vec_info. */
8448 init_stmt_vec_info_vec (void)
8450 gcc_assert (!stmt_vec_info_vec.exists ());
8451 stmt_vec_info_vec.create (50);
8455 /* Free hash table for stmt_vec_info. */
8458 free_stmt_vec_info_vec (void)
8462 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8464 free_stmt_vec_info (STMT_VINFO_STMT (info));
8465 gcc_assert (stmt_vec_info_vec.exists ());
8466 stmt_vec_info_vec.release ();
8470 /* Free stmt vectorization related info. */
8473 free_stmt_vec_info (gimple *stmt)
8475 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8480 /* Check if this statement has a related "pattern stmt"
8481 (introduced by the vectorizer during the pattern recognition
8482 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8484 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8486 stmt_vec_info patt_info
8487 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8490 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8491 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8492 gimple_set_bb (patt_stmt, NULL);
8493 tree lhs = gimple_get_lhs (patt_stmt);
8494 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8495 release_ssa_name (lhs);
8498 gimple_stmt_iterator si;
8499 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8501 gimple *seq_stmt = gsi_stmt (si);
8502 gimple_set_bb (seq_stmt, NULL);
8503 lhs = gimple_get_lhs (seq_stmt);
8504 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8505 release_ssa_name (lhs);
8506 free_stmt_vec_info (seq_stmt);
8509 free_stmt_vec_info (patt_stmt);
8513 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8514 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8515 set_vinfo_for_stmt (stmt, NULL);
8520 /* Function get_vectype_for_scalar_type_and_size.
8522 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8526 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8528 machine_mode inner_mode = TYPE_MODE (scalar_type);
8529 machine_mode simd_mode;
8530 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8537 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8538 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8541 /* For vector types of elements whose mode precision doesn't
8542 match their types precision we use a element type of mode
8543 precision. The vectorization routines will have to make sure
8544 they support the proper result truncation/extension.
8545 We also make sure to build vector types with INTEGER_TYPE
8546 component type only. */
8547 if (INTEGRAL_TYPE_P (scalar_type)
8548 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8549 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8550 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8551 TYPE_UNSIGNED (scalar_type));
8553 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8554 When the component mode passes the above test simply use a type
8555 corresponding to that mode. The theory is that any use that
8556 would cause problems with this will disable vectorization anyway. */
8557 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8558 && !INTEGRAL_TYPE_P (scalar_type))
8559 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8561 /* We can't build a vector type of elements with alignment bigger than
8563 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
8564 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8565 TYPE_UNSIGNED (scalar_type));
8567 /* If we felt back to using the mode fail if there was
8568 no scalar type for it. */
8569 if (scalar_type == NULL_TREE)
8572 /* If no size was supplied use the mode the target prefers. Otherwise
8573 lookup a vector mode of the specified size. */
8575 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
8577 simd_mode = mode_for_vector (inner_mode, size / nbytes);
8578 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
8582 vectype = build_vector_type (scalar_type, nunits);
8584 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8585 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
8591 unsigned int current_vector_size;
8593 /* Function get_vectype_for_scalar_type.
8595 Returns the vector type corresponding to SCALAR_TYPE as supported
8599 get_vectype_for_scalar_type (tree scalar_type)
8602 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
8603 current_vector_size);
8605 && current_vector_size == 0)
8606 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
8610 /* Function get_mask_type_for_scalar_type.
8612 Returns the mask type corresponding to a result of comparison
8613 of vectors of specified SCALAR_TYPE as supported by target. */
8616 get_mask_type_for_scalar_type (tree scalar_type)
8618 tree vectype = get_vectype_for_scalar_type (scalar_type);
8623 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
8624 current_vector_size);
8627 /* Function get_same_sized_vectype
8629 Returns a vector type corresponding to SCALAR_TYPE of size
8630 VECTOR_TYPE if supported by the target. */
8633 get_same_sized_vectype (tree scalar_type, tree vector_type)
8635 if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
8636 return build_same_sized_truth_vector_type (vector_type);
8638 return get_vectype_for_scalar_type_and_size
8639 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
8642 /* Function vect_is_simple_use.
8645 VINFO - the vect info of the loop or basic block that is being vectorized.
8646 OPERAND - operand in the loop or bb.
8648 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8649 DT - the type of definition
8651 Returns whether a stmt with OPERAND can be vectorized.
8652 For loops, supportable operands are constants, loop invariants, and operands
8653 that are defined by the current iteration of the loop. Unsupportable
8654 operands are those that are defined by a previous iteration of the loop (as
8655 is the case in reduction/induction computations).
8656 For basic blocks, supportable operands are constants and bb invariants.
8657 For now, operands defined outside the basic block are not supported. */
8660 vect_is_simple_use (tree operand, vec_info *vinfo,
8661 gimple **def_stmt, enum vect_def_type *dt)
8664 *dt = vect_unknown_def_type;
8666 if (dump_enabled_p ())
8668 dump_printf_loc (MSG_NOTE, vect_location,
8669 "vect_is_simple_use: operand ");
8670 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
8671 dump_printf (MSG_NOTE, "\n");
8674 if (CONSTANT_CLASS_P (operand))
8676 *dt = vect_constant_def;
8680 if (is_gimple_min_invariant (operand))
8682 *dt = vect_external_def;
8686 if (TREE_CODE (operand) != SSA_NAME)
8688 if (dump_enabled_p ())
8689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8694 if (SSA_NAME_IS_DEFAULT_DEF (operand))
8696 *dt = vect_external_def;
8700 *def_stmt = SSA_NAME_DEF_STMT (operand);
8701 if (dump_enabled_p ())
8703 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
8704 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
8707 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8708 *dt = vect_external_def;
8711 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
8712 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
8715 if (dump_enabled_p ())
8717 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8720 case vect_uninitialized_def:
8721 dump_printf (MSG_NOTE, "uninitialized\n");
8723 case vect_constant_def:
8724 dump_printf (MSG_NOTE, "constant\n");
8726 case vect_external_def:
8727 dump_printf (MSG_NOTE, "external\n");
8729 case vect_internal_def:
8730 dump_printf (MSG_NOTE, "internal\n");
8732 case vect_induction_def:
8733 dump_printf (MSG_NOTE, "induction\n");
8735 case vect_reduction_def:
8736 dump_printf (MSG_NOTE, "reduction\n");
8738 case vect_double_reduction_def:
8739 dump_printf (MSG_NOTE, "double reduction\n");
8741 case vect_nested_cycle:
8742 dump_printf (MSG_NOTE, "nested cycle\n");
8744 case vect_unknown_def_type:
8745 dump_printf (MSG_NOTE, "unknown\n");
8750 if (*dt == vect_unknown_def_type)
8752 if (dump_enabled_p ())
8753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8754 "Unsupported pattern.\n");
8758 switch (gimple_code (*def_stmt))
8765 if (dump_enabled_p ())
8766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8767 "unsupported defining stmt:\n");
8774 /* Function vect_is_simple_use.
8776 Same as vect_is_simple_use but also determines the vector operand
8777 type of OPERAND and stores it to *VECTYPE. If the definition of
8778 OPERAND is vect_uninitialized_def, vect_constant_def or
8779 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8780 is responsible to compute the best suited vector type for the
8784 vect_is_simple_use (tree operand, vec_info *vinfo,
8785 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
8787 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
8790 /* Now get a vector type if the def is internal, otherwise supply
8791 NULL_TREE and leave it up to the caller to figure out a proper
8792 type for the use stmt. */
8793 if (*dt == vect_internal_def
8794 || *dt == vect_induction_def
8795 || *dt == vect_reduction_def
8796 || *dt == vect_double_reduction_def
8797 || *dt == vect_nested_cycle)
8799 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8801 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8802 && !STMT_VINFO_RELEVANT (stmt_info)
8803 && !STMT_VINFO_LIVE_P (stmt_info))
8804 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8806 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8807 gcc_assert (*vectype != NULL_TREE);
8809 else if (*dt == vect_uninitialized_def
8810 || *dt == vect_constant_def
8811 || *dt == vect_external_def)
8812 *vectype = NULL_TREE;
8820 /* Function supportable_widening_operation
8822 Check whether an operation represented by the code CODE is a
8823 widening operation that is supported by the target platform in
8824 vector form (i.e., when operating on arguments of type VECTYPE_IN
8825 producing a result of type VECTYPE_OUT).
8827 Widening operations we currently support are NOP (CONVERT), FLOAT
8828 and WIDEN_MULT. This function checks if these operations are supported
8829 by the target platform either directly (via vector tree-codes), or via
8833 - CODE1 and CODE2 are codes of vector operations to be used when
8834 vectorizing the operation, if available.
8835 - MULTI_STEP_CVT determines the number of required intermediate steps in
8836 case of multi-step conversion (like char->short->int - in that case
8837 MULTI_STEP_CVT will be 1).
8838 - INTERM_TYPES contains the intermediate type required to perform the
8839 widening operation (short in the above example). */
8842 supportable_widening_operation (enum tree_code code, gimple *stmt,
8843 tree vectype_out, tree vectype_in,
8844 enum tree_code *code1, enum tree_code *code2,
8845 int *multi_step_cvt,
8846 vec<tree> *interm_types)
8848 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8849 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8850 struct loop *vect_loop = NULL;
8851 machine_mode vec_mode;
8852 enum insn_code icode1, icode2;
8853 optab optab1, optab2;
8854 tree vectype = vectype_in;
8855 tree wide_vectype = vectype_out;
8856 enum tree_code c1, c2;
8858 tree prev_type, intermediate_type;
8859 machine_mode intermediate_mode, prev_mode;
8860 optab optab3, optab4;
8862 *multi_step_cvt = 0;
8864 vect_loop = LOOP_VINFO_LOOP (loop_info);
8868 case WIDEN_MULT_EXPR:
8869 /* The result of a vectorized widening operation usually requires
8870 two vectors (because the widened results do not fit into one vector).
8871 The generated vector results would normally be expected to be
8872 generated in the same order as in the original scalar computation,
8873 i.e. if 8 results are generated in each vector iteration, they are
8874 to be organized as follows:
8875 vect1: [res1,res2,res3,res4],
8876 vect2: [res5,res6,res7,res8].
8878 However, in the special case that the result of the widening
8879 operation is used in a reduction computation only, the order doesn't
8880 matter (because when vectorizing a reduction we change the order of
8881 the computation). Some targets can take advantage of this and
8882 generate more efficient code. For example, targets like Altivec,
8883 that support widen_mult using a sequence of {mult_even,mult_odd}
8884 generate the following vectors:
8885 vect1: [res1,res3,res5,res7],
8886 vect2: [res2,res4,res6,res8].
8888 When vectorizing outer-loops, we execute the inner-loop sequentially
8889 (each vectorized inner-loop iteration contributes to VF outer-loop
8890 iterations in parallel). We therefore don't allow to change the
8891 order of the computation in the inner-loop during outer-loop
8893 /* TODO: Another case in which order doesn't *really* matter is when we
8894 widen and then contract again, e.g. (short)((int)x * y >> 8).
8895 Normally, pack_trunc performs an even/odd permute, whereas the
8896 repack from an even/odd expansion would be an interleave, which
8897 would be significantly simpler for e.g. AVX2. */
8898 /* In any case, in order to avoid duplicating the code below, recurse
8899 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8900 are properly set up for the caller. If we fail, we'll continue with
8901 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8903 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8904 && !nested_in_vect_loop_p (vect_loop, stmt)
8905 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8906 stmt, vectype_out, vectype_in,
8907 code1, code2, multi_step_cvt,
8910 /* Elements in a vector with vect_used_by_reduction property cannot
8911 be reordered if the use chain with this property does not have the
8912 same operation. One such an example is s += a * b, where elements
8913 in a and b cannot be reordered. Here we check if the vector defined
8914 by STMT is only directly used in the reduction statement. */
8915 tree lhs = gimple_assign_lhs (stmt);
8916 use_operand_p dummy;
8918 stmt_vec_info use_stmt_info = NULL;
8919 if (single_imm_use (lhs, &dummy, &use_stmt)
8920 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8921 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8924 c1 = VEC_WIDEN_MULT_LO_EXPR;
8925 c2 = VEC_WIDEN_MULT_HI_EXPR;
8938 case VEC_WIDEN_MULT_EVEN_EXPR:
8939 /* Support the recursion induced just above. */
8940 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8941 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8944 case WIDEN_LSHIFT_EXPR:
8945 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8946 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
8950 c1 = VEC_UNPACK_LO_EXPR;
8951 c2 = VEC_UNPACK_HI_EXPR;
8955 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8956 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
8959 case FIX_TRUNC_EXPR:
8960 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8961 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8962 computing the operation. */
8969 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
8972 if (code == FIX_TRUNC_EXPR)
8974 /* The signedness is determined from output operand. */
8975 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8976 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8980 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8981 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8984 if (!optab1 || !optab2)
8987 vec_mode = TYPE_MODE (vectype);
8988 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8989 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8995 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8996 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8997 /* For scalar masks we may have different boolean
8998 vector types having the same QImode. Thus we
8999 add additional check for elements number. */
9000 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9001 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9002 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9004 /* Check if it's a multi-step conversion that can be done using intermediate
9007 prev_type = vectype;
9008 prev_mode = vec_mode;
9010 if (!CONVERT_EXPR_CODE_P (code))
9013 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9014 intermediate steps in promotion sequence. We try
9015 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9017 interm_types->create (MAX_INTERM_CVT_STEPS);
9018 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9020 intermediate_mode = insn_data[icode1].operand[0].mode;
9021 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9024 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9025 current_vector_size);
9026 if (intermediate_mode != TYPE_MODE (intermediate_type))
9031 = lang_hooks.types.type_for_mode (intermediate_mode,
9032 TYPE_UNSIGNED (prev_type));
9034 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9035 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9037 if (!optab3 || !optab4
9038 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9039 || insn_data[icode1].operand[0].mode != intermediate_mode
9040 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9041 || insn_data[icode2].operand[0].mode != intermediate_mode
9042 || ((icode1 = optab_handler (optab3, intermediate_mode))
9043 == CODE_FOR_nothing)
9044 || ((icode2 = optab_handler (optab4, intermediate_mode))
9045 == CODE_FOR_nothing))
9048 interm_types->quick_push (intermediate_type);
9049 (*multi_step_cvt)++;
9051 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9052 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9053 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9054 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9055 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9057 prev_type = intermediate_type;
9058 prev_mode = intermediate_mode;
9061 interm_types->release ();
9066 /* Function supportable_narrowing_operation
9068 Check whether an operation represented by the code CODE is a
9069 narrowing operation that is supported by the target platform in
9070 vector form (i.e., when operating on arguments of type VECTYPE_IN
9071 and producing a result of type VECTYPE_OUT).
9073 Narrowing operations we currently support are NOP (CONVERT) and
9074 FIX_TRUNC. This function checks if these operations are supported by
9075 the target platform directly via vector tree-codes.
9078 - CODE1 is the code of a vector operation to be used when
9079 vectorizing the operation, if available.
9080 - MULTI_STEP_CVT determines the number of required intermediate steps in
9081 case of multi-step conversion (like int->short->char - in that case
9082 MULTI_STEP_CVT will be 1).
9083 - INTERM_TYPES contains the intermediate type required to perform the
9084 narrowing operation (short in the above example). */
9087 supportable_narrowing_operation (enum tree_code code,
9088 tree vectype_out, tree vectype_in,
9089 enum tree_code *code1, int *multi_step_cvt,
9090 vec<tree> *interm_types)
9092 machine_mode vec_mode;
9093 enum insn_code icode1;
9094 optab optab1, interm_optab;
9095 tree vectype = vectype_in;
9096 tree narrow_vectype = vectype_out;
9098 tree intermediate_type, prev_type;
9099 machine_mode intermediate_mode, prev_mode;
9103 *multi_step_cvt = 0;
9107 c1 = VEC_PACK_TRUNC_EXPR;
9110 case FIX_TRUNC_EXPR:
9111 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9115 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9116 tree code and optabs used for computing the operation. */
9123 if (code == FIX_TRUNC_EXPR)
9124 /* The signedness is determined from output operand. */
9125 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9127 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9132 vec_mode = TYPE_MODE (vectype);
9133 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9138 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9139 /* For scalar masks we may have different boolean
9140 vector types having the same QImode. Thus we
9141 add additional check for elements number. */
9142 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9143 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9144 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9146 /* Check if it's a multi-step conversion that can be done using intermediate
9148 prev_mode = vec_mode;
9149 prev_type = vectype;
9150 if (code == FIX_TRUNC_EXPR)
9151 uns = TYPE_UNSIGNED (vectype_out);
9153 uns = TYPE_UNSIGNED (vectype);
9155 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9156 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9157 costly than signed. */
9158 if (code == FIX_TRUNC_EXPR && uns)
9160 enum insn_code icode2;
9163 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9165 = optab_for_tree_code (c1, intermediate_type, optab_default);
9166 if (interm_optab != unknown_optab
9167 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9168 && insn_data[icode1].operand[0].mode
9169 == insn_data[icode2].operand[0].mode)
9172 optab1 = interm_optab;
9177 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9178 intermediate steps in promotion sequence. We try
9179 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9180 interm_types->create (MAX_INTERM_CVT_STEPS);
9181 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9183 intermediate_mode = insn_data[icode1].operand[0].mode;
9184 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9187 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9188 current_vector_size);
9189 if (intermediate_mode != TYPE_MODE (intermediate_type))
9194 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9196 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9199 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9200 || insn_data[icode1].operand[0].mode != intermediate_mode
9201 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9202 == CODE_FOR_nothing))
9205 interm_types->quick_push (intermediate_type);
9206 (*multi_step_cvt)++;
9208 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9209 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9210 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9211 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9213 prev_mode = intermediate_mode;
9214 prev_type = intermediate_type;
9215 optab1 = interm_optab;
9218 interm_types->release ();