1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Return the vectorized type for the given statement. */
58 stmt_vectype (struct _stmt_vec_info *stmt_info)
60 return STMT_VINFO_VECTYPE (stmt_info);
63 /* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
66 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
68 gimple *stmt = STMT_VINFO_STMT (stmt_info);
69 basic_block bb = gimple_bb (stmt);
70 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
76 loop = LOOP_VINFO_LOOP (loop_vinfo);
78 return (bb->loop_father == loop->inner);
81 /* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
86 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
87 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
88 int misalign, enum vect_cost_model_location where)
92 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
93 stmt_info_for_cost si = { count, kind,
94 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
96 body_cost_vec->safe_push (si);
98 (builtin_vectorization_cost (kind, vectype, misalign) * count);
101 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
102 count, kind, stmt_info, misalign, where);
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
108 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
120 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
121 tree array, unsigned HOST_WIDE_INT n)
123 tree vect_type, vect, vect_name, array_ref;
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
146 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
147 tree array, unsigned HOST_WIDE_INT n)
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
165 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
167 tree mem_ref, alias_ptr_type;
169 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
183 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
184 enum vect_relevant relevant, bool live_p)
186 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
187 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
188 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
189 gimple *pattern_stmt;
191 if (dump_enabled_p ())
193 dump_printf_loc (MSG_NOTE, vect_location,
194 "mark relevant %d, live %d: ", relevant, live_p);
195 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
198 /* If this stmt is an original stmt in a pattern, we might need to mark its
199 related pattern stmt instead of the original stmt. However, such stmts
200 may have their own uses that are not in any pattern, in such cases the
201 stmt itself should be marked. */
202 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
204 /* This is the last stmt in a sequence that was detected as a
205 pattern that can potentially be vectorized. Don't mark the stmt
206 as relevant/live because it's not going to be vectorized.
207 Instead mark the pattern-stmt that replaces it. */
209 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
211 if (dump_enabled_p ())
212 dump_printf_loc (MSG_NOTE, vect_location,
213 "last stmt in pattern. don't mark"
214 " relevant/live.\n");
215 stmt_info = vinfo_for_stmt (pattern_stmt);
216 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
217 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
218 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
222 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
223 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
224 STMT_VINFO_RELEVANT (stmt_info) = relevant;
226 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
227 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
229 if (dump_enabled_p ())
230 dump_printf_loc (MSG_NOTE, vect_location,
231 "already marked relevant/live.\n");
235 worklist->safe_push (stmt);
239 /* Function is_simple_and_all_uses_invariant
241 Return true if STMT is simple and all uses of it are invariant. */
244 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
250 if (!is_gimple_assign (stmt))
253 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
255 enum vect_def_type dt = vect_uninitialized_def;
257 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
259 if (dump_enabled_p ())
260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
261 "use not simple.\n");
265 if (dt != vect_external_def && dt != vect_constant_def)
271 /* Function vect_stmt_relevant_p.
273 Return true if STMT in loop that is represented by LOOP_VINFO is
274 "relevant for vectorization".
276 A stmt is considered "relevant for vectorization" if:
277 - it has uses outside the loop.
278 - it has vdefs (it alters memory).
279 - control stmts in the loop (except for the exit condition).
281 CHECKME: what other side effects would the vectorizer allow? */
284 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
285 enum vect_relevant *relevant, bool *live_p)
287 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
289 imm_use_iterator imm_iter;
293 *relevant = vect_unused_in_scope;
296 /* cond stmt other than loop exit cond. */
297 if (is_ctrl_stmt (stmt)
298 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
299 != loop_exit_ctrl_vec_info_type)
300 *relevant = vect_used_in_scope;
302 /* changing memory. */
303 if (gimple_code (stmt) != GIMPLE_PHI)
304 if (gimple_vdef (stmt)
305 && !gimple_clobber_p (stmt))
307 if (dump_enabled_p ())
308 dump_printf_loc (MSG_NOTE, vect_location,
309 "vec_stmt_relevant_p: stmt has vdefs.\n");
310 *relevant = vect_used_in_scope;
313 /* uses outside the loop. */
314 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
316 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
318 basic_block bb = gimple_bb (USE_STMT (use_p));
319 if (!flow_bb_inside_loop_p (loop, bb))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: used out of loop.\n");
325 if (is_gimple_debug (USE_STMT (use_p)))
328 /* We expect all such uses to be in the loop exit phis
329 (because of loop closed form) */
330 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
331 gcc_assert (bb == single_exit (loop)->dest);
338 if (*live_p && *relevant == vect_unused_in_scope
339 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
341 if (dump_enabled_p ())
342 dump_printf_loc (MSG_NOTE, vect_location,
343 "vec_stmt_relevant_p: stmt live but not relevant.\n");
344 *relevant = vect_used_only_live;
347 return (*live_p || *relevant);
351 /* Function exist_non_indexing_operands_for_use_p
353 USE is one of the uses attached to STMT. Check if USE is
354 used in STMT for anything other than indexing an array. */
357 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
360 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
362 /* USE corresponds to some operand in STMT. If there is no data
363 reference in STMT, then any operand that corresponds to USE
364 is not indexing an array. */
365 if (!STMT_VINFO_DATA_REF (stmt_info))
368 /* STMT has a data_ref. FORNOW this means that its of one of
372 (This should have been verified in analyze_data_refs).
374 'var' in the second case corresponds to a def, not a use,
375 so USE cannot correspond to any operands that are not used
378 Therefore, all we need to check is if STMT falls into the
379 first case, and whether var corresponds to USE. */
381 if (!gimple_assign_copy_p (stmt))
383 if (is_gimple_call (stmt)
384 && gimple_call_internal_p (stmt))
385 switch (gimple_call_internal_fn (stmt))
388 operand = gimple_call_arg (stmt, 3);
393 operand = gimple_call_arg (stmt, 2);
403 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
405 operand = gimple_assign_rhs1 (stmt);
406 if (TREE_CODE (operand) != SSA_NAME)
417 Function process_use.
420 - a USE in STMT in a loop represented by LOOP_VINFO
421 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
422 that defined USE. This is done by calling mark_relevant and passing it
423 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
424 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
428 Generally, LIVE_P and RELEVANT are used to define the liveness and
429 relevance info of the DEF_STMT of this USE:
430 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
431 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
433 - case 1: If USE is used only for address computations (e.g. array indexing),
434 which does not need to be directly vectorized, then the liveness/relevance
435 of the respective DEF_STMT is left unchanged.
436 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
437 skip DEF_STMT cause it had already been processed.
438 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
439 be modified accordingly.
441 Return true if everything is as expected. Return false otherwise. */
444 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
445 enum vect_relevant relevant, vec<gimple *> *worklist,
448 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
449 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
450 stmt_vec_info dstmt_vinfo;
451 basic_block bb, def_bb;
453 enum vect_def_type dt;
455 /* case 1: we are only interested in uses that need to be vectorized. Uses
456 that are used for address computation are not considered relevant. */
457 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
460 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
462 if (dump_enabled_p ())
463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
464 "not vectorized: unsupported use in stmt.\n");
468 if (!def_stmt || gimple_nop_p (def_stmt))
471 def_bb = gimple_bb (def_stmt);
472 if (!flow_bb_inside_loop_p (loop, def_bb))
474 if (dump_enabled_p ())
475 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
479 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
480 DEF_STMT must have already been processed, because this should be the
481 only way that STMT, which is a reduction-phi, was put in the worklist,
482 as there should be no other uses for DEF_STMT in the loop. So we just
483 check that everything is as expected, and we are done. */
484 dstmt_vinfo = vinfo_for_stmt (def_stmt);
485 bb = gimple_bb (stmt);
486 if (gimple_code (stmt) == GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
488 && gimple_code (def_stmt) != GIMPLE_PHI
489 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
490 && bb->loop_father == def_bb->loop_father)
492 if (dump_enabled_p ())
493 dump_printf_loc (MSG_NOTE, vect_location,
494 "reduc-stmt defining reduc-phi in the same nest.\n");
495 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
496 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
497 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
498 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
499 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
503 /* case 3a: outer-loop stmt defining an inner-loop stmt:
504 outer-loop-header-bb:
510 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
512 if (dump_enabled_p ())
513 dump_printf_loc (MSG_NOTE, vect_location,
514 "outer-loop def-stmt defining inner-loop stmt.\n");
518 case vect_unused_in_scope:
519 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
520 vect_used_in_scope : vect_unused_in_scope;
523 case vect_used_in_outer_by_reduction:
524 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
525 relevant = vect_used_by_reduction;
528 case vect_used_in_outer:
529 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
530 relevant = vect_used_in_scope;
533 case vect_used_in_scope:
541 /* case 3b: inner-loop stmt defining an outer-loop stmt:
542 outer-loop-header-bb:
546 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
548 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
550 if (dump_enabled_p ())
551 dump_printf_loc (MSG_NOTE, vect_location,
552 "inner-loop def-stmt defining outer-loop stmt.\n");
556 case vect_unused_in_scope:
557 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
558 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
559 vect_used_in_outer_by_reduction : vect_unused_in_scope;
562 case vect_used_by_reduction:
563 case vect_used_only_live:
564 relevant = vect_used_in_outer_by_reduction;
567 case vect_used_in_scope:
568 relevant = vect_used_in_outer;
576 vect_mark_relevant (worklist, def_stmt, relevant, false);
581 /* Function vect_mark_stmts_to_be_vectorized.
583 Not all stmts in the loop need to be vectorized. For example:
592 Stmt 1 and 3 do not need to be vectorized, because loop control and
593 addressing of vectorized data-refs are handled differently.
595 This pass detects such stmts. */
598 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
600 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
601 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
602 unsigned int nbbs = loop->num_nodes;
603 gimple_stmt_iterator si;
606 stmt_vec_info stmt_vinfo;
610 enum vect_relevant relevant;
612 if (dump_enabled_p ())
613 dump_printf_loc (MSG_NOTE, vect_location,
614 "=== vect_mark_stmts_to_be_vectorized ===\n");
616 auto_vec<gimple *, 64> worklist;
618 /* 1. Init worklist. */
619 for (i = 0; i < nbbs; i++)
622 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
625 if (dump_enabled_p ())
627 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
628 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
631 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
632 vect_mark_relevant (&worklist, phi, relevant, live_p);
634 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
636 stmt = gsi_stmt (si);
637 if (dump_enabled_p ())
639 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
640 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
643 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
644 vect_mark_relevant (&worklist, stmt, relevant, live_p);
648 /* 2. Process_worklist */
649 while (worklist.length () > 0)
654 stmt = worklist.pop ();
655 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
658 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
661 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
662 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 stmt_vinfo = vinfo_for_stmt (stmt);
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
687 if (dump_enabled_p ())
688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
689 "unsupported use of reduction.\n");
694 case vect_nested_cycle:
695 if (relevant != vect_unused_in_scope
696 && relevant != vect_used_in_outer_by_reduction
697 && relevant != vect_used_in_outer)
699 if (dump_enabled_p ())
700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
701 "unsupported use of nested cycle.\n");
707 case vect_double_reduction_def:
708 if (relevant != vect_unused_in_scope
709 && relevant != vect_used_by_reduction
710 && relevant != vect_used_only_live)
712 if (dump_enabled_p ())
713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
714 "unsupported use of double reduction.\n");
724 if (is_pattern_stmt_p (stmt_vinfo))
726 /* Pattern statements are not inserted into the code, so
727 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
728 have to scan the RHS or function arguments instead. */
729 if (is_gimple_assign (stmt))
731 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
732 tree op = gimple_assign_rhs1 (stmt);
735 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
737 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
738 relevant, &worklist, false)
739 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
740 relevant, &worklist, false))
744 for (; i < gimple_num_ops (stmt); i++)
746 op = gimple_op (stmt, i);
747 if (TREE_CODE (op) == SSA_NAME
748 && !process_use (stmt, op, loop_vinfo, relevant,
753 else if (is_gimple_call (stmt))
755 for (i = 0; i < gimple_call_num_args (stmt); i++)
757 tree arg = gimple_call_arg (stmt, i);
758 if (!process_use (stmt, arg, loop_vinfo, relevant,
765 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
767 tree op = USE_FROM_PTR (use_p);
768 if (!process_use (stmt, op, loop_vinfo, relevant,
773 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
776 tree decl = vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL);
778 if (!process_use (stmt, off, loop_vinfo, relevant, &worklist, true))
781 } /* while worklist */
787 /* Function vect_model_simple_cost.
789 Models cost for simple operations, i.e. those that only emit ncopies of a
790 single op. Right now, this does not account for multiple insns that could
791 be generated for the single vector op. We will handle that shortly. */
794 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
795 enum vect_def_type *dt,
796 stmt_vector_for_cost *prologue_cost_vec,
797 stmt_vector_for_cost *body_cost_vec)
800 int inside_cost = 0, prologue_cost = 0;
802 /* The SLP costs were already calculated during SLP tree build. */
803 if (PURE_SLP_STMT (stmt_info))
806 /* FORNOW: Assuming maximum 2 args per stmts. */
807 for (i = 0; i < 2; i++)
808 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
809 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
810 stmt_info, 0, vect_prologue);
812 /* Pass the inside-of-loop statements to the target-specific cost model. */
813 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
814 stmt_info, 0, vect_body);
816 if (dump_enabled_p ())
817 dump_printf_loc (MSG_NOTE, vect_location,
818 "vect_model_simple_cost: inside_cost = %d, "
819 "prologue_cost = %d .\n", inside_cost, prologue_cost);
823 /* Model cost for type demotion and promotion operations. PWR is normally
824 zero for single-step promotions and demotions. It will be one if
825 two-step promotion/demotion is required, and so on. Each additional
826 step doubles the number of instructions required. */
829 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
830 enum vect_def_type *dt, int pwr)
833 int inside_cost = 0, prologue_cost = 0;
834 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
835 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
836 void *target_cost_data;
838 /* The SLP costs were already calculated during SLP tree build. */
839 if (PURE_SLP_STMT (stmt_info))
843 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
845 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
847 for (i = 0; i < pwr + 1; i++)
849 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
851 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
852 vec_promote_demote, stmt_info, 0,
856 /* FORNOW: Assuming maximum 2 args per stmts. */
857 for (i = 0; i < 2; i++)
858 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
859 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
860 stmt_info, 0, vect_prologue);
862 if (dump_enabled_p ())
863 dump_printf_loc (MSG_NOTE, vect_location,
864 "vect_model_promotion_demotion_cost: inside_cost = %d, "
865 "prologue_cost = %d .\n", inside_cost, prologue_cost);
868 /* Function vect_cost_group_size
870 For grouped load or store, return the group_size only if it is the first
871 load or store of a group, else return 1. This ensures that group size is
872 only returned once per group. */
875 vect_cost_group_size (stmt_vec_info stmt_info)
877 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
879 if (first_stmt == STMT_VINFO_STMT (stmt_info))
880 return GROUP_SIZE (stmt_info);
886 /* Function vect_model_store_cost
888 Models cost for stores. In the case of grouped accesses, one access
889 has the overhead of the grouped access attributed to it. */
892 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
893 bool store_lanes_p, enum vect_def_type dt,
895 stmt_vector_for_cost *prologue_cost_vec,
896 stmt_vector_for_cost *body_cost_vec)
899 unsigned int inside_cost = 0, prologue_cost = 0;
900 struct data_reference *first_dr;
903 if (dt == vect_constant_def || dt == vect_external_def)
904 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
905 stmt_info, 0, vect_prologue);
907 /* Grouped access? */
908 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
912 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
917 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
918 group_size = vect_cost_group_size (stmt_info);
921 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
923 /* Not a grouped access. */
927 first_dr = STMT_VINFO_DATA_REF (stmt_info);
930 /* We assume that the cost of a single store-lanes instruction is
931 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
932 access is instead being provided by a permute-and-store operation,
933 include the cost of the permutes. */
934 if (!store_lanes_p && group_size > 1
935 && !STMT_VINFO_STRIDED_P (stmt_info))
937 /* Uses a high and low interleave or shuffle operations for each
939 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
940 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
941 stmt_info, 0, vect_body);
943 if (dump_enabled_p ())
944 dump_printf_loc (MSG_NOTE, vect_location,
945 "vect_model_store_cost: strided group_size = %d .\n",
949 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
950 /* Costs of the stores. */
951 if (STMT_VINFO_STRIDED_P (stmt_info)
952 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
954 /* N scalar stores plus extracting the elements. */
955 inside_cost += record_stmt_cost (body_cost_vec,
956 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
957 scalar_store, stmt_info, 0, vect_body);
960 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
962 if (STMT_VINFO_STRIDED_P (stmt_info))
963 inside_cost += record_stmt_cost (body_cost_vec,
964 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
965 vec_to_scalar, stmt_info, 0, vect_body);
967 if (dump_enabled_p ())
968 dump_printf_loc (MSG_NOTE, vect_location,
969 "vect_model_store_cost: inside_cost = %d, "
970 "prologue_cost = %d .\n", inside_cost, prologue_cost);
974 /* Calculate cost of DR's memory access. */
976 vect_get_store_cost (struct data_reference *dr, int ncopies,
977 unsigned int *inside_cost,
978 stmt_vector_for_cost *body_cost_vec)
980 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
981 gimple *stmt = DR_STMT (dr);
982 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
984 switch (alignment_support_scheme)
988 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
989 vector_store, stmt_info, 0,
992 if (dump_enabled_p ())
993 dump_printf_loc (MSG_NOTE, vect_location,
994 "vect_model_store_cost: aligned.\n");
998 case dr_unaligned_supported:
1000 /* Here, we assign an additional cost for the unaligned store. */
1001 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1002 unaligned_store, stmt_info,
1003 DR_MISALIGNMENT (dr), vect_body);
1004 if (dump_enabled_p ())
1005 dump_printf_loc (MSG_NOTE, vect_location,
1006 "vect_model_store_cost: unaligned supported by "
1011 case dr_unaligned_unsupported:
1013 *inside_cost = VECT_MAX_COST;
1015 if (dump_enabled_p ())
1016 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1017 "vect_model_store_cost: unsupported access.\n");
1027 /* Function vect_model_load_cost
1029 Models cost for loads. In the case of grouped accesses, the last access
1030 has the overhead of the grouped access attributed to it. Since unaligned
1031 accesses are supported for loads, we also account for the costs of the
1032 access scheme chosen. */
1035 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1036 bool load_lanes_p, slp_tree slp_node,
1037 stmt_vector_for_cost *prologue_cost_vec,
1038 stmt_vector_for_cost *body_cost_vec)
1042 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1043 unsigned int inside_cost = 0, prologue_cost = 0;
1045 /* Grouped accesses? */
1046 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1047 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1049 group_size = vect_cost_group_size (stmt_info);
1050 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1052 /* Not a grouped access. */
1059 /* We assume that the cost of a single load-lanes instruction is
1060 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1061 access is instead being provided by a load-and-permute operation,
1062 include the cost of the permutes. */
1063 if (!load_lanes_p && group_size > 1
1064 && !STMT_VINFO_STRIDED_P (stmt_info))
1066 /* Uses an even and odd extract operations or shuffle operations
1067 for each needed permute. */
1068 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1069 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1070 stmt_info, 0, vect_body);
1072 if (dump_enabled_p ())
1073 dump_printf_loc (MSG_NOTE, vect_location,
1074 "vect_model_load_cost: strided group_size = %d .\n",
1078 /* The loads themselves. */
1079 if (STMT_VINFO_STRIDED_P (stmt_info)
1080 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1082 /* N scalar loads plus gathering them into a vector. */
1083 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1084 inside_cost += record_stmt_cost (body_cost_vec,
1085 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1086 scalar_load, stmt_info, 0, vect_body);
1089 vect_get_load_cost (first_dr, ncopies,
1090 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1091 || group_size > 1 || slp_node),
1092 &inside_cost, &prologue_cost,
1093 prologue_cost_vec, body_cost_vec, true);
1094 if (STMT_VINFO_STRIDED_P (stmt_info))
1095 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1096 stmt_info, 0, vect_body);
1098 if (dump_enabled_p ())
1099 dump_printf_loc (MSG_NOTE, vect_location,
1100 "vect_model_load_cost: inside_cost = %d, "
1101 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1105 /* Calculate cost of DR's memory access. */
1107 vect_get_load_cost (struct data_reference *dr, int ncopies,
1108 bool add_realign_cost, unsigned int *inside_cost,
1109 unsigned int *prologue_cost,
1110 stmt_vector_for_cost *prologue_cost_vec,
1111 stmt_vector_for_cost *body_cost_vec,
1112 bool record_prologue_costs)
1114 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1115 gimple *stmt = DR_STMT (dr);
1116 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1118 switch (alignment_support_scheme)
1122 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1123 stmt_info, 0, vect_body);
1125 if (dump_enabled_p ())
1126 dump_printf_loc (MSG_NOTE, vect_location,
1127 "vect_model_load_cost: aligned.\n");
1131 case dr_unaligned_supported:
1133 /* Here, we assign an additional cost for the unaligned load. */
1134 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1135 unaligned_load, stmt_info,
1136 DR_MISALIGNMENT (dr), vect_body);
1138 if (dump_enabled_p ())
1139 dump_printf_loc (MSG_NOTE, vect_location,
1140 "vect_model_load_cost: unaligned supported by "
1145 case dr_explicit_realign:
1147 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1148 vector_load, stmt_info, 0, vect_body);
1149 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1150 vec_perm, stmt_info, 0, vect_body);
1152 /* FIXME: If the misalignment remains fixed across the iterations of
1153 the containing loop, the following cost should be added to the
1155 if (targetm.vectorize.builtin_mask_for_load)
1156 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1157 stmt_info, 0, vect_body);
1159 if (dump_enabled_p ())
1160 dump_printf_loc (MSG_NOTE, vect_location,
1161 "vect_model_load_cost: explicit realign\n");
1165 case dr_explicit_realign_optimized:
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE, vect_location,
1169 "vect_model_load_cost: unaligned software "
1172 /* Unaligned software pipeline has a load of an address, an initial
1173 load, and possibly a mask operation to "prime" the loop. However,
1174 if this is an access in a group of loads, which provide grouped
1175 access, then the above cost should only be considered for one
1176 access in the group. Inside the loop, there is a load op
1177 and a realignment op. */
1179 if (add_realign_cost && record_prologue_costs)
1181 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1182 vector_stmt, stmt_info,
1184 if (targetm.vectorize.builtin_mask_for_load)
1185 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1186 vector_stmt, stmt_info,
1190 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1191 stmt_info, 0, vect_body);
1192 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1193 stmt_info, 0, vect_body);
1195 if (dump_enabled_p ())
1196 dump_printf_loc (MSG_NOTE, vect_location,
1197 "vect_model_load_cost: explicit realign optimized"
1203 case dr_unaligned_unsupported:
1205 *inside_cost = VECT_MAX_COST;
1207 if (dump_enabled_p ())
1208 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1209 "vect_model_load_cost: unsupported access.\n");
1218 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1219 the loop preheader for the vectorized stmt STMT. */
1222 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1225 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1228 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1229 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1233 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1237 if (nested_in_vect_loop_p (loop, stmt))
1240 pe = loop_preheader_edge (loop);
1241 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1242 gcc_assert (!new_bb);
1246 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1248 gimple_stmt_iterator gsi_bb_start;
1250 gcc_assert (bb_vinfo);
1251 bb = BB_VINFO_BB (bb_vinfo);
1252 gsi_bb_start = gsi_after_labels (bb);
1253 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1257 if (dump_enabled_p ())
1259 dump_printf_loc (MSG_NOTE, vect_location,
1260 "created new init_stmt: ");
1261 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1265 /* Function vect_init_vector.
1267 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1268 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1269 vector type a vector with all elements equal to VAL is created first.
1270 Place the initialization at BSI if it is not NULL. Otherwise, place the
1271 initialization at the loop preheader.
1272 Return the DEF of INIT_STMT.
1273 It will be used in the vectorization of STMT. */
1276 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1281 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1282 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1284 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1285 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1287 /* Scalar boolean value should be transformed into
1288 all zeros or all ones value before building a vector. */
1289 if (VECTOR_BOOLEAN_TYPE_P (type))
1291 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1292 tree false_val = build_zero_cst (TREE_TYPE (type));
1294 if (CONSTANT_CLASS_P (val))
1295 val = integer_zerop (val) ? false_val : true_val;
1298 new_temp = make_ssa_name (TREE_TYPE (type));
1299 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1300 val, true_val, false_val);
1301 vect_init_vector_1 (stmt, init_stmt, gsi);
1305 else if (CONSTANT_CLASS_P (val))
1306 val = fold_convert (TREE_TYPE (type), val);
1309 new_temp = make_ssa_name (TREE_TYPE (type));
1310 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1311 init_stmt = gimple_build_assign (new_temp,
1312 fold_build1 (VIEW_CONVERT_EXPR,
1316 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1317 vect_init_vector_1 (stmt, init_stmt, gsi);
1321 val = build_vector_from_val (type, val);
1324 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1325 init_stmt = gimple_build_assign (new_temp, val);
1326 vect_init_vector_1 (stmt, init_stmt, gsi);
1330 /* Function vect_get_vec_def_for_operand_1.
1332 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1333 DT that will be used in the vectorized stmt. */
1336 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1340 stmt_vec_info def_stmt_info = NULL;
1344 /* operand is a constant or a loop invariant. */
1345 case vect_constant_def:
1346 case vect_external_def:
1347 /* Code should use vect_get_vec_def_for_operand. */
1350 /* operand is defined inside the loop. */
1351 case vect_internal_def:
1353 /* Get the def from the vectorized stmt. */
1354 def_stmt_info = vinfo_for_stmt (def_stmt);
1356 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1357 /* Get vectorized pattern statement. */
1359 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1360 && !STMT_VINFO_RELEVANT (def_stmt_info))
1361 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1362 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1363 gcc_assert (vec_stmt);
1364 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1365 vec_oprnd = PHI_RESULT (vec_stmt);
1366 else if (is_gimple_call (vec_stmt))
1367 vec_oprnd = gimple_call_lhs (vec_stmt);
1369 vec_oprnd = gimple_assign_lhs (vec_stmt);
1373 /* operand is defined by a loop header phi - reduction */
1374 case vect_reduction_def:
1375 case vect_double_reduction_def:
1376 case vect_nested_cycle:
1377 /* Code should use get_initial_def_for_reduction. */
1380 /* operand is defined by loop-header phi - induction. */
1381 case vect_induction_def:
1383 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1385 /* Get the def from the vectorized stmt. */
1386 def_stmt_info = vinfo_for_stmt (def_stmt);
1387 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1388 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1389 vec_oprnd = PHI_RESULT (vec_stmt);
1391 vec_oprnd = gimple_get_lhs (vec_stmt);
1401 /* Function vect_get_vec_def_for_operand.
1403 OP is an operand in STMT. This function returns a (vector) def that will be
1404 used in the vectorized stmt for STMT.
1406 In the case that OP is an SSA_NAME which is defined in the loop, then
1407 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1409 In case OP is an invariant or constant, a new stmt that creates a vector def
1410 needs to be introduced. VECTYPE may be used to specify a required type for
1411 vector invariant. */
1414 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1417 enum vect_def_type dt;
1419 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1420 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1422 if (dump_enabled_p ())
1424 dump_printf_loc (MSG_NOTE, vect_location,
1425 "vect_get_vec_def_for_operand: ");
1426 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1427 dump_printf (MSG_NOTE, "\n");
1430 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1431 gcc_assert (is_simple_use);
1432 if (def_stmt && dump_enabled_p ())
1434 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1435 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1438 if (dt == vect_constant_def || dt == vect_external_def)
1440 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1444 vector_type = vectype;
1445 else if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
1446 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1447 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1449 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1451 gcc_assert (vector_type);
1452 return vect_init_vector (stmt, op, vector_type, NULL);
1455 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1459 /* Function vect_get_vec_def_for_stmt_copy
1461 Return a vector-def for an operand. This function is used when the
1462 vectorized stmt to be created (by the caller to this function) is a "copy"
1463 created in case the vectorized result cannot fit in one vector, and several
1464 copies of the vector-stmt are required. In this case the vector-def is
1465 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1466 of the stmt that defines VEC_OPRND.
1467 DT is the type of the vector def VEC_OPRND.
1470 In case the vectorization factor (VF) is bigger than the number
1471 of elements that can fit in a vectype (nunits), we have to generate
1472 more than one vector stmt to vectorize the scalar stmt. This situation
1473 arises when there are multiple data-types operated upon in the loop; the
1474 smallest data-type determines the VF, and as a result, when vectorizing
1475 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1476 vector stmt (each computing a vector of 'nunits' results, and together
1477 computing 'VF' results in each iteration). This function is called when
1478 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1479 which VF=16 and nunits=4, so the number of copies required is 4):
1481 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1483 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1484 VS1.1: vx.1 = memref1 VS1.2
1485 VS1.2: vx.2 = memref2 VS1.3
1486 VS1.3: vx.3 = memref3
1488 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1489 VSnew.1: vz1 = vx.1 + ... VSnew.2
1490 VSnew.2: vz2 = vx.2 + ... VSnew.3
1491 VSnew.3: vz3 = vx.3 + ...
1493 The vectorization of S1 is explained in vectorizable_load.
1494 The vectorization of S2:
1495 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1496 the function 'vect_get_vec_def_for_operand' is called to
1497 get the relevant vector-def for each operand of S2. For operand x it
1498 returns the vector-def 'vx.0'.
1500 To create the remaining copies of the vector-stmt (VSnew.j), this
1501 function is called to get the relevant vector-def for each operand. It is
1502 obtained from the respective VS1.j stmt, which is recorded in the
1503 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1505 For example, to obtain the vector-def 'vx.1' in order to create the
1506 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1507 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1508 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1509 and return its def ('vx.1').
1510 Overall, to create the above sequence this function will be called 3 times:
1511 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1512 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1513 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1516 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1518 gimple *vec_stmt_for_operand;
1519 stmt_vec_info def_stmt_info;
1521 /* Do nothing; can reuse same def. */
1522 if (dt == vect_external_def || dt == vect_constant_def )
1525 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1526 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1527 gcc_assert (def_stmt_info);
1528 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1529 gcc_assert (vec_stmt_for_operand);
1530 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1531 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1533 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1538 /* Get vectorized definitions for the operands to create a copy of an original
1539 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1542 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1543 vec<tree> *vec_oprnds0,
1544 vec<tree> *vec_oprnds1)
1546 tree vec_oprnd = vec_oprnds0->pop ();
1548 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1549 vec_oprnds0->quick_push (vec_oprnd);
1551 if (vec_oprnds1 && vec_oprnds1->length ())
1553 vec_oprnd = vec_oprnds1->pop ();
1554 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1555 vec_oprnds1->quick_push (vec_oprnd);
1560 /* Get vectorized definitions for OP0 and OP1.
1561 REDUC_INDEX is the index of reduction operand in case of reduction,
1562 and -1 otherwise. */
1565 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1566 vec<tree> *vec_oprnds0,
1567 vec<tree> *vec_oprnds1,
1568 slp_tree slp_node, int reduc_index)
1572 int nops = (op1 == NULL_TREE) ? 1 : 2;
1573 auto_vec<tree> ops (nops);
1574 auto_vec<vec<tree> > vec_defs (nops);
1576 ops.quick_push (op0);
1578 ops.quick_push (op1);
1580 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1582 *vec_oprnds0 = vec_defs[0];
1584 *vec_oprnds1 = vec_defs[1];
1590 vec_oprnds0->create (1);
1591 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1592 vec_oprnds0->quick_push (vec_oprnd);
1596 vec_oprnds1->create (1);
1597 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1598 vec_oprnds1->quick_push (vec_oprnd);
1604 /* Function vect_finish_stmt_generation.
1606 Insert a new stmt. */
1609 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1610 gimple_stmt_iterator *gsi)
1612 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1613 vec_info *vinfo = stmt_info->vinfo;
1615 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1617 if (!gsi_end_p (*gsi)
1618 && gimple_has_mem_ops (vec_stmt))
1620 gimple *at_stmt = gsi_stmt (*gsi);
1621 tree vuse = gimple_vuse (at_stmt);
1622 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1624 tree vdef = gimple_vdef (at_stmt);
1625 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1626 /* If we have an SSA vuse and insert a store, update virtual
1627 SSA form to avoid triggering the renamer. Do so only
1628 if we can easily see all uses - which is what almost always
1629 happens with the way vectorized stmts are inserted. */
1630 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1631 && ((is_gimple_assign (vec_stmt)
1632 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1633 || (is_gimple_call (vec_stmt)
1634 && !(gimple_call_flags (vec_stmt)
1635 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1637 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1638 gimple_set_vdef (vec_stmt, new_vdef);
1639 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1643 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1645 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1647 if (dump_enabled_p ())
1649 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1650 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1653 gimple_set_location (vec_stmt, gimple_location (stmt));
1655 /* While EH edges will generally prevent vectorization, stmt might
1656 e.g. be in a must-not-throw region. Ensure newly created stmts
1657 that could throw are part of the same region. */
1658 int lp_nr = lookup_stmt_eh_lp (stmt);
1659 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1660 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1663 /* We want to vectorize a call to combined function CFN with function
1664 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1665 as the types of all inputs. Check whether this is possible using
1666 an internal function, returning its code if so or IFN_LAST if not. */
1669 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1670 tree vectype_out, tree vectype_in)
1673 if (internal_fn_p (cfn))
1674 ifn = as_internal_fn (cfn);
1676 ifn = associated_internal_fn (fndecl);
1677 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1679 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1680 if (info.vectorizable)
1682 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1683 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1684 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1685 OPTIMIZE_FOR_SPEED))
1693 static tree permute_vec_elements (tree, tree, tree, gimple *,
1694 gimple_stmt_iterator *);
1697 /* Function vectorizable_mask_load_store.
1699 Check if STMT performs a conditional load or store that can be vectorized.
1700 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1701 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1702 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1705 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
1706 gimple **vec_stmt, slp_tree slp_node)
1708 tree vec_dest = NULL;
1709 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1710 stmt_vec_info prev_stmt_info;
1711 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1712 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1713 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1714 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1715 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1716 tree rhs_vectype = NULL_TREE;
1721 tree dataref_ptr = NULL_TREE;
1723 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1727 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1728 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1729 int gather_scale = 1;
1730 enum vect_def_type gather_dt = vect_unknown_def_type;
1734 enum vect_def_type dt;
1736 if (slp_node != NULL)
1739 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1740 gcc_assert (ncopies >= 1);
1742 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1743 mask = gimple_call_arg (stmt, 2);
1745 if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
1748 /* FORNOW. This restriction should be relaxed. */
1749 if (nested_in_vect_loop && ncopies > 1)
1751 if (dump_enabled_p ())
1752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1753 "multiple types in nested loop.");
1757 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1760 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
1764 if (!STMT_VINFO_DATA_REF (stmt_info))
1767 elem_type = TREE_TYPE (vectype);
1769 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1772 if (STMT_VINFO_STRIDED_P (stmt_info))
1775 if (TREE_CODE (mask) != SSA_NAME)
1778 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
1782 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
1784 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
1785 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
1790 tree rhs = gimple_call_arg (stmt, 3);
1791 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
1795 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1798 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
1799 &gather_off, &gather_scale);
1800 gcc_assert (gather_decl);
1801 if (!vect_is_simple_use (gather_off, loop_vinfo, &def_stmt, &gather_dt,
1802 &gather_off_vectype))
1804 if (dump_enabled_p ())
1805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1806 "gather index use not simple.");
1810 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1812 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1813 if (TREE_CODE (masktype) == INTEGER_TYPE)
1815 if (dump_enabled_p ())
1816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1817 "masked gather with integer mask not supported.");
1821 else if (tree_int_cst_compare (nested_in_vect_loop
1822 ? STMT_VINFO_DR_STEP (stmt_info)
1823 : DR_STEP (dr), size_zero_node) <= 0)
1825 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1826 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
1827 TYPE_MODE (mask_vectype),
1830 && !useless_type_conversion_p (vectype, rhs_vectype)))
1833 if (!vec_stmt) /* transformation not required. */
1835 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1837 vect_model_store_cost (stmt_info, ncopies, false, dt,
1840 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1846 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1848 tree vec_oprnd0 = NULL_TREE, op;
1849 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1850 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1851 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1852 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1853 tree mask_perm_mask = NULL_TREE;
1854 edge pe = loop_preheader_edge (loop);
1857 enum { NARROW, NONE, WIDEN } modifier;
1858 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1860 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1861 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1862 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1863 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1864 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1865 scaletype = TREE_VALUE (arglist);
1866 gcc_checking_assert (types_compatible_p (srctype, rettype)
1867 && types_compatible_p (srctype, masktype));
1869 if (nunits == gather_off_nunits)
1871 else if (nunits == gather_off_nunits / 2)
1873 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1876 for (i = 0; i < gather_off_nunits; ++i)
1877 sel[i] = i | nunits;
1879 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1881 else if (nunits == gather_off_nunits * 2)
1883 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1886 for (i = 0; i < nunits; ++i)
1887 sel[i] = i < gather_off_nunits
1888 ? i : i + nunits - gather_off_nunits;
1890 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1892 for (i = 0; i < nunits; ++i)
1893 sel[i] = i | gather_off_nunits;
1894 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1899 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1901 ptr = fold_convert (ptrtype, gather_base);
1902 if (!is_gimple_min_invariant (ptr))
1904 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1905 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1906 gcc_assert (!new_bb);
1909 scale = build_int_cst (scaletype, gather_scale);
1911 prev_stmt_info = NULL;
1912 for (j = 0; j < ncopies; ++j)
1914 if (modifier == WIDEN && (j & 1))
1915 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1916 perm_mask, stmt, gsi);
1919 = vect_get_vec_def_for_operand (gather_off, stmt);
1922 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1924 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1926 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1927 == TYPE_VECTOR_SUBPARTS (idxtype));
1928 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
1929 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1931 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1932 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1936 if (mask_perm_mask && (j & 1))
1937 mask_op = permute_vec_elements (mask_op, mask_op,
1938 mask_perm_mask, stmt, gsi);
1942 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
1945 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
1946 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1950 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1952 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1953 == TYPE_VECTOR_SUBPARTS (masktype));
1954 var = vect_get_new_ssa_name (masktype, vect_simple_var);
1955 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1957 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
1958 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1964 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
1967 if (!useless_type_conversion_p (vectype, rettype))
1969 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
1970 == TYPE_VECTOR_SUBPARTS (rettype));
1971 op = vect_get_new_ssa_name (rettype, vect_simple_var);
1972 gimple_call_set_lhs (new_stmt, op);
1973 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1974 var = make_ssa_name (vec_dest);
1975 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
1976 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1980 var = make_ssa_name (vec_dest, new_stmt);
1981 gimple_call_set_lhs (new_stmt, var);
1984 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1986 if (modifier == NARROW)
1993 var = permute_vec_elements (prev_res, var,
1994 perm_mask, stmt, gsi);
1995 new_stmt = SSA_NAME_DEF_STMT (var);
1998 if (prev_stmt_info == NULL)
1999 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2001 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2002 prev_stmt_info = vinfo_for_stmt (new_stmt);
2005 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2007 if (STMT_VINFO_RELATED_STMT (stmt_info))
2009 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2010 stmt_info = vinfo_for_stmt (stmt);
2012 tree lhs = gimple_call_lhs (stmt);
2013 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2014 set_vinfo_for_stmt (new_stmt, stmt_info);
2015 set_vinfo_for_stmt (stmt, NULL);
2016 STMT_VINFO_STMT (stmt_info) = new_stmt;
2017 gsi_replace (gsi, new_stmt, true);
2022 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2023 prev_stmt_info = NULL;
2024 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2025 for (i = 0; i < ncopies; i++)
2027 unsigned align, misalign;
2031 tree rhs = gimple_call_arg (stmt, 3);
2032 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2033 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2034 /* We should have catched mismatched types earlier. */
2035 gcc_assert (useless_type_conversion_p (vectype,
2036 TREE_TYPE (vec_rhs)));
2037 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2038 NULL_TREE, &dummy, gsi,
2039 &ptr_incr, false, &inv_p);
2040 gcc_assert (!inv_p);
2044 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2045 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2046 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2047 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2048 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2049 TYPE_SIZE_UNIT (vectype));
2052 align = TYPE_ALIGN_UNIT (vectype);
2053 if (aligned_access_p (dr))
2055 else if (DR_MISALIGNMENT (dr) == -1)
2057 align = TYPE_ALIGN_UNIT (elem_type);
2061 misalign = DR_MISALIGNMENT (dr);
2062 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2064 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2065 misalign ? misalign & -misalign : align);
2067 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2068 ptr, vec_mask, vec_rhs);
2069 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2071 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2073 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2074 prev_stmt_info = vinfo_for_stmt (new_stmt);
2079 tree vec_mask = NULL_TREE;
2080 prev_stmt_info = NULL;
2081 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2082 for (i = 0; i < ncopies; i++)
2084 unsigned align, misalign;
2088 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2089 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2090 NULL_TREE, &dummy, gsi,
2091 &ptr_incr, false, &inv_p);
2092 gcc_assert (!inv_p);
2096 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2097 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2098 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2099 TYPE_SIZE_UNIT (vectype));
2102 align = TYPE_ALIGN_UNIT (vectype);
2103 if (aligned_access_p (dr))
2105 else if (DR_MISALIGNMENT (dr) == -1)
2107 align = TYPE_ALIGN_UNIT (elem_type);
2111 misalign = DR_MISALIGNMENT (dr);
2112 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2114 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2115 misalign ? misalign & -misalign : align);
2117 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2119 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2120 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2122 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2124 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2125 prev_stmt_info = vinfo_for_stmt (new_stmt);
2131 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2133 if (STMT_VINFO_RELATED_STMT (stmt_info))
2135 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2136 stmt_info = vinfo_for_stmt (stmt);
2138 tree lhs = gimple_call_lhs (stmt);
2139 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2140 set_vinfo_for_stmt (new_stmt, stmt_info);
2141 set_vinfo_for_stmt (stmt, NULL);
2142 STMT_VINFO_STMT (stmt_info) = new_stmt;
2143 gsi_replace (gsi, new_stmt, true);
2149 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2150 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2151 in a single step. On success, store the binary pack code in
2155 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2156 tree_code *convert_code)
2158 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2159 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2163 int multi_step_cvt = 0;
2164 auto_vec <tree, 8> interm_types;
2165 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2166 &code, &multi_step_cvt,
2171 *convert_code = code;
2175 /* Function vectorizable_call.
2177 Check if GS performs a function call that can be vectorized.
2178 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2179 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2180 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2183 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2190 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2191 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2192 tree vectype_out, vectype_in;
2195 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2196 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2197 vec_info *vinfo = stmt_info->vinfo;
2198 tree fndecl, new_temp, rhs_type;
2200 enum vect_def_type dt[3]
2201 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2202 gimple *new_stmt = NULL;
2204 vec<tree> vargs = vNULL;
2205 enum { NARROW, NONE, WIDEN } modifier;
2209 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2212 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2216 /* Is GS a vectorizable call? */
2217 stmt = dyn_cast <gcall *> (gs);
2221 if (gimple_call_internal_p (stmt)
2222 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2223 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2224 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2227 if (gimple_call_lhs (stmt) == NULL_TREE
2228 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2231 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2233 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2235 /* Process function arguments. */
2236 rhs_type = NULL_TREE;
2237 vectype_in = NULL_TREE;
2238 nargs = gimple_call_num_args (stmt);
2240 /* Bail out if the function has more than three arguments, we do not have
2241 interesting builtin functions to vectorize with more than two arguments
2242 except for fma. No arguments is also not good. */
2243 if (nargs == 0 || nargs > 3)
2246 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2247 if (gimple_call_internal_p (stmt)
2248 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2251 rhs_type = unsigned_type_node;
2254 for (i = 0; i < nargs; i++)
2258 op = gimple_call_arg (stmt, i);
2260 /* We can only handle calls with arguments of the same type. */
2262 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2264 if (dump_enabled_p ())
2265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2266 "argument types differ.\n");
2270 rhs_type = TREE_TYPE (op);
2272 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2274 if (dump_enabled_p ())
2275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2276 "use not simple.\n");
2281 vectype_in = opvectype;
2283 && opvectype != vectype_in)
2285 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2287 "argument vector types differ.\n");
2291 /* If all arguments are external or constant defs use a vector type with
2292 the same size as the output vector type. */
2294 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2296 gcc_assert (vectype_in);
2299 if (dump_enabled_p ())
2301 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2302 "no vectype for scalar type ");
2303 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2304 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2311 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2312 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2313 if (nunits_in == nunits_out / 2)
2315 else if (nunits_out == nunits_in)
2317 else if (nunits_out == nunits_in / 2)
2322 /* We only handle functions that do not read or clobber memory. */
2323 if (gimple_vuse (stmt))
2325 if (dump_enabled_p ())
2326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2327 "function reads from or writes to memory.\n");
2331 /* For now, we only vectorize functions if a target specific builtin
2332 is available. TODO -- in some cases, it might be profitable to
2333 insert the calls for pieces of the vector, in order to be able
2334 to vectorize other operations in the loop. */
2336 internal_fn ifn = IFN_LAST;
2337 combined_fn cfn = gimple_call_combined_fn (stmt);
2338 tree callee = gimple_call_fndecl (stmt);
2340 /* First try using an internal function. */
2341 tree_code convert_code = ERROR_MARK;
2343 && (modifier == NONE
2344 || (modifier == NARROW
2345 && simple_integer_narrowing (vectype_out, vectype_in,
2347 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2350 /* If that fails, try asking for a target-specific built-in function. */
2351 if (ifn == IFN_LAST)
2353 if (cfn != CFN_LAST)
2354 fndecl = targetm.vectorize.builtin_vectorized_function
2355 (cfn, vectype_out, vectype_in);
2357 fndecl = targetm.vectorize.builtin_md_vectorized_function
2358 (callee, vectype_out, vectype_in);
2361 if (ifn == IFN_LAST && !fndecl)
2363 if (cfn == CFN_GOMP_SIMD_LANE
2366 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2367 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2368 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2369 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2371 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2372 { 0, 1, 2, ... vf - 1 } vector. */
2373 gcc_assert (nargs == 0);
2377 if (dump_enabled_p ())
2378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2379 "function is not vectorizable.\n");
2386 else if (modifier == NARROW && ifn == IFN_LAST)
2387 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2389 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2391 /* Sanity check: make sure that at least one copy of the vectorized stmt
2392 needs to be generated. */
2393 gcc_assert (ncopies >= 1);
2395 if (!vec_stmt) /* transformation not required. */
2397 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2398 if (dump_enabled_p ())
2399 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2401 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2402 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2403 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2404 vec_promote_demote, stmt_info, 0, vect_body);
2411 if (dump_enabled_p ())
2412 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2415 scalar_dest = gimple_call_lhs (stmt);
2416 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2418 prev_stmt_info = NULL;
2419 if (modifier == NONE || ifn != IFN_LAST)
2421 tree prev_res = NULL_TREE;
2422 for (j = 0; j < ncopies; ++j)
2424 /* Build argument list for the vectorized call. */
2426 vargs.create (nargs);
2432 auto_vec<vec<tree> > vec_defs (nargs);
2433 vec<tree> vec_oprnds0;
2435 for (i = 0; i < nargs; i++)
2436 vargs.quick_push (gimple_call_arg (stmt, i));
2437 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2438 vec_oprnds0 = vec_defs[0];
2440 /* Arguments are ready. Create the new vector stmt. */
2441 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2444 for (k = 0; k < nargs; k++)
2446 vec<tree> vec_oprndsk = vec_defs[k];
2447 vargs[k] = vec_oprndsk[i];
2449 if (modifier == NARROW)
2451 tree half_res = make_ssa_name (vectype_in);
2452 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2453 gimple_call_set_lhs (new_stmt, half_res);
2454 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2457 prev_res = half_res;
2460 new_temp = make_ssa_name (vec_dest);
2461 new_stmt = gimple_build_assign (new_temp, convert_code,
2462 prev_res, half_res);
2466 if (ifn != IFN_LAST)
2467 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2469 new_stmt = gimple_build_call_vec (fndecl, vargs);
2470 new_temp = make_ssa_name (vec_dest, new_stmt);
2471 gimple_call_set_lhs (new_stmt, new_temp);
2473 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2474 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2477 for (i = 0; i < nargs; i++)
2479 vec<tree> vec_oprndsi = vec_defs[i];
2480 vec_oprndsi.release ();
2485 for (i = 0; i < nargs; i++)
2487 op = gimple_call_arg (stmt, i);
2490 = vect_get_vec_def_for_operand (op, stmt);
2493 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2495 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2498 vargs.quick_push (vec_oprnd0);
2501 if (gimple_call_internal_p (stmt)
2502 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2504 tree *v = XALLOCAVEC (tree, nunits_out);
2506 for (k = 0; k < nunits_out; ++k)
2507 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2508 tree cst = build_vector (vectype_out, v);
2510 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2511 gimple *init_stmt = gimple_build_assign (new_var, cst);
2512 vect_init_vector_1 (stmt, init_stmt, NULL);
2513 new_temp = make_ssa_name (vec_dest);
2514 new_stmt = gimple_build_assign (new_temp, new_var);
2516 else if (modifier == NARROW)
2518 tree half_res = make_ssa_name (vectype_in);
2519 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2520 gimple_call_set_lhs (new_stmt, half_res);
2521 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2524 prev_res = half_res;
2527 new_temp = make_ssa_name (vec_dest);
2528 new_stmt = gimple_build_assign (new_temp, convert_code,
2529 prev_res, half_res);
2533 if (ifn != IFN_LAST)
2534 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2536 new_stmt = gimple_build_call_vec (fndecl, vargs);
2537 new_temp = make_ssa_name (vec_dest, new_stmt);
2538 gimple_call_set_lhs (new_stmt, new_temp);
2540 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2542 if (j == (modifier == NARROW ? 1 : 0))
2543 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2545 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2547 prev_stmt_info = vinfo_for_stmt (new_stmt);
2550 else if (modifier == NARROW)
2552 for (j = 0; j < ncopies; ++j)
2554 /* Build argument list for the vectorized call. */
2556 vargs.create (nargs * 2);
2562 auto_vec<vec<tree> > vec_defs (nargs);
2563 vec<tree> vec_oprnds0;
2565 for (i = 0; i < nargs; i++)
2566 vargs.quick_push (gimple_call_arg (stmt, i));
2567 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2568 vec_oprnds0 = vec_defs[0];
2570 /* Arguments are ready. Create the new vector stmt. */
2571 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2575 for (k = 0; k < nargs; k++)
2577 vec<tree> vec_oprndsk = vec_defs[k];
2578 vargs.quick_push (vec_oprndsk[i]);
2579 vargs.quick_push (vec_oprndsk[i + 1]);
2581 if (ifn != IFN_LAST)
2582 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2584 new_stmt = gimple_build_call_vec (fndecl, vargs);
2585 new_temp = make_ssa_name (vec_dest, new_stmt);
2586 gimple_call_set_lhs (new_stmt, new_temp);
2587 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2588 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2591 for (i = 0; i < nargs; i++)
2593 vec<tree> vec_oprndsi = vec_defs[i];
2594 vec_oprndsi.release ();
2599 for (i = 0; i < nargs; i++)
2601 op = gimple_call_arg (stmt, i);
2605 = vect_get_vec_def_for_operand (op, stmt);
2607 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2611 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2613 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2615 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2618 vargs.quick_push (vec_oprnd0);
2619 vargs.quick_push (vec_oprnd1);
2622 new_stmt = gimple_build_call_vec (fndecl, vargs);
2623 new_temp = make_ssa_name (vec_dest, new_stmt);
2624 gimple_call_set_lhs (new_stmt, new_temp);
2625 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2628 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2630 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2632 prev_stmt_info = vinfo_for_stmt (new_stmt);
2635 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2638 /* No current target implements this case. */
2643 /* The call in STMT might prevent it from being removed in dce.
2644 We however cannot remove it here, due to the way the ssa name
2645 it defines is mapped to the new definition. So just replace
2646 rhs of the statement with something harmless. */
2651 type = TREE_TYPE (scalar_dest);
2652 if (is_pattern_stmt_p (stmt_info))
2653 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2655 lhs = gimple_call_lhs (stmt);
2657 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2658 set_vinfo_for_stmt (new_stmt, stmt_info);
2659 set_vinfo_for_stmt (stmt, NULL);
2660 STMT_VINFO_STMT (stmt_info) = new_stmt;
2661 gsi_replace (gsi, new_stmt, false);
2667 struct simd_call_arg_info
2671 enum vect_def_type dt;
2672 HOST_WIDE_INT linear_step;
2674 bool simd_lane_linear;
2677 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2678 is linear within simd lane (but not within whole loop), note it in
2682 vect_simd_lane_linear (tree op, struct loop *loop,
2683 struct simd_call_arg_info *arginfo)
2685 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
2687 if (!is_gimple_assign (def_stmt)
2688 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
2689 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
2692 tree base = gimple_assign_rhs1 (def_stmt);
2693 HOST_WIDE_INT linear_step = 0;
2694 tree v = gimple_assign_rhs2 (def_stmt);
2695 while (TREE_CODE (v) == SSA_NAME)
2698 def_stmt = SSA_NAME_DEF_STMT (v);
2699 if (is_gimple_assign (def_stmt))
2700 switch (gimple_assign_rhs_code (def_stmt))
2703 t = gimple_assign_rhs2 (def_stmt);
2704 if (linear_step || TREE_CODE (t) != INTEGER_CST)
2706 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
2707 v = gimple_assign_rhs1 (def_stmt);
2710 t = gimple_assign_rhs2 (def_stmt);
2711 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
2713 linear_step = tree_to_shwi (t);
2714 v = gimple_assign_rhs1 (def_stmt);
2717 t = gimple_assign_rhs1 (def_stmt);
2718 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
2719 || (TYPE_PRECISION (TREE_TYPE (v))
2720 < TYPE_PRECISION (TREE_TYPE (t))))
2729 else if (is_gimple_call (def_stmt)
2730 && gimple_call_internal_p (def_stmt)
2731 && gimple_call_internal_fn (def_stmt) == IFN_GOMP_SIMD_LANE
2733 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
2734 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
2739 arginfo->linear_step = linear_step;
2741 arginfo->simd_lane_linear = true;
2747 /* Function vectorizable_simd_clone_call.
2749 Check if STMT performs a function call that can be vectorized
2750 by calling a simd clone of the function.
2751 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2752 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2753 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2756 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
2757 gimple **vec_stmt, slp_tree slp_node)
2762 tree vec_oprnd0 = NULL_TREE;
2763 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2765 unsigned int nunits;
2766 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2767 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2768 vec_info *vinfo = stmt_info->vinfo;
2769 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2770 tree fndecl, new_temp;
2772 gimple *new_stmt = NULL;
2774 auto_vec<simd_call_arg_info> arginfo;
2775 vec<tree> vargs = vNULL;
2777 tree lhs, rtype, ratype;
2778 vec<constructor_elt, va_gc> *ret_ctor_elts;
2780 /* Is STMT a vectorizable call? */
2781 if (!is_gimple_call (stmt))
2784 fndecl = gimple_call_fndecl (stmt);
2785 if (fndecl == NULL_TREE)
2788 struct cgraph_node *node = cgraph_node::get (fndecl);
2789 if (node == NULL || node->simd_clones == NULL)
2792 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2795 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2799 if (gimple_call_lhs (stmt)
2800 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2803 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2805 vectype = STMT_VINFO_VECTYPE (stmt_info);
2807 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2814 /* Process function arguments. */
2815 nargs = gimple_call_num_args (stmt);
2817 /* Bail out if the function has zero arguments. */
2821 arginfo.reserve (nargs, true);
2823 for (i = 0; i < nargs; i++)
2825 simd_call_arg_info thisarginfo;
2828 thisarginfo.linear_step = 0;
2829 thisarginfo.align = 0;
2830 thisarginfo.op = NULL_TREE;
2831 thisarginfo.simd_lane_linear = false;
2833 op = gimple_call_arg (stmt, i);
2834 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
2835 &thisarginfo.vectype)
2836 || thisarginfo.dt == vect_uninitialized_def)
2838 if (dump_enabled_p ())
2839 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2840 "use not simple.\n");
2844 if (thisarginfo.dt == vect_constant_def
2845 || thisarginfo.dt == vect_external_def)
2846 gcc_assert (thisarginfo.vectype == NULL_TREE);
2848 gcc_assert (thisarginfo.vectype != NULL_TREE);
2850 /* For linear arguments, the analyze phase should have saved
2851 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2852 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2853 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
2855 gcc_assert (vec_stmt);
2856 thisarginfo.linear_step
2857 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
2859 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
2860 thisarginfo.simd_lane_linear
2861 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
2862 == boolean_true_node);
2863 /* If loop has been peeled for alignment, we need to adjust it. */
2864 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2865 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2866 if (n1 != n2 && !thisarginfo.simd_lane_linear)
2868 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2869 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
2870 tree opt = TREE_TYPE (thisarginfo.op);
2871 bias = fold_convert (TREE_TYPE (step), bias);
2872 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2874 = fold_build2 (POINTER_TYPE_P (opt)
2875 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2876 thisarginfo.op, bias);
2880 && thisarginfo.dt != vect_constant_def
2881 && thisarginfo.dt != vect_external_def
2883 && TREE_CODE (op) == SSA_NAME
2884 && simple_iv (loop, loop_containing_stmt (stmt), op,
2886 && tree_fits_shwi_p (iv.step))
2888 thisarginfo.linear_step = tree_to_shwi (iv.step);
2889 thisarginfo.op = iv.base;
2891 else if ((thisarginfo.dt == vect_constant_def
2892 || thisarginfo.dt == vect_external_def)
2893 && POINTER_TYPE_P (TREE_TYPE (op)))
2894 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2895 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2897 if (POINTER_TYPE_P (TREE_TYPE (op))
2898 && !thisarginfo.linear_step
2900 && thisarginfo.dt != vect_constant_def
2901 && thisarginfo.dt != vect_external_def
2904 && TREE_CODE (op) == SSA_NAME)
2905 vect_simd_lane_linear (op, loop, &thisarginfo);
2907 arginfo.quick_push (thisarginfo);
2910 unsigned int badness = 0;
2911 struct cgraph_node *bestn = NULL;
2912 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2913 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2915 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2916 n = n->simdclone->next_clone)
2918 unsigned int this_badness = 0;
2919 if (n->simdclone->simdlen
2920 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2921 || n->simdclone->nargs != nargs)
2923 if (n->simdclone->simdlen
2924 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2925 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2926 - exact_log2 (n->simdclone->simdlen)) * 1024;
2927 if (n->simdclone->inbranch)
2928 this_badness += 2048;
2929 int target_badness = targetm.simd_clone.usable (n);
2930 if (target_badness < 0)
2932 this_badness += target_badness * 512;
2933 /* FORNOW: Have to add code to add the mask argument. */
2934 if (n->simdclone->inbranch)
2936 for (i = 0; i < nargs; i++)
2938 switch (n->simdclone->args[i].arg_type)
2940 case SIMD_CLONE_ARG_TYPE_VECTOR:
2941 if (!useless_type_conversion_p
2942 (n->simdclone->args[i].orig_type,
2943 TREE_TYPE (gimple_call_arg (stmt, i))))
2945 else if (arginfo[i].dt == vect_constant_def
2946 || arginfo[i].dt == vect_external_def
2947 || arginfo[i].linear_step)
2950 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2951 if (arginfo[i].dt != vect_constant_def
2952 && arginfo[i].dt != vect_external_def)
2955 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2956 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
2957 if (arginfo[i].dt == vect_constant_def
2958 || arginfo[i].dt == vect_external_def
2959 || (arginfo[i].linear_step
2960 != n->simdclone->args[i].linear_step))
2963 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2964 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
2965 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
2966 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
2967 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
2968 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
2972 case SIMD_CLONE_ARG_TYPE_MASK:
2975 if (i == (size_t) -1)
2977 if (n->simdclone->args[i].alignment > arginfo[i].align)
2982 if (arginfo[i].align)
2983 this_badness += (exact_log2 (arginfo[i].align)
2984 - exact_log2 (n->simdclone->args[i].alignment));
2986 if (i == (size_t) -1)
2988 if (bestn == NULL || this_badness < badness)
2991 badness = this_badness;
2998 for (i = 0; i < nargs; i++)
2999 if ((arginfo[i].dt == vect_constant_def
3000 || arginfo[i].dt == vect_external_def)
3001 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3004 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3006 if (arginfo[i].vectype == NULL
3007 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3008 > bestn->simdclone->simdlen))
3012 fndecl = bestn->decl;
3013 nunits = bestn->simdclone->simdlen;
3014 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3016 /* If the function isn't const, only allow it in simd loops where user
3017 has asserted that at least nunits consecutive iterations can be
3018 performed using SIMD instructions. */
3019 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3020 && gimple_vuse (stmt))
3023 /* Sanity check: make sure that at least one copy of the vectorized stmt
3024 needs to be generated. */
3025 gcc_assert (ncopies >= 1);
3027 if (!vec_stmt) /* transformation not required. */
3029 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3030 for (i = 0; i < nargs; i++)
3031 if ((bestn->simdclone->args[i].arg_type
3032 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3033 || (bestn->simdclone->args[i].arg_type
3034 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3036 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3038 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3039 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3040 ? size_type_node : TREE_TYPE (arginfo[i].op);
3041 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3042 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3043 tree sll = arginfo[i].simd_lane_linear
3044 ? boolean_true_node : boolean_false_node;
3045 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3047 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3048 if (dump_enabled_p ())
3049 dump_printf_loc (MSG_NOTE, vect_location,
3050 "=== vectorizable_simd_clone_call ===\n");
3051 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3057 if (dump_enabled_p ())
3058 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3061 scalar_dest = gimple_call_lhs (stmt);
3062 vec_dest = NULL_TREE;
3067 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3068 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3069 if (TREE_CODE (rtype) == ARRAY_TYPE)
3072 rtype = TREE_TYPE (ratype);
3076 prev_stmt_info = NULL;
3077 for (j = 0; j < ncopies; ++j)
3079 /* Build argument list for the vectorized call. */
3081 vargs.create (nargs);
3085 for (i = 0; i < nargs; i++)
3087 unsigned int k, l, m, o;
3089 op = gimple_call_arg (stmt, i);
3090 switch (bestn->simdclone->args[i].arg_type)
3092 case SIMD_CLONE_ARG_TYPE_VECTOR:
3093 atype = bestn->simdclone->args[i].vector_type;
3094 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3095 for (m = j * o; m < (j + 1) * o; m++)
3097 if (TYPE_VECTOR_SUBPARTS (atype)
3098 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3100 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3101 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3102 / TYPE_VECTOR_SUBPARTS (atype));
3103 gcc_assert ((k & (k - 1)) == 0);
3106 = vect_get_vec_def_for_operand (op, stmt);
3109 vec_oprnd0 = arginfo[i].op;
3110 if ((m & (k - 1)) == 0)
3112 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3115 arginfo[i].op = vec_oprnd0;
3117 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3119 bitsize_int ((m & (k - 1)) * prec));
3121 = gimple_build_assign (make_ssa_name (atype),
3123 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3124 vargs.safe_push (gimple_assign_lhs (new_stmt));
3128 k = (TYPE_VECTOR_SUBPARTS (atype)
3129 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3130 gcc_assert ((k & (k - 1)) == 0);
3131 vec<constructor_elt, va_gc> *ctor_elts;
3133 vec_alloc (ctor_elts, k);
3136 for (l = 0; l < k; l++)
3138 if (m == 0 && l == 0)
3140 = vect_get_vec_def_for_operand (op, stmt);
3143 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3145 arginfo[i].op = vec_oprnd0;
3148 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3152 vargs.safe_push (vec_oprnd0);
3155 vec_oprnd0 = build_constructor (atype, ctor_elts);
3157 = gimple_build_assign (make_ssa_name (atype),
3159 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3160 vargs.safe_push (gimple_assign_lhs (new_stmt));
3165 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3166 vargs.safe_push (op);
3168 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3169 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3174 = force_gimple_operand (arginfo[i].op, &stmts, true,
3179 edge pe = loop_preheader_edge (loop);
3180 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3181 gcc_assert (!new_bb);
3183 if (arginfo[i].simd_lane_linear)
3185 vargs.safe_push (arginfo[i].op);
3188 tree phi_res = copy_ssa_name (op);
3189 gphi *new_phi = create_phi_node (phi_res, loop->header);
3190 set_vinfo_for_stmt (new_phi,
3191 new_stmt_vec_info (new_phi, loop_vinfo));
3192 add_phi_arg (new_phi, arginfo[i].op,
3193 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3195 = POINTER_TYPE_P (TREE_TYPE (op))
3196 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3197 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3198 ? sizetype : TREE_TYPE (op);
3200 = wi::mul (bestn->simdclone->args[i].linear_step,
3202 tree tcst = wide_int_to_tree (type, cst);
3203 tree phi_arg = copy_ssa_name (op);
3205 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3206 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3207 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3208 set_vinfo_for_stmt (new_stmt,
3209 new_stmt_vec_info (new_stmt, loop_vinfo));
3210 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3212 arginfo[i].op = phi_res;
3213 vargs.safe_push (phi_res);
3218 = POINTER_TYPE_P (TREE_TYPE (op))
3219 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3220 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3221 ? sizetype : TREE_TYPE (op);
3223 = wi::mul (bestn->simdclone->args[i].linear_step,
3225 tree tcst = wide_int_to_tree (type, cst);
3226 new_temp = make_ssa_name (TREE_TYPE (op));
3227 new_stmt = gimple_build_assign (new_temp, code,
3228 arginfo[i].op, tcst);
3229 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3230 vargs.safe_push (new_temp);
3233 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3234 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3235 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3236 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3237 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3238 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3244 new_stmt = gimple_build_call_vec (fndecl, vargs);
3247 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3249 new_temp = create_tmp_var (ratype);
3250 else if (TYPE_VECTOR_SUBPARTS (vectype)
3251 == TYPE_VECTOR_SUBPARTS (rtype))
3252 new_temp = make_ssa_name (vec_dest, new_stmt);
3254 new_temp = make_ssa_name (rtype, new_stmt);
3255 gimple_call_set_lhs (new_stmt, new_temp);
3257 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3261 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3264 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3265 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3266 gcc_assert ((k & (k - 1)) == 0);
3267 for (l = 0; l < k; l++)
3272 t = build_fold_addr_expr (new_temp);
3273 t = build2 (MEM_REF, vectype, t,
3274 build_int_cst (TREE_TYPE (t),
3275 l * prec / BITS_PER_UNIT));
3278 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3279 size_int (prec), bitsize_int (l * prec));
3281 = gimple_build_assign (make_ssa_name (vectype), t);
3282 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3283 if (j == 0 && l == 0)
3284 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3286 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3288 prev_stmt_info = vinfo_for_stmt (new_stmt);
3293 tree clobber = build_constructor (ratype, NULL);
3294 TREE_THIS_VOLATILE (clobber) = 1;
3295 new_stmt = gimple_build_assign (new_temp, clobber);
3296 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3300 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3302 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3303 / TYPE_VECTOR_SUBPARTS (rtype));
3304 gcc_assert ((k & (k - 1)) == 0);
3305 if ((j & (k - 1)) == 0)
3306 vec_alloc (ret_ctor_elts, k);
3309 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3310 for (m = 0; m < o; m++)
3312 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3313 size_int (m), NULL_TREE, NULL_TREE);
3315 = gimple_build_assign (make_ssa_name (rtype), tem);
3316 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3317 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3318 gimple_assign_lhs (new_stmt));
3320 tree clobber = build_constructor (ratype, NULL);
3321 TREE_THIS_VOLATILE (clobber) = 1;
3322 new_stmt = gimple_build_assign (new_temp, clobber);
3323 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3326 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3327 if ((j & (k - 1)) != k - 1)
3329 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3331 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3332 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3334 if ((unsigned) j == k - 1)
3335 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3337 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3339 prev_stmt_info = vinfo_for_stmt (new_stmt);
3344 tree t = build_fold_addr_expr (new_temp);
3345 t = build2 (MEM_REF, vectype, t,
3346 build_int_cst (TREE_TYPE (t), 0));
3348 = gimple_build_assign (make_ssa_name (vec_dest), t);
3349 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3350 tree clobber = build_constructor (ratype, NULL);
3351 TREE_THIS_VOLATILE (clobber) = 1;
3352 vect_finish_stmt_generation (stmt,
3353 gimple_build_assign (new_temp,
3359 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3361 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3363 prev_stmt_info = vinfo_for_stmt (new_stmt);
3368 /* The call in STMT might prevent it from being removed in dce.
3369 We however cannot remove it here, due to the way the ssa name
3370 it defines is mapped to the new definition. So just replace
3371 rhs of the statement with something harmless. */
3378 type = TREE_TYPE (scalar_dest);
3379 if (is_pattern_stmt_p (stmt_info))
3380 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3382 lhs = gimple_call_lhs (stmt);
3383 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3386 new_stmt = gimple_build_nop ();
3387 set_vinfo_for_stmt (new_stmt, stmt_info);
3388 set_vinfo_for_stmt (stmt, NULL);
3389 STMT_VINFO_STMT (stmt_info) = new_stmt;
3390 gsi_replace (gsi, new_stmt, true);
3391 unlink_stmt_vdef (stmt);
3397 /* Function vect_gen_widened_results_half
3399 Create a vector stmt whose code, type, number of arguments, and result
3400 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3401 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3402 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3403 needs to be created (DECL is a function-decl of a target-builtin).
3404 STMT is the original scalar stmt that we are vectorizing. */
3407 vect_gen_widened_results_half (enum tree_code code,
3409 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3410 tree vec_dest, gimple_stmt_iterator *gsi,
3416 /* Generate half of the widened result: */
3417 if (code == CALL_EXPR)
3419 /* Target specific support */
3420 if (op_type == binary_op)
3421 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3423 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3424 new_temp = make_ssa_name (vec_dest, new_stmt);
3425 gimple_call_set_lhs (new_stmt, new_temp);
3429 /* Generic support */
3430 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3431 if (op_type != binary_op)
3433 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3434 new_temp = make_ssa_name (vec_dest, new_stmt);
3435 gimple_assign_set_lhs (new_stmt, new_temp);
3437 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3443 /* Get vectorized definitions for loop-based vectorization. For the first
3444 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3445 scalar operand), and for the rest we get a copy with
3446 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3447 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3448 The vectors are collected into VEC_OPRNDS. */
3451 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3452 vec<tree> *vec_oprnds, int multi_step_cvt)
3456 /* Get first vector operand. */
3457 /* All the vector operands except the very first one (that is scalar oprnd)
3459 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3460 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3462 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3464 vec_oprnds->quick_push (vec_oprnd);
3466 /* Get second vector operand. */
3467 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3468 vec_oprnds->quick_push (vec_oprnd);
3472 /* For conversion in multiple steps, continue to get operands
3475 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3479 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3480 For multi-step conversions store the resulting vectors and call the function
3484 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3485 int multi_step_cvt, gimple *stmt,
3487 gimple_stmt_iterator *gsi,
3488 slp_tree slp_node, enum tree_code code,
3489 stmt_vec_info *prev_stmt_info)
3492 tree vop0, vop1, new_tmp, vec_dest;
3494 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3496 vec_dest = vec_dsts.pop ();
3498 for (i = 0; i < vec_oprnds->length (); i += 2)
3500 /* Create demotion operation. */
3501 vop0 = (*vec_oprnds)[i];
3502 vop1 = (*vec_oprnds)[i + 1];
3503 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3504 new_tmp = make_ssa_name (vec_dest, new_stmt);
3505 gimple_assign_set_lhs (new_stmt, new_tmp);
3506 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3509 /* Store the resulting vector for next recursive call. */
3510 (*vec_oprnds)[i/2] = new_tmp;
3513 /* This is the last step of the conversion sequence. Store the
3514 vectors in SLP_NODE or in vector info of the scalar statement
3515 (or in STMT_VINFO_RELATED_STMT chain). */
3517 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3520 if (!*prev_stmt_info)
3521 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3523 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3525 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3530 /* For multi-step demotion operations we first generate demotion operations
3531 from the source type to the intermediate types, and then combine the
3532 results (stored in VEC_OPRNDS) in demotion operation to the destination
3536 /* At each level of recursion we have half of the operands we had at the
3538 vec_oprnds->truncate ((i+1)/2);
3539 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3540 stmt, vec_dsts, gsi, slp_node,
3541 VEC_PACK_TRUNC_EXPR,
3545 vec_dsts.quick_push (vec_dest);
3549 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3550 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3551 the resulting vectors and call the function recursively. */
3554 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3555 vec<tree> *vec_oprnds1,
3556 gimple *stmt, tree vec_dest,
3557 gimple_stmt_iterator *gsi,
3558 enum tree_code code1,
3559 enum tree_code code2, tree decl1,
3560 tree decl2, int op_type)
3563 tree vop0, vop1, new_tmp1, new_tmp2;
3564 gimple *new_stmt1, *new_stmt2;
3565 vec<tree> vec_tmp = vNULL;
3567 vec_tmp.create (vec_oprnds0->length () * 2);
3568 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3570 if (op_type == binary_op)
3571 vop1 = (*vec_oprnds1)[i];
3575 /* Generate the two halves of promotion operation. */
3576 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3577 op_type, vec_dest, gsi, stmt);
3578 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3579 op_type, vec_dest, gsi, stmt);
3580 if (is_gimple_call (new_stmt1))
3582 new_tmp1 = gimple_call_lhs (new_stmt1);
3583 new_tmp2 = gimple_call_lhs (new_stmt2);
3587 new_tmp1 = gimple_assign_lhs (new_stmt1);
3588 new_tmp2 = gimple_assign_lhs (new_stmt2);
3591 /* Store the results for the next step. */
3592 vec_tmp.quick_push (new_tmp1);
3593 vec_tmp.quick_push (new_tmp2);
3596 vec_oprnds0->release ();
3597 *vec_oprnds0 = vec_tmp;
3601 /* Check if STMT performs a conversion operation, that can be vectorized.
3602 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3603 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3604 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3607 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
3608 gimple **vec_stmt, slp_tree slp_node)
3612 tree op0, op1 = NULL_TREE;
3613 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3614 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3615 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3616 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3617 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3618 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3621 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3622 gimple *new_stmt = NULL;
3623 stmt_vec_info prev_stmt_info;
3626 tree vectype_out, vectype_in;
3628 tree lhs_type, rhs_type;
3629 enum { NARROW, NONE, WIDEN } modifier;
3630 vec<tree> vec_oprnds0 = vNULL;
3631 vec<tree> vec_oprnds1 = vNULL;
3633 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3634 vec_info *vinfo = stmt_info->vinfo;
3635 int multi_step_cvt = 0;
3636 vec<tree> vec_dsts = vNULL;
3637 vec<tree> interm_types = vNULL;
3638 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3640 machine_mode rhs_mode;
3641 unsigned short fltsz;
3643 /* Is STMT a vectorizable conversion? */
3645 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3648 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3652 if (!is_gimple_assign (stmt))
3655 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3658 code = gimple_assign_rhs_code (stmt);
3659 if (!CONVERT_EXPR_CODE_P (code)
3660 && code != FIX_TRUNC_EXPR
3661 && code != FLOAT_EXPR
3662 && code != WIDEN_MULT_EXPR
3663 && code != WIDEN_LSHIFT_EXPR)
3666 op_type = TREE_CODE_LENGTH (code);
3668 /* Check types of lhs and rhs. */
3669 scalar_dest = gimple_assign_lhs (stmt);
3670 lhs_type = TREE_TYPE (scalar_dest);
3671 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3673 op0 = gimple_assign_rhs1 (stmt);
3674 rhs_type = TREE_TYPE (op0);
3676 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3677 && !((INTEGRAL_TYPE_P (lhs_type)
3678 && INTEGRAL_TYPE_P (rhs_type))
3679 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3680 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3683 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
3684 && ((INTEGRAL_TYPE_P (lhs_type)
3685 && (TYPE_PRECISION (lhs_type)
3686 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3687 || (INTEGRAL_TYPE_P (rhs_type)
3688 && (TYPE_PRECISION (rhs_type)
3689 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
3691 if (dump_enabled_p ())
3692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3693 "type conversion to/from bit-precision unsupported."
3698 /* Check the operands of the operation. */
3699 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
3701 if (dump_enabled_p ())
3702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3703 "use not simple.\n");
3706 if (op_type == binary_op)
3710 op1 = gimple_assign_rhs2 (stmt);
3711 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3712 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3714 if (CONSTANT_CLASS_P (op0))
3715 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
3717 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
3721 if (dump_enabled_p ())
3722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3723 "use not simple.\n");
3728 /* If op0 is an external or constant defs use a vector type of
3729 the same size as the output vector type. */
3731 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3733 gcc_assert (vectype_in);
3736 if (dump_enabled_p ())
3738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3739 "no vectype for scalar type ");
3740 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3741 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3747 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3748 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
3750 if (dump_enabled_p ())
3752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3753 "can't convert between boolean and non "
3755 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3756 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3762 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3763 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3764 if (nunits_in < nunits_out)
3766 else if (nunits_out == nunits_in)
3771 /* Multiple types in SLP are handled by creating the appropriate number of
3772 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3776 else if (modifier == NARROW)
3777 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3779 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3781 /* Sanity check: make sure that at least one copy of the vectorized stmt
3782 needs to be generated. */
3783 gcc_assert (ncopies >= 1);
3785 /* Supportable by target? */
3789 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3791 if (supportable_convert_operation (code, vectype_out, vectype_in,
3796 if (dump_enabled_p ())
3797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3798 "conversion not supported by target.\n");
3802 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3803 &code1, &code2, &multi_step_cvt,
3806 /* Binary widening operation can only be supported directly by the
3808 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3812 if (code != FLOAT_EXPR
3813 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3814 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3817 rhs_mode = TYPE_MODE (rhs_type);
3818 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3819 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3820 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3821 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3824 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3825 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3826 if (cvt_type == NULL_TREE)
3829 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3831 if (!supportable_convert_operation (code, vectype_out,
3832 cvt_type, &decl1, &codecvt1))
3835 else if (!supportable_widening_operation (code, stmt, vectype_out,
3836 cvt_type, &codecvt1,
3837 &codecvt2, &multi_step_cvt,
3841 gcc_assert (multi_step_cvt == 0);
3843 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3844 vectype_in, &code1, &code2,
3845 &multi_step_cvt, &interm_types))
3849 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3852 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3853 codecvt2 = ERROR_MARK;
3857 interm_types.safe_push (cvt_type);
3858 cvt_type = NULL_TREE;
3863 gcc_assert (op_type == unary_op);
3864 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3865 &code1, &multi_step_cvt,
3869 if (code != FIX_TRUNC_EXPR
3870 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3871 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3874 rhs_mode = TYPE_MODE (rhs_type);
3876 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3877 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3878 if (cvt_type == NULL_TREE)
3880 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3883 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3884 &code1, &multi_step_cvt,
3893 if (!vec_stmt) /* transformation not required. */
3895 if (dump_enabled_p ())
3896 dump_printf_loc (MSG_NOTE, vect_location,
3897 "=== vectorizable_conversion ===\n");
3898 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3900 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3901 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3903 else if (modifier == NARROW)
3905 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3906 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3910 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3911 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3913 interm_types.release ();
3918 if (dump_enabled_p ())
3919 dump_printf_loc (MSG_NOTE, vect_location,
3920 "transform conversion. ncopies = %d.\n", ncopies);
3922 if (op_type == binary_op)
3924 if (CONSTANT_CLASS_P (op0))
3925 op0 = fold_convert (TREE_TYPE (op1), op0);
3926 else if (CONSTANT_CLASS_P (op1))
3927 op1 = fold_convert (TREE_TYPE (op0), op1);
3930 /* In case of multi-step conversion, we first generate conversion operations
3931 to the intermediate types, and then from that types to the final one.
3932 We create vector destinations for the intermediate type (TYPES) received
3933 from supportable_*_operation, and store them in the correct order
3934 for future use in vect_create_vectorized_*_stmts (). */
3935 vec_dsts.create (multi_step_cvt + 1);
3936 vec_dest = vect_create_destination_var (scalar_dest,
3937 (cvt_type && modifier == WIDEN)
3938 ? cvt_type : vectype_out);
3939 vec_dsts.quick_push (vec_dest);
3943 for (i = interm_types.length () - 1;
3944 interm_types.iterate (i, &intermediate_type); i--)
3946 vec_dest = vect_create_destination_var (scalar_dest,
3948 vec_dsts.quick_push (vec_dest);
3953 vec_dest = vect_create_destination_var (scalar_dest,
3955 ? vectype_out : cvt_type);
3959 if (modifier == WIDEN)
3961 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3962 if (op_type == binary_op)
3963 vec_oprnds1.create (1);
3965 else if (modifier == NARROW)
3966 vec_oprnds0.create (
3967 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3969 else if (code == WIDEN_LSHIFT_EXPR)
3970 vec_oprnds1.create (slp_node->vec_stmts_size);
3973 prev_stmt_info = NULL;
3977 for (j = 0; j < ncopies; j++)
3980 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3983 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3985 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3987 /* Arguments are ready, create the new vector stmt. */
3988 if (code1 == CALL_EXPR)
3990 new_stmt = gimple_build_call (decl1, 1, vop0);
3991 new_temp = make_ssa_name (vec_dest, new_stmt);
3992 gimple_call_set_lhs (new_stmt, new_temp);
3996 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3997 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3998 new_temp = make_ssa_name (vec_dest, new_stmt);
3999 gimple_assign_set_lhs (new_stmt, new_temp);
4002 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4004 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4007 if (!prev_stmt_info)
4008 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4010 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4011 prev_stmt_info = vinfo_for_stmt (new_stmt);
4018 /* In case the vectorization factor (VF) is bigger than the number
4019 of elements that we can fit in a vectype (nunits), we have to
4020 generate more than one vector stmt - i.e - we need to "unroll"
4021 the vector stmt by a factor VF/nunits. */
4022 for (j = 0; j < ncopies; j++)
4029 if (code == WIDEN_LSHIFT_EXPR)
4034 /* Store vec_oprnd1 for every vector stmt to be created
4035 for SLP_NODE. We check during the analysis that all
4036 the shift arguments are the same. */
4037 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4038 vec_oprnds1.quick_push (vec_oprnd1);
4040 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4044 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4045 &vec_oprnds1, slp_node, -1);
4049 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4050 vec_oprnds0.quick_push (vec_oprnd0);
4051 if (op_type == binary_op)
4053 if (code == WIDEN_LSHIFT_EXPR)
4056 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4057 vec_oprnds1.quick_push (vec_oprnd1);
4063 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4064 vec_oprnds0.truncate (0);
4065 vec_oprnds0.quick_push (vec_oprnd0);
4066 if (op_type == binary_op)
4068 if (code == WIDEN_LSHIFT_EXPR)
4071 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4073 vec_oprnds1.truncate (0);
4074 vec_oprnds1.quick_push (vec_oprnd1);
4078 /* Arguments are ready. Create the new vector stmts. */
4079 for (i = multi_step_cvt; i >= 0; i--)
4081 tree this_dest = vec_dsts[i];
4082 enum tree_code c1 = code1, c2 = code2;
4083 if (i == 0 && codecvt2 != ERROR_MARK)
4088 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4090 stmt, this_dest, gsi,
4091 c1, c2, decl1, decl2,
4095 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4099 if (codecvt1 == CALL_EXPR)
4101 new_stmt = gimple_build_call (decl1, 1, vop0);
4102 new_temp = make_ssa_name (vec_dest, new_stmt);
4103 gimple_call_set_lhs (new_stmt, new_temp);
4107 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4108 new_temp = make_ssa_name (vec_dest);
4109 new_stmt = gimple_build_assign (new_temp, codecvt1,
4113 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4116 new_stmt = SSA_NAME_DEF_STMT (vop0);
4119 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4122 if (!prev_stmt_info)
4123 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4125 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4126 prev_stmt_info = vinfo_for_stmt (new_stmt);
4131 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4135 /* In case the vectorization factor (VF) is bigger than the number
4136 of elements that we can fit in a vectype (nunits), we have to
4137 generate more than one vector stmt - i.e - we need to "unroll"
4138 the vector stmt by a factor VF/nunits. */
4139 for (j = 0; j < ncopies; j++)
4143 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4147 vec_oprnds0.truncate (0);
4148 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4149 vect_pow2 (multi_step_cvt) - 1);
4152 /* Arguments are ready. Create the new vector stmts. */
4154 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4156 if (codecvt1 == CALL_EXPR)
4158 new_stmt = gimple_build_call (decl1, 1, vop0);
4159 new_temp = make_ssa_name (vec_dest, new_stmt);
4160 gimple_call_set_lhs (new_stmt, new_temp);
4164 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4165 new_temp = make_ssa_name (vec_dest);
4166 new_stmt = gimple_build_assign (new_temp, codecvt1,
4170 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4171 vec_oprnds0[i] = new_temp;
4174 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4175 stmt, vec_dsts, gsi,
4180 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4184 vec_oprnds0.release ();
4185 vec_oprnds1.release ();
4186 vec_dsts.release ();
4187 interm_types.release ();
4193 /* Function vectorizable_assignment.
4195 Check if STMT performs an assignment (copy) that can be vectorized.
4196 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4197 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4198 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4201 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4202 gimple **vec_stmt, slp_tree slp_node)
4207 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4208 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4211 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4214 vec<tree> vec_oprnds = vNULL;
4216 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4217 vec_info *vinfo = stmt_info->vinfo;
4218 gimple *new_stmt = NULL;
4219 stmt_vec_info prev_stmt_info = NULL;
4220 enum tree_code code;
4223 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4226 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4230 /* Is vectorizable assignment? */
4231 if (!is_gimple_assign (stmt))
4234 scalar_dest = gimple_assign_lhs (stmt);
4235 if (TREE_CODE (scalar_dest) != SSA_NAME)
4238 code = gimple_assign_rhs_code (stmt);
4239 if (gimple_assign_single_p (stmt)
4240 || code == PAREN_EXPR
4241 || CONVERT_EXPR_CODE_P (code))
4242 op = gimple_assign_rhs1 (stmt);
4246 if (code == VIEW_CONVERT_EXPR)
4247 op = TREE_OPERAND (op, 0);
4249 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4250 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4252 /* Multiple types in SLP are handled by creating the appropriate number of
4253 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4258 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4260 gcc_assert (ncopies >= 1);
4262 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4264 if (dump_enabled_p ())
4265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4266 "use not simple.\n");
4270 /* We can handle NOP_EXPR conversions that do not change the number
4271 of elements or the vector size. */
4272 if ((CONVERT_EXPR_CODE_P (code)
4273 || code == VIEW_CONVERT_EXPR)
4275 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4276 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4277 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4280 /* We do not handle bit-precision changes. */
4281 if ((CONVERT_EXPR_CODE_P (code)
4282 || code == VIEW_CONVERT_EXPR)
4283 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4284 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4285 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4286 || ((TYPE_PRECISION (TREE_TYPE (op))
4287 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4288 /* But a conversion that does not change the bit-pattern is ok. */
4289 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4290 > TYPE_PRECISION (TREE_TYPE (op)))
4291 && TYPE_UNSIGNED (TREE_TYPE (op)))
4292 /* Conversion between boolean types of different sizes is
4293 a simple assignment in case their vectypes are same
4295 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4296 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4298 if (dump_enabled_p ())
4299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4300 "type conversion to/from bit-precision "
4305 if (!vec_stmt) /* transformation not required. */
4307 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4308 if (dump_enabled_p ())
4309 dump_printf_loc (MSG_NOTE, vect_location,
4310 "=== vectorizable_assignment ===\n");
4311 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4316 if (dump_enabled_p ())
4317 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4320 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4323 for (j = 0; j < ncopies; j++)
4327 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4329 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4331 /* Arguments are ready. create the new vector stmt. */
4332 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4334 if (CONVERT_EXPR_CODE_P (code)
4335 || code == VIEW_CONVERT_EXPR)
4336 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4337 new_stmt = gimple_build_assign (vec_dest, vop);
4338 new_temp = make_ssa_name (vec_dest, new_stmt);
4339 gimple_assign_set_lhs (new_stmt, new_temp);
4340 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4342 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4349 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4351 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4353 prev_stmt_info = vinfo_for_stmt (new_stmt);
4356 vec_oprnds.release ();
4361 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4362 either as shift by a scalar or by a vector. */
4365 vect_supportable_shift (enum tree_code code, tree scalar_type)
4368 machine_mode vec_mode;
4373 vectype = get_vectype_for_scalar_type (scalar_type);
4377 optab = optab_for_tree_code (code, vectype, optab_scalar);
4379 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4381 optab = optab_for_tree_code (code, vectype, optab_vector);
4383 || (optab_handler (optab, TYPE_MODE (vectype))
4384 == CODE_FOR_nothing))
4388 vec_mode = TYPE_MODE (vectype);
4389 icode = (int) optab_handler (optab, vec_mode);
4390 if (icode == CODE_FOR_nothing)
4397 /* Function vectorizable_shift.
4399 Check if STMT performs a shift operation that can be vectorized.
4400 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4401 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4402 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4405 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4406 gimple **vec_stmt, slp_tree slp_node)
4410 tree op0, op1 = NULL;
4411 tree vec_oprnd1 = NULL_TREE;
4412 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4414 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4415 enum tree_code code;
4416 machine_mode vec_mode;
4420 machine_mode optab_op2_mode;
4422 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4423 gimple *new_stmt = NULL;
4424 stmt_vec_info prev_stmt_info;
4431 vec<tree> vec_oprnds0 = vNULL;
4432 vec<tree> vec_oprnds1 = vNULL;
4435 bool scalar_shift_arg = true;
4436 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4437 vec_info *vinfo = stmt_info->vinfo;
4440 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4443 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4447 /* Is STMT a vectorizable binary/unary operation? */
4448 if (!is_gimple_assign (stmt))
4451 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4454 code = gimple_assign_rhs_code (stmt);
4456 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4457 || code == RROTATE_EXPR))
4460 scalar_dest = gimple_assign_lhs (stmt);
4461 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4462 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4463 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4465 if (dump_enabled_p ())
4466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4467 "bit-precision shifts not supported.\n");
4471 op0 = gimple_assign_rhs1 (stmt);
4472 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4474 if (dump_enabled_p ())
4475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4476 "use not simple.\n");
4479 /* If op0 is an external or constant def use a vector type with
4480 the same size as the output vector type. */
4482 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4484 gcc_assert (vectype);
4487 if (dump_enabled_p ())
4488 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4489 "no vectype for scalar type\n");
4493 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4494 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4495 if (nunits_out != nunits_in)
4498 op1 = gimple_assign_rhs2 (stmt);
4499 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4501 if (dump_enabled_p ())
4502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4503 "use not simple.\n");
4508 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4512 /* Multiple types in SLP are handled by creating the appropriate number of
4513 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4518 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4520 gcc_assert (ncopies >= 1);
4522 /* Determine whether the shift amount is a vector, or scalar. If the
4523 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4525 if ((dt[1] == vect_internal_def
4526 || dt[1] == vect_induction_def)
4528 scalar_shift_arg = false;
4529 else if (dt[1] == vect_constant_def
4530 || dt[1] == vect_external_def
4531 || dt[1] == vect_internal_def)
4533 /* In SLP, need to check whether the shift count is the same,
4534 in loops if it is a constant or invariant, it is always
4538 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4541 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4542 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4543 scalar_shift_arg = false;
4546 /* If the shift amount is computed by a pattern stmt we cannot
4547 use the scalar amount directly thus give up and use a vector
4549 if (dt[1] == vect_internal_def)
4551 gimple *def = SSA_NAME_DEF_STMT (op1);
4552 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4553 scalar_shift_arg = false;
4558 if (dump_enabled_p ())
4559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4560 "operand mode requires invariant argument.\n");
4564 /* Vector shifted by vector. */
4565 if (!scalar_shift_arg)
4567 optab = optab_for_tree_code (code, vectype, optab_vector);
4568 if (dump_enabled_p ())
4569 dump_printf_loc (MSG_NOTE, vect_location,
4570 "vector/vector shift/rotate found.\n");
4573 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4574 if (op1_vectype == NULL_TREE
4575 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4577 if (dump_enabled_p ())
4578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4579 "unusable type for last operand in"
4580 " vector/vector shift/rotate.\n");
4584 /* See if the machine has a vector shifted by scalar insn and if not
4585 then see if it has a vector shifted by vector insn. */
4588 optab = optab_for_tree_code (code, vectype, optab_scalar);
4590 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4592 if (dump_enabled_p ())
4593 dump_printf_loc (MSG_NOTE, vect_location,
4594 "vector/scalar shift/rotate found.\n");
4598 optab = optab_for_tree_code (code, vectype, optab_vector);
4600 && (optab_handler (optab, TYPE_MODE (vectype))
4601 != CODE_FOR_nothing))
4603 scalar_shift_arg = false;
4605 if (dump_enabled_p ())
4606 dump_printf_loc (MSG_NOTE, vect_location,
4607 "vector/vector shift/rotate found.\n");
4609 /* Unlike the other binary operators, shifts/rotates have
4610 the rhs being int, instead of the same type as the lhs,
4611 so make sure the scalar is the right type if we are
4612 dealing with vectors of long long/long/short/char. */
4613 if (dt[1] == vect_constant_def)
4614 op1 = fold_convert (TREE_TYPE (vectype), op1);
4615 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4619 && TYPE_MODE (TREE_TYPE (vectype))
4620 != TYPE_MODE (TREE_TYPE (op1)))
4622 if (dump_enabled_p ())
4623 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4624 "unusable type for last operand in"
4625 " vector/vector shift/rotate.\n");
4628 if (vec_stmt && !slp_node)
4630 op1 = fold_convert (TREE_TYPE (vectype), op1);
4631 op1 = vect_init_vector (stmt, op1,
4632 TREE_TYPE (vectype), NULL);
4639 /* Supportable by target? */
4642 if (dump_enabled_p ())
4643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4647 vec_mode = TYPE_MODE (vectype);
4648 icode = (int) optab_handler (optab, vec_mode);
4649 if (icode == CODE_FOR_nothing)
4651 if (dump_enabled_p ())
4652 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4653 "op not supported by target.\n");
4654 /* Check only during analysis. */
4655 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4656 || (vf < vect_min_worthwhile_factor (code)
4659 if (dump_enabled_p ())
4660 dump_printf_loc (MSG_NOTE, vect_location,
4661 "proceeding using word mode.\n");
4664 /* Worthwhile without SIMD support? Check only during analysis. */
4665 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4666 && vf < vect_min_worthwhile_factor (code)
4669 if (dump_enabled_p ())
4670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4671 "not worthwhile without SIMD support.\n");
4675 if (!vec_stmt) /* transformation not required. */
4677 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4678 if (dump_enabled_p ())
4679 dump_printf_loc (MSG_NOTE, vect_location,
4680 "=== vectorizable_shift ===\n");
4681 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4687 if (dump_enabled_p ())
4688 dump_printf_loc (MSG_NOTE, vect_location,
4689 "transform binary/unary operation.\n");
4692 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4694 prev_stmt_info = NULL;
4695 for (j = 0; j < ncopies; j++)
4700 if (scalar_shift_arg)
4702 /* Vector shl and shr insn patterns can be defined with scalar
4703 operand 2 (shift operand). In this case, use constant or loop
4704 invariant op1 directly, without extending it to vector mode
4706 optab_op2_mode = insn_data[icode].operand[2].mode;
4707 if (!VECTOR_MODE_P (optab_op2_mode))
4709 if (dump_enabled_p ())
4710 dump_printf_loc (MSG_NOTE, vect_location,
4711 "operand 1 using scalar mode.\n");
4713 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4714 vec_oprnds1.quick_push (vec_oprnd1);
4717 /* Store vec_oprnd1 for every vector stmt to be created
4718 for SLP_NODE. We check during the analysis that all
4719 the shift arguments are the same.
4720 TODO: Allow different constants for different vector
4721 stmts generated for an SLP instance. */
4722 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4723 vec_oprnds1.quick_push (vec_oprnd1);
4728 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4729 (a special case for certain kind of vector shifts); otherwise,
4730 operand 1 should be of a vector type (the usual case). */
4732 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4735 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4739 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4741 /* Arguments are ready. Create the new vector stmt. */
4742 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4744 vop1 = vec_oprnds1[i];
4745 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4746 new_temp = make_ssa_name (vec_dest, new_stmt);
4747 gimple_assign_set_lhs (new_stmt, new_temp);
4748 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4750 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4757 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4759 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4760 prev_stmt_info = vinfo_for_stmt (new_stmt);
4763 vec_oprnds0.release ();
4764 vec_oprnds1.release ();
4770 /* Function vectorizable_operation.
4772 Check if STMT performs a binary, unary or ternary operation that can
4774 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4775 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4776 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4779 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
4780 gimple **vec_stmt, slp_tree slp_node)
4784 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4785 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4787 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4788 enum tree_code code;
4789 machine_mode vec_mode;
4793 bool target_support_p;
4795 enum vect_def_type dt[3]
4796 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4797 gimple *new_stmt = NULL;
4798 stmt_vec_info prev_stmt_info;
4804 vec<tree> vec_oprnds0 = vNULL;
4805 vec<tree> vec_oprnds1 = vNULL;
4806 vec<tree> vec_oprnds2 = vNULL;
4807 tree vop0, vop1, vop2;
4808 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4809 vec_info *vinfo = stmt_info->vinfo;
4812 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4815 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4819 /* Is STMT a vectorizable binary/unary operation? */
4820 if (!is_gimple_assign (stmt))
4823 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4826 code = gimple_assign_rhs_code (stmt);
4828 /* For pointer addition, we should use the normal plus for
4829 the vector addition. */
4830 if (code == POINTER_PLUS_EXPR)
4833 /* Support only unary or binary operations. */
4834 op_type = TREE_CODE_LENGTH (code);
4835 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4837 if (dump_enabled_p ())
4838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4839 "num. args = %d (not unary/binary/ternary op).\n",
4844 scalar_dest = gimple_assign_lhs (stmt);
4845 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4847 /* Most operations cannot handle bit-precision types without extra
4849 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4850 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4851 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4852 /* Exception are bitwise binary operations. */
4853 && code != BIT_IOR_EXPR
4854 && code != BIT_XOR_EXPR
4855 && code != BIT_AND_EXPR)
4857 if (dump_enabled_p ())
4858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4859 "bit-precision arithmetic not supported.\n");
4863 op0 = gimple_assign_rhs1 (stmt);
4864 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4866 if (dump_enabled_p ())
4867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4868 "use not simple.\n");
4871 /* If op0 is an external or constant def use a vector type with
4872 the same size as the output vector type. */
4875 /* For boolean type we cannot determine vectype by
4876 invariant value (don't know whether it is a vector
4877 of booleans or vector of integers). We use output
4878 vectype because operations on boolean don't change
4880 if (TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE)
4882 if (TREE_CODE (TREE_TYPE (scalar_dest)) != BOOLEAN_TYPE)
4884 if (dump_enabled_p ())
4885 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4886 "not supported operation on bool value.\n");
4889 vectype = vectype_out;
4892 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4895 gcc_assert (vectype);
4898 if (dump_enabled_p ())
4900 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4901 "no vectype for scalar type ");
4902 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4904 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4910 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4911 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4912 if (nunits_out != nunits_in)
4915 if (op_type == binary_op || op_type == ternary_op)
4917 op1 = gimple_assign_rhs2 (stmt);
4918 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
4920 if (dump_enabled_p ())
4921 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4922 "use not simple.\n");
4926 if (op_type == ternary_op)
4928 op2 = gimple_assign_rhs3 (stmt);
4929 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
4931 if (dump_enabled_p ())
4932 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4933 "use not simple.\n");
4939 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4943 /* Multiple types in SLP are handled by creating the appropriate number of
4944 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4949 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4951 gcc_assert (ncopies >= 1);
4953 /* Shifts are handled in vectorizable_shift (). */
4954 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4955 || code == RROTATE_EXPR)
4958 /* Supportable by target? */
4960 vec_mode = TYPE_MODE (vectype);
4961 if (code == MULT_HIGHPART_EXPR)
4962 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
4965 optab = optab_for_tree_code (code, vectype, optab_default);
4968 if (dump_enabled_p ())
4969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4973 target_support_p = (optab_handler (optab, vec_mode)
4974 != CODE_FOR_nothing);
4977 if (!target_support_p)
4979 if (dump_enabled_p ())
4980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4981 "op not supported by target.\n");
4982 /* Check only during analysis. */
4983 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4984 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4986 if (dump_enabled_p ())
4987 dump_printf_loc (MSG_NOTE, vect_location,
4988 "proceeding using word mode.\n");
4991 /* Worthwhile without SIMD support? Check only during analysis. */
4992 if (!VECTOR_MODE_P (vec_mode)
4994 && vf < vect_min_worthwhile_factor (code))
4996 if (dump_enabled_p ())
4997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4998 "not worthwhile without SIMD support.\n");
5002 if (!vec_stmt) /* transformation not required. */
5004 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5005 if (dump_enabled_p ())
5006 dump_printf_loc (MSG_NOTE, vect_location,
5007 "=== vectorizable_operation ===\n");
5008 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5014 if (dump_enabled_p ())
5015 dump_printf_loc (MSG_NOTE, vect_location,
5016 "transform binary/unary operation.\n");
5019 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5021 /* In case the vectorization factor (VF) is bigger than the number
5022 of elements that we can fit in a vectype (nunits), we have to generate
5023 more than one vector stmt - i.e - we need to "unroll" the
5024 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5025 from one copy of the vector stmt to the next, in the field
5026 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5027 stages to find the correct vector defs to be used when vectorizing
5028 stmts that use the defs of the current stmt. The example below
5029 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5030 we need to create 4 vectorized stmts):
5032 before vectorization:
5033 RELATED_STMT VEC_STMT
5037 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5039 RELATED_STMT VEC_STMT
5040 VS1_0: vx0 = memref0 VS1_1 -
5041 VS1_1: vx1 = memref1 VS1_2 -
5042 VS1_2: vx2 = memref2 VS1_3 -
5043 VS1_3: vx3 = memref3 - -
5044 S1: x = load - VS1_0
5047 step2: vectorize stmt S2 (done here):
5048 To vectorize stmt S2 we first need to find the relevant vector
5049 def for the first operand 'x'. This is, as usual, obtained from
5050 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5051 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5052 relevant vector def 'vx0'. Having found 'vx0' we can generate
5053 the vector stmt VS2_0, and as usual, record it in the
5054 STMT_VINFO_VEC_STMT of stmt S2.
5055 When creating the second copy (VS2_1), we obtain the relevant vector
5056 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5057 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5058 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5059 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5060 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5061 chain of stmts and pointers:
5062 RELATED_STMT VEC_STMT
5063 VS1_0: vx0 = memref0 VS1_1 -
5064 VS1_1: vx1 = memref1 VS1_2 -
5065 VS1_2: vx2 = memref2 VS1_3 -
5066 VS1_3: vx3 = memref3 - -
5067 S1: x = load - VS1_0
5068 VS2_0: vz0 = vx0 + v1 VS2_1 -
5069 VS2_1: vz1 = vx1 + v1 VS2_2 -
5070 VS2_2: vz2 = vx2 + v1 VS2_3 -
5071 VS2_3: vz3 = vx3 + v1 - -
5072 S2: z = x + 1 - VS2_0 */
5074 prev_stmt_info = NULL;
5075 for (j = 0; j < ncopies; j++)
5080 if (op_type == binary_op || op_type == ternary_op)
5081 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5084 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5086 if (op_type == ternary_op)
5088 vec_oprnds2.create (1);
5089 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
5095 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5096 if (op_type == ternary_op)
5098 tree vec_oprnd = vec_oprnds2.pop ();
5099 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5104 /* Arguments are ready. Create the new vector stmt. */
5105 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5107 vop1 = ((op_type == binary_op || op_type == ternary_op)
5108 ? vec_oprnds1[i] : NULL_TREE);
5109 vop2 = ((op_type == ternary_op)
5110 ? vec_oprnds2[i] : NULL_TREE);
5111 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5112 new_temp = make_ssa_name (vec_dest, new_stmt);
5113 gimple_assign_set_lhs (new_stmt, new_temp);
5114 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5116 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5123 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5125 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5126 prev_stmt_info = vinfo_for_stmt (new_stmt);
5129 vec_oprnds0.release ();
5130 vec_oprnds1.release ();
5131 vec_oprnds2.release ();
5136 /* A helper function to ensure data reference DR's base alignment
5140 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5145 if (DR_VECT_AUX (dr)->base_misaligned)
5147 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5148 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5150 if (decl_in_symtab_p (base_decl))
5151 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5154 SET_DECL_ALIGN (base_decl, TYPE_ALIGN (vectype));
5155 DECL_USER_ALIGN (base_decl) = 1;
5157 DR_VECT_AUX (dr)->base_misaligned = false;
5162 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5163 reversal of the vector elements. If that is impossible to do,
5167 perm_mask_for_reverse (tree vectype)
5172 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5173 sel = XALLOCAVEC (unsigned char, nunits);
5175 for (i = 0; i < nunits; ++i)
5176 sel[i] = nunits - 1 - i;
5178 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5180 return vect_gen_perm_mask_checked (vectype, sel);
5183 /* Function vectorizable_store.
5185 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5187 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5188 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5189 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5192 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5198 tree vec_oprnd = NULL_TREE;
5199 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5200 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5202 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5203 struct loop *loop = NULL;
5204 machine_mode vec_mode;
5206 enum dr_alignment_support alignment_support_scheme;
5208 enum vect_def_type dt;
5209 stmt_vec_info prev_stmt_info = NULL;
5210 tree dataref_ptr = NULL_TREE;
5211 tree dataref_offset = NULL_TREE;
5212 gimple *ptr_incr = NULL;
5215 gimple *next_stmt, *first_stmt = NULL;
5216 bool grouped_store = false;
5217 bool store_lanes_p = false;
5218 unsigned int group_size, i;
5219 vec<tree> dr_chain = vNULL;
5220 vec<tree> oprnds = vNULL;
5221 vec<tree> result_chain = vNULL;
5223 bool negative = false;
5224 tree offset = NULL_TREE;
5225 vec<tree> vec_oprnds = vNULL;
5226 bool slp = (slp_node != NULL);
5227 unsigned int vec_num;
5228 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5229 vec_info *vinfo = stmt_info->vinfo;
5231 tree scatter_base = NULL_TREE, scatter_off = NULL_TREE;
5232 tree scatter_off_vectype = NULL_TREE, scatter_decl = NULL_TREE;
5233 int scatter_scale = 1;
5234 enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
5235 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5238 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5241 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5245 /* Is vectorizable store? */
5247 if (!is_gimple_assign (stmt))
5250 scalar_dest = gimple_assign_lhs (stmt);
5251 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5252 && is_pattern_stmt_p (stmt_info))
5253 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5254 if (TREE_CODE (scalar_dest) != ARRAY_REF
5255 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5256 && TREE_CODE (scalar_dest) != INDIRECT_REF
5257 && TREE_CODE (scalar_dest) != COMPONENT_REF
5258 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5259 && TREE_CODE (scalar_dest) != REALPART_EXPR
5260 && TREE_CODE (scalar_dest) != MEM_REF)
5263 /* Cannot have hybrid store SLP -- that would mean storing to the
5264 same location twice. */
5265 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5267 gcc_assert (gimple_assign_single_p (stmt));
5269 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5270 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5273 loop = LOOP_VINFO_LOOP (loop_vinfo);
5275 /* Multiple types in SLP are handled by creating the appropriate number of
5276 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5281 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5283 gcc_assert (ncopies >= 1);
5285 /* FORNOW. This restriction should be relaxed. */
5286 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5288 if (dump_enabled_p ())
5289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5290 "multiple types in nested loop.\n");
5294 op = gimple_assign_rhs1 (stmt);
5296 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5298 if (dump_enabled_p ())
5299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5300 "use not simple.\n");
5304 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5307 elem_type = TREE_TYPE (vectype);
5308 vec_mode = TYPE_MODE (vectype);
5310 /* FORNOW. In some cases can vectorize even if data-type not supported
5311 (e.g. - array initialization with 0). */
5312 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5315 if (!STMT_VINFO_DATA_REF (stmt_info))
5318 if (!STMT_VINFO_STRIDED_P (stmt_info))
5321 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5322 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5323 size_zero_node) < 0;
5324 if (negative && ncopies > 1)
5326 if (dump_enabled_p ())
5327 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5328 "multiple types with negative step.\n");
5333 gcc_assert (!grouped_store);
5334 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5335 if (alignment_support_scheme != dr_aligned
5336 && alignment_support_scheme != dr_unaligned_supported)
5338 if (dump_enabled_p ())
5339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5340 "negative step but alignment required.\n");
5343 if (dt != vect_constant_def
5344 && dt != vect_external_def
5345 && !perm_mask_for_reverse (vectype))
5347 if (dump_enabled_p ())
5348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5349 "negative step and reversing not supported.\n");
5355 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5357 grouped_store = true;
5358 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5359 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5360 if (!slp && !STMT_VINFO_STRIDED_P (stmt_info))
5362 if (vect_store_lanes_supported (vectype, group_size))
5363 store_lanes_p = true;
5364 else if (!vect_grouped_store_supported (vectype, group_size))
5368 if (STMT_VINFO_STRIDED_P (stmt_info)
5370 && (group_size > nunits
5371 || nunits % group_size != 0))
5373 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5374 "unhandled strided group store\n");
5378 if (first_stmt == stmt)
5380 /* STMT is the leader of the group. Check the operands of all the
5381 stmts of the group. */
5382 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5385 gcc_assert (gimple_assign_single_p (next_stmt));
5386 op = gimple_assign_rhs1 (next_stmt);
5387 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
5389 if (dump_enabled_p ())
5390 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5391 "use not simple.\n");
5394 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5399 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5402 scatter_decl = vect_check_gather_scatter (stmt, loop_vinfo, &scatter_base,
5403 &scatter_off, &scatter_scale);
5404 gcc_assert (scatter_decl);
5405 if (!vect_is_simple_use (scatter_off, vinfo, &def_stmt, &scatter_idx_dt,
5406 &scatter_off_vectype))
5408 if (dump_enabled_p ())
5409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5410 "scatter index use not simple.");
5415 if (!vec_stmt) /* transformation not required. */
5417 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5418 /* The SLP costs are calculated during SLP analysis. */
5419 if (!PURE_SLP_STMT (stmt_info))
5420 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5427 ensure_base_align (stmt_info, dr);
5429 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5431 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5432 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (scatter_decl));
5433 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5434 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5435 edge pe = loop_preheader_edge (loop);
5438 enum { NARROW, NONE, WIDEN } modifier;
5439 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (scatter_off_vectype);
5441 if (nunits == (unsigned int) scatter_off_nunits)
5443 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5445 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5448 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5449 sel[i] = i | nunits;
5451 perm_mask = vect_gen_perm_mask_checked (scatter_off_vectype, sel);
5452 gcc_assert (perm_mask != NULL_TREE);
5454 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5456 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5459 for (i = 0; i < (unsigned int) nunits; ++i)
5460 sel[i] = i | scatter_off_nunits;
5462 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5463 gcc_assert (perm_mask != NULL_TREE);
5469 rettype = TREE_TYPE (TREE_TYPE (scatter_decl));
5470 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5471 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5472 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5473 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5474 scaletype = TREE_VALUE (arglist);
5476 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5477 && TREE_CODE (rettype) == VOID_TYPE);
5479 ptr = fold_convert (ptrtype, scatter_base);
5480 if (!is_gimple_min_invariant (ptr))
5482 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5483 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5484 gcc_assert (!new_bb);
5487 /* Currently we support only unconditional scatter stores,
5488 so mask should be all ones. */
5489 mask = build_int_cst (masktype, -1);
5490 mask = vect_init_vector (stmt, mask, masktype, NULL);
5492 scale = build_int_cst (scaletype, scatter_scale);
5494 prev_stmt_info = NULL;
5495 for (j = 0; j < ncopies; ++j)
5500 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5502 = vect_get_vec_def_for_operand (scatter_off, stmt);
5504 else if (modifier != NONE && (j & 1))
5506 if (modifier == WIDEN)
5509 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5510 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5513 else if (modifier == NARROW)
5515 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5518 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5526 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5528 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5531 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5533 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5534 == TYPE_VECTOR_SUBPARTS (srctype));
5535 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5536 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5537 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5538 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5542 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5544 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5545 == TYPE_VECTOR_SUBPARTS (idxtype));
5546 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5547 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5548 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5549 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5554 = gimple_build_call (scatter_decl, 5, ptr, mask, op, src, scale);
5556 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5558 if (prev_stmt_info == NULL)
5559 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5561 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5562 prev_stmt_info = vinfo_for_stmt (new_stmt);
5569 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5570 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5572 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5575 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5577 /* We vectorize all the stmts of the interleaving group when we
5578 reach the last stmt in the group. */
5579 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5580 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5589 grouped_store = false;
5590 /* VEC_NUM is the number of vect stmts to be created for this
5592 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5593 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5594 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5595 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5596 op = gimple_assign_rhs1 (first_stmt);
5599 /* VEC_NUM is the number of vect stmts to be created for this
5601 vec_num = group_size;
5607 group_size = vec_num = 1;
5610 if (dump_enabled_p ())
5611 dump_printf_loc (MSG_NOTE, vect_location,
5612 "transform store. ncopies = %d\n", ncopies);
5614 if (STMT_VINFO_STRIDED_P (stmt_info))
5616 gimple_stmt_iterator incr_gsi;
5622 gimple_seq stmts = NULL;
5623 tree stride_base, stride_step, alias_off;
5627 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5630 = fold_build_pointer_plus
5631 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5632 size_binop (PLUS_EXPR,
5633 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5634 convert_to_ptrofftype (DR_INIT(first_dr))));
5635 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5637 /* For a store with loop-invariant (but other than power-of-2)
5638 stride (i.e. not a grouped access) like so:
5640 for (i = 0; i < n; i += stride)
5643 we generate a new induction variable and new stores from
5644 the components of the (vectorized) rhs:
5646 for (j = 0; ; j += VF*stride)
5651 array[j + stride] = tmp2;
5655 unsigned nstores = nunits;
5656 tree ltype = elem_type;
5659 nstores = nunits / group_size;
5660 if (group_size < nunits)
5661 ltype = build_vector_type (elem_type, group_size);
5664 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5665 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5669 ivstep = stride_step;
5670 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5671 build_int_cst (TREE_TYPE (ivstep),
5672 ncopies * nstores));
5674 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5676 create_iv (stride_base, ivstep, NULL,
5677 loop, &incr_gsi, insert_after,
5679 incr = gsi_stmt (incr_gsi);
5680 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
5682 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5684 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5686 prev_stmt_info = NULL;
5687 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
5688 next_stmt = first_stmt;
5689 for (g = 0; g < group_size; g++)
5691 running_off = offvar;
5694 tree size = TYPE_SIZE_UNIT (ltype);
5695 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
5697 tree newoff = copy_ssa_name (running_off, NULL);
5698 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5700 vect_finish_stmt_generation (stmt, incr, gsi);
5701 running_off = newoff;
5703 for (j = 0; j < ncopies; j++)
5705 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5706 and first_stmt == stmt. */
5711 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5713 vec_oprnd = vec_oprnds[0];
5717 gcc_assert (gimple_assign_single_p (next_stmt));
5718 op = gimple_assign_rhs1 (next_stmt);
5719 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5725 vec_oprnd = vec_oprnds[j];
5728 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
5729 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5733 for (i = 0; i < nstores; i++)
5735 tree newref, newoff;
5736 gimple *incr, *assign;
5737 tree size = TYPE_SIZE (ltype);
5738 /* Extract the i'th component. */
5739 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5740 bitsize_int (i), size);
5741 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5744 elem = force_gimple_operand_gsi (gsi, elem, true,
5748 newref = build2 (MEM_REF, ltype,
5749 running_off, alias_off);
5751 /* And store it to *running_off. */
5752 assign = gimple_build_assign (newref, elem);
5753 vect_finish_stmt_generation (stmt, assign, gsi);
5755 newoff = copy_ssa_name (running_off, NULL);
5756 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5757 running_off, stride_step);
5758 vect_finish_stmt_generation (stmt, incr, gsi);
5760 running_off = newoff;
5761 if (g == group_size - 1
5764 if (j == 0 && i == 0)
5765 STMT_VINFO_VEC_STMT (stmt_info)
5766 = *vec_stmt = assign;
5768 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5769 prev_stmt_info = vinfo_for_stmt (assign);
5773 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5778 dr_chain.create (group_size);
5779 oprnds.create (group_size);
5781 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5782 gcc_assert (alignment_support_scheme);
5783 /* Targets with store-lane instructions must not require explicit
5785 gcc_assert (!store_lanes_p
5786 || alignment_support_scheme == dr_aligned
5787 || alignment_support_scheme == dr_unaligned_supported);
5790 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5793 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5795 aggr_type = vectype;
5797 /* In case the vectorization factor (VF) is bigger than the number
5798 of elements that we can fit in a vectype (nunits), we have to generate
5799 more than one vector stmt - i.e - we need to "unroll" the
5800 vector stmt by a factor VF/nunits. For more details see documentation in
5801 vect_get_vec_def_for_copy_stmt. */
5803 /* In case of interleaving (non-unit grouped access):
5810 We create vectorized stores starting from base address (the access of the
5811 first stmt in the chain (S2 in the above example), when the last store stmt
5812 of the chain (S4) is reached:
5815 VS2: &base + vec_size*1 = vx0
5816 VS3: &base + vec_size*2 = vx1
5817 VS4: &base + vec_size*3 = vx3
5819 Then permutation statements are generated:
5821 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5822 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5825 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5826 (the order of the data-refs in the output of vect_permute_store_chain
5827 corresponds to the order of scalar stmts in the interleaving chain - see
5828 the documentation of vect_permute_store_chain()).
5830 In case of both multiple types and interleaving, above vector stores and
5831 permutation stmts are created for every copy. The result vector stmts are
5832 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5833 STMT_VINFO_RELATED_STMT for the next copies.
5836 prev_stmt_info = NULL;
5837 for (j = 0; j < ncopies; j++)
5844 /* Get vectorized arguments for SLP_NODE. */
5845 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5846 NULL, slp_node, -1);
5848 vec_oprnd = vec_oprnds[0];
5852 /* For interleaved stores we collect vectorized defs for all the
5853 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5854 used as an input to vect_permute_store_chain(), and OPRNDS as
5855 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5857 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5858 OPRNDS are of size 1. */
5859 next_stmt = first_stmt;
5860 for (i = 0; i < group_size; i++)
5862 /* Since gaps are not supported for interleaved stores,
5863 GROUP_SIZE is the exact number of stmts in the chain.
5864 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5865 there is no interleaving, GROUP_SIZE is 1, and only one
5866 iteration of the loop will be executed. */
5867 gcc_assert (next_stmt
5868 && gimple_assign_single_p (next_stmt));
5869 op = gimple_assign_rhs1 (next_stmt);
5871 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5872 dr_chain.quick_push (vec_oprnd);
5873 oprnds.quick_push (vec_oprnd);
5874 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5878 /* We should have catched mismatched types earlier. */
5879 gcc_assert (useless_type_conversion_p (vectype,
5880 TREE_TYPE (vec_oprnd)));
5881 bool simd_lane_access_p
5882 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5883 if (simd_lane_access_p
5884 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5885 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5886 && integer_zerop (DR_OFFSET (first_dr))
5887 && integer_zerop (DR_INIT (first_dr))
5888 && alias_sets_conflict_p (get_alias_set (aggr_type),
5889 get_alias_set (DR_REF (first_dr))))
5891 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5892 dataref_offset = build_int_cst (reference_alias_ptr_type
5893 (DR_REF (first_dr)), 0);
5898 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5899 simd_lane_access_p ? loop : NULL,
5900 offset, &dummy, gsi, &ptr_incr,
5901 simd_lane_access_p, &inv_p);
5902 gcc_assert (bb_vinfo || !inv_p);
5906 /* For interleaved stores we created vectorized defs for all the
5907 defs stored in OPRNDS in the previous iteration (previous copy).
5908 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5909 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5911 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5912 OPRNDS are of size 1. */
5913 for (i = 0; i < group_size; i++)
5916 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
5917 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5918 dr_chain[i] = vec_oprnd;
5919 oprnds[i] = vec_oprnd;
5923 = int_const_binop (PLUS_EXPR, dataref_offset,
5924 TYPE_SIZE_UNIT (aggr_type));
5926 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5927 TYPE_SIZE_UNIT (aggr_type));
5934 /* Combine all the vectors into an array. */
5935 vec_array = create_vector_array (vectype, vec_num);
5936 for (i = 0; i < vec_num; i++)
5938 vec_oprnd = dr_chain[i];
5939 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5943 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5944 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5945 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5946 gimple_call_set_lhs (new_stmt, data_ref);
5947 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5955 result_chain.create (group_size);
5957 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5961 next_stmt = first_stmt;
5962 for (i = 0; i < vec_num; i++)
5964 unsigned align, misalign;
5967 /* Bump the vector pointer. */
5968 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5972 vec_oprnd = vec_oprnds[i];
5973 else if (grouped_store)
5974 /* For grouped stores vectorized defs are interleaved in
5975 vect_permute_store_chain(). */
5976 vec_oprnd = result_chain[i];
5978 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
5982 : build_int_cst (reference_alias_ptr_type
5983 (DR_REF (first_dr)), 0));
5984 align = TYPE_ALIGN_UNIT (vectype);
5985 if (aligned_access_p (first_dr))
5987 else if (DR_MISALIGNMENT (first_dr) == -1)
5989 if (DR_VECT_AUX (first_dr)->base_element_aligned)
5990 align = TYPE_ALIGN_UNIT (elem_type);
5992 align = get_object_alignment (DR_REF (first_dr))
5995 TREE_TYPE (data_ref)
5996 = build_aligned_type (TREE_TYPE (data_ref),
5997 align * BITS_PER_UNIT);
6001 TREE_TYPE (data_ref)
6002 = build_aligned_type (TREE_TYPE (data_ref),
6003 TYPE_ALIGN (elem_type));
6004 misalign = DR_MISALIGNMENT (first_dr);
6006 if (dataref_offset == NULL_TREE
6007 && TREE_CODE (dataref_ptr) == SSA_NAME)
6008 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6012 && dt != vect_constant_def
6013 && dt != vect_external_def)
6015 tree perm_mask = perm_mask_for_reverse (vectype);
6017 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6019 tree new_temp = make_ssa_name (perm_dest);
6021 /* Generate the permute statement. */
6023 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6024 vec_oprnd, perm_mask);
6025 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6027 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6028 vec_oprnd = new_temp;
6031 /* Arguments are ready. Create the new vector stmt. */
6032 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6033 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6038 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6046 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6048 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6049 prev_stmt_info = vinfo_for_stmt (new_stmt);
6053 dr_chain.release ();
6055 result_chain.release ();
6056 vec_oprnds.release ();
6061 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6062 VECTOR_CST mask. No checks are made that the target platform supports the
6063 mask, so callers may wish to test can_vec_perm_p separately, or use
6064 vect_gen_perm_mask_checked. */
6067 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6069 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6072 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6074 mask_elt_type = lang_hooks.types.type_for_mode
6075 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6076 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6078 mask_elts = XALLOCAVEC (tree, nunits);
6079 for (i = nunits - 1; i >= 0; i--)
6080 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6081 mask_vec = build_vector (mask_type, mask_elts);
6086 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6087 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6090 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6092 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6093 return vect_gen_perm_mask_any (vectype, sel);
6096 /* Given a vector variable X and Y, that was generated for the scalar
6097 STMT, generate instructions to permute the vector elements of X and Y
6098 using permutation mask MASK_VEC, insert them at *GSI and return the
6099 permuted vector variable. */
6102 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6103 gimple_stmt_iterator *gsi)
6105 tree vectype = TREE_TYPE (x);
6106 tree perm_dest, data_ref;
6109 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6110 data_ref = make_ssa_name (perm_dest);
6112 /* Generate the permute statement. */
6113 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6114 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6119 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6120 inserting them on the loops preheader edge. Returns true if we
6121 were successful in doing so (and thus STMT can be moved then),
6122 otherwise returns false. */
6125 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6131 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6133 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6134 if (!gimple_nop_p (def_stmt)
6135 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6137 /* Make sure we don't need to recurse. While we could do
6138 so in simple cases when there are more complex use webs
6139 we don't have an easy way to preserve stmt order to fulfil
6140 dependencies within them. */
6143 if (gimple_code (def_stmt) == GIMPLE_PHI)
6145 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6147 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6148 if (!gimple_nop_p (def_stmt2)
6149 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6159 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6161 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6162 if (!gimple_nop_p (def_stmt)
6163 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6165 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6166 gsi_remove (&gsi, false);
6167 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6174 /* vectorizable_load.
6176 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6178 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6179 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6180 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6183 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6184 slp_tree slp_node, slp_instance slp_node_instance)
6187 tree vec_dest = NULL;
6188 tree data_ref = NULL;
6189 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6190 stmt_vec_info prev_stmt_info;
6191 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6192 struct loop *loop = NULL;
6193 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6194 bool nested_in_vect_loop = false;
6195 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6199 gimple *new_stmt = NULL;
6201 enum dr_alignment_support alignment_support_scheme;
6202 tree dataref_ptr = NULL_TREE;
6203 tree dataref_offset = NULL_TREE;
6204 gimple *ptr_incr = NULL;
6206 int i, j, group_size = -1, group_gap_adj;
6207 tree msq = NULL_TREE, lsq;
6208 tree offset = NULL_TREE;
6209 tree byte_offset = NULL_TREE;
6210 tree realignment_token = NULL_TREE;
6212 vec<tree> dr_chain = vNULL;
6213 bool grouped_load = false;
6214 bool load_lanes_p = false;
6216 gimple *first_stmt_for_drptr = NULL;
6218 bool negative = false;
6219 bool compute_in_loop = false;
6220 struct loop *at_loop;
6222 bool slp = (slp_node != NULL);
6223 bool slp_perm = false;
6224 enum tree_code code;
6225 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6228 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
6229 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
6230 int gather_scale = 1;
6231 enum vect_def_type gather_dt = vect_unknown_def_type;
6232 vec_info *vinfo = stmt_info->vinfo;
6234 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6237 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6241 /* Is vectorizable load? */
6242 if (!is_gimple_assign (stmt))
6245 scalar_dest = gimple_assign_lhs (stmt);
6246 if (TREE_CODE (scalar_dest) != SSA_NAME)
6249 code = gimple_assign_rhs_code (stmt);
6250 if (code != ARRAY_REF
6251 && code != BIT_FIELD_REF
6252 && code != INDIRECT_REF
6253 && code != COMPONENT_REF
6254 && code != IMAGPART_EXPR
6255 && code != REALPART_EXPR
6257 && TREE_CODE_CLASS (code) != tcc_declaration)
6260 if (!STMT_VINFO_DATA_REF (stmt_info))
6263 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6264 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6268 loop = LOOP_VINFO_LOOP (loop_vinfo);
6269 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6270 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6275 /* Multiple types in SLP are handled by creating the appropriate number of
6276 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6281 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6283 gcc_assert (ncopies >= 1);
6285 /* FORNOW. This restriction should be relaxed. */
6286 if (nested_in_vect_loop && ncopies > 1)
6288 if (dump_enabled_p ())
6289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6290 "multiple types in nested loop.\n");
6294 /* Invalidate assumptions made by dependence analysis when vectorization
6295 on the unrolled body effectively re-orders stmts. */
6297 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6298 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6299 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6301 if (dump_enabled_p ())
6302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6303 "cannot perform implicit CSE when unrolling "
6304 "with negative dependence distance\n");
6308 elem_type = TREE_TYPE (vectype);
6309 mode = TYPE_MODE (vectype);
6311 /* FORNOW. In some cases can vectorize even if data-type not supported
6312 (e.g. - data copies). */
6313 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6315 if (dump_enabled_p ())
6316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6317 "Aligned load, but unsupported type.\n");
6321 /* Check if the load is a part of an interleaving chain. */
6322 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6324 grouped_load = true;
6326 gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6328 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6329 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6331 if (!slp && !STMT_VINFO_STRIDED_P (stmt_info))
6333 if (vect_load_lanes_supported (vectype, group_size))
6334 load_lanes_p = true;
6335 else if (!vect_grouped_load_supported (vectype, group_size))
6339 /* If this is single-element interleaving with an element distance
6340 that leaves unused vector loads around punt - we at least create
6341 very sub-optimal code in that case (and blow up memory,
6343 if (first_stmt == stmt
6344 && !GROUP_NEXT_ELEMENT (stmt_info))
6346 if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
6348 if (dump_enabled_p ())
6349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6350 "single-element interleaving not supported "
6351 "for not adjacent vector loads\n");
6355 /* Single-element interleaving requires peeling for gaps. */
6356 gcc_assert (GROUP_GAP (stmt_info));
6359 /* If there is a gap in the end of the group or the group size cannot
6360 be made a multiple of the vector element count then we access excess
6361 elements in the last iteration and thus need to peel that off. */
6363 && ! STMT_VINFO_STRIDED_P (stmt_info)
6364 && (GROUP_GAP (vinfo_for_stmt (first_stmt)) != 0
6365 || (!slp && !load_lanes_p && vf % group_size != 0)))
6367 if (dump_enabled_p ())
6368 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6369 "Data access with gaps requires scalar "
6373 if (dump_enabled_p ())
6374 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6375 "Peeling for outer loop is not supported\n");
6379 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
6382 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6385 /* ??? The following is overly pessimistic (as well as the loop
6386 case above) in the case we can statically determine the excess
6387 elements loaded are within the bounds of a decl that is accessed.
6388 Likewise for BB vectorizations using masked loads is a possibility. */
6389 if (bb_vinfo && slp_perm && group_size % nunits != 0)
6391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6392 "BB vectorization with gaps at the end of a load "
6393 "is not supported\n");
6397 /* Invalidate assumptions made by dependence analysis when vectorization
6398 on the unrolled body effectively re-orders stmts. */
6399 if (!PURE_SLP_STMT (stmt_info)
6400 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6401 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6402 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6404 if (dump_enabled_p ())
6405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6406 "cannot perform implicit CSE when performing "
6407 "group loads with negative dependence distance\n");
6411 /* Similarly when the stmt is a load that is both part of a SLP
6412 instance and a loop vectorized stmt via the same-dr mechanism
6413 we have to give up. */
6414 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6415 && (STMT_SLP_TYPE (stmt_info)
6416 != STMT_SLP_TYPE (vinfo_for_stmt
6417 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6419 if (dump_enabled_p ())
6420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6421 "conflicting SLP types for CSEd load\n");
6427 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6430 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
6431 &gather_off, &gather_scale);
6432 gcc_assert (gather_decl);
6433 if (!vect_is_simple_use (gather_off, vinfo, &def_stmt, &gather_dt,
6434 &gather_off_vectype))
6436 if (dump_enabled_p ())
6437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6438 "gather index use not simple.\n");
6442 else if (STMT_VINFO_STRIDED_P (stmt_info))
6446 negative = tree_int_cst_compare (nested_in_vect_loop
6447 ? STMT_VINFO_DR_STEP (stmt_info)
6449 size_zero_node) < 0;
6450 if (negative && ncopies > 1)
6452 if (dump_enabled_p ())
6453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6454 "multiple types with negative step.\n");
6462 if (dump_enabled_p ())
6463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6464 "negative step for group load not supported"
6468 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6469 if (alignment_support_scheme != dr_aligned
6470 && alignment_support_scheme != dr_unaligned_supported)
6472 if (dump_enabled_p ())
6473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6474 "negative step but alignment required.\n");
6477 if (!perm_mask_for_reverse (vectype))
6479 if (dump_enabled_p ())
6480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6481 "negative step and reversing not supported."
6488 if (!vec_stmt) /* transformation not required. */
6490 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6491 /* The SLP costs are calculated during SLP analysis. */
6492 if (!PURE_SLP_STMT (stmt_info))
6493 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6498 if (dump_enabled_p ())
6499 dump_printf_loc (MSG_NOTE, vect_location,
6500 "transform load. ncopies = %d\n", ncopies);
6504 ensure_base_align (stmt_info, dr);
6506 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6508 tree vec_oprnd0 = NULL_TREE, op;
6509 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6510 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6511 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6512 edge pe = loop_preheader_edge (loop);
6515 enum { NARROW, NONE, WIDEN } modifier;
6516 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6518 if (nunits == gather_off_nunits)
6520 else if (nunits == gather_off_nunits / 2)
6522 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6525 for (i = 0; i < gather_off_nunits; ++i)
6526 sel[i] = i | nunits;
6528 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
6530 else if (nunits == gather_off_nunits * 2)
6532 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6535 for (i = 0; i < nunits; ++i)
6536 sel[i] = i < gather_off_nunits
6537 ? i : i + nunits - gather_off_nunits;
6539 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6545 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6546 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6547 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6548 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6549 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6550 scaletype = TREE_VALUE (arglist);
6551 gcc_checking_assert (types_compatible_p (srctype, rettype));
6553 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6555 ptr = fold_convert (ptrtype, gather_base);
6556 if (!is_gimple_min_invariant (ptr))
6558 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6559 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6560 gcc_assert (!new_bb);
6563 /* Currently we support only unconditional gather loads,
6564 so mask should be all ones. */
6565 if (TREE_CODE (masktype) == INTEGER_TYPE)
6566 mask = build_int_cst (masktype, -1);
6567 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6569 mask = build_int_cst (TREE_TYPE (masktype), -1);
6570 mask = build_vector_from_val (masktype, mask);
6571 mask = vect_init_vector (stmt, mask, masktype, NULL);
6573 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6577 for (j = 0; j < 6; ++j)
6579 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6580 mask = build_real (TREE_TYPE (masktype), r);
6581 mask = build_vector_from_val (masktype, mask);
6582 mask = vect_init_vector (stmt, mask, masktype, NULL);
6587 scale = build_int_cst (scaletype, gather_scale);
6589 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6590 merge = build_int_cst (TREE_TYPE (rettype), 0);
6591 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6595 for (j = 0; j < 6; ++j)
6597 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6598 merge = build_real (TREE_TYPE (rettype), r);
6602 merge = build_vector_from_val (rettype, merge);
6603 merge = vect_init_vector (stmt, merge, rettype, NULL);
6605 prev_stmt_info = NULL;
6606 for (j = 0; j < ncopies; ++j)
6608 if (modifier == WIDEN && (j & 1))
6609 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6610 perm_mask, stmt, gsi);
6613 = vect_get_vec_def_for_operand (gather_off, stmt);
6616 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6618 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6620 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6621 == TYPE_VECTOR_SUBPARTS (idxtype));
6622 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6623 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6625 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6626 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6631 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6633 if (!useless_type_conversion_p (vectype, rettype))
6635 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6636 == TYPE_VECTOR_SUBPARTS (rettype));
6637 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6638 gimple_call_set_lhs (new_stmt, op);
6639 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6640 var = make_ssa_name (vec_dest);
6641 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6643 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6647 var = make_ssa_name (vec_dest, new_stmt);
6648 gimple_call_set_lhs (new_stmt, var);
6651 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6653 if (modifier == NARROW)
6660 var = permute_vec_elements (prev_res, var,
6661 perm_mask, stmt, gsi);
6662 new_stmt = SSA_NAME_DEF_STMT (var);
6665 if (prev_stmt_info == NULL)
6666 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6668 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6669 prev_stmt_info = vinfo_for_stmt (new_stmt);
6673 else if (STMT_VINFO_STRIDED_P (stmt_info))
6675 gimple_stmt_iterator incr_gsi;
6681 vec<constructor_elt, va_gc> *v = NULL;
6682 gimple_seq stmts = NULL;
6683 tree stride_base, stride_step, alias_off;
6685 gcc_assert (!nested_in_vect_loop);
6687 if (slp && grouped_load)
6688 first_dr = STMT_VINFO_DATA_REF
6689 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
6694 = fold_build_pointer_plus
6695 (DR_BASE_ADDRESS (first_dr),
6696 size_binop (PLUS_EXPR,
6697 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6698 convert_to_ptrofftype (DR_INIT (first_dr))));
6699 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6701 /* For a load with loop-invariant (but other than power-of-2)
6702 stride (i.e. not a grouped access) like so:
6704 for (i = 0; i < n; i += stride)
6707 we generate a new induction variable and new accesses to
6708 form a new vector (or vectors, depending on ncopies):
6710 for (j = 0; ; j += VF*stride)
6712 tmp2 = array[j + stride];
6714 vectemp = {tmp1, tmp2, ...}
6717 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6718 build_int_cst (TREE_TYPE (stride_step), vf));
6720 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6722 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6723 loop, &incr_gsi, insert_after,
6725 incr = gsi_stmt (incr_gsi);
6726 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6728 stride_step = force_gimple_operand (unshare_expr (stride_step),
6729 &stmts, true, NULL_TREE);
6731 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6733 prev_stmt_info = NULL;
6734 running_off = offvar;
6735 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
6736 int nloads = nunits;
6738 tree ltype = TREE_TYPE (vectype);
6739 auto_vec<tree> dr_chain;
6742 if (group_size < nunits
6743 && nunits % group_size == 0)
6745 nloads = nunits / group_size;
6747 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6748 ltype = build_aligned_type (ltype,
6749 TYPE_ALIGN (TREE_TYPE (vectype)));
6751 else if (group_size >= nunits
6752 && group_size % nunits == 0)
6757 ltype = build_aligned_type (ltype,
6758 TYPE_ALIGN (TREE_TYPE (vectype)));
6760 /* For SLP permutation support we need to load the whole group,
6761 not only the number of vector stmts the permutation result
6765 ncopies = (group_size * vf + nunits - 1) / nunits;
6766 dr_chain.create (ncopies);
6769 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6772 unsigned HOST_WIDE_INT
6773 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6774 for (j = 0; j < ncopies; j++)
6777 vec_alloc (v, nloads);
6778 for (i = 0; i < nloads; i++)
6780 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6782 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6783 build2 (MEM_REF, ltype,
6784 running_off, this_off));
6785 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6787 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
6788 gimple_assign_lhs (new_stmt));
6792 || group_el == group_size)
6794 tree newoff = copy_ssa_name (running_off);
6795 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6796 running_off, stride_step);
6797 vect_finish_stmt_generation (stmt, incr, gsi);
6799 running_off = newoff;
6805 tree vec_inv = build_constructor (vectype, v);
6806 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6807 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6813 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6815 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6820 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6822 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6823 prev_stmt_info = vinfo_for_stmt (new_stmt);
6827 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6828 slp_node_instance, false);
6834 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6835 /* For SLP vectorization we directly vectorize a subchain
6836 without permutation. */
6837 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6838 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6839 /* For BB vectorization always use the first stmt to base
6840 the data ref pointer on. */
6842 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6844 /* Check if the chain of loads is already vectorized. */
6845 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6846 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6847 ??? But we can only do so if there is exactly one
6848 as we have no way to get at the rest. Leave the CSE
6850 ??? With the group load eventually participating
6851 in multiple different permutations (having multiple
6852 slp nodes which refer to the same group) the CSE
6853 is even wrong code. See PR56270. */
6856 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6859 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6860 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6863 /* VEC_NUM is the number of vect stmts to be created for this group. */
6866 grouped_load = false;
6867 /* For SLP permutation support we need to load the whole group,
6868 not only the number of vector stmts the permutation result
6871 vec_num = (group_size * vf + nunits - 1) / nunits;
6873 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6874 group_gap_adj = vf * group_size - nunits * vec_num;
6877 vec_num = group_size;
6883 group_size = vec_num = 1;
6887 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6888 gcc_assert (alignment_support_scheme);
6889 /* Targets with load-lane instructions must not require explicit
6891 gcc_assert (!load_lanes_p
6892 || alignment_support_scheme == dr_aligned
6893 || alignment_support_scheme == dr_unaligned_supported);
6895 /* In case the vectorization factor (VF) is bigger than the number
6896 of elements that we can fit in a vectype (nunits), we have to generate
6897 more than one vector stmt - i.e - we need to "unroll" the
6898 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6899 from one copy of the vector stmt to the next, in the field
6900 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6901 stages to find the correct vector defs to be used when vectorizing
6902 stmts that use the defs of the current stmt. The example below
6903 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6904 need to create 4 vectorized stmts):
6906 before vectorization:
6907 RELATED_STMT VEC_STMT
6911 step 1: vectorize stmt S1:
6912 We first create the vector stmt VS1_0, and, as usual, record a
6913 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6914 Next, we create the vector stmt VS1_1, and record a pointer to
6915 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6916 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6918 RELATED_STMT VEC_STMT
6919 VS1_0: vx0 = memref0 VS1_1 -
6920 VS1_1: vx1 = memref1 VS1_2 -
6921 VS1_2: vx2 = memref2 VS1_3 -
6922 VS1_3: vx3 = memref3 - -
6923 S1: x = load - VS1_0
6926 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6927 information we recorded in RELATED_STMT field is used to vectorize
6930 /* In case of interleaving (non-unit grouped access):
6937 Vectorized loads are created in the order of memory accesses
6938 starting from the access of the first stmt of the chain:
6941 VS2: vx1 = &base + vec_size*1
6942 VS3: vx3 = &base + vec_size*2
6943 VS4: vx4 = &base + vec_size*3
6945 Then permutation statements are generated:
6947 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6948 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6951 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6952 (the order of the data-refs in the output of vect_permute_load_chain
6953 corresponds to the order of scalar stmts in the interleaving chain - see
6954 the documentation of vect_permute_load_chain()).
6955 The generation of permutation stmts and recording them in
6956 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6958 In case of both multiple types and interleaving, the vector loads and
6959 permutation stmts above are created for every copy. The result vector
6960 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6961 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6963 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6964 on a target that supports unaligned accesses (dr_unaligned_supported)
6965 we generate the following code:
6969 p = p + indx * vectype_size;
6974 Otherwise, the data reference is potentially unaligned on a target that
6975 does not support unaligned accesses (dr_explicit_realign_optimized) -
6976 then generate the following code, in which the data in each iteration is
6977 obtained by two vector loads, one from the previous iteration, and one
6978 from the current iteration:
6980 msq_init = *(floor(p1))
6981 p2 = initial_addr + VS - 1;
6982 realignment_token = call target_builtin;
6985 p2 = p2 + indx * vectype_size
6987 vec_dest = realign_load (msq, lsq, realignment_token)
6992 /* If the misalignment remains the same throughout the execution of the
6993 loop, we can create the init_addr and permutation mask at the loop
6994 preheader. Otherwise, it needs to be created inside the loop.
6995 This can only occur when vectorizing memory accesses in the inner-loop
6996 nested within an outer-loop that is being vectorized. */
6998 if (nested_in_vect_loop
6999 && (TREE_INT_CST_LOW (DR_STEP (dr))
7000 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7002 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7003 compute_in_loop = true;
7006 if ((alignment_support_scheme == dr_explicit_realign_optimized
7007 || alignment_support_scheme == dr_explicit_realign)
7008 && !compute_in_loop)
7010 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7011 alignment_support_scheme, NULL_TREE,
7013 if (alignment_support_scheme == dr_explicit_realign_optimized)
7015 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7016 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7024 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7027 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7029 aggr_type = vectype;
7031 prev_stmt_info = NULL;
7032 for (j = 0; j < ncopies; j++)
7034 /* 1. Create the vector or array pointer update chain. */
7037 bool simd_lane_access_p
7038 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7039 if (simd_lane_access_p
7040 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7041 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7042 && integer_zerop (DR_OFFSET (first_dr))
7043 && integer_zerop (DR_INIT (first_dr))
7044 && alias_sets_conflict_p (get_alias_set (aggr_type),
7045 get_alias_set (DR_REF (first_dr)))
7046 && (alignment_support_scheme == dr_aligned
7047 || alignment_support_scheme == dr_unaligned_supported))
7049 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7050 dataref_offset = build_int_cst (reference_alias_ptr_type
7051 (DR_REF (first_dr)), 0);
7054 else if (first_stmt_for_drptr
7055 && first_stmt != first_stmt_for_drptr)
7058 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7059 at_loop, offset, &dummy, gsi,
7060 &ptr_incr, simd_lane_access_p,
7061 &inv_p, byte_offset);
7062 /* Adjust the pointer by the difference to first_stmt. */
7063 data_reference_p ptrdr
7064 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7065 tree diff = fold_convert (sizetype,
7066 size_binop (MINUS_EXPR,
7069 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7074 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7075 offset, &dummy, gsi, &ptr_incr,
7076 simd_lane_access_p, &inv_p,
7079 else if (dataref_offset)
7080 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7081 TYPE_SIZE_UNIT (aggr_type));
7083 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7084 TYPE_SIZE_UNIT (aggr_type));
7086 if (grouped_load || slp_perm)
7087 dr_chain.create (vec_num);
7093 vec_array = create_vector_array (vectype, vec_num);
7096 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7097 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
7098 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7099 gimple_call_set_lhs (new_stmt, vec_array);
7100 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7102 /* Extract each vector into an SSA_NAME. */
7103 for (i = 0; i < vec_num; i++)
7105 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7107 dr_chain.quick_push (new_temp);
7110 /* Record the mapping between SSA_NAMEs and statements. */
7111 vect_record_grouped_load_vectors (stmt, dr_chain);
7115 for (i = 0; i < vec_num; i++)
7118 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7121 /* 2. Create the vector-load in the loop. */
7122 switch (alignment_support_scheme)
7125 case dr_unaligned_supported:
7127 unsigned int align, misalign;
7130 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7133 : build_int_cst (reference_alias_ptr_type
7134 (DR_REF (first_dr)), 0));
7135 align = TYPE_ALIGN_UNIT (vectype);
7136 if (alignment_support_scheme == dr_aligned)
7138 gcc_assert (aligned_access_p (first_dr));
7141 else if (DR_MISALIGNMENT (first_dr) == -1)
7143 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7144 align = TYPE_ALIGN_UNIT (elem_type);
7146 align = (get_object_alignment (DR_REF (first_dr))
7149 TREE_TYPE (data_ref)
7150 = build_aligned_type (TREE_TYPE (data_ref),
7151 align * BITS_PER_UNIT);
7155 TREE_TYPE (data_ref)
7156 = build_aligned_type (TREE_TYPE (data_ref),
7157 TYPE_ALIGN (elem_type));
7158 misalign = DR_MISALIGNMENT (first_dr);
7160 if (dataref_offset == NULL_TREE
7161 && TREE_CODE (dataref_ptr) == SSA_NAME)
7162 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7166 case dr_explicit_realign:
7170 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7172 if (compute_in_loop)
7173 msq = vect_setup_realignment (first_stmt, gsi,
7175 dr_explicit_realign,
7178 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7179 ptr = copy_ssa_name (dataref_ptr);
7181 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7182 new_stmt = gimple_build_assign
7183 (ptr, BIT_AND_EXPR, dataref_ptr,
7185 (TREE_TYPE (dataref_ptr),
7186 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7187 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7189 = build2 (MEM_REF, vectype, ptr,
7190 build_int_cst (reference_alias_ptr_type
7191 (DR_REF (first_dr)), 0));
7192 vec_dest = vect_create_destination_var (scalar_dest,
7194 new_stmt = gimple_build_assign (vec_dest, data_ref);
7195 new_temp = make_ssa_name (vec_dest, new_stmt);
7196 gimple_assign_set_lhs (new_stmt, new_temp);
7197 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7198 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7199 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7202 bump = size_binop (MULT_EXPR, vs,
7203 TYPE_SIZE_UNIT (elem_type));
7204 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7205 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7206 new_stmt = gimple_build_assign
7207 (NULL_TREE, BIT_AND_EXPR, ptr,
7210 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7211 ptr = copy_ssa_name (ptr, new_stmt);
7212 gimple_assign_set_lhs (new_stmt, ptr);
7213 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7215 = build2 (MEM_REF, vectype, ptr,
7216 build_int_cst (reference_alias_ptr_type
7217 (DR_REF (first_dr)), 0));
7220 case dr_explicit_realign_optimized:
7221 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7222 new_temp = copy_ssa_name (dataref_ptr);
7224 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7225 new_stmt = gimple_build_assign
7226 (new_temp, BIT_AND_EXPR, dataref_ptr,
7228 (TREE_TYPE (dataref_ptr),
7229 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7230 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7232 = build2 (MEM_REF, vectype, new_temp,
7233 build_int_cst (reference_alias_ptr_type
7234 (DR_REF (first_dr)), 0));
7239 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7240 new_stmt = gimple_build_assign (vec_dest, data_ref);
7241 new_temp = make_ssa_name (vec_dest, new_stmt);
7242 gimple_assign_set_lhs (new_stmt, new_temp);
7243 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7245 /* 3. Handle explicit realignment if necessary/supported.
7247 vec_dest = realign_load (msq, lsq, realignment_token) */
7248 if (alignment_support_scheme == dr_explicit_realign_optimized
7249 || alignment_support_scheme == dr_explicit_realign)
7251 lsq = gimple_assign_lhs (new_stmt);
7252 if (!realignment_token)
7253 realignment_token = dataref_ptr;
7254 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7255 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7256 msq, lsq, realignment_token);
7257 new_temp = make_ssa_name (vec_dest, new_stmt);
7258 gimple_assign_set_lhs (new_stmt, new_temp);
7259 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7261 if (alignment_support_scheme == dr_explicit_realign_optimized)
7264 if (i == vec_num - 1 && j == ncopies - 1)
7265 add_phi_arg (phi, lsq,
7266 loop_latch_edge (containing_loop),
7272 /* 4. Handle invariant-load. */
7273 if (inv_p && !bb_vinfo)
7275 gcc_assert (!grouped_load);
7276 /* If we have versioned for aliasing or the loop doesn't
7277 have any data dependencies that would preclude this,
7278 then we are sure this is a loop invariant load and
7279 thus we can insert it on the preheader edge. */
7280 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7281 && !nested_in_vect_loop
7282 && hoist_defs_of_uses (stmt, loop))
7284 if (dump_enabled_p ())
7286 dump_printf_loc (MSG_NOTE, vect_location,
7287 "hoisting out of the vectorized "
7289 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7291 tree tem = copy_ssa_name (scalar_dest);
7292 gsi_insert_on_edge_immediate
7293 (loop_preheader_edge (loop),
7294 gimple_build_assign (tem,
7296 (gimple_assign_rhs1 (stmt))));
7297 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7298 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7299 set_vinfo_for_stmt (new_stmt,
7300 new_stmt_vec_info (new_stmt, vinfo));
7304 gimple_stmt_iterator gsi2 = *gsi;
7306 new_temp = vect_init_vector (stmt, scalar_dest,
7308 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7314 tree perm_mask = perm_mask_for_reverse (vectype);
7315 new_temp = permute_vec_elements (new_temp, new_temp,
7316 perm_mask, stmt, gsi);
7317 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7320 /* Collect vector loads and later create their permutation in
7321 vect_transform_grouped_load (). */
7322 if (grouped_load || slp_perm)
7323 dr_chain.quick_push (new_temp);
7325 /* Store vector loads in the corresponding SLP_NODE. */
7326 if (slp && !slp_perm)
7327 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7329 /* Bump the vector pointer to account for a gap or for excess
7330 elements loaded for a permuted SLP load. */
7331 if (group_gap_adj != 0)
7335 = wide_int_to_tree (sizetype,
7336 wi::smul (TYPE_SIZE_UNIT (elem_type),
7337 group_gap_adj, &ovf));
7338 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7343 if (slp && !slp_perm)
7348 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7349 slp_node_instance, false))
7351 dr_chain.release ();
7360 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7361 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7366 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7368 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7369 prev_stmt_info = vinfo_for_stmt (new_stmt);
7372 dr_chain.release ();
7378 /* Function vect_is_simple_cond.
7381 LOOP - the loop that is being vectorized.
7382 COND - Condition that is checked for simple use.
7385 *COMP_VECTYPE - the vector type for the comparison.
7387 Returns whether a COND can be vectorized. Checks whether
7388 condition operands are supportable using vec_is_simple_use. */
7391 vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
7394 enum vect_def_type dt;
7395 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7398 if (TREE_CODE (cond) == SSA_NAME
7399 && TREE_CODE (TREE_TYPE (cond)) == BOOLEAN_TYPE)
7401 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7402 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7405 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7410 if (!COMPARISON_CLASS_P (cond))
7413 lhs = TREE_OPERAND (cond, 0);
7414 rhs = TREE_OPERAND (cond, 1);
7416 if (TREE_CODE (lhs) == SSA_NAME)
7418 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7419 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
7422 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7423 && TREE_CODE (lhs) != FIXED_CST)
7426 if (TREE_CODE (rhs) == SSA_NAME)
7428 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7429 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
7432 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
7433 && TREE_CODE (rhs) != FIXED_CST)
7436 if (vectype1 && vectype2
7437 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7440 *comp_vectype = vectype1 ? vectype1 : vectype2;
7444 /* vectorizable_condition.
7446 Check if STMT is conditional modify expression that can be vectorized.
7447 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7448 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7451 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7452 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7453 else clause if it is 2).
7455 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7458 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7459 gimple **vec_stmt, tree reduc_def, int reduc_index,
7462 tree scalar_dest = NULL_TREE;
7463 tree vec_dest = NULL_TREE;
7464 tree cond_expr, then_clause, else_clause;
7465 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7466 tree comp_vectype = NULL_TREE;
7467 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7468 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7471 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7472 enum vect_def_type dt, dts[4];
7474 enum tree_code code;
7475 stmt_vec_info prev_stmt_info = NULL;
7477 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7478 vec<tree> vec_oprnds0 = vNULL;
7479 vec<tree> vec_oprnds1 = vNULL;
7480 vec<tree> vec_oprnds2 = vNULL;
7481 vec<tree> vec_oprnds3 = vNULL;
7483 bool masked = false;
7485 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7488 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7490 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7493 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7494 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7498 /* FORNOW: not yet supported. */
7499 if (STMT_VINFO_LIVE_P (stmt_info))
7501 if (dump_enabled_p ())
7502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7503 "value used after loop.\n");
7508 /* Is vectorizable conditional operation? */
7509 if (!is_gimple_assign (stmt))
7512 code = gimple_assign_rhs_code (stmt);
7514 if (code != COND_EXPR)
7517 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7518 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7519 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7524 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7526 gcc_assert (ncopies >= 1);
7527 if (reduc_index && ncopies > 1)
7528 return false; /* FORNOW */
7530 cond_expr = gimple_assign_rhs1 (stmt);
7531 then_clause = gimple_assign_rhs2 (stmt);
7532 else_clause = gimple_assign_rhs3 (stmt);
7534 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
7539 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt,
7542 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt,
7546 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7549 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7552 masked = !COMPARISON_CLASS_P (cond_expr);
7553 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7555 if (vec_cmp_type == NULL_TREE)
7560 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7561 return expand_vec_cond_expr_p (vectype, comp_vectype);
7568 vec_oprnds0.create (1);
7569 vec_oprnds1.create (1);
7570 vec_oprnds2.create (1);
7571 vec_oprnds3.create (1);
7575 scalar_dest = gimple_assign_lhs (stmt);
7576 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7578 /* Handle cond expr. */
7579 for (j = 0; j < ncopies; j++)
7581 gassign *new_stmt = NULL;
7586 auto_vec<tree, 4> ops;
7587 auto_vec<vec<tree>, 4> vec_defs;
7590 ops.safe_push (cond_expr);
7593 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7594 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7596 ops.safe_push (then_clause);
7597 ops.safe_push (else_clause);
7598 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7599 vec_oprnds3 = vec_defs.pop ();
7600 vec_oprnds2 = vec_defs.pop ();
7602 vec_oprnds1 = vec_defs.pop ();
7603 vec_oprnds0 = vec_defs.pop ();
7606 vec_defs.release ();
7614 = vect_get_vec_def_for_operand (cond_expr, stmt,
7616 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7622 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7623 stmt, comp_vectype);
7624 vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
7625 loop_vinfo, >emp, &dts[0]);
7628 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7629 stmt, comp_vectype);
7630 vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
7631 loop_vinfo, >emp, &dts[1]);
7633 if (reduc_index == 1)
7634 vec_then_clause = reduc_def;
7637 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7639 vect_is_simple_use (then_clause, loop_vinfo,
7642 if (reduc_index == 2)
7643 vec_else_clause = reduc_def;
7646 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7648 vect_is_simple_use (else_clause, loop_vinfo, >emp, &dts[3]);
7655 = vect_get_vec_def_for_stmt_copy (dts[0],
7656 vec_oprnds0.pop ());
7659 = vect_get_vec_def_for_stmt_copy (dts[1],
7660 vec_oprnds1.pop ());
7662 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7663 vec_oprnds2.pop ());
7664 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7665 vec_oprnds3.pop ());
7670 vec_oprnds0.quick_push (vec_cond_lhs);
7672 vec_oprnds1.quick_push (vec_cond_rhs);
7673 vec_oprnds2.quick_push (vec_then_clause);
7674 vec_oprnds3.quick_push (vec_else_clause);
7677 /* Arguments are ready. Create the new vector stmt. */
7678 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7680 vec_then_clause = vec_oprnds2[i];
7681 vec_else_clause = vec_oprnds3[i];
7684 vec_compare = vec_cond_lhs;
7687 vec_cond_rhs = vec_oprnds1[i];
7688 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7689 vec_cond_lhs, vec_cond_rhs);
7691 new_temp = make_ssa_name (vec_dest);
7692 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
7693 vec_compare, vec_then_clause,
7695 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7697 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7704 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7706 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7708 prev_stmt_info = vinfo_for_stmt (new_stmt);
7711 vec_oprnds0.release ();
7712 vec_oprnds1.release ();
7713 vec_oprnds2.release ();
7714 vec_oprnds3.release ();
7719 /* vectorizable_comparison.
7721 Check if STMT is comparison expression that can be vectorized.
7722 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7723 comparison, put it in VEC_STMT, and insert it at GSI.
7725 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7728 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
7729 gimple **vec_stmt, tree reduc_def,
7732 tree lhs, rhs1, rhs2;
7733 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7734 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7735 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7736 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
7738 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7739 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
7742 enum tree_code code;
7743 stmt_vec_info prev_stmt_info = NULL;
7745 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7746 vec<tree> vec_oprnds0 = vNULL;
7747 vec<tree> vec_oprnds1 = vNULL;
7752 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7755 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
7758 mask_type = vectype;
7759 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7764 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7766 gcc_assert (ncopies >= 1);
7767 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7768 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7772 if (STMT_VINFO_LIVE_P (stmt_info))
7774 if (dump_enabled_p ())
7775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7776 "value used after loop.\n");
7780 if (!is_gimple_assign (stmt))
7783 code = gimple_assign_rhs_code (stmt);
7785 if (TREE_CODE_CLASS (code) != tcc_comparison)
7788 rhs1 = gimple_assign_rhs1 (stmt);
7789 rhs2 = gimple_assign_rhs2 (stmt);
7791 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
7792 &dts[0], &vectype1))
7795 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
7796 &dts[1], &vectype2))
7799 if (vectype1 && vectype2
7800 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7803 vectype = vectype1 ? vectype1 : vectype2;
7805 /* Invariant comparison. */
7808 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
7809 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
7812 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
7817 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
7818 vect_model_simple_cost (stmt_info, ncopies, dts, NULL, NULL);
7819 return expand_vec_cmp_expr_p (vectype, mask_type);
7825 vec_oprnds0.create (1);
7826 vec_oprnds1.create (1);
7830 lhs = gimple_assign_lhs (stmt);
7831 mask = vect_create_destination_var (lhs, mask_type);
7833 /* Handle cmp expr. */
7834 for (j = 0; j < ncopies; j++)
7836 gassign *new_stmt = NULL;
7841 auto_vec<tree, 2> ops;
7842 auto_vec<vec<tree>, 2> vec_defs;
7844 ops.safe_push (rhs1);
7845 ops.safe_push (rhs2);
7846 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7847 vec_oprnds1 = vec_defs.pop ();
7848 vec_oprnds0 = vec_defs.pop ();
7852 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
7853 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
7858 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
7859 vec_oprnds0.pop ());
7860 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
7861 vec_oprnds1.pop ());
7866 vec_oprnds0.quick_push (vec_rhs1);
7867 vec_oprnds1.quick_push (vec_rhs2);
7870 /* Arguments are ready. Create the new vector stmt. */
7871 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
7873 vec_rhs2 = vec_oprnds1[i];
7875 new_temp = make_ssa_name (mask);
7876 new_stmt = gimple_build_assign (new_temp, code, vec_rhs1, vec_rhs2);
7877 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7879 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7886 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7888 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7890 prev_stmt_info = vinfo_for_stmt (new_stmt);
7893 vec_oprnds0.release ();
7894 vec_oprnds1.release ();
7899 /* Make sure the statement is vectorizable. */
7902 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
7904 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7905 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7906 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7908 tree scalar_type, vectype;
7909 gimple *pattern_stmt;
7910 gimple_seq pattern_def_seq;
7912 if (dump_enabled_p ())
7914 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7915 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7918 if (gimple_has_volatile_ops (stmt))
7920 if (dump_enabled_p ())
7921 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7922 "not vectorized: stmt has volatile operands\n");
7927 /* Skip stmts that do not need to be vectorized. In loops this is expected
7929 - the COND_EXPR which is the loop exit condition
7930 - any LABEL_EXPRs in the loop
7931 - computations that are used only for array indexing or loop control.
7932 In basic blocks we only analyze statements that are a part of some SLP
7933 instance, therefore, all the statements are relevant.
7935 Pattern statement needs to be analyzed instead of the original statement
7936 if the original statement is not relevant. Otherwise, we analyze both
7937 statements. In basic blocks we are called from some SLP instance
7938 traversal, don't analyze pattern stmts instead, the pattern stmts
7939 already will be part of SLP instance. */
7941 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7942 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7943 && !STMT_VINFO_LIVE_P (stmt_info))
7945 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7947 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7948 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7950 /* Analyze PATTERN_STMT instead of the original stmt. */
7951 stmt = pattern_stmt;
7952 stmt_info = vinfo_for_stmt (pattern_stmt);
7953 if (dump_enabled_p ())
7955 dump_printf_loc (MSG_NOTE, vect_location,
7956 "==> examining pattern statement: ");
7957 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7962 if (dump_enabled_p ())
7963 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7968 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7971 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7972 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7974 /* Analyze PATTERN_STMT too. */
7975 if (dump_enabled_p ())
7977 dump_printf_loc (MSG_NOTE, vect_location,
7978 "==> examining pattern statement: ");
7979 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7982 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7986 if (is_pattern_stmt_p (stmt_info)
7988 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7990 gimple_stmt_iterator si;
7992 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7994 gimple *pattern_def_stmt = gsi_stmt (si);
7995 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7996 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7998 /* Analyze def stmt of STMT if it's a pattern stmt. */
7999 if (dump_enabled_p ())
8001 dump_printf_loc (MSG_NOTE, vect_location,
8002 "==> examining pattern def statement: ");
8003 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8006 if (!vect_analyze_stmt (pattern_def_stmt,
8007 need_to_vectorize, node))
8013 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8015 case vect_internal_def:
8018 case vect_reduction_def:
8019 case vect_nested_cycle:
8020 gcc_assert (!bb_vinfo
8021 && (relevance == vect_used_in_outer
8022 || relevance == vect_used_in_outer_by_reduction
8023 || relevance == vect_used_by_reduction
8024 || relevance == vect_unused_in_scope
8025 || relevance == vect_used_only_live));
8028 case vect_induction_def:
8029 case vect_constant_def:
8030 case vect_external_def:
8031 case vect_unknown_def_type:
8038 gcc_assert (PURE_SLP_STMT (stmt_info));
8040 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
8041 if (dump_enabled_p ())
8043 dump_printf_loc (MSG_NOTE, vect_location,
8044 "get vectype for scalar type: ");
8045 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
8046 dump_printf (MSG_NOTE, "\n");
8049 vectype = get_vectype_for_scalar_type (scalar_type);
8052 if (dump_enabled_p ())
8054 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8055 "not SLPed: unsupported data-type ");
8056 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8058 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8063 if (dump_enabled_p ())
8065 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8066 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
8067 dump_printf (MSG_NOTE, "\n");
8070 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8073 if (STMT_VINFO_RELEVANT_P (stmt_info))
8075 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8076 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8077 || (is_gimple_call (stmt)
8078 && gimple_call_lhs (stmt) == NULL_TREE));
8079 *need_to_vectorize = true;
8082 if (PURE_SLP_STMT (stmt_info) && !node)
8084 dump_printf_loc (MSG_NOTE, vect_location,
8085 "handled only by SLP analysis\n");
8091 && (STMT_VINFO_RELEVANT_P (stmt_info)
8092 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8093 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8094 || vectorizable_conversion (stmt, NULL, NULL, node)
8095 || vectorizable_shift (stmt, NULL, NULL, node)
8096 || vectorizable_operation (stmt, NULL, NULL, node)
8097 || vectorizable_assignment (stmt, NULL, NULL, node)
8098 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8099 || vectorizable_call (stmt, NULL, NULL, node)
8100 || vectorizable_store (stmt, NULL, NULL, node)
8101 || vectorizable_reduction (stmt, NULL, NULL, node)
8102 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8103 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8107 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8108 || vectorizable_conversion (stmt, NULL, NULL, node)
8109 || vectorizable_shift (stmt, NULL, NULL, node)
8110 || vectorizable_operation (stmt, NULL, NULL, node)
8111 || vectorizable_assignment (stmt, NULL, NULL, node)
8112 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8113 || vectorizable_call (stmt, NULL, NULL, node)
8114 || vectorizable_store (stmt, NULL, NULL, node)
8115 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8116 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8121 if (dump_enabled_p ())
8123 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8124 "not vectorized: relevant stmt not ");
8125 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8126 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8135 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8136 need extra handling, except for vectorizable reductions. */
8137 if (STMT_VINFO_LIVE_P (stmt_info)
8138 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8139 ok = vectorizable_live_operation (stmt, NULL, NULL, -1, NULL);
8143 if (dump_enabled_p ())
8145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8146 "not vectorized: live stmt not ");
8147 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8148 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8158 /* Function vect_transform_stmt.
8160 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8163 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8164 bool *grouped_store, slp_tree slp_node,
8165 slp_instance slp_node_instance)
8167 bool is_store = false;
8168 gimple *vec_stmt = NULL;
8169 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8172 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8173 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8175 switch (STMT_VINFO_TYPE (stmt_info))
8177 case type_demotion_vec_info_type:
8178 case type_promotion_vec_info_type:
8179 case type_conversion_vec_info_type:
8180 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8184 case induc_vec_info_type:
8185 gcc_assert (!slp_node);
8186 done = vectorizable_induction (stmt, gsi, &vec_stmt);
8190 case shift_vec_info_type:
8191 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8195 case op_vec_info_type:
8196 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8200 case assignment_vec_info_type:
8201 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8205 case load_vec_info_type:
8206 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8211 case store_vec_info_type:
8212 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8214 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8216 /* In case of interleaving, the whole chain is vectorized when the
8217 last store in the chain is reached. Store stmts before the last
8218 one are skipped, and there vec_stmt_info shouldn't be freed
8220 *grouped_store = true;
8221 if (STMT_VINFO_VEC_STMT (stmt_info))
8228 case condition_vec_info_type:
8229 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8233 case comparison_vec_info_type:
8234 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8238 case call_vec_info_type:
8239 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8240 stmt = gsi_stmt (*gsi);
8241 if (is_gimple_call (stmt)
8242 && gimple_call_internal_p (stmt)
8243 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
8247 case call_simd_clone_vec_info_type:
8248 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8249 stmt = gsi_stmt (*gsi);
8252 case reduc_vec_info_type:
8253 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8258 if (!STMT_VINFO_LIVE_P (stmt_info))
8260 if (dump_enabled_p ())
8261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8262 "stmt not supported.\n");
8267 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8268 This would break hybrid SLP vectorization. */
8270 gcc_assert (!vec_stmt
8271 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8273 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8274 is being vectorized, but outside the immediately enclosing loop. */
8276 && STMT_VINFO_LOOP_VINFO (stmt_info)
8277 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8278 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8279 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8280 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8281 || STMT_VINFO_RELEVANT (stmt_info) ==
8282 vect_used_in_outer_by_reduction))
8284 struct loop *innerloop = LOOP_VINFO_LOOP (
8285 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8286 imm_use_iterator imm_iter;
8287 use_operand_p use_p;
8291 if (dump_enabled_p ())
8292 dump_printf_loc (MSG_NOTE, vect_location,
8293 "Record the vdef for outer-loop vectorization.\n");
8295 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8296 (to be used when vectorizing outer-loop stmts that use the DEF of
8298 if (gimple_code (stmt) == GIMPLE_PHI)
8299 scalar_dest = PHI_RESULT (stmt);
8301 scalar_dest = gimple_assign_lhs (stmt);
8303 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8305 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8307 exit_phi = USE_STMT (use_p);
8308 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8313 /* Handle stmts whose DEF is used outside the loop-nest that is
8314 being vectorized. */
8319 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8321 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8322 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8323 && STMT_VINFO_TYPE (slp_stmt_info) != reduc_vec_info_type)
8325 done = vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8331 else if (STMT_VINFO_LIVE_P (stmt_info)
8332 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8334 done = vectorizable_live_operation (stmt, gsi, slp_node, -1, &vec_stmt);
8339 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8345 /* Remove a group of stores (for SLP or interleaving), free their
8349 vect_remove_stores (gimple *first_stmt)
8351 gimple *next = first_stmt;
8353 gimple_stmt_iterator next_si;
8357 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8359 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8360 if (is_pattern_stmt_p (stmt_info))
8361 next = STMT_VINFO_RELATED_STMT (stmt_info);
8362 /* Free the attached stmt_vec_info and remove the stmt. */
8363 next_si = gsi_for_stmt (next);
8364 unlink_stmt_vdef (next);
8365 gsi_remove (&next_si, true);
8366 release_defs (next);
8367 free_stmt_vec_info (next);
8373 /* Function new_stmt_vec_info.
8375 Create and initialize a new stmt_vec_info struct for STMT. */
8378 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8381 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8383 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8384 STMT_VINFO_STMT (res) = stmt;
8386 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8387 STMT_VINFO_LIVE_P (res) = false;
8388 STMT_VINFO_VECTYPE (res) = NULL;
8389 STMT_VINFO_VEC_STMT (res) = NULL;
8390 STMT_VINFO_VECTORIZABLE (res) = true;
8391 STMT_VINFO_IN_PATTERN_P (res) = false;
8392 STMT_VINFO_RELATED_STMT (res) = NULL;
8393 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8394 STMT_VINFO_DATA_REF (res) = NULL;
8395 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8397 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8398 STMT_VINFO_DR_OFFSET (res) = NULL;
8399 STMT_VINFO_DR_INIT (res) = NULL;
8400 STMT_VINFO_DR_STEP (res) = NULL;
8401 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8403 if (gimple_code (stmt) == GIMPLE_PHI
8404 && is_loop_header_bb_p (gimple_bb (stmt)))
8405 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8407 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8409 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8410 STMT_SLP_TYPE (res) = loop_vect;
8411 STMT_VINFO_NUM_SLP_USES (res) = 0;
8413 GROUP_FIRST_ELEMENT (res) = NULL;
8414 GROUP_NEXT_ELEMENT (res) = NULL;
8415 GROUP_SIZE (res) = 0;
8416 GROUP_STORE_COUNT (res) = 0;
8417 GROUP_GAP (res) = 0;
8418 GROUP_SAME_DR_STMT (res) = NULL;
8424 /* Create a hash table for stmt_vec_info. */
8427 init_stmt_vec_info_vec (void)
8429 gcc_assert (!stmt_vec_info_vec.exists ());
8430 stmt_vec_info_vec.create (50);
8434 /* Free hash table for stmt_vec_info. */
8437 free_stmt_vec_info_vec (void)
8441 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8443 free_stmt_vec_info (STMT_VINFO_STMT (info));
8444 gcc_assert (stmt_vec_info_vec.exists ());
8445 stmt_vec_info_vec.release ();
8449 /* Free stmt vectorization related info. */
8452 free_stmt_vec_info (gimple *stmt)
8454 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8459 /* Check if this statement has a related "pattern stmt"
8460 (introduced by the vectorizer during the pattern recognition
8461 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8463 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8465 stmt_vec_info patt_info
8466 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8469 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8470 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8471 gimple_set_bb (patt_stmt, NULL);
8472 tree lhs = gimple_get_lhs (patt_stmt);
8473 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8474 release_ssa_name (lhs);
8477 gimple_stmt_iterator si;
8478 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8480 gimple *seq_stmt = gsi_stmt (si);
8481 gimple_set_bb (seq_stmt, NULL);
8482 lhs = gimple_get_lhs (seq_stmt);
8483 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8484 release_ssa_name (lhs);
8485 free_stmt_vec_info (seq_stmt);
8488 free_stmt_vec_info (patt_stmt);
8492 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8493 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8494 set_vinfo_for_stmt (stmt, NULL);
8499 /* Function get_vectype_for_scalar_type_and_size.
8501 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8505 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8507 machine_mode inner_mode = TYPE_MODE (scalar_type);
8508 machine_mode simd_mode;
8509 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8516 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8517 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8520 /* For vector types of elements whose mode precision doesn't
8521 match their types precision we use a element type of mode
8522 precision. The vectorization routines will have to make sure
8523 they support the proper result truncation/extension.
8524 We also make sure to build vector types with INTEGER_TYPE
8525 component type only. */
8526 if (INTEGRAL_TYPE_P (scalar_type)
8527 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8528 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8529 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8530 TYPE_UNSIGNED (scalar_type));
8532 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8533 When the component mode passes the above test simply use a type
8534 corresponding to that mode. The theory is that any use that
8535 would cause problems with this will disable vectorization anyway. */
8536 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8537 && !INTEGRAL_TYPE_P (scalar_type))
8538 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8540 /* We can't build a vector type of elements with alignment bigger than
8542 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
8543 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8544 TYPE_UNSIGNED (scalar_type));
8546 /* If we felt back to using the mode fail if there was
8547 no scalar type for it. */
8548 if (scalar_type == NULL_TREE)
8551 /* If no size was supplied use the mode the target prefers. Otherwise
8552 lookup a vector mode of the specified size. */
8554 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
8556 simd_mode = mode_for_vector (inner_mode, size / nbytes);
8557 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
8561 vectype = build_vector_type (scalar_type, nunits);
8563 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8564 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
8570 unsigned int current_vector_size;
8572 /* Function get_vectype_for_scalar_type.
8574 Returns the vector type corresponding to SCALAR_TYPE as supported
8578 get_vectype_for_scalar_type (tree scalar_type)
8581 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
8582 current_vector_size);
8584 && current_vector_size == 0)
8585 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
8589 /* Function get_mask_type_for_scalar_type.
8591 Returns the mask type corresponding to a result of comparison
8592 of vectors of specified SCALAR_TYPE as supported by target. */
8595 get_mask_type_for_scalar_type (tree scalar_type)
8597 tree vectype = get_vectype_for_scalar_type (scalar_type);
8602 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
8603 current_vector_size);
8606 /* Function get_same_sized_vectype
8608 Returns a vector type corresponding to SCALAR_TYPE of size
8609 VECTOR_TYPE if supported by the target. */
8612 get_same_sized_vectype (tree scalar_type, tree vector_type)
8614 if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
8615 return build_same_sized_truth_vector_type (vector_type);
8617 return get_vectype_for_scalar_type_and_size
8618 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
8621 /* Function vect_is_simple_use.
8624 VINFO - the vect info of the loop or basic block that is being vectorized.
8625 OPERAND - operand in the loop or bb.
8627 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8628 DT - the type of definition
8630 Returns whether a stmt with OPERAND can be vectorized.
8631 For loops, supportable operands are constants, loop invariants, and operands
8632 that are defined by the current iteration of the loop. Unsupportable
8633 operands are those that are defined by a previous iteration of the loop (as
8634 is the case in reduction/induction computations).
8635 For basic blocks, supportable operands are constants and bb invariants.
8636 For now, operands defined outside the basic block are not supported. */
8639 vect_is_simple_use (tree operand, vec_info *vinfo,
8640 gimple **def_stmt, enum vect_def_type *dt)
8643 *dt = vect_unknown_def_type;
8645 if (dump_enabled_p ())
8647 dump_printf_loc (MSG_NOTE, vect_location,
8648 "vect_is_simple_use: operand ");
8649 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
8650 dump_printf (MSG_NOTE, "\n");
8653 if (CONSTANT_CLASS_P (operand))
8655 *dt = vect_constant_def;
8659 if (is_gimple_min_invariant (operand))
8661 *dt = vect_external_def;
8665 if (TREE_CODE (operand) != SSA_NAME)
8667 if (dump_enabled_p ())
8668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8673 if (SSA_NAME_IS_DEFAULT_DEF (operand))
8675 *dt = vect_external_def;
8679 *def_stmt = SSA_NAME_DEF_STMT (operand);
8680 if (dump_enabled_p ())
8682 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
8683 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
8686 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8687 *dt = vect_external_def;
8690 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
8691 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
8694 if (dump_enabled_p ())
8696 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8699 case vect_uninitialized_def:
8700 dump_printf (MSG_NOTE, "uninitialized\n");
8702 case vect_constant_def:
8703 dump_printf (MSG_NOTE, "constant\n");
8705 case vect_external_def:
8706 dump_printf (MSG_NOTE, "external\n");
8708 case vect_internal_def:
8709 dump_printf (MSG_NOTE, "internal\n");
8711 case vect_induction_def:
8712 dump_printf (MSG_NOTE, "induction\n");
8714 case vect_reduction_def:
8715 dump_printf (MSG_NOTE, "reduction\n");
8717 case vect_double_reduction_def:
8718 dump_printf (MSG_NOTE, "double reduction\n");
8720 case vect_nested_cycle:
8721 dump_printf (MSG_NOTE, "nested cycle\n");
8723 case vect_unknown_def_type:
8724 dump_printf (MSG_NOTE, "unknown\n");
8729 if (*dt == vect_unknown_def_type)
8731 if (dump_enabled_p ())
8732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8733 "Unsupported pattern.\n");
8737 switch (gimple_code (*def_stmt))
8744 if (dump_enabled_p ())
8745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8746 "unsupported defining stmt:\n");
8753 /* Function vect_is_simple_use.
8755 Same as vect_is_simple_use but also determines the vector operand
8756 type of OPERAND and stores it to *VECTYPE. If the definition of
8757 OPERAND is vect_uninitialized_def, vect_constant_def or
8758 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8759 is responsible to compute the best suited vector type for the
8763 vect_is_simple_use (tree operand, vec_info *vinfo,
8764 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
8766 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
8769 /* Now get a vector type if the def is internal, otherwise supply
8770 NULL_TREE and leave it up to the caller to figure out a proper
8771 type for the use stmt. */
8772 if (*dt == vect_internal_def
8773 || *dt == vect_induction_def
8774 || *dt == vect_reduction_def
8775 || *dt == vect_double_reduction_def
8776 || *dt == vect_nested_cycle)
8778 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8780 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8781 && !STMT_VINFO_RELEVANT (stmt_info)
8782 && !STMT_VINFO_LIVE_P (stmt_info))
8783 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8785 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8786 gcc_assert (*vectype != NULL_TREE);
8788 else if (*dt == vect_uninitialized_def
8789 || *dt == vect_constant_def
8790 || *dt == vect_external_def)
8791 *vectype = NULL_TREE;
8799 /* Function supportable_widening_operation
8801 Check whether an operation represented by the code CODE is a
8802 widening operation that is supported by the target platform in
8803 vector form (i.e., when operating on arguments of type VECTYPE_IN
8804 producing a result of type VECTYPE_OUT).
8806 Widening operations we currently support are NOP (CONVERT), FLOAT
8807 and WIDEN_MULT. This function checks if these operations are supported
8808 by the target platform either directly (via vector tree-codes), or via
8812 - CODE1 and CODE2 are codes of vector operations to be used when
8813 vectorizing the operation, if available.
8814 - MULTI_STEP_CVT determines the number of required intermediate steps in
8815 case of multi-step conversion (like char->short->int - in that case
8816 MULTI_STEP_CVT will be 1).
8817 - INTERM_TYPES contains the intermediate type required to perform the
8818 widening operation (short in the above example). */
8821 supportable_widening_operation (enum tree_code code, gimple *stmt,
8822 tree vectype_out, tree vectype_in,
8823 enum tree_code *code1, enum tree_code *code2,
8824 int *multi_step_cvt,
8825 vec<tree> *interm_types)
8827 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8828 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8829 struct loop *vect_loop = NULL;
8830 machine_mode vec_mode;
8831 enum insn_code icode1, icode2;
8832 optab optab1, optab2;
8833 tree vectype = vectype_in;
8834 tree wide_vectype = vectype_out;
8835 enum tree_code c1, c2;
8837 tree prev_type, intermediate_type;
8838 machine_mode intermediate_mode, prev_mode;
8839 optab optab3, optab4;
8841 *multi_step_cvt = 0;
8843 vect_loop = LOOP_VINFO_LOOP (loop_info);
8847 case WIDEN_MULT_EXPR:
8848 /* The result of a vectorized widening operation usually requires
8849 two vectors (because the widened results do not fit into one vector).
8850 The generated vector results would normally be expected to be
8851 generated in the same order as in the original scalar computation,
8852 i.e. if 8 results are generated in each vector iteration, they are
8853 to be organized as follows:
8854 vect1: [res1,res2,res3,res4],
8855 vect2: [res5,res6,res7,res8].
8857 However, in the special case that the result of the widening
8858 operation is used in a reduction computation only, the order doesn't
8859 matter (because when vectorizing a reduction we change the order of
8860 the computation). Some targets can take advantage of this and
8861 generate more efficient code. For example, targets like Altivec,
8862 that support widen_mult using a sequence of {mult_even,mult_odd}
8863 generate the following vectors:
8864 vect1: [res1,res3,res5,res7],
8865 vect2: [res2,res4,res6,res8].
8867 When vectorizing outer-loops, we execute the inner-loop sequentially
8868 (each vectorized inner-loop iteration contributes to VF outer-loop
8869 iterations in parallel). We therefore don't allow to change the
8870 order of the computation in the inner-loop during outer-loop
8872 /* TODO: Another case in which order doesn't *really* matter is when we
8873 widen and then contract again, e.g. (short)((int)x * y >> 8).
8874 Normally, pack_trunc performs an even/odd permute, whereas the
8875 repack from an even/odd expansion would be an interleave, which
8876 would be significantly simpler for e.g. AVX2. */
8877 /* In any case, in order to avoid duplicating the code below, recurse
8878 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8879 are properly set up for the caller. If we fail, we'll continue with
8880 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8882 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8883 && !nested_in_vect_loop_p (vect_loop, stmt)
8884 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8885 stmt, vectype_out, vectype_in,
8886 code1, code2, multi_step_cvt,
8889 /* Elements in a vector with vect_used_by_reduction property cannot
8890 be reordered if the use chain with this property does not have the
8891 same operation. One such an example is s += a * b, where elements
8892 in a and b cannot be reordered. Here we check if the vector defined
8893 by STMT is only directly used in the reduction statement. */
8894 tree lhs = gimple_assign_lhs (stmt);
8895 use_operand_p dummy;
8897 stmt_vec_info use_stmt_info = NULL;
8898 if (single_imm_use (lhs, &dummy, &use_stmt)
8899 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8900 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8903 c1 = VEC_WIDEN_MULT_LO_EXPR;
8904 c2 = VEC_WIDEN_MULT_HI_EXPR;
8917 case VEC_WIDEN_MULT_EVEN_EXPR:
8918 /* Support the recursion induced just above. */
8919 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8920 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8923 case WIDEN_LSHIFT_EXPR:
8924 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8925 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
8929 c1 = VEC_UNPACK_LO_EXPR;
8930 c2 = VEC_UNPACK_HI_EXPR;
8934 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8935 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
8938 case FIX_TRUNC_EXPR:
8939 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8940 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8941 computing the operation. */
8948 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
8951 if (code == FIX_TRUNC_EXPR)
8953 /* The signedness is determined from output operand. */
8954 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8955 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8959 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8960 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8963 if (!optab1 || !optab2)
8966 vec_mode = TYPE_MODE (vectype);
8967 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8968 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8974 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8975 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8976 /* For scalar masks we may have different boolean
8977 vector types having the same QImode. Thus we
8978 add additional check for elements number. */
8979 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
8980 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
8981 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
8983 /* Check if it's a multi-step conversion that can be done using intermediate
8986 prev_type = vectype;
8987 prev_mode = vec_mode;
8989 if (!CONVERT_EXPR_CODE_P (code))
8992 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8993 intermediate steps in promotion sequence. We try
8994 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8996 interm_types->create (MAX_INTERM_CVT_STEPS);
8997 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8999 intermediate_mode = insn_data[icode1].operand[0].mode;
9000 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9003 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9004 current_vector_size);
9005 if (intermediate_mode != TYPE_MODE (intermediate_type))
9010 = lang_hooks.types.type_for_mode (intermediate_mode,
9011 TYPE_UNSIGNED (prev_type));
9013 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9014 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9016 if (!optab3 || !optab4
9017 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9018 || insn_data[icode1].operand[0].mode != intermediate_mode
9019 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9020 || insn_data[icode2].operand[0].mode != intermediate_mode
9021 || ((icode1 = optab_handler (optab3, intermediate_mode))
9022 == CODE_FOR_nothing)
9023 || ((icode2 = optab_handler (optab4, intermediate_mode))
9024 == CODE_FOR_nothing))
9027 interm_types->quick_push (intermediate_type);
9028 (*multi_step_cvt)++;
9030 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9031 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9032 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9033 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9034 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9036 prev_type = intermediate_type;
9037 prev_mode = intermediate_mode;
9040 interm_types->release ();
9045 /* Function supportable_narrowing_operation
9047 Check whether an operation represented by the code CODE is a
9048 narrowing operation that is supported by the target platform in
9049 vector form (i.e., when operating on arguments of type VECTYPE_IN
9050 and producing a result of type VECTYPE_OUT).
9052 Narrowing operations we currently support are NOP (CONVERT) and
9053 FIX_TRUNC. This function checks if these operations are supported by
9054 the target platform directly via vector tree-codes.
9057 - CODE1 is the code of a vector operation to be used when
9058 vectorizing the operation, if available.
9059 - MULTI_STEP_CVT determines the number of required intermediate steps in
9060 case of multi-step conversion (like int->short->char - in that case
9061 MULTI_STEP_CVT will be 1).
9062 - INTERM_TYPES contains the intermediate type required to perform the
9063 narrowing operation (short in the above example). */
9066 supportable_narrowing_operation (enum tree_code code,
9067 tree vectype_out, tree vectype_in,
9068 enum tree_code *code1, int *multi_step_cvt,
9069 vec<tree> *interm_types)
9071 machine_mode vec_mode;
9072 enum insn_code icode1;
9073 optab optab1, interm_optab;
9074 tree vectype = vectype_in;
9075 tree narrow_vectype = vectype_out;
9077 tree intermediate_type, prev_type;
9078 machine_mode intermediate_mode, prev_mode;
9082 *multi_step_cvt = 0;
9086 c1 = VEC_PACK_TRUNC_EXPR;
9089 case FIX_TRUNC_EXPR:
9090 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9094 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9095 tree code and optabs used for computing the operation. */
9102 if (code == FIX_TRUNC_EXPR)
9103 /* The signedness is determined from output operand. */
9104 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9106 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9111 vec_mode = TYPE_MODE (vectype);
9112 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9117 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9118 /* For scalar masks we may have different boolean
9119 vector types having the same QImode. Thus we
9120 add additional check for elements number. */
9121 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9122 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9123 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9125 /* Check if it's a multi-step conversion that can be done using intermediate
9127 prev_mode = vec_mode;
9128 prev_type = vectype;
9129 if (code == FIX_TRUNC_EXPR)
9130 uns = TYPE_UNSIGNED (vectype_out);
9132 uns = TYPE_UNSIGNED (vectype);
9134 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9135 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9136 costly than signed. */
9137 if (code == FIX_TRUNC_EXPR && uns)
9139 enum insn_code icode2;
9142 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9144 = optab_for_tree_code (c1, intermediate_type, optab_default);
9145 if (interm_optab != unknown_optab
9146 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9147 && insn_data[icode1].operand[0].mode
9148 == insn_data[icode2].operand[0].mode)
9151 optab1 = interm_optab;
9156 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9157 intermediate steps in promotion sequence. We try
9158 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9159 interm_types->create (MAX_INTERM_CVT_STEPS);
9160 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9162 intermediate_mode = insn_data[icode1].operand[0].mode;
9163 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9166 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9167 current_vector_size);
9168 if (intermediate_mode != TYPE_MODE (intermediate_type))
9173 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9175 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9178 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9179 || insn_data[icode1].operand[0].mode != intermediate_mode
9180 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9181 == CODE_FOR_nothing))
9184 interm_types->quick_push (intermediate_type);
9185 (*multi_step_cvt)++;
9187 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9188 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9189 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9190 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9192 prev_mode = intermediate_mode;
9193 prev_type = intermediate_type;
9194 optab1 = interm_optab;
9197 interm_types->release ();